{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 13650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003664178079054642, "grad_norm": 27.061571424931305, "learning_rate": 0.0, "loss": 1.9327, "num_tokens": 808898.0, "step": 1 }, { "epoch": 0.0007328356158109284, "grad_norm": 28.121328330684193, "learning_rate": 9.756097560975611e-08, "loss": 2.0095, "num_tokens": 1633576.0, "step": 2 }, { "epoch": 0.0010992534237163926, "grad_norm": 27.412358900123284, "learning_rate": 1.9512195121951221e-07, "loss": 1.9944, "num_tokens": 2415919.0, "step": 3 }, { "epoch": 0.0014656712316218569, "grad_norm": 25.995865822983315, "learning_rate": 2.926829268292683e-07, "loss": 1.9674, "num_tokens": 3200365.0, "step": 4 }, { "epoch": 0.001832089039527321, "grad_norm": 28.944944811933837, "learning_rate": 3.9024390243902443e-07, "loss": 1.9762, "num_tokens": 4010484.0, "step": 5 }, { "epoch": 0.002198506847432785, "grad_norm": 28.2541796331682, "learning_rate": 4.878048780487805e-07, "loss": 1.9844, "num_tokens": 4809207.0, "step": 6 }, { "epoch": 0.0025649246553382495, "grad_norm": 30.448583025885355, "learning_rate": 5.853658536585366e-07, "loss": 1.9598, "num_tokens": 5524360.0, "step": 7 }, { "epoch": 0.0029313424632437138, "grad_norm": 37.30989135588656, "learning_rate": 6.829268292682928e-07, "loss": 1.8908, "num_tokens": 6271979.0, "step": 8 }, { "epoch": 0.003297760271149178, "grad_norm": 39.040548093841345, "learning_rate": 7.804878048780489e-07, "loss": 1.8639, "num_tokens": 6931130.0, "step": 9 }, { "epoch": 0.003664178079054642, "grad_norm": 41.68890237673831, "learning_rate": 8.780487804878049e-07, "loss": 2.0607, "num_tokens": 7659887.0, "step": 10 }, { "epoch": 0.004030595886960107, "grad_norm": 58.87308630191245, "learning_rate": 9.75609756097561e-07, "loss": 1.9239, "num_tokens": 8368687.0, "step": 11 }, { "epoch": 0.00439701369486557, "grad_norm": 228.28786406444578, "learning_rate": 1.0731707317073172e-06, "loss": 1.8335, "num_tokens": 9189664.0, "step": 12 }, { "epoch": 0.004763431502771034, "grad_norm": 140.90431703637702, "learning_rate": 1.1707317073170732e-06, "loss": 1.769, "num_tokens": 9954704.0, "step": 13 }, { "epoch": 0.005129849310676499, "grad_norm": 59.81234952299779, "learning_rate": 1.2682926829268293e-06, "loss": 1.7606, "num_tokens": 10667942.0, "step": 14 }, { "epoch": 0.005496267118581963, "grad_norm": 40.947269921841986, "learning_rate": 1.3658536585365856e-06, "loss": 1.6263, "num_tokens": 11387981.0, "step": 15 }, { "epoch": 0.0058626849264874275, "grad_norm": 34.196316146708554, "learning_rate": 1.4634146341463414e-06, "loss": 1.6374, "num_tokens": 12014178.0, "step": 16 }, { "epoch": 0.006229102734392891, "grad_norm": 29.52440348523295, "learning_rate": 1.5609756097560977e-06, "loss": 1.5535, "num_tokens": 12734796.0, "step": 17 }, { "epoch": 0.006595520542298356, "grad_norm": 50.471054036513664, "learning_rate": 1.6585365853658538e-06, "loss": 1.6264, "num_tokens": 13430639.0, "step": 18 }, { "epoch": 0.00696193835020382, "grad_norm": 96.77902535391084, "learning_rate": 1.7560975609756098e-06, "loss": 1.5852, "num_tokens": 14156060.0, "step": 19 }, { "epoch": 0.007328356158109284, "grad_norm": 227.30405546549738, "learning_rate": 1.853658536585366e-06, "loss": 1.5323, "num_tokens": 14877624.0, "step": 20 }, { "epoch": 0.0076947739660147485, "grad_norm": 346.20930550868917, "learning_rate": 1.951219512195122e-06, "loss": 1.4175, "num_tokens": 15659647.0, "step": 21 }, { "epoch": 0.008061191773920213, "grad_norm": 359.1073255336916, "learning_rate": 2.048780487804878e-06, "loss": 1.4903, "num_tokens": 16433227.0, "step": 22 }, { "epoch": 0.008427609581825677, "grad_norm": 280.63322056721586, "learning_rate": 2.1463414634146343e-06, "loss": 1.4609, "num_tokens": 17222312.0, "step": 23 }, { "epoch": 0.00879402738973114, "grad_norm": 385.5774359631995, "learning_rate": 2.2439024390243906e-06, "loss": 1.4565, "num_tokens": 17939183.0, "step": 24 }, { "epoch": 0.009160445197636605, "grad_norm": 196.43022100736457, "learning_rate": 2.3414634146341465e-06, "loss": 1.4236, "num_tokens": 18720096.0, "step": 25 }, { "epoch": 0.009526863005542069, "grad_norm": 158.92269007231545, "learning_rate": 2.4390243902439027e-06, "loss": 1.3825, "num_tokens": 19410539.0, "step": 26 }, { "epoch": 0.009893280813447534, "grad_norm": 150.31459325307603, "learning_rate": 2.5365853658536586e-06, "loss": 1.3628, "num_tokens": 20192089.0, "step": 27 }, { "epoch": 0.010259698621352998, "grad_norm": 121.50055369182209, "learning_rate": 2.634146341463415e-06, "loss": 1.3057, "num_tokens": 20987096.0, "step": 28 }, { "epoch": 0.010626116429258462, "grad_norm": 40.437729047087046, "learning_rate": 2.731707317073171e-06, "loss": 1.3315, "num_tokens": 21684259.0, "step": 29 }, { "epoch": 0.010992534237163926, "grad_norm": 19.167610373297755, "learning_rate": 2.8292682926829266e-06, "loss": 1.3564, "num_tokens": 22382076.0, "step": 30 }, { "epoch": 0.011358952045069391, "grad_norm": 6.488202538135345, "learning_rate": 2.926829268292683e-06, "loss": 1.2952, "num_tokens": 23171914.0, "step": 31 }, { "epoch": 0.011725369852974855, "grad_norm": 6.061055012776076, "learning_rate": 3.024390243902439e-06, "loss": 1.1835, "num_tokens": 23848301.0, "step": 32 }, { "epoch": 0.012091787660880319, "grad_norm": 4.773685218811565, "learning_rate": 3.1219512195121954e-06, "loss": 1.2554, "num_tokens": 24585219.0, "step": 33 }, { "epoch": 0.012458205468785783, "grad_norm": 8.706878577889922, "learning_rate": 3.2195121951219517e-06, "loss": 1.1384, "num_tokens": 25463577.0, "step": 34 }, { "epoch": 0.012824623276691247, "grad_norm": 6.1479565685539, "learning_rate": 3.3170731707317076e-06, "loss": 1.0915, "num_tokens": 26226389.0, "step": 35 }, { "epoch": 0.013191041084596712, "grad_norm": 3.0389159195403197, "learning_rate": 3.414634146341464e-06, "loss": 1.124, "num_tokens": 26827255.0, "step": 36 }, { "epoch": 0.013557458892502176, "grad_norm": 2.5038758841904634, "learning_rate": 3.5121951219512197e-06, "loss": 1.0382, "num_tokens": 27560024.0, "step": 37 }, { "epoch": 0.01392387670040764, "grad_norm": 3.8221425491508287, "learning_rate": 3.609756097560976e-06, "loss": 1.1, "num_tokens": 28274117.0, "step": 38 }, { "epoch": 0.014290294508313104, "grad_norm": 2.895312502719736, "learning_rate": 3.707317073170732e-06, "loss": 0.9963, "num_tokens": 29069824.0, "step": 39 }, { "epoch": 0.014656712316218568, "grad_norm": 2.75592186898017, "learning_rate": 3.804878048780488e-06, "loss": 1.013, "num_tokens": 29821112.0, "step": 40 }, { "epoch": 0.015023130124124033, "grad_norm": 1.7426562670573311, "learning_rate": 3.902439024390244e-06, "loss": 0.9839, "num_tokens": 30565995.0, "step": 41 }, { "epoch": 0.015389547932029497, "grad_norm": 1.66036920154137, "learning_rate": 4.000000000000001e-06, "loss": 0.9663, "num_tokens": 31304348.0, "step": 42 }, { "epoch": 0.01575596573993496, "grad_norm": 1.1289689679198138, "learning_rate": 4.097560975609756e-06, "loss": 0.9476, "num_tokens": 32021227.0, "step": 43 }, { "epoch": 0.016122383547840426, "grad_norm": 0.961692689240107, "learning_rate": 4.195121951219512e-06, "loss": 0.9077, "num_tokens": 32731694.0, "step": 44 }, { "epoch": 0.01648880135574589, "grad_norm": 0.9587247032866075, "learning_rate": 4.292682926829269e-06, "loss": 0.9191, "num_tokens": 33535371.0, "step": 45 }, { "epoch": 0.016855219163651354, "grad_norm": 0.9704028133462127, "learning_rate": 4.390243902439025e-06, "loss": 0.9135, "num_tokens": 34261186.0, "step": 46 }, { "epoch": 0.017221636971556816, "grad_norm": 0.9179028297553896, "learning_rate": 4.487804878048781e-06, "loss": 0.8952, "num_tokens": 35008491.0, "step": 47 }, { "epoch": 0.01758805477946228, "grad_norm": 0.7191428273099728, "learning_rate": 4.5853658536585375e-06, "loss": 0.872, "num_tokens": 35798768.0, "step": 48 }, { "epoch": 0.017954472587367747, "grad_norm": 0.961475792606748, "learning_rate": 4.682926829268293e-06, "loss": 0.8929, "num_tokens": 36529460.0, "step": 49 }, { "epoch": 0.01832089039527321, "grad_norm": 0.7754565101778618, "learning_rate": 4.780487804878049e-06, "loss": 0.8485, "num_tokens": 37282936.0, "step": 50 }, { "epoch": 0.018687308203178675, "grad_norm": 1.3108490339676668, "learning_rate": 4.8780487804878055e-06, "loss": 0.8569, "num_tokens": 38020861.0, "step": 51 }, { "epoch": 0.019053726011084137, "grad_norm": 0.7020461480835559, "learning_rate": 4.975609756097562e-06, "loss": 0.8706, "num_tokens": 38732272.0, "step": 52 }, { "epoch": 0.019420143818989603, "grad_norm": 0.7274244535793231, "learning_rate": 5.073170731707317e-06, "loss": 0.8136, "num_tokens": 39441436.0, "step": 53 }, { "epoch": 0.019786561626895068, "grad_norm": 0.7022085810732956, "learning_rate": 5.1707317073170735e-06, "loss": 0.8458, "num_tokens": 40191622.0, "step": 54 }, { "epoch": 0.02015297943480053, "grad_norm": 0.5017956226399719, "learning_rate": 5.26829268292683e-06, "loss": 0.799, "num_tokens": 40992390.0, "step": 55 }, { "epoch": 0.020519397242705996, "grad_norm": 0.44437836384775786, "learning_rate": 5.365853658536586e-06, "loss": 0.811, "num_tokens": 41783754.0, "step": 56 }, { "epoch": 0.020885815050611458, "grad_norm": 0.42162394244082246, "learning_rate": 5.463414634146342e-06, "loss": 0.783, "num_tokens": 42590712.0, "step": 57 }, { "epoch": 0.021252232858516924, "grad_norm": 0.46177347265629365, "learning_rate": 5.560975609756099e-06, "loss": 0.8182, "num_tokens": 43392751.0, "step": 58 }, { "epoch": 0.02161865066642239, "grad_norm": 0.4199928654312008, "learning_rate": 5.658536585365853e-06, "loss": 0.7732, "num_tokens": 44105538.0, "step": 59 }, { "epoch": 0.02198506847432785, "grad_norm": 0.44071064670169247, "learning_rate": 5.7560975609756095e-06, "loss": 0.8121, "num_tokens": 44887999.0, "step": 60 }, { "epoch": 0.022351486282233317, "grad_norm": 0.4402263857880696, "learning_rate": 5.853658536585366e-06, "loss": 0.8122, "num_tokens": 45537031.0, "step": 61 }, { "epoch": 0.022717904090138782, "grad_norm": 0.37686655827573423, "learning_rate": 5.951219512195122e-06, "loss": 0.767, "num_tokens": 46231789.0, "step": 62 }, { "epoch": 0.023084321898044245, "grad_norm": 0.4241979917782198, "learning_rate": 6.048780487804878e-06, "loss": 0.7891, "num_tokens": 46984617.0, "step": 63 }, { "epoch": 0.02345073970594971, "grad_norm": 0.47590301810089636, "learning_rate": 6.1463414634146346e-06, "loss": 0.7946, "num_tokens": 47618345.0, "step": 64 }, { "epoch": 0.023817157513855172, "grad_norm": 0.41017435601419844, "learning_rate": 6.243902439024391e-06, "loss": 0.8047, "num_tokens": 48371736.0, "step": 65 }, { "epoch": 0.024183575321760638, "grad_norm": 0.38789443132853924, "learning_rate": 6.341463414634147e-06, "loss": 0.7654, "num_tokens": 49171983.0, "step": 66 }, { "epoch": 0.024549993129666103, "grad_norm": 0.42350360784428587, "learning_rate": 6.439024390243903e-06, "loss": 0.7535, "num_tokens": 49816662.0, "step": 67 }, { "epoch": 0.024916410937571565, "grad_norm": 0.468847962913629, "learning_rate": 6.53658536585366e-06, "loss": 0.7576, "num_tokens": 50707092.0, "step": 68 }, { "epoch": 0.02528282874547703, "grad_norm": 0.3921231619499579, "learning_rate": 6.634146341463415e-06, "loss": 0.7508, "num_tokens": 51549285.0, "step": 69 }, { "epoch": 0.025649246553382493, "grad_norm": 0.4385701682877042, "learning_rate": 6.731707317073171e-06, "loss": 0.7429, "num_tokens": 52375044.0, "step": 70 }, { "epoch": 0.02601566436128796, "grad_norm": 0.4110938236660965, "learning_rate": 6.829268292682928e-06, "loss": 0.7727, "num_tokens": 53060652.0, "step": 71 }, { "epoch": 0.026382082169193424, "grad_norm": 0.3827188097574585, "learning_rate": 6.926829268292683e-06, "loss": 0.7401, "num_tokens": 53763468.0, "step": 72 }, { "epoch": 0.026748499977098886, "grad_norm": 0.3694082149968567, "learning_rate": 7.024390243902439e-06, "loss": 0.764, "num_tokens": 54601789.0, "step": 73 }, { "epoch": 0.027114917785004352, "grad_norm": 0.3846752622734582, "learning_rate": 7.121951219512196e-06, "loss": 0.7652, "num_tokens": 55423465.0, "step": 74 }, { "epoch": 0.027481335592909814, "grad_norm": 0.3445741048835456, "learning_rate": 7.219512195121952e-06, "loss": 0.7452, "num_tokens": 56103819.0, "step": 75 }, { "epoch": 0.02784775340081528, "grad_norm": 0.35215676164980164, "learning_rate": 7.317073170731707e-06, "loss": 0.7452, "num_tokens": 56981554.0, "step": 76 }, { "epoch": 0.028214171208720745, "grad_norm": 0.3728599545809349, "learning_rate": 7.414634146341464e-06, "loss": 0.7462, "num_tokens": 57753882.0, "step": 77 }, { "epoch": 0.028580589016626207, "grad_norm": 0.3628297372286606, "learning_rate": 7.51219512195122e-06, "loss": 0.7854, "num_tokens": 58621580.0, "step": 78 }, { "epoch": 0.028947006824531673, "grad_norm": 0.3433326912147645, "learning_rate": 7.609756097560976e-06, "loss": 0.745, "num_tokens": 59400372.0, "step": 79 }, { "epoch": 0.029313424632437135, "grad_norm": 0.3899831397556686, "learning_rate": 7.707317073170732e-06, "loss": 0.758, "num_tokens": 60065986.0, "step": 80 }, { "epoch": 0.0296798424403426, "grad_norm": 0.3687945061403439, "learning_rate": 7.804878048780489e-06, "loss": 0.7547, "num_tokens": 60874296.0, "step": 81 }, { "epoch": 0.030046260248248066, "grad_norm": 0.34405365332594745, "learning_rate": 7.902439024390245e-06, "loss": 0.6773, "num_tokens": 61639218.0, "step": 82 }, { "epoch": 0.03041267805615353, "grad_norm": 0.3636132891052512, "learning_rate": 8.000000000000001e-06, "loss": 0.706, "num_tokens": 62385776.0, "step": 83 }, { "epoch": 0.030779095864058994, "grad_norm": 0.3118340113929443, "learning_rate": 8.097560975609758e-06, "loss": 0.6943, "num_tokens": 63233638.0, "step": 84 }, { "epoch": 0.031145513671964456, "grad_norm": 0.33236436583962586, "learning_rate": 8.195121951219512e-06, "loss": 0.7225, "num_tokens": 64041717.0, "step": 85 }, { "epoch": 0.03151193147986992, "grad_norm": 0.38325379113892044, "learning_rate": 8.292682926829268e-06, "loss": 0.6775, "num_tokens": 64728455.0, "step": 86 }, { "epoch": 0.03187834928777539, "grad_norm": 0.3921007178175507, "learning_rate": 8.390243902439025e-06, "loss": 0.698, "num_tokens": 65506275.0, "step": 87 }, { "epoch": 0.03224476709568085, "grad_norm": 0.41839648655679784, "learning_rate": 8.487804878048781e-06, "loss": 0.7411, "num_tokens": 66204779.0, "step": 88 }, { "epoch": 0.03261118490358631, "grad_norm": 0.37708220585805, "learning_rate": 8.585365853658537e-06, "loss": 0.7241, "num_tokens": 67110724.0, "step": 89 }, { "epoch": 0.03297760271149178, "grad_norm": 0.4373350858762525, "learning_rate": 8.682926829268294e-06, "loss": 0.684, "num_tokens": 67766403.0, "step": 90 }, { "epoch": 0.03334402051939724, "grad_norm": 0.3682279116494388, "learning_rate": 8.78048780487805e-06, "loss": 0.7072, "num_tokens": 68677848.0, "step": 91 }, { "epoch": 0.03371043832730271, "grad_norm": 0.3856572870875986, "learning_rate": 8.878048780487806e-06, "loss": 0.6993, "num_tokens": 69419290.0, "step": 92 }, { "epoch": 0.034076856135208174, "grad_norm": 0.3575918701167241, "learning_rate": 8.975609756097562e-06, "loss": 0.7364, "num_tokens": 70082144.0, "step": 93 }, { "epoch": 0.03444327394311363, "grad_norm": 0.3504994585120699, "learning_rate": 9.073170731707319e-06, "loss": 0.7085, "num_tokens": 70833584.0, "step": 94 }, { "epoch": 0.0348096917510191, "grad_norm": 0.4958455327819232, "learning_rate": 9.170731707317075e-06, "loss": 0.7248, "num_tokens": 71456177.0, "step": 95 }, { "epoch": 0.03517610955892456, "grad_norm": 0.4679383758341906, "learning_rate": 9.268292682926831e-06, "loss": 0.6605, "num_tokens": 72207679.0, "step": 96 }, { "epoch": 0.03554252736683003, "grad_norm": 0.3776813159503037, "learning_rate": 9.365853658536586e-06, "loss": 0.7295, "num_tokens": 73054967.0, "step": 97 }, { "epoch": 0.035908945174735495, "grad_norm": 0.48835617783607116, "learning_rate": 9.463414634146342e-06, "loss": 0.7236, "num_tokens": 73834024.0, "step": 98 }, { "epoch": 0.03627536298264095, "grad_norm": 0.3903899622286641, "learning_rate": 9.560975609756098e-06, "loss": 0.6994, "num_tokens": 74518020.0, "step": 99 }, { "epoch": 0.03664178079054642, "grad_norm": 0.3155260622969738, "learning_rate": 9.658536585365855e-06, "loss": 0.7249, "num_tokens": 75250404.0, "step": 100 }, { "epoch": 0.037008198598451884, "grad_norm": 0.3361549284048753, "learning_rate": 9.756097560975611e-06, "loss": 0.7026, "num_tokens": 76048415.0, "step": 101 }, { "epoch": 0.03737461640635735, "grad_norm": 0.36483174849560845, "learning_rate": 9.853658536585367e-06, "loss": 0.6849, "num_tokens": 76852888.0, "step": 102 }, { "epoch": 0.037741034214262816, "grad_norm": 0.38806565478495564, "learning_rate": 9.951219512195124e-06, "loss": 0.6865, "num_tokens": 77687458.0, "step": 103 }, { "epoch": 0.038107452022168274, "grad_norm": 0.379731073080182, "learning_rate": 1.0048780487804878e-05, "loss": 0.7009, "num_tokens": 78558398.0, "step": 104 }, { "epoch": 0.03847386983007374, "grad_norm": 0.39510031528945183, "learning_rate": 1.0146341463414634e-05, "loss": 0.7194, "num_tokens": 79240541.0, "step": 105 }, { "epoch": 0.038840287637979205, "grad_norm": 0.3365088612552052, "learning_rate": 1.024390243902439e-05, "loss": 0.6716, "num_tokens": 80045054.0, "step": 106 }, { "epoch": 0.03920670544588467, "grad_norm": 0.3333359630798628, "learning_rate": 1.0341463414634147e-05, "loss": 0.689, "num_tokens": 80737524.0, "step": 107 }, { "epoch": 0.039573123253790136, "grad_norm": 0.2921428216837956, "learning_rate": 1.0439024390243903e-05, "loss": 0.7126, "num_tokens": 81583910.0, "step": 108 }, { "epoch": 0.039939541061695595, "grad_norm": 0.41762011499841867, "learning_rate": 1.053658536585366e-05, "loss": 0.6759, "num_tokens": 82410111.0, "step": 109 }, { "epoch": 0.04030595886960106, "grad_norm": 0.35019711798658293, "learning_rate": 1.0634146341463416e-05, "loss": 0.7116, "num_tokens": 83106403.0, "step": 110 }, { "epoch": 0.040672376677506526, "grad_norm": 0.305095100102837, "learning_rate": 1.0731707317073172e-05, "loss": 0.6778, "num_tokens": 83824328.0, "step": 111 }, { "epoch": 0.04103879448541199, "grad_norm": 0.34899527830492927, "learning_rate": 1.0829268292682928e-05, "loss": 0.7091, "num_tokens": 84686885.0, "step": 112 }, { "epoch": 0.04140521229331746, "grad_norm": 0.4144242784321613, "learning_rate": 1.0926829268292685e-05, "loss": 0.669, "num_tokens": 85480522.0, "step": 113 }, { "epoch": 0.041771630101222916, "grad_norm": 0.292476278765836, "learning_rate": 1.1024390243902441e-05, "loss": 0.6842, "num_tokens": 86339756.0, "step": 114 }, { "epoch": 0.04213804790912838, "grad_norm": 0.375691212426354, "learning_rate": 1.1121951219512197e-05, "loss": 0.7057, "num_tokens": 87133206.0, "step": 115 }, { "epoch": 0.04250446571703385, "grad_norm": 0.29951666480380923, "learning_rate": 1.1219512195121953e-05, "loss": 0.649, "num_tokens": 87881801.0, "step": 116 }, { "epoch": 0.04287088352493931, "grad_norm": 0.3192409647987107, "learning_rate": 1.1317073170731706e-05, "loss": 0.6727, "num_tokens": 88751007.0, "step": 117 }, { "epoch": 0.04323730133284478, "grad_norm": 0.3502014548741017, "learning_rate": 1.1414634146341463e-05, "loss": 0.7023, "num_tokens": 89508242.0, "step": 118 }, { "epoch": 0.043603719140750244, "grad_norm": 0.3818391731389477, "learning_rate": 1.1512195121951219e-05, "loss": 0.7224, "num_tokens": 90305433.0, "step": 119 }, { "epoch": 0.0439701369486557, "grad_norm": 0.3227242364826719, "learning_rate": 1.1609756097560975e-05, "loss": 0.7118, "num_tokens": 91090167.0, "step": 120 }, { "epoch": 0.04433655475656117, "grad_norm": 0.32802683680246286, "learning_rate": 1.1707317073170731e-05, "loss": 0.7049, "num_tokens": 91887451.0, "step": 121 }, { "epoch": 0.044702972564466634, "grad_norm": 0.3954327670928302, "learning_rate": 1.1804878048780488e-05, "loss": 0.7235, "num_tokens": 92760469.0, "step": 122 }, { "epoch": 0.0450693903723721, "grad_norm": 0.34476361265112465, "learning_rate": 1.1902439024390244e-05, "loss": 0.68, "num_tokens": 93499888.0, "step": 123 }, { "epoch": 0.045435808180277565, "grad_norm": 0.4247971053827936, "learning_rate": 1.2e-05, "loss": 0.6686, "num_tokens": 94273409.0, "step": 124 }, { "epoch": 0.045802225988183023, "grad_norm": 0.44407092393893677, "learning_rate": 1.2097560975609757e-05, "loss": 0.6989, "num_tokens": 95054561.0, "step": 125 }, { "epoch": 0.04616864379608849, "grad_norm": 0.4201767603061677, "learning_rate": 1.2195121951219513e-05, "loss": 0.7012, "num_tokens": 95809904.0, "step": 126 }, { "epoch": 0.046535061603993955, "grad_norm": 0.37543533792023176, "learning_rate": 1.2292682926829269e-05, "loss": 0.6881, "num_tokens": 96666555.0, "step": 127 }, { "epoch": 0.04690147941189942, "grad_norm": 0.38720452463431354, "learning_rate": 1.2390243902439025e-05, "loss": 0.687, "num_tokens": 97497091.0, "step": 128 }, { "epoch": 0.047267897219804886, "grad_norm": 0.36297336098066035, "learning_rate": 1.2487804878048782e-05, "loss": 0.6821, "num_tokens": 98226670.0, "step": 129 }, { "epoch": 0.047634315027710344, "grad_norm": 0.35514096412637325, "learning_rate": 1.2585365853658538e-05, "loss": 0.6873, "num_tokens": 98927118.0, "step": 130 }, { "epoch": 0.04800073283561581, "grad_norm": 0.47148452851261174, "learning_rate": 1.2682926829268294e-05, "loss": 0.6786, "num_tokens": 99578636.0, "step": 131 }, { "epoch": 0.048367150643521276, "grad_norm": 0.3772426401119523, "learning_rate": 1.278048780487805e-05, "loss": 0.7063, "num_tokens": 100307573.0, "step": 132 }, { "epoch": 0.04873356845142674, "grad_norm": 0.49411228619978226, "learning_rate": 1.2878048780487807e-05, "loss": 0.6735, "num_tokens": 101082875.0, "step": 133 }, { "epoch": 0.04909998625933221, "grad_norm": 0.5731011108574258, "learning_rate": 1.2975609756097563e-05, "loss": 0.6904, "num_tokens": 101964515.0, "step": 134 }, { "epoch": 0.049466404067237665, "grad_norm": 0.39558250282655344, "learning_rate": 1.307317073170732e-05, "loss": 0.6966, "num_tokens": 102753864.0, "step": 135 }, { "epoch": 0.04983282187514313, "grad_norm": 0.43897788650484637, "learning_rate": 1.3170731707317076e-05, "loss": 0.6729, "num_tokens": 103396002.0, "step": 136 }, { "epoch": 0.050199239683048597, "grad_norm": 0.4326527195030328, "learning_rate": 1.326829268292683e-05, "loss": 0.7, "num_tokens": 104144316.0, "step": 137 }, { "epoch": 0.05056565749095406, "grad_norm": 0.36071012658803875, "learning_rate": 1.3365853658536587e-05, "loss": 0.6377, "num_tokens": 104902692.0, "step": 138 }, { "epoch": 0.05093207529885953, "grad_norm": 0.3516324368911853, "learning_rate": 1.3463414634146343e-05, "loss": 0.6651, "num_tokens": 105544499.0, "step": 139 }, { "epoch": 0.051298493106764986, "grad_norm": 0.3789861589924934, "learning_rate": 1.3560975609756099e-05, "loss": 0.6989, "num_tokens": 106286179.0, "step": 140 }, { "epoch": 0.05166491091467045, "grad_norm": 0.41129173604536895, "learning_rate": 1.3658536585365855e-05, "loss": 0.6651, "num_tokens": 107064158.0, "step": 141 }, { "epoch": 0.05203132872257592, "grad_norm": 0.396935898383318, "learning_rate": 1.375609756097561e-05, "loss": 0.7054, "num_tokens": 107959668.0, "step": 142 }, { "epoch": 0.05239774653048138, "grad_norm": 0.33991285004422067, "learning_rate": 1.3853658536585366e-05, "loss": 0.6853, "num_tokens": 108710225.0, "step": 143 }, { "epoch": 0.05276416433838685, "grad_norm": 0.39943797032514206, "learning_rate": 1.3951219512195122e-05, "loss": 0.6269, "num_tokens": 109428456.0, "step": 144 }, { "epoch": 0.05313058214629231, "grad_norm": 0.38953915807902495, "learning_rate": 1.4048780487804879e-05, "loss": 0.6772, "num_tokens": 110294544.0, "step": 145 }, { "epoch": 0.05349699995419777, "grad_norm": 0.44446394919289395, "learning_rate": 1.4146341463414635e-05, "loss": 0.7041, "num_tokens": 110979749.0, "step": 146 }, { "epoch": 0.05386341776210324, "grad_norm": 0.3533094108322274, "learning_rate": 1.4243902439024391e-05, "loss": 0.6665, "num_tokens": 111802187.0, "step": 147 }, { "epoch": 0.054229835570008704, "grad_norm": 0.43030473003148234, "learning_rate": 1.4341463414634148e-05, "loss": 0.6742, "num_tokens": 112496963.0, "step": 148 }, { "epoch": 0.05459625337791417, "grad_norm": 0.3270631314669412, "learning_rate": 1.4439024390243904e-05, "loss": 0.6608, "num_tokens": 113323934.0, "step": 149 }, { "epoch": 0.05496267118581963, "grad_norm": 0.4256549674890246, "learning_rate": 1.4536585365853658e-05, "loss": 0.6481, "num_tokens": 114035785.0, "step": 150 }, { "epoch": 0.055329088993725094, "grad_norm": 0.47978675753484695, "learning_rate": 1.4634146341463415e-05, "loss": 0.6717, "num_tokens": 114670483.0, "step": 151 }, { "epoch": 0.05569550680163056, "grad_norm": 0.3429012072265794, "learning_rate": 1.4731707317073171e-05, "loss": 0.6879, "num_tokens": 115421255.0, "step": 152 }, { "epoch": 0.056061924609536025, "grad_norm": 0.4381899037868102, "learning_rate": 1.4829268292682927e-05, "loss": 0.6931, "num_tokens": 116155695.0, "step": 153 }, { "epoch": 0.05642834241744149, "grad_norm": 0.3848889761186483, "learning_rate": 1.4926829268292684e-05, "loss": 0.6686, "num_tokens": 116927360.0, "step": 154 }, { "epoch": 0.05679476022534695, "grad_norm": 0.3879521121112494, "learning_rate": 1.502439024390244e-05, "loss": 0.6889, "num_tokens": 117696782.0, "step": 155 }, { "epoch": 0.057161178033252415, "grad_norm": 0.31736784293314035, "learning_rate": 1.5121951219512196e-05, "loss": 0.6262, "num_tokens": 118447116.0, "step": 156 }, { "epoch": 0.05752759584115788, "grad_norm": 0.4052057806913101, "learning_rate": 1.5219512195121952e-05, "loss": 0.6756, "num_tokens": 119167399.0, "step": 157 }, { "epoch": 0.057894013649063346, "grad_norm": 0.3943230203608488, "learning_rate": 1.531707317073171e-05, "loss": 0.6655, "num_tokens": 119841793.0, "step": 158 }, { "epoch": 0.05826043145696881, "grad_norm": 0.43859204589035994, "learning_rate": 1.5414634146341465e-05, "loss": 0.716, "num_tokens": 120620110.0, "step": 159 }, { "epoch": 0.05862684926487427, "grad_norm": 0.3372223436420471, "learning_rate": 1.551219512195122e-05, "loss": 0.6455, "num_tokens": 121439994.0, "step": 160 }, { "epoch": 0.058993267072779736, "grad_norm": 0.42235762507886404, "learning_rate": 1.5609756097560978e-05, "loss": 0.6805, "num_tokens": 122217317.0, "step": 161 }, { "epoch": 0.0593596848806852, "grad_norm": 0.371764303383993, "learning_rate": 1.5707317073170732e-05, "loss": 0.6857, "num_tokens": 123002424.0, "step": 162 }, { "epoch": 0.05972610268859067, "grad_norm": 0.471525833976362, "learning_rate": 1.580487804878049e-05, "loss": 0.6572, "num_tokens": 123836974.0, "step": 163 }, { "epoch": 0.06009252049649613, "grad_norm": 0.3334616158732852, "learning_rate": 1.5902439024390245e-05, "loss": 0.6455, "num_tokens": 124618696.0, "step": 164 }, { "epoch": 0.06045893830440159, "grad_norm": 0.38221399222912533, "learning_rate": 1.6000000000000003e-05, "loss": 0.6778, "num_tokens": 125462355.0, "step": 165 }, { "epoch": 0.06082535611230706, "grad_norm": 0.33744025142581546, "learning_rate": 1.6097560975609757e-05, "loss": 0.652, "num_tokens": 126149202.0, "step": 166 }, { "epoch": 0.06119177392021252, "grad_norm": 0.4174642576537427, "learning_rate": 1.6195121951219515e-05, "loss": 0.6958, "num_tokens": 126824113.0, "step": 167 }, { "epoch": 0.06155819172811799, "grad_norm": 0.33824141434443555, "learning_rate": 1.629268292682927e-05, "loss": 0.6589, "num_tokens": 127722502.0, "step": 168 }, { "epoch": 0.06192460953602345, "grad_norm": 0.4635665308167126, "learning_rate": 1.6390243902439024e-05, "loss": 0.6806, "num_tokens": 128429764.0, "step": 169 }, { "epoch": 0.06229102734392891, "grad_norm": 0.42181395441975883, "learning_rate": 1.6487804878048782e-05, "loss": 0.6863, "num_tokens": 129147616.0, "step": 170 }, { "epoch": 0.06265744515183438, "grad_norm": 0.4149902978830228, "learning_rate": 1.6585365853658537e-05, "loss": 0.7182, "num_tokens": 129865980.0, "step": 171 }, { "epoch": 0.06302386295973984, "grad_norm": 0.5634436134841048, "learning_rate": 1.6682926829268295e-05, "loss": 0.6496, "num_tokens": 130608569.0, "step": 172 }, { "epoch": 0.0633902807676453, "grad_norm": 0.4896243994171541, "learning_rate": 1.678048780487805e-05, "loss": 0.6724, "num_tokens": 131314958.0, "step": 173 }, { "epoch": 0.06375669857555077, "grad_norm": 0.344978142843496, "learning_rate": 1.6878048780487804e-05, "loss": 0.689, "num_tokens": 132122025.0, "step": 174 }, { "epoch": 0.06412311638345623, "grad_norm": 0.48125843154042297, "learning_rate": 1.6975609756097562e-05, "loss": 0.664, "num_tokens": 132891841.0, "step": 175 }, { "epoch": 0.0644895341913617, "grad_norm": 0.3923196984437137, "learning_rate": 1.7073170731707317e-05, "loss": 0.6673, "num_tokens": 133607627.0, "step": 176 }, { "epoch": 0.06485595199926716, "grad_norm": 0.37454739057559117, "learning_rate": 1.7170731707317075e-05, "loss": 0.6863, "num_tokens": 134441565.0, "step": 177 }, { "epoch": 0.06522236980717262, "grad_norm": 0.4004005245879078, "learning_rate": 1.726829268292683e-05, "loss": 0.6678, "num_tokens": 135245108.0, "step": 178 }, { "epoch": 0.0655887876150781, "grad_norm": 0.3624474557265517, "learning_rate": 1.7365853658536587e-05, "loss": 0.6563, "num_tokens": 136077581.0, "step": 179 }, { "epoch": 0.06595520542298355, "grad_norm": 0.3785478126959272, "learning_rate": 1.7463414634146342e-05, "loss": 0.6851, "num_tokens": 136964155.0, "step": 180 }, { "epoch": 0.06632162323088903, "grad_norm": 0.411554574491304, "learning_rate": 1.75609756097561e-05, "loss": 0.6567, "num_tokens": 137722931.0, "step": 181 }, { "epoch": 0.06668804103879448, "grad_norm": 0.3213308831902322, "learning_rate": 1.7658536585365854e-05, "loss": 0.6544, "num_tokens": 138458733.0, "step": 182 }, { "epoch": 0.06705445884669994, "grad_norm": 0.3795888808135746, "learning_rate": 1.7756097560975612e-05, "loss": 0.6996, "num_tokens": 139131015.0, "step": 183 }, { "epoch": 0.06742087665460542, "grad_norm": 0.3687217642735718, "learning_rate": 1.7853658536585367e-05, "loss": 0.6574, "num_tokens": 139993882.0, "step": 184 }, { "epoch": 0.06778729446251087, "grad_norm": 0.3409627486742385, "learning_rate": 1.7951219512195125e-05, "loss": 0.6891, "num_tokens": 140743238.0, "step": 185 }, { "epoch": 0.06815371227041635, "grad_norm": 0.38112164870148124, "learning_rate": 1.804878048780488e-05, "loss": 0.6575, "num_tokens": 141579738.0, "step": 186 }, { "epoch": 0.0685201300783218, "grad_norm": 0.3843330330995901, "learning_rate": 1.8146341463414637e-05, "loss": 0.6564, "num_tokens": 142363893.0, "step": 187 }, { "epoch": 0.06888654788622726, "grad_norm": 0.5086739946126669, "learning_rate": 1.8243902439024392e-05, "loss": 0.664, "num_tokens": 143125822.0, "step": 188 }, { "epoch": 0.06925296569413274, "grad_norm": 0.3586947514697586, "learning_rate": 1.834146341463415e-05, "loss": 0.6546, "num_tokens": 143987839.0, "step": 189 }, { "epoch": 0.0696193835020382, "grad_norm": 0.5559344568250809, "learning_rate": 1.8439024390243905e-05, "loss": 0.6864, "num_tokens": 144814305.0, "step": 190 }, { "epoch": 0.06998580130994367, "grad_norm": 0.5362670916496263, "learning_rate": 1.8536585365853663e-05, "loss": 0.6362, "num_tokens": 145510930.0, "step": 191 }, { "epoch": 0.07035221911784913, "grad_norm": 0.46093274497736497, "learning_rate": 1.8634146341463417e-05, "loss": 0.7072, "num_tokens": 146234240.0, "step": 192 }, { "epoch": 0.07071863692575459, "grad_norm": 0.32815619844555965, "learning_rate": 1.8731707317073172e-05, "loss": 0.6574, "num_tokens": 146989181.0, "step": 193 }, { "epoch": 0.07108505473366006, "grad_norm": 0.4023130377771441, "learning_rate": 1.8829268292682926e-05, "loss": 0.637, "num_tokens": 147787846.0, "step": 194 }, { "epoch": 0.07145147254156552, "grad_norm": 0.34347282203263546, "learning_rate": 1.8926829268292684e-05, "loss": 0.6679, "num_tokens": 148446816.0, "step": 195 }, { "epoch": 0.07181789034947099, "grad_norm": 0.4004606686063848, "learning_rate": 1.902439024390244e-05, "loss": 0.6739, "num_tokens": 149234369.0, "step": 196 }, { "epoch": 0.07218430815737645, "grad_norm": 0.35034530650828044, "learning_rate": 1.9121951219512197e-05, "loss": 0.675, "num_tokens": 150070215.0, "step": 197 }, { "epoch": 0.0725507259652819, "grad_norm": 0.44694515906446725, "learning_rate": 1.921951219512195e-05, "loss": 0.6974, "num_tokens": 150789111.0, "step": 198 }, { "epoch": 0.07291714377318738, "grad_norm": 0.3650601260394466, "learning_rate": 1.931707317073171e-05, "loss": 0.6869, "num_tokens": 151424743.0, "step": 199 }, { "epoch": 0.07328356158109284, "grad_norm": 0.4196664545532189, "learning_rate": 1.9414634146341464e-05, "loss": 0.6492, "num_tokens": 152273810.0, "step": 200 }, { "epoch": 0.07364997938899831, "grad_norm": 0.32055924675774383, "learning_rate": 1.9512195121951222e-05, "loss": 0.6173, "num_tokens": 153118653.0, "step": 201 }, { "epoch": 0.07401639719690377, "grad_norm": 0.3009554533625609, "learning_rate": 1.9609756097560977e-05, "loss": 0.6286, "num_tokens": 153781626.0, "step": 202 }, { "epoch": 0.07438281500480923, "grad_norm": 0.40335932852313583, "learning_rate": 1.9707317073170734e-05, "loss": 0.6509, "num_tokens": 154573175.0, "step": 203 }, { "epoch": 0.0747492328127147, "grad_norm": 0.42699522438566895, "learning_rate": 1.980487804878049e-05, "loss": 0.674, "num_tokens": 155489650.0, "step": 204 }, { "epoch": 0.07511565062062016, "grad_norm": 0.3630586201827205, "learning_rate": 1.9902439024390247e-05, "loss": 0.6401, "num_tokens": 156293098.0, "step": 205 }, { "epoch": 0.07548206842852563, "grad_norm": 0.4796052946046007, "learning_rate": 2e-05, "loss": 0.6468, "num_tokens": 157186824.0, "step": 206 }, { "epoch": 0.07584848623643109, "grad_norm": 0.3460758724487627, "learning_rate": 2.0097560975609756e-05, "loss": 0.651, "num_tokens": 158011759.0, "step": 207 }, { "epoch": 0.07621490404433655, "grad_norm": 0.4162787732951245, "learning_rate": 2.0195121951219514e-05, "loss": 0.6674, "num_tokens": 158748271.0, "step": 208 }, { "epoch": 0.07658132185224202, "grad_norm": 0.34908201236317243, "learning_rate": 2.029268292682927e-05, "loss": 0.7055, "num_tokens": 159404316.0, "step": 209 }, { "epoch": 0.07694773966014748, "grad_norm": 0.31662500512289166, "learning_rate": 2.0390243902439027e-05, "loss": 0.7102, "num_tokens": 160134220.0, "step": 210 }, { "epoch": 0.07731415746805295, "grad_norm": 0.42146978933401175, "learning_rate": 2.048780487804878e-05, "loss": 0.6456, "num_tokens": 160930254.0, "step": 211 }, { "epoch": 0.07768057527595841, "grad_norm": 0.3557728383643591, "learning_rate": 2.058536585365854e-05, "loss": 0.6476, "num_tokens": 161794371.0, "step": 212 }, { "epoch": 0.07804699308386387, "grad_norm": 0.43972213487440975, "learning_rate": 2.0682926829268294e-05, "loss": 0.653, "num_tokens": 162505977.0, "step": 213 }, { "epoch": 0.07841341089176934, "grad_norm": 0.5027643258686102, "learning_rate": 2.0780487804878052e-05, "loss": 0.6896, "num_tokens": 163213283.0, "step": 214 }, { "epoch": 0.0787798286996748, "grad_norm": 0.42259381409260816, "learning_rate": 2.0878048780487806e-05, "loss": 0.6806, "num_tokens": 164006178.0, "step": 215 }, { "epoch": 0.07914624650758027, "grad_norm": 0.5540885583365167, "learning_rate": 2.0975609756097564e-05, "loss": 0.655, "num_tokens": 164767273.0, "step": 216 }, { "epoch": 0.07951266431548573, "grad_norm": 0.47645938287594397, "learning_rate": 2.107317073170732e-05, "loss": 0.6909, "num_tokens": 165535095.0, "step": 217 }, { "epoch": 0.07987908212339119, "grad_norm": 0.45153382086385924, "learning_rate": 2.1170731707317077e-05, "loss": 0.637, "num_tokens": 166382070.0, "step": 218 }, { "epoch": 0.08024549993129666, "grad_norm": 0.4438172391957941, "learning_rate": 2.126829268292683e-05, "loss": 0.654, "num_tokens": 167128083.0, "step": 219 }, { "epoch": 0.08061191773920212, "grad_norm": 0.40303997818925846, "learning_rate": 2.136585365853659e-05, "loss": 0.6183, "num_tokens": 167815503.0, "step": 220 }, { "epoch": 0.0809783355471076, "grad_norm": 0.41670612447825983, "learning_rate": 2.1463414634146344e-05, "loss": 0.6793, "num_tokens": 168615033.0, "step": 221 }, { "epoch": 0.08134475335501305, "grad_norm": 0.516019737706915, "learning_rate": 2.1560975609756102e-05, "loss": 0.6763, "num_tokens": 169365676.0, "step": 222 }, { "epoch": 0.08171117116291851, "grad_norm": 0.44541540421145615, "learning_rate": 2.1658536585365857e-05, "loss": 0.7019, "num_tokens": 170094264.0, "step": 223 }, { "epoch": 0.08207758897082398, "grad_norm": 0.4031820988185169, "learning_rate": 2.1756097560975615e-05, "loss": 0.6416, "num_tokens": 170976711.0, "step": 224 }, { "epoch": 0.08244400677872944, "grad_norm": 0.49254999638078284, "learning_rate": 2.185365853658537e-05, "loss": 0.6875, "num_tokens": 171670022.0, "step": 225 }, { "epoch": 0.08281042458663491, "grad_norm": 0.34191995869366926, "learning_rate": 2.1951219512195124e-05, "loss": 0.5914, "num_tokens": 172437007.0, "step": 226 }, { "epoch": 0.08317684239454037, "grad_norm": 0.33804588391786117, "learning_rate": 2.2048780487804882e-05, "loss": 0.6583, "num_tokens": 173096005.0, "step": 227 }, { "epoch": 0.08354326020244583, "grad_norm": 0.3754375494082089, "learning_rate": 2.2146341463414636e-05, "loss": 0.6686, "num_tokens": 173888016.0, "step": 228 }, { "epoch": 0.0839096780103513, "grad_norm": 0.3697887806049264, "learning_rate": 2.2243902439024394e-05, "loss": 0.6495, "num_tokens": 174539683.0, "step": 229 }, { "epoch": 0.08427609581825676, "grad_norm": 0.40864144912868705, "learning_rate": 2.234146341463415e-05, "loss": 0.6773, "num_tokens": 175338001.0, "step": 230 }, { "epoch": 0.08464251362616224, "grad_norm": 0.4230300167226326, "learning_rate": 2.2439024390243907e-05, "loss": 0.7071, "num_tokens": 176067959.0, "step": 231 }, { "epoch": 0.0850089314340677, "grad_norm": 0.42728314766802183, "learning_rate": 2.2536585365853658e-05, "loss": 0.6603, "num_tokens": 176817946.0, "step": 232 }, { "epoch": 0.08537534924197315, "grad_norm": 0.36160597599921124, "learning_rate": 2.2634146341463413e-05, "loss": 0.6418, "num_tokens": 177637322.0, "step": 233 }, { "epoch": 0.08574176704987863, "grad_norm": 0.41713699925451186, "learning_rate": 2.273170731707317e-05, "loss": 0.6848, "num_tokens": 178362015.0, "step": 234 }, { "epoch": 0.08610818485778408, "grad_norm": 0.3640385152125941, "learning_rate": 2.2829268292682925e-05, "loss": 0.669, "num_tokens": 179231034.0, "step": 235 }, { "epoch": 0.08647460266568956, "grad_norm": 0.38764366455406524, "learning_rate": 2.2926829268292683e-05, "loss": 0.6842, "num_tokens": 179999550.0, "step": 236 }, { "epoch": 0.08684102047359502, "grad_norm": 0.3223489612083334, "learning_rate": 2.3024390243902438e-05, "loss": 0.6696, "num_tokens": 180676918.0, "step": 237 }, { "epoch": 0.08720743828150049, "grad_norm": 0.38523008446103635, "learning_rate": 2.3121951219512196e-05, "loss": 0.6454, "num_tokens": 181584895.0, "step": 238 }, { "epoch": 0.08757385608940595, "grad_norm": 0.3204479523147045, "learning_rate": 2.321951219512195e-05, "loss": 0.6595, "num_tokens": 182285444.0, "step": 239 }, { "epoch": 0.0879402738973114, "grad_norm": 0.3821009347900929, "learning_rate": 2.331707317073171e-05, "loss": 0.6063, "num_tokens": 182977031.0, "step": 240 }, { "epoch": 0.08830669170521688, "grad_norm": 0.3622187715064899, "learning_rate": 2.3414634146341463e-05, "loss": 0.6325, "num_tokens": 183742993.0, "step": 241 }, { "epoch": 0.08867310951312234, "grad_norm": 0.5194323895553912, "learning_rate": 2.351219512195122e-05, "loss": 0.6825, "num_tokens": 184428353.0, "step": 242 }, { "epoch": 0.08903952732102781, "grad_norm": 0.3801717200507722, "learning_rate": 2.3609756097560975e-05, "loss": 0.6594, "num_tokens": 185219076.0, "step": 243 }, { "epoch": 0.08940594512893327, "grad_norm": 0.3875903129411112, "learning_rate": 2.3707317073170733e-05, "loss": 0.6717, "num_tokens": 185868112.0, "step": 244 }, { "epoch": 0.08977236293683873, "grad_norm": 0.44372959308854, "learning_rate": 2.3804878048780488e-05, "loss": 0.6651, "num_tokens": 186609045.0, "step": 245 }, { "epoch": 0.0901387807447442, "grad_norm": 0.34673796725148154, "learning_rate": 2.3902439024390246e-05, "loss": 0.7082, "num_tokens": 187365361.0, "step": 246 }, { "epoch": 0.09050519855264966, "grad_norm": 0.3985568596593128, "learning_rate": 2.4e-05, "loss": 0.686, "num_tokens": 188067685.0, "step": 247 }, { "epoch": 0.09087161636055513, "grad_norm": 0.37516535510659593, "learning_rate": 2.409756097560976e-05, "loss": 0.6462, "num_tokens": 188836650.0, "step": 248 }, { "epoch": 0.09123803416846059, "grad_norm": 0.38499416718833274, "learning_rate": 2.4195121951219513e-05, "loss": 0.6853, "num_tokens": 189583375.0, "step": 249 }, { "epoch": 0.09160445197636605, "grad_norm": 0.47661641387008874, "learning_rate": 2.429268292682927e-05, "loss": 0.6903, "num_tokens": 190447741.0, "step": 250 }, { "epoch": 0.09197086978427152, "grad_norm": 0.3674228444888806, "learning_rate": 2.4390243902439026e-05, "loss": 0.6655, "num_tokens": 191367387.0, "step": 251 }, { "epoch": 0.09233728759217698, "grad_norm": 0.43169516143136427, "learning_rate": 2.4487804878048784e-05, "loss": 0.6441, "num_tokens": 192258586.0, "step": 252 }, { "epoch": 0.09270370540008245, "grad_norm": 0.4105848814672189, "learning_rate": 2.4585365853658538e-05, "loss": 0.6279, "num_tokens": 193014801.0, "step": 253 }, { "epoch": 0.09307012320798791, "grad_norm": 0.4078537057144698, "learning_rate": 2.4682926829268293e-05, "loss": 0.6468, "num_tokens": 193766722.0, "step": 254 }, { "epoch": 0.09343654101589337, "grad_norm": 0.41077988008926475, "learning_rate": 2.478048780487805e-05, "loss": 0.6251, "num_tokens": 194571636.0, "step": 255 }, { "epoch": 0.09380295882379884, "grad_norm": 0.3519319781077189, "learning_rate": 2.4878048780487805e-05, "loss": 0.6583, "num_tokens": 195245644.0, "step": 256 }, { "epoch": 0.0941693766317043, "grad_norm": 0.3954293676345662, "learning_rate": 2.4975609756097563e-05, "loss": 0.6452, "num_tokens": 196080310.0, "step": 257 }, { "epoch": 0.09453579443960977, "grad_norm": 0.41108215309650986, "learning_rate": 2.5073170731707318e-05, "loss": 0.6579, "num_tokens": 196899262.0, "step": 258 }, { "epoch": 0.09490221224751523, "grad_norm": 0.33637571756786616, "learning_rate": 2.5170731707317076e-05, "loss": 0.6449, "num_tokens": 197598047.0, "step": 259 }, { "epoch": 0.09526863005542069, "grad_norm": 0.5144211683049846, "learning_rate": 2.526829268292683e-05, "loss": 0.7005, "num_tokens": 198273874.0, "step": 260 }, { "epoch": 0.09563504786332616, "grad_norm": 0.4117704880714939, "learning_rate": 2.536585365853659e-05, "loss": 0.6252, "num_tokens": 198995512.0, "step": 261 }, { "epoch": 0.09600146567123162, "grad_norm": 0.3929637363045247, "learning_rate": 2.5463414634146343e-05, "loss": 0.6623, "num_tokens": 199779426.0, "step": 262 }, { "epoch": 0.09636788347913709, "grad_norm": 0.5029281639586253, "learning_rate": 2.55609756097561e-05, "loss": 0.6719, "num_tokens": 200445863.0, "step": 263 }, { "epoch": 0.09673430128704255, "grad_norm": 0.40372479777540105, "learning_rate": 2.5658536585365856e-05, "loss": 0.6169, "num_tokens": 201154106.0, "step": 264 }, { "epoch": 0.09710071909494801, "grad_norm": 0.488859245679032, "learning_rate": 2.5756097560975614e-05, "loss": 0.6312, "num_tokens": 202061799.0, "step": 265 }, { "epoch": 0.09746713690285348, "grad_norm": 0.5130925468656765, "learning_rate": 2.5853658536585368e-05, "loss": 0.6627, "num_tokens": 202911122.0, "step": 266 }, { "epoch": 0.09783355471075894, "grad_norm": 0.35744242606092524, "learning_rate": 2.5951219512195126e-05, "loss": 0.6859, "num_tokens": 203577140.0, "step": 267 }, { "epoch": 0.09819997251866441, "grad_norm": 0.4858005461750126, "learning_rate": 2.604878048780488e-05, "loss": 0.6636, "num_tokens": 204340244.0, "step": 268 }, { "epoch": 0.09856639032656987, "grad_norm": 0.4217897565846461, "learning_rate": 2.614634146341464e-05, "loss": 0.6471, "num_tokens": 205110003.0, "step": 269 }, { "epoch": 0.09893280813447533, "grad_norm": 0.38878819435058704, "learning_rate": 2.6243902439024393e-05, "loss": 0.6474, "num_tokens": 205947325.0, "step": 270 }, { "epoch": 0.0992992259423808, "grad_norm": 0.31774037858913196, "learning_rate": 2.634146341463415e-05, "loss": 0.6341, "num_tokens": 206671450.0, "step": 271 }, { "epoch": 0.09966564375028626, "grad_norm": 0.4367350323925787, "learning_rate": 2.6439024390243906e-05, "loss": 0.684, "num_tokens": 207440905.0, "step": 272 }, { "epoch": 0.10003206155819173, "grad_norm": 0.35936421738856855, "learning_rate": 2.653658536585366e-05, "loss": 0.662, "num_tokens": 208094937.0, "step": 273 }, { "epoch": 0.10039847936609719, "grad_norm": 0.4521905931558904, "learning_rate": 2.663414634146342e-05, "loss": 0.6484, "num_tokens": 208911565.0, "step": 274 }, { "epoch": 0.10076489717400265, "grad_norm": 0.42554967348745476, "learning_rate": 2.6731707317073173e-05, "loss": 0.6396, "num_tokens": 209714988.0, "step": 275 }, { "epoch": 0.10113131498190812, "grad_norm": 0.3845579113807808, "learning_rate": 2.682926829268293e-05, "loss": 0.6427, "num_tokens": 210468334.0, "step": 276 }, { "epoch": 0.10149773278981358, "grad_norm": 0.47599832866852, "learning_rate": 2.6926829268292686e-05, "loss": 0.6418, "num_tokens": 211207341.0, "step": 277 }, { "epoch": 0.10186415059771906, "grad_norm": 0.31889195698413714, "learning_rate": 2.7024390243902444e-05, "loss": 0.6197, "num_tokens": 211867164.0, "step": 278 }, { "epoch": 0.10223056840562451, "grad_norm": 0.5178337038928075, "learning_rate": 2.7121951219512198e-05, "loss": 0.6753, "num_tokens": 212523684.0, "step": 279 }, { "epoch": 0.10259698621352997, "grad_norm": 0.4353418719479532, "learning_rate": 2.7219512195121956e-05, "loss": 0.6463, "num_tokens": 213298792.0, "step": 280 }, { "epoch": 0.10296340402143545, "grad_norm": 0.4676811819090702, "learning_rate": 2.731707317073171e-05, "loss": 0.6224, "num_tokens": 214078928.0, "step": 281 }, { "epoch": 0.1033298218293409, "grad_norm": 0.4465441670364666, "learning_rate": 2.741463414634147e-05, "loss": 0.6366, "num_tokens": 214869280.0, "step": 282 }, { "epoch": 0.10369623963724638, "grad_norm": 0.39479893778993264, "learning_rate": 2.751219512195122e-05, "loss": 0.605, "num_tokens": 215625519.0, "step": 283 }, { "epoch": 0.10406265744515183, "grad_norm": 0.36837775689155994, "learning_rate": 2.7609756097560974e-05, "loss": 0.6525, "num_tokens": 216379198.0, "step": 284 }, { "epoch": 0.1044290752530573, "grad_norm": 0.4044722557831474, "learning_rate": 2.7707317073170732e-05, "loss": 0.6763, "num_tokens": 217146741.0, "step": 285 }, { "epoch": 0.10479549306096277, "grad_norm": 0.2789626797022916, "learning_rate": 2.7804878048780487e-05, "loss": 0.6209, "num_tokens": 217913791.0, "step": 286 }, { "epoch": 0.10516191086886822, "grad_norm": 0.4369695413148876, "learning_rate": 2.7902439024390245e-05, "loss": 0.6755, "num_tokens": 218614419.0, "step": 287 }, { "epoch": 0.1055283286767737, "grad_norm": 0.36496519992943427, "learning_rate": 2.8e-05, "loss": 0.6546, "num_tokens": 219358198.0, "step": 288 }, { "epoch": 0.10589474648467916, "grad_norm": 0.47848073277383407, "learning_rate": 2.8097560975609758e-05, "loss": 0.6441, "num_tokens": 220260786.0, "step": 289 }, { "epoch": 0.10626116429258461, "grad_norm": 0.3961739426731916, "learning_rate": 2.8195121951219512e-05, "loss": 0.6501, "num_tokens": 220924922.0, "step": 290 }, { "epoch": 0.10662758210049009, "grad_norm": 0.4394110206310609, "learning_rate": 2.829268292682927e-05, "loss": 0.6896, "num_tokens": 221730574.0, "step": 291 }, { "epoch": 0.10699399990839555, "grad_norm": 0.5302252803231946, "learning_rate": 2.8390243902439025e-05, "loss": 0.6466, "num_tokens": 222492286.0, "step": 292 }, { "epoch": 0.10736041771630102, "grad_norm": 0.4266863470185275, "learning_rate": 2.8487804878048783e-05, "loss": 0.6084, "num_tokens": 223298986.0, "step": 293 }, { "epoch": 0.10772683552420648, "grad_norm": 0.45074900844345345, "learning_rate": 2.8585365853658537e-05, "loss": 0.6354, "num_tokens": 224132333.0, "step": 294 }, { "epoch": 0.10809325333211194, "grad_norm": 0.47314390585877286, "learning_rate": 2.8682926829268295e-05, "loss": 0.678, "num_tokens": 224997815.0, "step": 295 }, { "epoch": 0.10845967114001741, "grad_norm": 0.3338304790759442, "learning_rate": 2.878048780487805e-05, "loss": 0.6543, "num_tokens": 225834880.0, "step": 296 }, { "epoch": 0.10882608894792287, "grad_norm": 0.42164815080215584, "learning_rate": 2.8878048780487808e-05, "loss": 0.6169, "num_tokens": 226657860.0, "step": 297 }, { "epoch": 0.10919250675582834, "grad_norm": 0.3446951741577162, "learning_rate": 2.8975609756097562e-05, "loss": 0.6231, "num_tokens": 227555213.0, "step": 298 }, { "epoch": 0.1095589245637338, "grad_norm": 0.4045184441134088, "learning_rate": 2.9073170731707317e-05, "loss": 0.6514, "num_tokens": 228272061.0, "step": 299 }, { "epoch": 0.10992534237163926, "grad_norm": 0.32935283962824446, "learning_rate": 2.9170731707317075e-05, "loss": 0.65, "num_tokens": 229147131.0, "step": 300 }, { "epoch": 0.11029176017954473, "grad_norm": 0.33269063214313177, "learning_rate": 2.926829268292683e-05, "loss": 0.6369, "num_tokens": 229946705.0, "step": 301 }, { "epoch": 0.11065817798745019, "grad_norm": 0.4375289909904364, "learning_rate": 2.9365853658536587e-05, "loss": 0.6861, "num_tokens": 230687641.0, "step": 302 }, { "epoch": 0.11102459579535566, "grad_norm": 0.4143426752007671, "learning_rate": 2.9463414634146342e-05, "loss": 0.658, "num_tokens": 231572154.0, "step": 303 }, { "epoch": 0.11139101360326112, "grad_norm": 0.4016735945667528, "learning_rate": 2.95609756097561e-05, "loss": 0.6266, "num_tokens": 232463203.0, "step": 304 }, { "epoch": 0.11175743141116658, "grad_norm": 0.4690687583084336, "learning_rate": 2.9658536585365855e-05, "loss": 0.6796, "num_tokens": 233243786.0, "step": 305 }, { "epoch": 0.11212384921907205, "grad_norm": 0.3991722394572876, "learning_rate": 2.9756097560975613e-05, "loss": 0.6516, "num_tokens": 234031223.0, "step": 306 }, { "epoch": 0.11249026702697751, "grad_norm": 0.44897102562877517, "learning_rate": 2.9853658536585367e-05, "loss": 0.6558, "num_tokens": 234686695.0, "step": 307 }, { "epoch": 0.11285668483488298, "grad_norm": 0.4987117245177475, "learning_rate": 2.9951219512195125e-05, "loss": 0.6728, "num_tokens": 235441481.0, "step": 308 }, { "epoch": 0.11322310264278844, "grad_norm": 0.4318423075146508, "learning_rate": 3.004878048780488e-05, "loss": 0.6308, "num_tokens": 236175526.0, "step": 309 }, { "epoch": 0.1135895204506939, "grad_norm": 0.33782410407415503, "learning_rate": 3.0146341463414638e-05, "loss": 0.6114, "num_tokens": 236928120.0, "step": 310 }, { "epoch": 0.11395593825859937, "grad_norm": 0.4970831094826414, "learning_rate": 3.0243902439024392e-05, "loss": 0.6555, "num_tokens": 237709865.0, "step": 311 }, { "epoch": 0.11432235606650483, "grad_norm": 0.4153633336381283, "learning_rate": 3.034146341463415e-05, "loss": 0.6288, "num_tokens": 238529198.0, "step": 312 }, { "epoch": 0.1146887738744103, "grad_norm": 0.42332956508043856, "learning_rate": 3.0439024390243905e-05, "loss": 0.6591, "num_tokens": 239336575.0, "step": 313 }, { "epoch": 0.11505519168231576, "grad_norm": 0.39739859797633303, "learning_rate": 3.053658536585366e-05, "loss": 0.6293, "num_tokens": 240133305.0, "step": 314 }, { "epoch": 0.11542160949022122, "grad_norm": 0.42572563896567195, "learning_rate": 3.063414634146342e-05, "loss": 0.6515, "num_tokens": 240921606.0, "step": 315 }, { "epoch": 0.11578802729812669, "grad_norm": 0.4068452872267471, "learning_rate": 3.073170731707317e-05, "loss": 0.6253, "num_tokens": 241755941.0, "step": 316 }, { "epoch": 0.11615444510603215, "grad_norm": 0.42081427835471913, "learning_rate": 3.082926829268293e-05, "loss": 0.6297, "num_tokens": 242504638.0, "step": 317 }, { "epoch": 0.11652086291393762, "grad_norm": 0.360466656389546, "learning_rate": 3.092682926829269e-05, "loss": 0.6529, "num_tokens": 243323397.0, "step": 318 }, { "epoch": 0.11688728072184308, "grad_norm": 0.5176188632354743, "learning_rate": 3.102439024390244e-05, "loss": 0.6815, "num_tokens": 244101061.0, "step": 319 }, { "epoch": 0.11725369852974854, "grad_norm": 0.3544596690819067, "learning_rate": 3.11219512195122e-05, "loss": 0.6162, "num_tokens": 244786397.0, "step": 320 }, { "epoch": 0.11762011633765401, "grad_norm": 0.48107284420609603, "learning_rate": 3.1219512195121955e-05, "loss": 0.6561, "num_tokens": 245488244.0, "step": 321 }, { "epoch": 0.11798653414555947, "grad_norm": 0.4699955626178778, "learning_rate": 3.131707317073171e-05, "loss": 0.6636, "num_tokens": 246342184.0, "step": 322 }, { "epoch": 0.11835295195346494, "grad_norm": 0.4122617622143294, "learning_rate": 3.1414634146341464e-05, "loss": 0.6583, "num_tokens": 247064455.0, "step": 323 }, { "epoch": 0.1187193697613704, "grad_norm": 0.47292140916988884, "learning_rate": 3.151219512195122e-05, "loss": 0.6258, "num_tokens": 247921983.0, "step": 324 }, { "epoch": 0.11908578756927586, "grad_norm": 0.4376302562916287, "learning_rate": 3.160975609756098e-05, "loss": 0.611, "num_tokens": 248625196.0, "step": 325 }, { "epoch": 0.11945220537718133, "grad_norm": 0.4622887856690259, "learning_rate": 3.170731707317074e-05, "loss": 0.6444, "num_tokens": 249363689.0, "step": 326 }, { "epoch": 0.11981862318508679, "grad_norm": 0.36405389144641737, "learning_rate": 3.180487804878049e-05, "loss": 0.6246, "num_tokens": 250273715.0, "step": 327 }, { "epoch": 0.12018504099299226, "grad_norm": 0.38704680719329065, "learning_rate": 3.190243902439025e-05, "loss": 0.6497, "num_tokens": 251055821.0, "step": 328 }, { "epoch": 0.12055145880089772, "grad_norm": 0.34952762358640926, "learning_rate": 3.2000000000000005e-05, "loss": 0.6571, "num_tokens": 251865728.0, "step": 329 }, { "epoch": 0.12091787660880318, "grad_norm": 0.34201066699989324, "learning_rate": 3.209756097560976e-05, "loss": 0.6618, "num_tokens": 252647137.0, "step": 330 }, { "epoch": 0.12128429441670865, "grad_norm": 0.469085906429019, "learning_rate": 3.2195121951219514e-05, "loss": 0.6493, "num_tokens": 253369766.0, "step": 331 }, { "epoch": 0.12165071222461411, "grad_norm": 0.32571572796829606, "learning_rate": 3.229268292682927e-05, "loss": 0.6634, "num_tokens": 254152471.0, "step": 332 }, { "epoch": 0.12201713003251959, "grad_norm": 0.44617699476071265, "learning_rate": 3.239024390243903e-05, "loss": 0.6367, "num_tokens": 254958817.0, "step": 333 }, { "epoch": 0.12238354784042504, "grad_norm": 0.30475366409651183, "learning_rate": 3.248780487804879e-05, "loss": 0.6307, "num_tokens": 255739151.0, "step": 334 }, { "epoch": 0.1227499656483305, "grad_norm": 0.47997525557708565, "learning_rate": 3.258536585365854e-05, "loss": 0.677, "num_tokens": 256596305.0, "step": 335 }, { "epoch": 0.12311638345623598, "grad_norm": 0.3293706758226393, "learning_rate": 3.268292682926829e-05, "loss": 0.6337, "num_tokens": 257456555.0, "step": 336 }, { "epoch": 0.12348280126414143, "grad_norm": 0.5136763672614253, "learning_rate": 3.278048780487805e-05, "loss": 0.6543, "num_tokens": 258213762.0, "step": 337 }, { "epoch": 0.1238492190720469, "grad_norm": 0.45957568019526496, "learning_rate": 3.287804878048781e-05, "loss": 0.6616, "num_tokens": 258991521.0, "step": 338 }, { "epoch": 0.12421563687995237, "grad_norm": 0.40745501475907414, "learning_rate": 3.2975609756097565e-05, "loss": 0.6411, "num_tokens": 259680233.0, "step": 339 }, { "epoch": 0.12458205468785782, "grad_norm": 0.4586562353796359, "learning_rate": 3.3073170731707316e-05, "loss": 0.6316, "num_tokens": 260653330.0, "step": 340 }, { "epoch": 0.1249484724957633, "grad_norm": 0.35660588655697634, "learning_rate": 3.3170731707317074e-05, "loss": 0.6604, "num_tokens": 261419446.0, "step": 341 }, { "epoch": 0.12531489030366877, "grad_norm": 0.4136051279291867, "learning_rate": 3.326829268292683e-05, "loss": 0.6087, "num_tokens": 262099424.0, "step": 342 }, { "epoch": 0.1256813081115742, "grad_norm": 0.32759355284060127, "learning_rate": 3.336585365853659e-05, "loss": 0.5736, "num_tokens": 262931639.0, "step": 343 }, { "epoch": 0.1260477259194797, "grad_norm": 0.35984957122928746, "learning_rate": 3.346341463414634e-05, "loss": 0.6532, "num_tokens": 263730618.0, "step": 344 }, { "epoch": 0.12641414372738516, "grad_norm": 0.36387922295263797, "learning_rate": 3.35609756097561e-05, "loss": 0.6263, "num_tokens": 264518553.0, "step": 345 }, { "epoch": 0.1267805615352906, "grad_norm": 0.4164714500788403, "learning_rate": 3.365853658536586e-05, "loss": 0.6573, "num_tokens": 265249601.0, "step": 346 }, { "epoch": 0.12714697934319608, "grad_norm": 0.37970383352820186, "learning_rate": 3.375609756097561e-05, "loss": 0.6527, "num_tokens": 265936557.0, "step": 347 }, { "epoch": 0.12751339715110155, "grad_norm": 0.4292273749805531, "learning_rate": 3.3853658536585366e-05, "loss": 0.6567, "num_tokens": 266701844.0, "step": 348 }, { "epoch": 0.12787981495900702, "grad_norm": 0.38839708219779995, "learning_rate": 3.3951219512195124e-05, "loss": 0.6178, "num_tokens": 267574395.0, "step": 349 }, { "epoch": 0.12824623276691247, "grad_norm": 0.4746392172252568, "learning_rate": 3.404878048780488e-05, "loss": 0.6452, "num_tokens": 268405583.0, "step": 350 }, { "epoch": 0.12861265057481794, "grad_norm": 0.3969831168554217, "learning_rate": 3.414634146341463e-05, "loss": 0.655, "num_tokens": 269286241.0, "step": 351 }, { "epoch": 0.1289790683827234, "grad_norm": 0.42152769425806197, "learning_rate": 3.424390243902439e-05, "loss": 0.6319, "num_tokens": 270048597.0, "step": 352 }, { "epoch": 0.12934548619062886, "grad_norm": 0.4762532472799411, "learning_rate": 3.434146341463415e-05, "loss": 0.6472, "num_tokens": 270851699.0, "step": 353 }, { "epoch": 0.12971190399853433, "grad_norm": 0.3443956662794948, "learning_rate": 3.443902439024391e-05, "loss": 0.6054, "num_tokens": 271689329.0, "step": 354 }, { "epoch": 0.1300783218064398, "grad_norm": 0.3809846382233422, "learning_rate": 3.453658536585366e-05, "loss": 0.603, "num_tokens": 272626464.0, "step": 355 }, { "epoch": 0.13044473961434525, "grad_norm": 0.3890982557392055, "learning_rate": 3.4634146341463416e-05, "loss": 0.6362, "num_tokens": 273424191.0, "step": 356 }, { "epoch": 0.13081115742225072, "grad_norm": 0.3884322447362833, "learning_rate": 3.4731707317073174e-05, "loss": 0.6392, "num_tokens": 274286828.0, "step": 357 }, { "epoch": 0.1311775752301562, "grad_norm": 0.45747096306585916, "learning_rate": 3.482926829268293e-05, "loss": 0.6384, "num_tokens": 274972873.0, "step": 358 }, { "epoch": 0.13154399303806166, "grad_norm": 0.4562284299273014, "learning_rate": 3.4926829268292684e-05, "loss": 0.6838, "num_tokens": 275775230.0, "step": 359 }, { "epoch": 0.1319104108459671, "grad_norm": 0.43354727330186266, "learning_rate": 3.502439024390244e-05, "loss": 0.6542, "num_tokens": 276604831.0, "step": 360 }, { "epoch": 0.13227682865387258, "grad_norm": 0.37984236468004984, "learning_rate": 3.51219512195122e-05, "loss": 0.6302, "num_tokens": 277331703.0, "step": 361 }, { "epoch": 0.13264324646177805, "grad_norm": 0.4906784538438972, "learning_rate": 3.521951219512196e-05, "loss": 0.6541, "num_tokens": 277976006.0, "step": 362 }, { "epoch": 0.1330096642696835, "grad_norm": 0.43883487187601605, "learning_rate": 3.531707317073171e-05, "loss": 0.6504, "num_tokens": 278715074.0, "step": 363 }, { "epoch": 0.13337608207758897, "grad_norm": 0.38715899693702194, "learning_rate": 3.541463414634147e-05, "loss": 0.6607, "num_tokens": 279508893.0, "step": 364 }, { "epoch": 0.13374249988549444, "grad_norm": 0.46785663130062, "learning_rate": 3.5512195121951225e-05, "loss": 0.6378, "num_tokens": 280352641.0, "step": 365 }, { "epoch": 0.1341089176933999, "grad_norm": 0.3350149253738511, "learning_rate": 3.5609756097560976e-05, "loss": 0.6379, "num_tokens": 281298849.0, "step": 366 }, { "epoch": 0.13447533550130536, "grad_norm": 0.3972147125375943, "learning_rate": 3.5707317073170734e-05, "loss": 0.621, "num_tokens": 282056594.0, "step": 367 }, { "epoch": 0.13484175330921083, "grad_norm": 0.35049276583562744, "learning_rate": 3.580487804878049e-05, "loss": 0.6493, "num_tokens": 282783093.0, "step": 368 }, { "epoch": 0.1352081711171163, "grad_norm": 0.4867012130776034, "learning_rate": 3.590243902439025e-05, "loss": 0.6822, "num_tokens": 283575849.0, "step": 369 }, { "epoch": 0.13557458892502175, "grad_norm": 0.45293640324013046, "learning_rate": 3.6e-05, "loss": 0.6744, "num_tokens": 284363876.0, "step": 370 }, { "epoch": 0.13594100673292722, "grad_norm": 0.3970641845293089, "learning_rate": 3.609756097560976e-05, "loss": 0.6413, "num_tokens": 285163944.0, "step": 371 }, { "epoch": 0.1363074245408327, "grad_norm": 0.3518858603463989, "learning_rate": 3.619512195121952e-05, "loss": 0.6318, "num_tokens": 285929305.0, "step": 372 }, { "epoch": 0.13667384234873814, "grad_norm": 0.4523490691713423, "learning_rate": 3.6292682926829275e-05, "loss": 0.6428, "num_tokens": 286725053.0, "step": 373 }, { "epoch": 0.1370402601566436, "grad_norm": 0.33824526030462987, "learning_rate": 3.6390243902439026e-05, "loss": 0.6242, "num_tokens": 287515243.0, "step": 374 }, { "epoch": 0.13740667796454908, "grad_norm": 0.3525163524224056, "learning_rate": 3.6487804878048784e-05, "loss": 0.6329, "num_tokens": 288309533.0, "step": 375 }, { "epoch": 0.13777309577245453, "grad_norm": 0.3907414455863394, "learning_rate": 3.658536585365854e-05, "loss": 0.5959, "num_tokens": 289035198.0, "step": 376 }, { "epoch": 0.13813951358036, "grad_norm": 0.3725231476206113, "learning_rate": 3.66829268292683e-05, "loss": 0.6257, "num_tokens": 289836856.0, "step": 377 }, { "epoch": 0.13850593138826547, "grad_norm": 0.45362356742855536, "learning_rate": 3.678048780487805e-05, "loss": 0.6049, "num_tokens": 290588577.0, "step": 378 }, { "epoch": 0.13887234919617095, "grad_norm": 0.3127959257231738, "learning_rate": 3.687804878048781e-05, "loss": 0.6186, "num_tokens": 291245596.0, "step": 379 }, { "epoch": 0.1392387670040764, "grad_norm": 0.41026928959938286, "learning_rate": 3.697560975609757e-05, "loss": 0.6365, "num_tokens": 292051779.0, "step": 380 }, { "epoch": 0.13960518481198186, "grad_norm": 0.3245289447274128, "learning_rate": 3.7073170731707325e-05, "loss": 0.6396, "num_tokens": 292860259.0, "step": 381 }, { "epoch": 0.13997160261988734, "grad_norm": 0.41831923289030726, "learning_rate": 3.7170731707317076e-05, "loss": 0.6456, "num_tokens": 293514896.0, "step": 382 }, { "epoch": 0.14033802042779278, "grad_norm": 0.3293144772076955, "learning_rate": 3.7268292682926834e-05, "loss": 0.6257, "num_tokens": 294272268.0, "step": 383 }, { "epoch": 0.14070443823569825, "grad_norm": 0.38891186066744554, "learning_rate": 3.736585365853659e-05, "loss": 0.6667, "num_tokens": 294982353.0, "step": 384 }, { "epoch": 0.14107085604360373, "grad_norm": 0.3328924199911974, "learning_rate": 3.7463414634146343e-05, "loss": 0.6506, "num_tokens": 295550144.0, "step": 385 }, { "epoch": 0.14143727385150917, "grad_norm": 0.41954417309992603, "learning_rate": 3.75609756097561e-05, "loss": 0.6423, "num_tokens": 296190536.0, "step": 386 }, { "epoch": 0.14180369165941464, "grad_norm": 0.3587367752904023, "learning_rate": 3.765853658536585e-05, "loss": 0.5913, "num_tokens": 296981421.0, "step": 387 }, { "epoch": 0.14217010946732012, "grad_norm": 0.4961427072295045, "learning_rate": 3.775609756097561e-05, "loss": 0.6372, "num_tokens": 297731700.0, "step": 388 }, { "epoch": 0.1425365272752256, "grad_norm": 0.4797599667846196, "learning_rate": 3.785365853658537e-05, "loss": 0.615, "num_tokens": 298532675.0, "step": 389 }, { "epoch": 0.14290294508313103, "grad_norm": 0.40324839570323034, "learning_rate": 3.7951219512195126e-05, "loss": 0.5907, "num_tokens": 299282903.0, "step": 390 }, { "epoch": 0.1432693628910365, "grad_norm": 0.3716634987032932, "learning_rate": 3.804878048780488e-05, "loss": 0.6199, "num_tokens": 300073951.0, "step": 391 }, { "epoch": 0.14363578069894198, "grad_norm": 0.3778810803901277, "learning_rate": 3.8146341463414636e-05, "loss": 0.6148, "num_tokens": 300805995.0, "step": 392 }, { "epoch": 0.14400219850684742, "grad_norm": 0.39001237281638373, "learning_rate": 3.8243902439024394e-05, "loss": 0.611, "num_tokens": 301596677.0, "step": 393 }, { "epoch": 0.1443686163147529, "grad_norm": 0.3199621980905258, "learning_rate": 3.8341463414634145e-05, "loss": 0.6324, "num_tokens": 302352594.0, "step": 394 }, { "epoch": 0.14473503412265837, "grad_norm": 0.35282953215054863, "learning_rate": 3.84390243902439e-05, "loss": 0.6454, "num_tokens": 303102748.0, "step": 395 }, { "epoch": 0.1451014519305638, "grad_norm": 0.4220903793576959, "learning_rate": 3.853658536585366e-05, "loss": 0.6388, "num_tokens": 303900717.0, "step": 396 }, { "epoch": 0.14546786973846929, "grad_norm": 0.34977127354719606, "learning_rate": 3.863414634146342e-05, "loss": 0.615, "num_tokens": 304741061.0, "step": 397 }, { "epoch": 0.14583428754637476, "grad_norm": 0.4539447940929313, "learning_rate": 3.873170731707317e-05, "loss": 0.6147, "num_tokens": 305543455.0, "step": 398 }, { "epoch": 0.14620070535428023, "grad_norm": 0.4098335803621343, "learning_rate": 3.882926829268293e-05, "loss": 0.6184, "num_tokens": 306399274.0, "step": 399 }, { "epoch": 0.14656712316218568, "grad_norm": 0.3606468464921341, "learning_rate": 3.8926829268292686e-05, "loss": 0.6164, "num_tokens": 307193480.0, "step": 400 }, { "epoch": 0.14693354097009115, "grad_norm": 0.328782283694742, "learning_rate": 3.9024390243902444e-05, "loss": 0.5825, "num_tokens": 307951908.0, "step": 401 }, { "epoch": 0.14729995877799662, "grad_norm": 0.4217526739642736, "learning_rate": 3.9121951219512195e-05, "loss": 0.6608, "num_tokens": 308812309.0, "step": 402 }, { "epoch": 0.14766637658590206, "grad_norm": 0.37209852828261286, "learning_rate": 3.921951219512195e-05, "loss": 0.6176, "num_tokens": 309477187.0, "step": 403 }, { "epoch": 0.14803279439380754, "grad_norm": 0.4148989986471944, "learning_rate": 3.931707317073171e-05, "loss": 0.6614, "num_tokens": 310205035.0, "step": 404 }, { "epoch": 0.148399212201713, "grad_norm": 0.3856781529545393, "learning_rate": 3.941463414634147e-05, "loss": 0.6772, "num_tokens": 310872526.0, "step": 405 }, { "epoch": 0.14876563000961845, "grad_norm": 0.383028579374726, "learning_rate": 3.951219512195122e-05, "loss": 0.6366, "num_tokens": 311506139.0, "step": 406 }, { "epoch": 0.14913204781752393, "grad_norm": 0.37287209982697417, "learning_rate": 3.960975609756098e-05, "loss": 0.6188, "num_tokens": 312235797.0, "step": 407 }, { "epoch": 0.1494984656254294, "grad_norm": 0.3355733489236959, "learning_rate": 3.9707317073170736e-05, "loss": 0.6503, "num_tokens": 312970139.0, "step": 408 }, { "epoch": 0.14986488343333487, "grad_norm": 0.40024168064758225, "learning_rate": 3.9804878048780494e-05, "loss": 0.635, "num_tokens": 313838939.0, "step": 409 }, { "epoch": 0.15023130124124032, "grad_norm": 0.36937613357315807, "learning_rate": 3.9902439024390245e-05, "loss": 0.6724, "num_tokens": 314632742.0, "step": 410 }, { "epoch": 0.1505977190491458, "grad_norm": 0.35149849348869994, "learning_rate": 4e-05, "loss": 0.6141, "num_tokens": 315551965.0, "step": 411 }, { "epoch": 0.15096413685705126, "grad_norm": 0.3242102264541031, "learning_rate": 3.9999999493282065e-05, "loss": 0.6274, "num_tokens": 316277107.0, "step": 412 }, { "epoch": 0.1513305546649567, "grad_norm": 0.2985425035919905, "learning_rate": 3.9999997973128264e-05, "loss": 0.6312, "num_tokens": 317016710.0, "step": 413 }, { "epoch": 0.15169697247286218, "grad_norm": 0.3191411735340872, "learning_rate": 3.9999995439538694e-05, "loss": 0.6175, "num_tokens": 317847276.0, "step": 414 }, { "epoch": 0.15206339028076765, "grad_norm": 0.3247865682687521, "learning_rate": 3.99999918925135e-05, "loss": 0.6285, "num_tokens": 318654306.0, "step": 415 }, { "epoch": 0.1524298080886731, "grad_norm": 0.39339806244236447, "learning_rate": 3.999998733205287e-05, "loss": 0.6297, "num_tokens": 319498679.0, "step": 416 }, { "epoch": 0.15279622589657857, "grad_norm": 0.3488463895649198, "learning_rate": 3.999998175815707e-05, "loss": 0.6246, "num_tokens": 320347644.0, "step": 417 }, { "epoch": 0.15316264370448404, "grad_norm": 0.3004032792440151, "learning_rate": 3.9999975170826415e-05, "loss": 0.6185, "num_tokens": 321225763.0, "step": 418 }, { "epoch": 0.15352906151238951, "grad_norm": 0.3629200127640311, "learning_rate": 3.999996757006127e-05, "loss": 0.6676, "num_tokens": 321985238.0, "step": 419 }, { "epoch": 0.15389547932029496, "grad_norm": 0.3373991437417486, "learning_rate": 3.9999958955862066e-05, "loss": 0.6506, "num_tokens": 322674199.0, "step": 420 }, { "epoch": 0.15426189712820043, "grad_norm": 0.34516246412757273, "learning_rate": 3.999994932822929e-05, "loss": 0.6266, "num_tokens": 323383391.0, "step": 421 }, { "epoch": 0.1546283149361059, "grad_norm": 0.33503062388556876, "learning_rate": 3.999993868716348e-05, "loss": 0.6577, "num_tokens": 324105360.0, "step": 422 }, { "epoch": 0.15499473274401135, "grad_norm": 0.38019179386932245, "learning_rate": 3.9999927032665246e-05, "loss": 0.6511, "num_tokens": 324866210.0, "step": 423 }, { "epoch": 0.15536115055191682, "grad_norm": 0.36541348409200763, "learning_rate": 3.999991436473522e-05, "loss": 0.6678, "num_tokens": 325614888.0, "step": 424 }, { "epoch": 0.1557275683598223, "grad_norm": 0.33569069851025723, "learning_rate": 3.9999900683374143e-05, "loss": 0.609, "num_tokens": 326250635.0, "step": 425 }, { "epoch": 0.15609398616772774, "grad_norm": 0.35925466378597753, "learning_rate": 3.9999885988582766e-05, "loss": 0.6361, "num_tokens": 327139623.0, "step": 426 }, { "epoch": 0.1564604039756332, "grad_norm": 0.3760345057784179, "learning_rate": 3.999987028036193e-05, "loss": 0.6553, "num_tokens": 327940910.0, "step": 427 }, { "epoch": 0.15682682178353868, "grad_norm": 0.33318577016007533, "learning_rate": 3.999985355871251e-05, "loss": 0.6628, "num_tokens": 328573404.0, "step": 428 }, { "epoch": 0.15719323959144416, "grad_norm": 0.3918820911934162, "learning_rate": 3.999983582363544e-05, "loss": 0.6398, "num_tokens": 329337362.0, "step": 429 }, { "epoch": 0.1575596573993496, "grad_norm": 0.28609446048308806, "learning_rate": 3.9999817075131744e-05, "loss": 0.6332, "num_tokens": 330032926.0, "step": 430 }, { "epoch": 0.15792607520725507, "grad_norm": 0.36118722964276795, "learning_rate": 3.999979731320245e-05, "loss": 0.6306, "num_tokens": 330851931.0, "step": 431 }, { "epoch": 0.15829249301516055, "grad_norm": 0.3101631318937685, "learning_rate": 3.9999776537848687e-05, "loss": 0.6431, "num_tokens": 331600981.0, "step": 432 }, { "epoch": 0.158658910823066, "grad_norm": 0.29707002389118853, "learning_rate": 3.999975474907162e-05, "loss": 0.6687, "num_tokens": 332368353.0, "step": 433 }, { "epoch": 0.15902532863097146, "grad_norm": 0.34356633361061123, "learning_rate": 3.999973194687248e-05, "loss": 0.6182, "num_tokens": 333070144.0, "step": 434 }, { "epoch": 0.15939174643887694, "grad_norm": 0.3112616252396475, "learning_rate": 3.9999708131252544e-05, "loss": 0.636, "num_tokens": 333829879.0, "step": 435 }, { "epoch": 0.15975816424678238, "grad_norm": 0.30922075572725716, "learning_rate": 3.999968330221315e-05, "loss": 0.6206, "num_tokens": 334560909.0, "step": 436 }, { "epoch": 0.16012458205468785, "grad_norm": 0.4154001024098811, "learning_rate": 3.999965745975571e-05, "loss": 0.6507, "num_tokens": 335216698.0, "step": 437 }, { "epoch": 0.16049099986259333, "grad_norm": 0.3951558230779705, "learning_rate": 3.999963060388167e-05, "loss": 0.6152, "num_tokens": 336040813.0, "step": 438 }, { "epoch": 0.1608574176704988, "grad_norm": 0.325072753774644, "learning_rate": 3.999960273459254e-05, "loss": 0.6249, "num_tokens": 336712483.0, "step": 439 }, { "epoch": 0.16122383547840424, "grad_norm": 0.3670078686485625, "learning_rate": 3.999957385188989e-05, "loss": 0.6245, "num_tokens": 337541884.0, "step": 440 }, { "epoch": 0.16159025328630972, "grad_norm": 0.29796211555913993, "learning_rate": 3.9999543955775355e-05, "loss": 0.6517, "num_tokens": 338430378.0, "step": 441 }, { "epoch": 0.1619566710942152, "grad_norm": 0.2956903901473169, "learning_rate": 3.9999513046250605e-05, "loss": 0.662, "num_tokens": 339128250.0, "step": 442 }, { "epoch": 0.16232308890212063, "grad_norm": 0.3083215519345638, "learning_rate": 3.999948112331739e-05, "loss": 0.6246, "num_tokens": 340005128.0, "step": 443 }, { "epoch": 0.1626895067100261, "grad_norm": 0.2966513425749513, "learning_rate": 3.999944818697751e-05, "loss": 0.6381, "num_tokens": 340796378.0, "step": 444 }, { "epoch": 0.16305592451793158, "grad_norm": 0.386783086107881, "learning_rate": 3.99994142372328e-05, "loss": 0.6197, "num_tokens": 341664108.0, "step": 445 }, { "epoch": 0.16342234232583702, "grad_norm": 0.31312219828060733, "learning_rate": 3.9999379274085196e-05, "loss": 0.6355, "num_tokens": 342493431.0, "step": 446 }, { "epoch": 0.1637887601337425, "grad_norm": 0.5049474872930326, "learning_rate": 3.9999343297536646e-05, "loss": 0.6323, "num_tokens": 343212769.0, "step": 447 }, { "epoch": 0.16415517794164797, "grad_norm": 0.4293014113194709, "learning_rate": 3.9999306307589184e-05, "loss": 0.6211, "num_tokens": 344014849.0, "step": 448 }, { "epoch": 0.16452159574955344, "grad_norm": 0.41323646187821844, "learning_rate": 3.99992683042449e-05, "loss": 0.6491, "num_tokens": 344851829.0, "step": 449 }, { "epoch": 0.16488801355745888, "grad_norm": 0.34424106116432573, "learning_rate": 3.999922928750592e-05, "loss": 0.6075, "num_tokens": 345662132.0, "step": 450 }, { "epoch": 0.16525443136536436, "grad_norm": 0.374672184659871, "learning_rate": 3.9999189257374456e-05, "loss": 0.62, "num_tokens": 346474376.0, "step": 451 }, { "epoch": 0.16562084917326983, "grad_norm": 0.33893399842380495, "learning_rate": 3.9999148213852745e-05, "loss": 0.666, "num_tokens": 347240660.0, "step": 452 }, { "epoch": 0.16598726698117527, "grad_norm": 0.3976455121056353, "learning_rate": 3.9999106156943114e-05, "loss": 0.6378, "num_tokens": 348015525.0, "step": 453 }, { "epoch": 0.16635368478908075, "grad_norm": 0.3436530892939768, "learning_rate": 3.999906308664792e-05, "loss": 0.6449, "num_tokens": 348845218.0, "step": 454 }, { "epoch": 0.16672010259698622, "grad_norm": 0.37562014316910197, "learning_rate": 3.9999019002969596e-05, "loss": 0.6786, "num_tokens": 349509444.0, "step": 455 }, { "epoch": 0.16708652040489166, "grad_norm": 0.2983055521086259, "learning_rate": 3.999897390591061e-05, "loss": 0.5846, "num_tokens": 350310959.0, "step": 456 }, { "epoch": 0.16745293821279714, "grad_norm": 0.3023082974350931, "learning_rate": 3.999892779547352e-05, "loss": 0.6452, "num_tokens": 351214656.0, "step": 457 }, { "epoch": 0.1678193560207026, "grad_norm": 0.3350504089803749, "learning_rate": 3.99988806716609e-05, "loss": 0.631, "num_tokens": 352075313.0, "step": 458 }, { "epoch": 0.16818577382860808, "grad_norm": 0.28038222755779957, "learning_rate": 3.9998832534475426e-05, "loss": 0.6532, "num_tokens": 352736677.0, "step": 459 }, { "epoch": 0.16855219163651353, "grad_norm": 0.31450587166402744, "learning_rate": 3.999878338391979e-05, "loss": 0.6336, "num_tokens": 353600361.0, "step": 460 }, { "epoch": 0.168918609444419, "grad_norm": 0.3001540835214911, "learning_rate": 3.999873321999677e-05, "loss": 0.6221, "num_tokens": 354349412.0, "step": 461 }, { "epoch": 0.16928502725232447, "grad_norm": 0.3451272138141451, "learning_rate": 3.9998682042709195e-05, "loss": 0.6951, "num_tokens": 355043647.0, "step": 462 }, { "epoch": 0.16965144506022992, "grad_norm": 0.32628991916792777, "learning_rate": 3.999862985205993e-05, "loss": 0.6595, "num_tokens": 355808811.0, "step": 463 }, { "epoch": 0.1700178628681354, "grad_norm": 0.3396472186150914, "learning_rate": 3.999857664805192e-05, "loss": 0.6482, "num_tokens": 356486386.0, "step": 464 }, { "epoch": 0.17038428067604086, "grad_norm": 0.3026349461214127, "learning_rate": 3.999852243068817e-05, "loss": 0.6037, "num_tokens": 357163027.0, "step": 465 }, { "epoch": 0.1707506984839463, "grad_norm": 0.3112220810265007, "learning_rate": 3.999846719997172e-05, "loss": 0.6624, "num_tokens": 357943016.0, "step": 466 }, { "epoch": 0.17111711629185178, "grad_norm": 0.2985247140654528, "learning_rate": 3.9998410955905684e-05, "loss": 0.6178, "num_tokens": 358696949.0, "step": 467 }, { "epoch": 0.17148353409975725, "grad_norm": 0.3124937142057119, "learning_rate": 3.9998353698493236e-05, "loss": 0.6393, "num_tokens": 359433305.0, "step": 468 }, { "epoch": 0.17184995190766272, "grad_norm": 0.2872119311625333, "learning_rate": 3.999829542773759e-05, "loss": 0.6355, "num_tokens": 360231080.0, "step": 469 }, { "epoch": 0.17221636971556817, "grad_norm": 0.291771872031955, "learning_rate": 3.9998236143642034e-05, "loss": 0.6525, "num_tokens": 360870838.0, "step": 470 }, { "epoch": 0.17258278752347364, "grad_norm": 0.29432433491111076, "learning_rate": 3.999817584620989e-05, "loss": 0.6389, "num_tokens": 361675753.0, "step": 471 }, { "epoch": 0.1729492053313791, "grad_norm": 0.3198459178324275, "learning_rate": 3.999811453544457e-05, "loss": 0.634, "num_tokens": 362462144.0, "step": 472 }, { "epoch": 0.17331562313928456, "grad_norm": 0.25764899612875464, "learning_rate": 3.999805221134952e-05, "loss": 0.6091, "num_tokens": 363242929.0, "step": 473 }, { "epoch": 0.17368204094719003, "grad_norm": 0.31507988794567926, "learning_rate": 3.9997988873928255e-05, "loss": 0.6452, "num_tokens": 363901523.0, "step": 474 }, { "epoch": 0.1740484587550955, "grad_norm": 0.2809136822592465, "learning_rate": 3.999792452318433e-05, "loss": 0.6315, "num_tokens": 364589258.0, "step": 475 }, { "epoch": 0.17441487656300098, "grad_norm": 0.2919030147391462, "learning_rate": 3.999785915912137e-05, "loss": 0.6265, "num_tokens": 365297565.0, "step": 476 }, { "epoch": 0.17478129437090642, "grad_norm": 0.29088940791155043, "learning_rate": 3.999779278174307e-05, "loss": 0.6052, "num_tokens": 366029787.0, "step": 477 }, { "epoch": 0.1751477121788119, "grad_norm": 0.31218181405634204, "learning_rate": 3.999772539105314e-05, "loss": 0.6404, "num_tokens": 366796584.0, "step": 478 }, { "epoch": 0.17551412998671737, "grad_norm": 0.363296972432682, "learning_rate": 3.9997656987055406e-05, "loss": 0.655, "num_tokens": 367569113.0, "step": 479 }, { "epoch": 0.1758805477946228, "grad_norm": 0.29752705561130055, "learning_rate": 3.99975875697537e-05, "loss": 0.6408, "num_tokens": 368340891.0, "step": 480 }, { "epoch": 0.17624696560252828, "grad_norm": 0.3108003512403313, "learning_rate": 3.999751713915193e-05, "loss": 0.6501, "num_tokens": 369050609.0, "step": 481 }, { "epoch": 0.17661338341043376, "grad_norm": 0.40451194127014556, "learning_rate": 3.999744569525406e-05, "loss": 0.6511, "num_tokens": 369830479.0, "step": 482 }, { "epoch": 0.1769798012183392, "grad_norm": 0.31427889910422874, "learning_rate": 3.999737323806413e-05, "loss": 0.6165, "num_tokens": 370643986.0, "step": 483 }, { "epoch": 0.17734621902624467, "grad_norm": 0.2862729592225759, "learning_rate": 3.99972997675862e-05, "loss": 0.6247, "num_tokens": 371426746.0, "step": 484 }, { "epoch": 0.17771263683415015, "grad_norm": 0.2855326463976992, "learning_rate": 3.999722528382441e-05, "loss": 0.6049, "num_tokens": 372224787.0, "step": 485 }, { "epoch": 0.17807905464205562, "grad_norm": 0.2541488113068871, "learning_rate": 3.999714978678296e-05, "loss": 0.5948, "num_tokens": 373145342.0, "step": 486 }, { "epoch": 0.17844547244996106, "grad_norm": 0.2865128436320339, "learning_rate": 3.99970732764661e-05, "loss": 0.6345, "num_tokens": 373972491.0, "step": 487 }, { "epoch": 0.17881189025786653, "grad_norm": 0.2642680227679984, "learning_rate": 3.999699575287814e-05, "loss": 0.6509, "num_tokens": 374760179.0, "step": 488 }, { "epoch": 0.179178308065772, "grad_norm": 0.2883689065398108, "learning_rate": 3.999691721602343e-05, "loss": 0.6031, "num_tokens": 375611303.0, "step": 489 }, { "epoch": 0.17954472587367745, "grad_norm": 0.2614240510151541, "learning_rate": 3.999683766590641e-05, "loss": 0.6085, "num_tokens": 376440396.0, "step": 490 }, { "epoch": 0.17991114368158292, "grad_norm": 0.2851007070434342, "learning_rate": 3.999675710253154e-05, "loss": 0.603, "num_tokens": 377270033.0, "step": 491 }, { "epoch": 0.1802775614894884, "grad_norm": 0.32006245917806575, "learning_rate": 3.999667552590338e-05, "loss": 0.6265, "num_tokens": 377983690.0, "step": 492 }, { "epoch": 0.18064397929739384, "grad_norm": 0.26000823963859127, "learning_rate": 3.99965929360265e-05, "loss": 0.6153, "num_tokens": 378747731.0, "step": 493 }, { "epoch": 0.18101039710529931, "grad_norm": 0.33095246187475025, "learning_rate": 3.9996509332905565e-05, "loss": 0.665, "num_tokens": 379547837.0, "step": 494 }, { "epoch": 0.1813768149132048, "grad_norm": 0.30475788535011533, "learning_rate": 3.999642471654527e-05, "loss": 0.6363, "num_tokens": 380384841.0, "step": 495 }, { "epoch": 0.18174323272111026, "grad_norm": 0.31383774343946785, "learning_rate": 3.9996339086950394e-05, "loss": 0.6374, "num_tokens": 381168050.0, "step": 496 }, { "epoch": 0.1821096505290157, "grad_norm": 0.3557975594514082, "learning_rate": 3.999625244412574e-05, "loss": 0.6393, "num_tokens": 381891899.0, "step": 497 }, { "epoch": 0.18247606833692118, "grad_norm": 0.3030660262092294, "learning_rate": 3.9996164788076205e-05, "loss": 0.6263, "num_tokens": 382572507.0, "step": 498 }, { "epoch": 0.18284248614482665, "grad_norm": 0.2939483056907811, "learning_rate": 3.999607611880671e-05, "loss": 0.6025, "num_tokens": 383308031.0, "step": 499 }, { "epoch": 0.1832089039527321, "grad_norm": 0.3268418221152537, "learning_rate": 3.999598643632225e-05, "loss": 0.6773, "num_tokens": 384111273.0, "step": 500 }, { "epoch": 0.18357532176063757, "grad_norm": 0.2920983778596361, "learning_rate": 3.999589574062788e-05, "loss": 0.6193, "num_tokens": 384942997.0, "step": 501 }, { "epoch": 0.18394173956854304, "grad_norm": 0.27075036980475337, "learning_rate": 3.999580403172871e-05, "loss": 0.6328, "num_tokens": 385646943.0, "step": 502 }, { "epoch": 0.18430815737644848, "grad_norm": 0.32083020883395785, "learning_rate": 3.999571130962989e-05, "loss": 0.6692, "num_tokens": 386374772.0, "step": 503 }, { "epoch": 0.18467457518435396, "grad_norm": 0.3139011965120839, "learning_rate": 3.9995617574336646e-05, "loss": 0.6365, "num_tokens": 387086725.0, "step": 504 }, { "epoch": 0.18504099299225943, "grad_norm": 0.32640561157482484, "learning_rate": 3.999552282585426e-05, "loss": 0.6189, "num_tokens": 387829620.0, "step": 505 }, { "epoch": 0.1854074108001649, "grad_norm": 0.27565472157488424, "learning_rate": 3.9995427064188056e-05, "loss": 0.6358, "num_tokens": 388678589.0, "step": 506 }, { "epoch": 0.18577382860807035, "grad_norm": 0.28106715181881614, "learning_rate": 3.9995330289343434e-05, "loss": 0.6167, "num_tokens": 389363533.0, "step": 507 }, { "epoch": 0.18614024641597582, "grad_norm": 0.290033270907386, "learning_rate": 3.999523250132585e-05, "loss": 0.6199, "num_tokens": 390215877.0, "step": 508 }, { "epoch": 0.1865066642238813, "grad_norm": 0.2593615756068311, "learning_rate": 3.9995133700140795e-05, "loss": 0.6342, "num_tokens": 391031129.0, "step": 509 }, { "epoch": 0.18687308203178674, "grad_norm": 0.30736324123249786, "learning_rate": 3.9995033885793835e-05, "loss": 0.6035, "num_tokens": 391743251.0, "step": 510 }, { "epoch": 0.1872394998396922, "grad_norm": 0.31026714227501373, "learning_rate": 3.9994933058290594e-05, "loss": 0.6555, "num_tokens": 392512276.0, "step": 511 }, { "epoch": 0.18760591764759768, "grad_norm": 0.3744982723971652, "learning_rate": 3.9994831217636744e-05, "loss": 0.6065, "num_tokens": 393220046.0, "step": 512 }, { "epoch": 0.18797233545550313, "grad_norm": 0.32586306149586974, "learning_rate": 3.9994728363838024e-05, "loss": 0.663, "num_tokens": 393990660.0, "step": 513 }, { "epoch": 0.1883387532634086, "grad_norm": 0.2675821072679454, "learning_rate": 3.999462449690022e-05, "loss": 0.5927, "num_tokens": 394878809.0, "step": 514 }, { "epoch": 0.18870517107131407, "grad_norm": 0.24864095981503048, "learning_rate": 3.999451961682919e-05, "loss": 0.6109, "num_tokens": 395681267.0, "step": 515 }, { "epoch": 0.18907158887921954, "grad_norm": 0.2722778413488842, "learning_rate": 3.999441372363083e-05, "loss": 0.6101, "num_tokens": 396517295.0, "step": 516 }, { "epoch": 0.189438006687125, "grad_norm": 0.24428146458362437, "learning_rate": 3.99943068173111e-05, "loss": 0.639, "num_tokens": 397294533.0, "step": 517 }, { "epoch": 0.18980442449503046, "grad_norm": 0.2792687580136656, "learning_rate": 3.9994198897876024e-05, "loss": 0.6274, "num_tokens": 398019109.0, "step": 518 }, { "epoch": 0.19017084230293593, "grad_norm": 0.2864564404975282, "learning_rate": 3.9994089965331676e-05, "loss": 0.6028, "num_tokens": 398729232.0, "step": 519 }, { "epoch": 0.19053726011084138, "grad_norm": 0.3162984705736095, "learning_rate": 3.999398001968419e-05, "loss": 0.6371, "num_tokens": 399523762.0, "step": 520 }, { "epoch": 0.19090367791874685, "grad_norm": 0.3075069287051526, "learning_rate": 3.999386906093975e-05, "loss": 0.6458, "num_tokens": 400223655.0, "step": 521 }, { "epoch": 0.19127009572665232, "grad_norm": 0.31895755242547635, "learning_rate": 3.9993757089104626e-05, "loss": 0.6791, "num_tokens": 401034837.0, "step": 522 }, { "epoch": 0.19163651353455777, "grad_norm": 0.32743810272802704, "learning_rate": 3.999364410418509e-05, "loss": 0.6385, "num_tokens": 401896919.0, "step": 523 }, { "epoch": 0.19200293134246324, "grad_norm": 0.39444122516960256, "learning_rate": 3.9993530106187524e-05, "loss": 0.6419, "num_tokens": 402544342.0, "step": 524 }, { "epoch": 0.1923693491503687, "grad_norm": 0.38061762662752485, "learning_rate": 3.999341509511834e-05, "loss": 0.6104, "num_tokens": 403318408.0, "step": 525 }, { "epoch": 0.19273576695827419, "grad_norm": 0.3673964153157995, "learning_rate": 3.999329907098402e-05, "loss": 0.7264, "num_tokens": 403939613.0, "step": 526 }, { "epoch": 0.19310218476617963, "grad_norm": 0.36001204424087535, "learning_rate": 3.999318203379108e-05, "loss": 0.6356, "num_tokens": 404637342.0, "step": 527 }, { "epoch": 0.1934686025740851, "grad_norm": 0.32829940895730525, "learning_rate": 3.9993063983546124e-05, "loss": 0.6103, "num_tokens": 405462976.0, "step": 528 }, { "epoch": 0.19383502038199057, "grad_norm": 0.3009106500500984, "learning_rate": 3.9992944920255796e-05, "loss": 0.6142, "num_tokens": 406236335.0, "step": 529 }, { "epoch": 0.19420143818989602, "grad_norm": 0.3188963504120122, "learning_rate": 3.99928248439268e-05, "loss": 0.624, "num_tokens": 406942867.0, "step": 530 }, { "epoch": 0.1945678559978015, "grad_norm": 0.32446603448474437, "learning_rate": 3.999270375456589e-05, "loss": 0.6599, "num_tokens": 407713677.0, "step": 531 }, { "epoch": 0.19493427380570696, "grad_norm": 0.3050707266630733, "learning_rate": 3.999258165217989e-05, "loss": 0.6055, "num_tokens": 408465553.0, "step": 532 }, { "epoch": 0.1953006916136124, "grad_norm": 0.3239391101147322, "learning_rate": 3.999245853677568e-05, "loss": 0.6461, "num_tokens": 409266478.0, "step": 533 }, { "epoch": 0.19566710942151788, "grad_norm": 0.31281664514063445, "learning_rate": 3.9992334408360174e-05, "loss": 0.665, "num_tokens": 410063715.0, "step": 534 }, { "epoch": 0.19603352722942335, "grad_norm": 0.3308375421076045, "learning_rate": 3.999220926694038e-05, "loss": 0.6432, "num_tokens": 410858547.0, "step": 535 }, { "epoch": 0.19639994503732883, "grad_norm": 0.28598150526404853, "learning_rate": 3.9992083112523326e-05, "loss": 0.6234, "num_tokens": 411633735.0, "step": 536 }, { "epoch": 0.19676636284523427, "grad_norm": 0.25925644281921795, "learning_rate": 3.999195594511613e-05, "loss": 0.6175, "num_tokens": 412421881.0, "step": 537 }, { "epoch": 0.19713278065313974, "grad_norm": 0.2928530196007291, "learning_rate": 3.999182776472594e-05, "loss": 0.6393, "num_tokens": 413260053.0, "step": 538 }, { "epoch": 0.19749919846104522, "grad_norm": 0.27233128278728275, "learning_rate": 3.999169857135998e-05, "loss": 0.634, "num_tokens": 413956474.0, "step": 539 }, { "epoch": 0.19786561626895066, "grad_norm": 0.304966864741422, "learning_rate": 3.9991568365025526e-05, "loss": 0.6283, "num_tokens": 414760320.0, "step": 540 }, { "epoch": 0.19823203407685613, "grad_norm": 0.29315858305599324, "learning_rate": 3.9991437145729904e-05, "loss": 0.6179, "num_tokens": 415396185.0, "step": 541 }, { "epoch": 0.1985984518847616, "grad_norm": 0.2820320136763384, "learning_rate": 3.9991304913480495e-05, "loss": 0.6062, "num_tokens": 416196815.0, "step": 542 }, { "epoch": 0.19896486969266705, "grad_norm": 0.29147615603528276, "learning_rate": 3.999117166828476e-05, "loss": 0.6266, "num_tokens": 416995243.0, "step": 543 }, { "epoch": 0.19933128750057252, "grad_norm": 0.3029426283298013, "learning_rate": 3.999103741015019e-05, "loss": 0.6148, "num_tokens": 417748037.0, "step": 544 }, { "epoch": 0.199697705308478, "grad_norm": 0.30375267360444524, "learning_rate": 3.999090213908435e-05, "loss": 0.647, "num_tokens": 418522788.0, "step": 545 }, { "epoch": 0.20006412311638347, "grad_norm": 0.33951695408557303, "learning_rate": 3.9990765855094845e-05, "loss": 0.6298, "num_tokens": 419205027.0, "step": 546 }, { "epoch": 0.2004305409242889, "grad_norm": 0.3658420194372056, "learning_rate": 3.9990628558189365e-05, "loss": 0.6155, "num_tokens": 420047355.0, "step": 547 }, { "epoch": 0.20079695873219439, "grad_norm": 0.254928270511065, "learning_rate": 3.999049024837562e-05, "loss": 0.5856, "num_tokens": 420799065.0, "step": 548 }, { "epoch": 0.20116337654009986, "grad_norm": 0.3851637249339689, "learning_rate": 3.9990350925661416e-05, "loss": 0.6378, "num_tokens": 421556620.0, "step": 549 }, { "epoch": 0.2015297943480053, "grad_norm": 0.3876449480213786, "learning_rate": 3.999021059005459e-05, "loss": 0.6483, "num_tokens": 422321820.0, "step": 550 }, { "epoch": 0.20189621215591078, "grad_norm": 0.28389012995966456, "learning_rate": 3.9990069241563046e-05, "loss": 0.6304, "num_tokens": 423205788.0, "step": 551 }, { "epoch": 0.20226262996381625, "grad_norm": 0.40806487328092433, "learning_rate": 3.9989926880194734e-05, "loss": 0.6433, "num_tokens": 423888541.0, "step": 552 }, { "epoch": 0.2026290477717217, "grad_norm": 0.3078318426289441, "learning_rate": 3.998978350595767e-05, "loss": 0.6358, "num_tokens": 424734079.0, "step": 553 }, { "epoch": 0.20299546557962717, "grad_norm": 0.27899016233933877, "learning_rate": 3.998963911885993e-05, "loss": 0.6101, "num_tokens": 425720705.0, "step": 554 }, { "epoch": 0.20336188338753264, "grad_norm": 0.34568534688743124, "learning_rate": 3.998949371890965e-05, "loss": 0.6037, "num_tokens": 426377222.0, "step": 555 }, { "epoch": 0.2037283011954381, "grad_norm": 0.346151082910662, "learning_rate": 3.9989347306115005e-05, "loss": 0.5952, "num_tokens": 427076069.0, "step": 556 }, { "epoch": 0.20409471900334356, "grad_norm": 0.27761387189720194, "learning_rate": 3.998919988048424e-05, "loss": 0.6353, "num_tokens": 427807483.0, "step": 557 }, { "epoch": 0.20446113681124903, "grad_norm": 0.44347154483692547, "learning_rate": 3.998905144202567e-05, "loss": 0.6603, "num_tokens": 428546267.0, "step": 558 }, { "epoch": 0.2048275546191545, "grad_norm": 0.3487111399058776, "learning_rate": 3.998890199074763e-05, "loss": 0.6533, "num_tokens": 429247424.0, "step": 559 }, { "epoch": 0.20519397242705995, "grad_norm": 0.36036330656538074, "learning_rate": 3.998875152665854e-05, "loss": 0.6076, "num_tokens": 430001549.0, "step": 560 }, { "epoch": 0.20556039023496542, "grad_norm": 0.3124828073341015, "learning_rate": 3.998860004976689e-05, "loss": 0.6032, "num_tokens": 430706589.0, "step": 561 }, { "epoch": 0.2059268080428709, "grad_norm": 0.2862305465062455, "learning_rate": 3.9988447560081183e-05, "loss": 0.6266, "num_tokens": 431408934.0, "step": 562 }, { "epoch": 0.20629322585077634, "grad_norm": 0.29206449369941073, "learning_rate": 3.9988294057610026e-05, "loss": 0.6274, "num_tokens": 432317085.0, "step": 563 }, { "epoch": 0.2066596436586818, "grad_norm": 0.3044196292437427, "learning_rate": 3.9988139542362044e-05, "loss": 0.6645, "num_tokens": 433173969.0, "step": 564 }, { "epoch": 0.20702606146658728, "grad_norm": 0.2503693489585637, "learning_rate": 3.998798401434595e-05, "loss": 0.6081, "num_tokens": 434052121.0, "step": 565 }, { "epoch": 0.20739247927449275, "grad_norm": 0.2833591472653634, "learning_rate": 3.99878274735705e-05, "loss": 0.6058, "num_tokens": 434810832.0, "step": 566 }, { "epoch": 0.2077588970823982, "grad_norm": 0.29976547042801777, "learning_rate": 3.9987669920044496e-05, "loss": 0.6144, "num_tokens": 435561263.0, "step": 567 }, { "epoch": 0.20812531489030367, "grad_norm": 0.2589945878201654, "learning_rate": 3.9987511353776815e-05, "loss": 0.5927, "num_tokens": 436429092.0, "step": 568 }, { "epoch": 0.20849173269820914, "grad_norm": 0.3163965763825031, "learning_rate": 3.9987351774776384e-05, "loss": 0.6107, "num_tokens": 437240399.0, "step": 569 }, { "epoch": 0.2088581505061146, "grad_norm": 0.2271229898799919, "learning_rate": 3.9987191183052186e-05, "loss": 0.6293, "num_tokens": 437990327.0, "step": 570 }, { "epoch": 0.20922456831402006, "grad_norm": 0.27701992214064824, "learning_rate": 3.998702957861327e-05, "loss": 0.6191, "num_tokens": 438692094.0, "step": 571 }, { "epoch": 0.20959098612192553, "grad_norm": 0.22239032775867718, "learning_rate": 3.9986866961468733e-05, "loss": 0.6188, "num_tokens": 439482554.0, "step": 572 }, { "epoch": 0.20995740392983098, "grad_norm": 0.2364868820453565, "learning_rate": 3.998670333162772e-05, "loss": 0.6364, "num_tokens": 440323657.0, "step": 573 }, { "epoch": 0.21032382173773645, "grad_norm": 0.23746769112775373, "learning_rate": 3.998653868909946e-05, "loss": 0.6278, "num_tokens": 441020143.0, "step": 574 }, { "epoch": 0.21069023954564192, "grad_norm": 0.22346156519786506, "learning_rate": 3.9986373033893203e-05, "loss": 0.5841, "num_tokens": 441775768.0, "step": 575 }, { "epoch": 0.2110566573535474, "grad_norm": 0.2905850625843416, "learning_rate": 3.998620636601829e-05, "loss": 0.6206, "num_tokens": 442431917.0, "step": 576 }, { "epoch": 0.21142307516145284, "grad_norm": 0.2743461151478621, "learning_rate": 3.99860386854841e-05, "loss": 0.6338, "num_tokens": 443202645.0, "step": 577 }, { "epoch": 0.2117894929693583, "grad_norm": 0.2888673185371787, "learning_rate": 3.998586999230008e-05, "loss": 0.641, "num_tokens": 444003987.0, "step": 578 }, { "epoch": 0.21215591077726378, "grad_norm": 0.25329934263199344, "learning_rate": 3.998570028647572e-05, "loss": 0.6262, "num_tokens": 444724952.0, "step": 579 }, { "epoch": 0.21252232858516923, "grad_norm": 0.309621721877711, "learning_rate": 3.998552956802057e-05, "loss": 0.5836, "num_tokens": 445458061.0, "step": 580 }, { "epoch": 0.2128887463930747, "grad_norm": 0.30545676256937776, "learning_rate": 3.9985357836944264e-05, "loss": 0.5897, "num_tokens": 446253205.0, "step": 581 }, { "epoch": 0.21325516420098017, "grad_norm": 0.3247958240642702, "learning_rate": 3.998518509325645e-05, "loss": 0.6236, "num_tokens": 446886167.0, "step": 582 }, { "epoch": 0.21362158200888562, "grad_norm": 0.28901166184667526, "learning_rate": 3.998501133696685e-05, "loss": 0.5928, "num_tokens": 447691711.0, "step": 583 }, { "epoch": 0.2139879998167911, "grad_norm": 0.2610533342012023, "learning_rate": 3.998483656808527e-05, "loss": 0.6562, "num_tokens": 448538240.0, "step": 584 }, { "epoch": 0.21435441762469656, "grad_norm": 0.27549715601890434, "learning_rate": 3.998466078662153e-05, "loss": 0.6196, "num_tokens": 449242368.0, "step": 585 }, { "epoch": 0.21472083543260204, "grad_norm": 0.2727474182895639, "learning_rate": 3.998448399258553e-05, "loss": 0.6851, "num_tokens": 449988137.0, "step": 586 }, { "epoch": 0.21508725324050748, "grad_norm": 0.2958551827288529, "learning_rate": 3.998430618598723e-05, "loss": 0.6267, "num_tokens": 450707303.0, "step": 587 }, { "epoch": 0.21545367104841295, "grad_norm": 0.32150004433836205, "learning_rate": 3.998412736683664e-05, "loss": 0.6278, "num_tokens": 451432850.0, "step": 588 }, { "epoch": 0.21582008885631843, "grad_norm": 0.35107418279511166, "learning_rate": 3.998394753514383e-05, "loss": 0.6676, "num_tokens": 452205113.0, "step": 589 }, { "epoch": 0.21618650666422387, "grad_norm": 0.2667722202906286, "learning_rate": 3.9983766690918916e-05, "loss": 0.6171, "num_tokens": 453017047.0, "step": 590 }, { "epoch": 0.21655292447212934, "grad_norm": 0.36000623622259004, "learning_rate": 3.9983584834172084e-05, "loss": 0.5976, "num_tokens": 453831295.0, "step": 591 }, { "epoch": 0.21691934228003482, "grad_norm": 0.2998933108370535, "learning_rate": 3.998340196491358e-05, "loss": 0.6324, "num_tokens": 454588541.0, "step": 592 }, { "epoch": 0.21728576008794026, "grad_norm": 0.3390371981738138, "learning_rate": 3.998321808315369e-05, "loss": 0.6287, "num_tokens": 455398530.0, "step": 593 }, { "epoch": 0.21765217789584573, "grad_norm": 0.308212787414317, "learning_rate": 3.9983033188902764e-05, "loss": 0.6405, "num_tokens": 456172999.0, "step": 594 }, { "epoch": 0.2180185957037512, "grad_norm": 0.26660052859563016, "learning_rate": 3.998284728217122e-05, "loss": 0.5987, "num_tokens": 456935921.0, "step": 595 }, { "epoch": 0.21838501351165668, "grad_norm": 0.3370997012827317, "learning_rate": 3.998266036296953e-05, "loss": 0.639, "num_tokens": 457614224.0, "step": 596 }, { "epoch": 0.21875143131956212, "grad_norm": 0.3395296750980625, "learning_rate": 3.99824724313082e-05, "loss": 0.6074, "num_tokens": 458273540.0, "step": 597 }, { "epoch": 0.2191178491274676, "grad_norm": 0.327011181211077, "learning_rate": 3.998228348719783e-05, "loss": 0.625, "num_tokens": 459151137.0, "step": 598 }, { "epoch": 0.21948426693537307, "grad_norm": 0.30695681008393805, "learning_rate": 3.998209353064905e-05, "loss": 0.5829, "num_tokens": 459836949.0, "step": 599 }, { "epoch": 0.2198506847432785, "grad_norm": 0.3327648362463845, "learning_rate": 3.998190256167256e-05, "loss": 0.6094, "num_tokens": 460548505.0, "step": 600 }, { "epoch": 0.22021710255118399, "grad_norm": 0.3318824126570222, "learning_rate": 3.9981710580279095e-05, "loss": 0.6314, "num_tokens": 461328573.0, "step": 601 }, { "epoch": 0.22058352035908946, "grad_norm": 0.38140453329637863, "learning_rate": 3.998151758647949e-05, "loss": 0.6308, "num_tokens": 462156653.0, "step": 602 }, { "epoch": 0.2209499381669949, "grad_norm": 0.3197548023218152, "learning_rate": 3.9981323580284585e-05, "loss": 0.607, "num_tokens": 462968942.0, "step": 603 }, { "epoch": 0.22131635597490038, "grad_norm": 0.38173415215196166, "learning_rate": 3.998112856170532e-05, "loss": 0.613, "num_tokens": 463745126.0, "step": 604 }, { "epoch": 0.22168277378280585, "grad_norm": 0.27989683111575026, "learning_rate": 3.998093253075267e-05, "loss": 0.5962, "num_tokens": 464545360.0, "step": 605 }, { "epoch": 0.22204919159071132, "grad_norm": 0.2811733346904278, "learning_rate": 3.998073548743766e-05, "loss": 0.6141, "num_tokens": 465279816.0, "step": 606 }, { "epoch": 0.22241560939861676, "grad_norm": 0.28300145452014913, "learning_rate": 3.9980537431771415e-05, "loss": 0.6057, "num_tokens": 466012535.0, "step": 607 }, { "epoch": 0.22278202720652224, "grad_norm": 0.2705592782063543, "learning_rate": 3.998033836376506e-05, "loss": 0.6246, "num_tokens": 466692409.0, "step": 608 }, { "epoch": 0.2231484450144277, "grad_norm": 0.2667641495213636, "learning_rate": 3.99801382834298e-05, "loss": 0.6472, "num_tokens": 467520164.0, "step": 609 }, { "epoch": 0.22351486282233315, "grad_norm": 0.28015459825323047, "learning_rate": 3.997993719077691e-05, "loss": 0.6083, "num_tokens": 468326181.0, "step": 610 }, { "epoch": 0.22388128063023863, "grad_norm": 0.29432325075586674, "learning_rate": 3.997973508581772e-05, "loss": 0.6804, "num_tokens": 468949126.0, "step": 611 }, { "epoch": 0.2242476984381441, "grad_norm": 0.3123551035545475, "learning_rate": 3.9979531968563605e-05, "loss": 0.6673, "num_tokens": 469663798.0, "step": 612 }, { "epoch": 0.22461411624604954, "grad_norm": 0.34896084443364095, "learning_rate": 3.9979327839025985e-05, "loss": 0.6362, "num_tokens": 470421418.0, "step": 613 }, { "epoch": 0.22498053405395502, "grad_norm": 0.27234409942090365, "learning_rate": 3.9979122697216365e-05, "loss": 0.6302, "num_tokens": 471232526.0, "step": 614 }, { "epoch": 0.2253469518618605, "grad_norm": 0.36114011814822333, "learning_rate": 3.997891654314629e-05, "loss": 0.5795, "num_tokens": 472071419.0, "step": 615 }, { "epoch": 0.22571336966976596, "grad_norm": 0.2785554280718878, "learning_rate": 3.997870937682738e-05, "loss": 0.6236, "num_tokens": 472845141.0, "step": 616 }, { "epoch": 0.2260797874776714, "grad_norm": 0.38239075803936307, "learning_rate": 3.997850119827129e-05, "loss": 0.6284, "num_tokens": 473613003.0, "step": 617 }, { "epoch": 0.22644620528557688, "grad_norm": 0.3384303434267532, "learning_rate": 3.997829200748974e-05, "loss": 0.6464, "num_tokens": 474272719.0, "step": 618 }, { "epoch": 0.22681262309348235, "grad_norm": 0.31642838759637115, "learning_rate": 3.99780818044945e-05, "loss": 0.6448, "num_tokens": 475042971.0, "step": 619 }, { "epoch": 0.2271790409013878, "grad_norm": 0.3782059375648129, "learning_rate": 3.997787058929742e-05, "loss": 0.628, "num_tokens": 475824487.0, "step": 620 }, { "epoch": 0.22754545870929327, "grad_norm": 0.3316667405435595, "learning_rate": 3.997765836191038e-05, "loss": 0.623, "num_tokens": 476673360.0, "step": 621 }, { "epoch": 0.22791187651719874, "grad_norm": 0.37548571397784164, "learning_rate": 3.9977445122345335e-05, "loss": 0.604, "num_tokens": 477420430.0, "step": 622 }, { "epoch": 0.2282782943251042, "grad_norm": 0.2979493056950271, "learning_rate": 3.997723087061429e-05, "loss": 0.6271, "num_tokens": 478172676.0, "step": 623 }, { "epoch": 0.22864471213300966, "grad_norm": 0.32313835454751333, "learning_rate": 3.9977015606729315e-05, "loss": 0.5839, "num_tokens": 478989317.0, "step": 624 }, { "epoch": 0.22901112994091513, "grad_norm": 0.30437396784579746, "learning_rate": 3.997679933070252e-05, "loss": 0.6018, "num_tokens": 479625983.0, "step": 625 }, { "epoch": 0.2293775477488206, "grad_norm": 0.2608376916564369, "learning_rate": 3.997658204254607e-05, "loss": 0.6627, "num_tokens": 480421320.0, "step": 626 }, { "epoch": 0.22974396555672605, "grad_norm": 0.3460656642493226, "learning_rate": 3.997636374227222e-05, "loss": 0.6071, "num_tokens": 481223561.0, "step": 627 }, { "epoch": 0.23011038336463152, "grad_norm": 0.25237609081624773, "learning_rate": 3.997614442989326e-05, "loss": 0.6054, "num_tokens": 482052033.0, "step": 628 }, { "epoch": 0.230476801172537, "grad_norm": 0.27818990343264766, "learning_rate": 3.997592410542153e-05, "loss": 0.6615, "num_tokens": 482713528.0, "step": 629 }, { "epoch": 0.23084321898044244, "grad_norm": 0.2660831179064185, "learning_rate": 3.9975702768869434e-05, "loss": 0.644, "num_tokens": 483563029.0, "step": 630 }, { "epoch": 0.2312096367883479, "grad_norm": 0.28308377877825996, "learning_rate": 3.997548042024943e-05, "loss": 0.5767, "num_tokens": 484283598.0, "step": 631 }, { "epoch": 0.23157605459625338, "grad_norm": 0.28991037042903456, "learning_rate": 3.997525705957405e-05, "loss": 0.6414, "num_tokens": 484924033.0, "step": 632 }, { "epoch": 0.23194247240415886, "grad_norm": 0.256696312714915, "learning_rate": 3.9975032686855856e-05, "loss": 0.6204, "num_tokens": 485707675.0, "step": 633 }, { "epoch": 0.2323088902120643, "grad_norm": 0.3051422246909382, "learning_rate": 3.9974807302107495e-05, "loss": 0.622, "num_tokens": 486557520.0, "step": 634 }, { "epoch": 0.23267530801996977, "grad_norm": 0.2481069316318308, "learning_rate": 3.997458090534164e-05, "loss": 0.5768, "num_tokens": 487319090.0, "step": 635 }, { "epoch": 0.23304172582787525, "grad_norm": 0.3109365901716389, "learning_rate": 3.9974353496571045e-05, "loss": 0.6531, "num_tokens": 488103963.0, "step": 636 }, { "epoch": 0.2334081436357807, "grad_norm": 0.3197167138102606, "learning_rate": 3.997412507580852e-05, "loss": 0.6063, "num_tokens": 488736501.0, "step": 637 }, { "epoch": 0.23377456144368616, "grad_norm": 0.2746049063837255, "learning_rate": 3.997389564306692e-05, "loss": 0.5869, "num_tokens": 489536383.0, "step": 638 }, { "epoch": 0.23414097925159164, "grad_norm": 0.2695147244618516, "learning_rate": 3.997366519835916e-05, "loss": 0.6388, "num_tokens": 490211766.0, "step": 639 }, { "epoch": 0.23450739705949708, "grad_norm": 0.3075216463197993, "learning_rate": 3.9973433741698216e-05, "loss": 0.6401, "num_tokens": 490962269.0, "step": 640 }, { "epoch": 0.23487381486740255, "grad_norm": 0.26926209114147803, "learning_rate": 3.997320127309712e-05, "loss": 0.6592, "num_tokens": 491743903.0, "step": 641 }, { "epoch": 0.23524023267530803, "grad_norm": 0.26161577267589015, "learning_rate": 3.997296779256896e-05, "loss": 0.6303, "num_tokens": 492503602.0, "step": 642 }, { "epoch": 0.2356066504832135, "grad_norm": 0.26056850985890034, "learning_rate": 3.997273330012689e-05, "loss": 0.6107, "num_tokens": 493207094.0, "step": 643 }, { "epoch": 0.23597306829111894, "grad_norm": 0.2543191364255079, "learning_rate": 3.9972497795784095e-05, "loss": 0.6029, "num_tokens": 493942700.0, "step": 644 }, { "epoch": 0.23633948609902442, "grad_norm": 0.2504025567035444, "learning_rate": 3.997226127955385e-05, "loss": 0.6137, "num_tokens": 494797497.0, "step": 645 }, { "epoch": 0.2367059039069299, "grad_norm": 0.2664527709609173, "learning_rate": 3.997202375144946e-05, "loss": 0.6083, "num_tokens": 495518837.0, "step": 646 }, { "epoch": 0.23707232171483533, "grad_norm": 0.2966337842886008, "learning_rate": 3.997178521148431e-05, "loss": 0.6379, "num_tokens": 496380977.0, "step": 647 }, { "epoch": 0.2374387395227408, "grad_norm": 0.2854163990185708, "learning_rate": 3.997154565967182e-05, "loss": 0.596, "num_tokens": 497188668.0, "step": 648 }, { "epoch": 0.23780515733064628, "grad_norm": 0.26624353597091943, "learning_rate": 3.997130509602548e-05, "loss": 0.6593, "num_tokens": 498009053.0, "step": 649 }, { "epoch": 0.23817157513855172, "grad_norm": 0.3174216804987258, "learning_rate": 3.997106352055884e-05, "loss": 0.6376, "num_tokens": 498652630.0, "step": 650 }, { "epoch": 0.2385379929464572, "grad_norm": 0.2734376983561695, "learning_rate": 3.9970820933285494e-05, "loss": 0.6085, "num_tokens": 499400687.0, "step": 651 }, { "epoch": 0.23890441075436267, "grad_norm": 0.33826838281855465, "learning_rate": 3.99705773342191e-05, "loss": 0.6148, "num_tokens": 500121628.0, "step": 652 }, { "epoch": 0.23927082856226814, "grad_norm": 0.2624947845898707, "learning_rate": 3.997033272337338e-05, "loss": 0.6265, "num_tokens": 500912990.0, "step": 653 }, { "epoch": 0.23963724637017358, "grad_norm": 0.2727472723746739, "learning_rate": 3.99700871007621e-05, "loss": 0.5844, "num_tokens": 501653806.0, "step": 654 }, { "epoch": 0.24000366417807906, "grad_norm": 0.2748166192817892, "learning_rate": 3.996984046639909e-05, "loss": 0.607, "num_tokens": 502559223.0, "step": 655 }, { "epoch": 0.24037008198598453, "grad_norm": 0.2864286213204215, "learning_rate": 3.996959282029823e-05, "loss": 0.6204, "num_tokens": 503506392.0, "step": 656 }, { "epoch": 0.24073649979388997, "grad_norm": 0.24983713201291943, "learning_rate": 3.9969344162473487e-05, "loss": 0.6106, "num_tokens": 504439364.0, "step": 657 }, { "epoch": 0.24110291760179545, "grad_norm": 0.29234673428769886, "learning_rate": 3.996909449293883e-05, "loss": 0.595, "num_tokens": 505331456.0, "step": 658 }, { "epoch": 0.24146933540970092, "grad_norm": 0.25820213061379854, "learning_rate": 3.996884381170833e-05, "loss": 0.5931, "num_tokens": 506017132.0, "step": 659 }, { "epoch": 0.24183575321760636, "grad_norm": 0.31219047709201847, "learning_rate": 3.996859211879611e-05, "loss": 0.6435, "num_tokens": 506747102.0, "step": 660 }, { "epoch": 0.24220217102551184, "grad_norm": 0.2895971887663432, "learning_rate": 3.9968339414216324e-05, "loss": 0.6362, "num_tokens": 507535968.0, "step": 661 }, { "epoch": 0.2425685888334173, "grad_norm": 0.28407999546705825, "learning_rate": 3.9968085697983216e-05, "loss": 0.5772, "num_tokens": 508376994.0, "step": 662 }, { "epoch": 0.24293500664132278, "grad_norm": 0.2414154800820013, "learning_rate": 3.996783097011105e-05, "loss": 0.6174, "num_tokens": 509136713.0, "step": 663 }, { "epoch": 0.24330142444922823, "grad_norm": 0.2823483342799755, "learning_rate": 3.9967575230614185e-05, "loss": 0.6132, "num_tokens": 509877231.0, "step": 664 }, { "epoch": 0.2436678422571337, "grad_norm": 0.2673492041742662, "learning_rate": 3.9967318479507014e-05, "loss": 0.613, "num_tokens": 510584957.0, "step": 665 }, { "epoch": 0.24403426006503917, "grad_norm": 0.25274111903904783, "learning_rate": 3.996706071680399e-05, "loss": 0.6206, "num_tokens": 511289173.0, "step": 666 }, { "epoch": 0.24440067787294462, "grad_norm": 0.31808284599450837, "learning_rate": 3.996680194251963e-05, "loss": 0.6648, "num_tokens": 511931919.0, "step": 667 }, { "epoch": 0.2447670956808501, "grad_norm": 0.30097310255790455, "learning_rate": 3.996654215666851e-05, "loss": 0.6056, "num_tokens": 512749077.0, "step": 668 }, { "epoch": 0.24513351348875556, "grad_norm": 0.36225171402877643, "learning_rate": 3.996628135926524e-05, "loss": 0.6893, "num_tokens": 513459865.0, "step": 669 }, { "epoch": 0.245499931296661, "grad_norm": 0.2612945008023349, "learning_rate": 3.996601955032451e-05, "loss": 0.6401, "num_tokens": 514173802.0, "step": 670 }, { "epoch": 0.24586634910456648, "grad_norm": 0.2801398997117899, "learning_rate": 3.9965756729861066e-05, "loss": 0.6283, "num_tokens": 514918084.0, "step": 671 }, { "epoch": 0.24623276691247195, "grad_norm": 0.2512848461082436, "learning_rate": 3.99654928978897e-05, "loss": 0.6296, "num_tokens": 515660539.0, "step": 672 }, { "epoch": 0.24659918472037742, "grad_norm": 0.28152582872843407, "learning_rate": 3.9965228054425267e-05, "loss": 0.6163, "num_tokens": 516409269.0, "step": 673 }, { "epoch": 0.24696560252828287, "grad_norm": 0.23445388741817416, "learning_rate": 3.996496219948268e-05, "loss": 0.5988, "num_tokens": 517312512.0, "step": 674 }, { "epoch": 0.24733202033618834, "grad_norm": 0.29994649442147275, "learning_rate": 3.996469533307691e-05, "loss": 0.6531, "num_tokens": 518058831.0, "step": 675 }, { "epoch": 0.2476984381440938, "grad_norm": 0.2833829432785999, "learning_rate": 3.996442745522298e-05, "loss": 0.6048, "num_tokens": 518799903.0, "step": 676 }, { "epoch": 0.24806485595199926, "grad_norm": 0.27881776687277754, "learning_rate": 3.996415856593596e-05, "loss": 0.6466, "num_tokens": 519641580.0, "step": 677 }, { "epoch": 0.24843127375990473, "grad_norm": 0.32563644374478473, "learning_rate": 3.9963888665231006e-05, "loss": 0.6106, "num_tokens": 520307433.0, "step": 678 }, { "epoch": 0.2487976915678102, "grad_norm": 0.29659568345528314, "learning_rate": 3.9963617753123303e-05, "loss": 0.6353, "num_tokens": 521162947.0, "step": 679 }, { "epoch": 0.24916410937571565, "grad_norm": 0.3271574848370207, "learning_rate": 3.9963345829628115e-05, "loss": 0.6087, "num_tokens": 521836119.0, "step": 680 }, { "epoch": 0.24953052718362112, "grad_norm": 0.2823071752073067, "learning_rate": 3.9963072894760733e-05, "loss": 0.6366, "num_tokens": 522617380.0, "step": 681 }, { "epoch": 0.2498969449915266, "grad_norm": 0.3665272382081492, "learning_rate": 3.996279894853655e-05, "loss": 0.6063, "num_tokens": 523432387.0, "step": 682 }, { "epoch": 0.25026336279943207, "grad_norm": 0.31043431712971303, "learning_rate": 3.996252399097096e-05, "loss": 0.6015, "num_tokens": 524194389.0, "step": 683 }, { "epoch": 0.25062978060733754, "grad_norm": 0.23179200617158024, "learning_rate": 3.9962248022079465e-05, "loss": 0.5897, "num_tokens": 525013640.0, "step": 684 }, { "epoch": 0.25099619841524295, "grad_norm": 0.34145999039902, "learning_rate": 3.99619710418776e-05, "loss": 0.6377, "num_tokens": 525674585.0, "step": 685 }, { "epoch": 0.2513626162231484, "grad_norm": 0.24899977481352054, "learning_rate": 3.996169305038096e-05, "loss": 0.6157, "num_tokens": 526389114.0, "step": 686 }, { "epoch": 0.2517290340310539, "grad_norm": 0.25844636587000686, "learning_rate": 3.996141404760519e-05, "loss": 0.6496, "num_tokens": 526967552.0, "step": 687 }, { "epoch": 0.2520954518389594, "grad_norm": 0.24901366603913572, "learning_rate": 3.9961134033566e-05, "loss": 0.614, "num_tokens": 527788106.0, "step": 688 }, { "epoch": 0.25246186964686484, "grad_norm": 0.2455900389702711, "learning_rate": 3.9960853008279154e-05, "loss": 0.5961, "num_tokens": 528551719.0, "step": 689 }, { "epoch": 0.2528282874547703, "grad_norm": 0.24759404302909618, "learning_rate": 3.9960570971760476e-05, "loss": 0.5965, "num_tokens": 529153756.0, "step": 690 }, { "epoch": 0.2531947052626758, "grad_norm": 0.2958686075950065, "learning_rate": 3.9960287924025845e-05, "loss": 0.6224, "num_tokens": 529954988.0, "step": 691 }, { "epoch": 0.2535611230705812, "grad_norm": 0.24844279085853094, "learning_rate": 3.996000386509121e-05, "loss": 0.6226, "num_tokens": 530772281.0, "step": 692 }, { "epoch": 0.2539275408784867, "grad_norm": 0.25335278499897934, "learning_rate": 3.995971879497253e-05, "loss": 0.607, "num_tokens": 531688870.0, "step": 693 }, { "epoch": 0.25429395868639215, "grad_norm": 0.2871729818557321, "learning_rate": 3.9959432713685895e-05, "loss": 0.6148, "num_tokens": 532521878.0, "step": 694 }, { "epoch": 0.2546603764942976, "grad_norm": 0.23685089236000895, "learning_rate": 3.995914562124739e-05, "loss": 0.6286, "num_tokens": 533243639.0, "step": 695 }, { "epoch": 0.2550267943022031, "grad_norm": 0.3134948728090868, "learning_rate": 3.995885751767318e-05, "loss": 0.6007, "num_tokens": 534057017.0, "step": 696 }, { "epoch": 0.25539321211010857, "grad_norm": 0.27825990774110565, "learning_rate": 3.9958568402979495e-05, "loss": 0.6156, "num_tokens": 534905498.0, "step": 697 }, { "epoch": 0.25575962991801404, "grad_norm": 0.3062090958904066, "learning_rate": 3.99582782771826e-05, "loss": 0.6071, "num_tokens": 535642284.0, "step": 698 }, { "epoch": 0.25612604772591946, "grad_norm": 0.29676621154313854, "learning_rate": 3.995798714029884e-05, "loss": 0.5932, "num_tokens": 536467297.0, "step": 699 }, { "epoch": 0.25649246553382493, "grad_norm": 0.2828937331832028, "learning_rate": 3.99576949923446e-05, "loss": 0.6085, "num_tokens": 537218180.0, "step": 700 }, { "epoch": 0.2568588833417304, "grad_norm": 0.30224541153375867, "learning_rate": 3.995740183333634e-05, "loss": 0.613, "num_tokens": 537958476.0, "step": 701 }, { "epoch": 0.2572253011496359, "grad_norm": 0.28975363347706695, "learning_rate": 3.9957107663290545e-05, "loss": 0.66, "num_tokens": 538716268.0, "step": 702 }, { "epoch": 0.25759171895754135, "grad_norm": 0.2774797282072996, "learning_rate": 3.9956812482223795e-05, "loss": 0.6457, "num_tokens": 539443638.0, "step": 703 }, { "epoch": 0.2579581367654468, "grad_norm": 0.32092488109744, "learning_rate": 3.995651629015271e-05, "loss": 0.6424, "num_tokens": 540085128.0, "step": 704 }, { "epoch": 0.25832455457335224, "grad_norm": 0.31913592332530344, "learning_rate": 3.995621908709395e-05, "loss": 0.5847, "num_tokens": 540789646.0, "step": 705 }, { "epoch": 0.2586909723812577, "grad_norm": 0.2394056784189533, "learning_rate": 3.995592087306425e-05, "loss": 0.6368, "num_tokens": 541627355.0, "step": 706 }, { "epoch": 0.2590573901891632, "grad_norm": 0.321696976955009, "learning_rate": 3.9955621648080424e-05, "loss": 0.6218, "num_tokens": 542299004.0, "step": 707 }, { "epoch": 0.25942380799706866, "grad_norm": 0.31862017235026646, "learning_rate": 3.99553214121593e-05, "loss": 0.6252, "num_tokens": 543006257.0, "step": 708 }, { "epoch": 0.25979022580497413, "grad_norm": 0.26244732283895333, "learning_rate": 3.995502016531778e-05, "loss": 0.6273, "num_tokens": 543761092.0, "step": 709 }, { "epoch": 0.2601566436128796, "grad_norm": 0.24576814776563166, "learning_rate": 3.995471790757283e-05, "loss": 0.6161, "num_tokens": 544463491.0, "step": 710 }, { "epoch": 0.2605230614207851, "grad_norm": 0.2736235019443271, "learning_rate": 3.9954414638941465e-05, "loss": 0.6134, "num_tokens": 545199817.0, "step": 711 }, { "epoch": 0.2608894792286905, "grad_norm": 0.25572292510241057, "learning_rate": 3.995411035944077e-05, "loss": 0.6245, "num_tokens": 545952703.0, "step": 712 }, { "epoch": 0.26125589703659596, "grad_norm": 0.24696919972423167, "learning_rate": 3.995380506908786e-05, "loss": 0.6203, "num_tokens": 546809697.0, "step": 713 }, { "epoch": 0.26162231484450144, "grad_norm": 0.26356201433680404, "learning_rate": 3.9953498767899935e-05, "loss": 0.6204, "num_tokens": 547551524.0, "step": 714 }, { "epoch": 0.2619887326524069, "grad_norm": 0.24530313591220798, "learning_rate": 3.995319145589423e-05, "loss": 0.5983, "num_tokens": 548342759.0, "step": 715 }, { "epoch": 0.2623551504603124, "grad_norm": 0.24818999954253507, "learning_rate": 3.995288313308807e-05, "loss": 0.6086, "num_tokens": 549161721.0, "step": 716 }, { "epoch": 0.26272156826821785, "grad_norm": 0.2748821059922899, "learning_rate": 3.995257379949879e-05, "loss": 0.6342, "num_tokens": 549840223.0, "step": 717 }, { "epoch": 0.2630879860761233, "grad_norm": 0.26995559152057347, "learning_rate": 3.995226345514381e-05, "loss": 0.6352, "num_tokens": 550583228.0, "step": 718 }, { "epoch": 0.26345440388402874, "grad_norm": 0.29744655389403357, "learning_rate": 3.995195210004061e-05, "loss": 0.6509, "num_tokens": 551242515.0, "step": 719 }, { "epoch": 0.2638208216919342, "grad_norm": 0.2656479990615421, "learning_rate": 3.995163973420672e-05, "loss": 0.6043, "num_tokens": 551937410.0, "step": 720 }, { "epoch": 0.2641872394998397, "grad_norm": 0.2627664779177708, "learning_rate": 3.995132635765972e-05, "loss": 0.6232, "num_tokens": 552662397.0, "step": 721 }, { "epoch": 0.26455365730774516, "grad_norm": 0.2962786044599794, "learning_rate": 3.995101197041727e-05, "loss": 0.6415, "num_tokens": 553358967.0, "step": 722 }, { "epoch": 0.26492007511565063, "grad_norm": 0.3221791686782898, "learning_rate": 3.9950696572497046e-05, "loss": 0.6218, "num_tokens": 554132798.0, "step": 723 }, { "epoch": 0.2652864929235561, "grad_norm": 0.27449695884254105, "learning_rate": 3.995038016391682e-05, "loss": 0.611, "num_tokens": 555041745.0, "step": 724 }, { "epoch": 0.2656529107314615, "grad_norm": 0.27453247478862514, "learning_rate": 3.995006274469441e-05, "loss": 0.5723, "num_tokens": 555833841.0, "step": 725 }, { "epoch": 0.266019328539367, "grad_norm": 0.30203404903142045, "learning_rate": 3.9949744314847684e-05, "loss": 0.5944, "num_tokens": 556518297.0, "step": 726 }, { "epoch": 0.26638574634727247, "grad_norm": 0.27871388334846, "learning_rate": 3.9949424874394564e-05, "loss": 0.6421, "num_tokens": 557222764.0, "step": 727 }, { "epoch": 0.26675216415517794, "grad_norm": 0.2771144921606604, "learning_rate": 3.9949104423353035e-05, "loss": 0.6179, "num_tokens": 557970245.0, "step": 728 }, { "epoch": 0.2671185819630834, "grad_norm": 0.28808121928173824, "learning_rate": 3.9948782961741154e-05, "loss": 0.5955, "num_tokens": 558818518.0, "step": 729 }, { "epoch": 0.2674849997709889, "grad_norm": 0.25327637852571194, "learning_rate": 3.9948460489576996e-05, "loss": 0.599, "num_tokens": 559564626.0, "step": 730 }, { "epoch": 0.26785141757889436, "grad_norm": 0.28122295785635526, "learning_rate": 3.994813700687875e-05, "loss": 0.5798, "num_tokens": 560372756.0, "step": 731 }, { "epoch": 0.2682178353867998, "grad_norm": 0.35232373083575985, "learning_rate": 3.994781251366459e-05, "loss": 0.5753, "num_tokens": 561277271.0, "step": 732 }, { "epoch": 0.26858425319470525, "grad_norm": 0.36914295109646444, "learning_rate": 3.9947487009952814e-05, "loss": 0.6178, "num_tokens": 561973420.0, "step": 733 }, { "epoch": 0.2689506710026107, "grad_norm": 0.24849965694685952, "learning_rate": 3.9947160495761736e-05, "loss": 0.6421, "num_tokens": 562733401.0, "step": 734 }, { "epoch": 0.2693170888105162, "grad_norm": 0.44178694255012707, "learning_rate": 3.9946832971109745e-05, "loss": 0.5843, "num_tokens": 563498476.0, "step": 735 }, { "epoch": 0.26968350661842166, "grad_norm": 0.37627714706838183, "learning_rate": 3.994650443601527e-05, "loss": 0.6104, "num_tokens": 564306955.0, "step": 736 }, { "epoch": 0.27004992442632714, "grad_norm": 0.2670072408681829, "learning_rate": 3.9946174890496826e-05, "loss": 0.6019, "num_tokens": 565267654.0, "step": 737 }, { "epoch": 0.2704163422342326, "grad_norm": 0.3291814916687825, "learning_rate": 3.994584433457296e-05, "loss": 0.597, "num_tokens": 566032867.0, "step": 738 }, { "epoch": 0.270782760042138, "grad_norm": 0.31284073533318424, "learning_rate": 3.994551276826227e-05, "loss": 0.5955, "num_tokens": 566782275.0, "step": 739 }, { "epoch": 0.2711491778500435, "grad_norm": 0.29844242615423355, "learning_rate": 3.994518019158344e-05, "loss": 0.627, "num_tokens": 567587309.0, "step": 740 }, { "epoch": 0.27151559565794897, "grad_norm": 0.2521699968574945, "learning_rate": 3.9944846604555194e-05, "loss": 0.5962, "num_tokens": 568391803.0, "step": 741 }, { "epoch": 0.27188201346585444, "grad_norm": 0.33073737738582837, "learning_rate": 3.994451200719631e-05, "loss": 0.6154, "num_tokens": 569059993.0, "step": 742 }, { "epoch": 0.2722484312737599, "grad_norm": 0.25661645778457415, "learning_rate": 3.994417639952562e-05, "loss": 0.585, "num_tokens": 569914930.0, "step": 743 }, { "epoch": 0.2726148490816654, "grad_norm": 0.33326068889284605, "learning_rate": 3.994383978156202e-05, "loss": 0.6122, "num_tokens": 570621054.0, "step": 744 }, { "epoch": 0.2729812668895708, "grad_norm": 0.27370002206844785, "learning_rate": 3.994350215332448e-05, "loss": 0.6241, "num_tokens": 571395644.0, "step": 745 }, { "epoch": 0.2733476846974763, "grad_norm": 0.2769887092218055, "learning_rate": 3.994316351483199e-05, "loss": 0.5933, "num_tokens": 572032692.0, "step": 746 }, { "epoch": 0.27371410250538175, "grad_norm": 0.30433368661931653, "learning_rate": 3.9942823866103626e-05, "loss": 0.6057, "num_tokens": 572796920.0, "step": 747 }, { "epoch": 0.2740805203132872, "grad_norm": 0.2514078762764806, "learning_rate": 3.99424832071585e-05, "loss": 0.6273, "num_tokens": 573566978.0, "step": 748 }, { "epoch": 0.2744469381211927, "grad_norm": 0.2538191603695708, "learning_rate": 3.99421415380158e-05, "loss": 0.5861, "num_tokens": 574308058.0, "step": 749 }, { "epoch": 0.27481335592909817, "grad_norm": 0.2503307626488844, "learning_rate": 3.9941798858694774e-05, "loss": 0.6261, "num_tokens": 574955281.0, "step": 750 }, { "epoch": 0.27517977373700364, "grad_norm": 0.25846203603744033, "learning_rate": 3.994145516921469e-05, "loss": 0.5989, "num_tokens": 575647160.0, "step": 751 }, { "epoch": 0.27554619154490906, "grad_norm": 0.27783198498354755, "learning_rate": 3.994111046959492e-05, "loss": 0.6111, "num_tokens": 576459137.0, "step": 752 }, { "epoch": 0.27591260935281453, "grad_norm": 0.23310580958956073, "learning_rate": 3.9940764759854856e-05, "loss": 0.6091, "num_tokens": 577371141.0, "step": 753 }, { "epoch": 0.27627902716072, "grad_norm": 0.2836329288860321, "learning_rate": 3.9940418040013976e-05, "loss": 0.6211, "num_tokens": 578097753.0, "step": 754 }, { "epoch": 0.2766454449686255, "grad_norm": 0.2535580021682139, "learning_rate": 3.994007031009179e-05, "loss": 0.6055, "num_tokens": 578985646.0, "step": 755 }, { "epoch": 0.27701186277653095, "grad_norm": 0.3044313020724499, "learning_rate": 3.993972157010788e-05, "loss": 0.6183, "num_tokens": 579831375.0, "step": 756 }, { "epoch": 0.2773782805844364, "grad_norm": 0.2899935196703482, "learning_rate": 3.993937182008188e-05, "loss": 0.6161, "num_tokens": 580628380.0, "step": 757 }, { "epoch": 0.2777446983923419, "grad_norm": 0.3022504548643783, "learning_rate": 3.993902106003349e-05, "loss": 0.6226, "num_tokens": 581410834.0, "step": 758 }, { "epoch": 0.2781111162002473, "grad_norm": 0.26667555834879586, "learning_rate": 3.993866928998244e-05, "loss": 0.5827, "num_tokens": 582164542.0, "step": 759 }, { "epoch": 0.2784775340081528, "grad_norm": 0.21461617419392504, "learning_rate": 3.993831650994855e-05, "loss": 0.6206, "num_tokens": 582954281.0, "step": 760 }, { "epoch": 0.27884395181605826, "grad_norm": 0.2779114439032474, "learning_rate": 3.9937962719951685e-05, "loss": 0.6045, "num_tokens": 583665146.0, "step": 761 }, { "epoch": 0.27921036962396373, "grad_norm": 0.2743939430282114, "learning_rate": 3.993760792001175e-05, "loss": 0.6343, "num_tokens": 584476927.0, "step": 762 }, { "epoch": 0.2795767874318692, "grad_norm": 0.24928684326784548, "learning_rate": 3.993725211014873e-05, "loss": 0.6407, "num_tokens": 585307784.0, "step": 763 }, { "epoch": 0.2799432052397747, "grad_norm": 0.2510205711257017, "learning_rate": 3.993689529038266e-05, "loss": 0.6257, "num_tokens": 586068440.0, "step": 764 }, { "epoch": 0.2803096230476801, "grad_norm": 0.2683465842596722, "learning_rate": 3.9936537460733625e-05, "loss": 0.5994, "num_tokens": 586881923.0, "step": 765 }, { "epoch": 0.28067604085558556, "grad_norm": 0.24217368322525681, "learning_rate": 3.993617862122177e-05, "loss": 0.6011, "num_tokens": 587649967.0, "step": 766 }, { "epoch": 0.28104245866349103, "grad_norm": 0.21268353193371523, "learning_rate": 3.99358187718673e-05, "loss": 0.5471, "num_tokens": 588447825.0, "step": 767 }, { "epoch": 0.2814088764713965, "grad_norm": 0.23419535028772218, "learning_rate": 3.993545791269048e-05, "loss": 0.5954, "num_tokens": 589239765.0, "step": 768 }, { "epoch": 0.281775294279302, "grad_norm": 0.21854747463133933, "learning_rate": 3.9935096043711615e-05, "loss": 0.5632, "num_tokens": 590033179.0, "step": 769 }, { "epoch": 0.28214171208720745, "grad_norm": 0.23851599541393428, "learning_rate": 3.99347331649511e-05, "loss": 0.6037, "num_tokens": 590838000.0, "step": 770 }, { "epoch": 0.2825081298951129, "grad_norm": 0.25530354886048146, "learning_rate": 3.993436927642934e-05, "loss": 0.6002, "num_tokens": 591549424.0, "step": 771 }, { "epoch": 0.28287454770301834, "grad_norm": 0.24878524161588067, "learning_rate": 3.993400437816685e-05, "loss": 0.5938, "num_tokens": 592238379.0, "step": 772 }, { "epoch": 0.2832409655109238, "grad_norm": 0.23014381136551784, "learning_rate": 3.993363847018415e-05, "loss": 0.586, "num_tokens": 593070955.0, "step": 773 }, { "epoch": 0.2836073833188293, "grad_norm": 0.22872319518948847, "learning_rate": 3.993327155250185e-05, "loss": 0.6285, "num_tokens": 593785721.0, "step": 774 }, { "epoch": 0.28397380112673476, "grad_norm": 0.21588454129963253, "learning_rate": 3.993290362514061e-05, "loss": 0.592, "num_tokens": 594583834.0, "step": 775 }, { "epoch": 0.28434021893464023, "grad_norm": 0.23687230634820367, "learning_rate": 3.9932534688121154e-05, "loss": 0.623, "num_tokens": 595389664.0, "step": 776 }, { "epoch": 0.2847066367425457, "grad_norm": 0.24888526048567639, "learning_rate": 3.9932164741464234e-05, "loss": 0.6914, "num_tokens": 596006843.0, "step": 777 }, { "epoch": 0.2850730545504512, "grad_norm": 0.23702309584303544, "learning_rate": 3.9931793785190696e-05, "loss": 0.56, "num_tokens": 596797980.0, "step": 778 }, { "epoch": 0.2854394723583566, "grad_norm": 0.2320642492247544, "learning_rate": 3.993142181932142e-05, "loss": 0.6253, "num_tokens": 597641418.0, "step": 779 }, { "epoch": 0.28580589016626207, "grad_norm": 0.2733496525174336, "learning_rate": 3.993104884387734e-05, "loss": 0.6434, "num_tokens": 598457411.0, "step": 780 }, { "epoch": 0.28617230797416754, "grad_norm": 0.2702098564758518, "learning_rate": 3.9930674858879465e-05, "loss": 0.6155, "num_tokens": 599183264.0, "step": 781 }, { "epoch": 0.286538725782073, "grad_norm": 0.2920843455990547, "learning_rate": 3.993029986434886e-05, "loss": 0.6219, "num_tokens": 599980410.0, "step": 782 }, { "epoch": 0.2869051435899785, "grad_norm": 0.2654776371219815, "learning_rate": 3.992992386030661e-05, "loss": 0.621, "num_tokens": 600695551.0, "step": 783 }, { "epoch": 0.28727156139788396, "grad_norm": 0.28152230683900875, "learning_rate": 3.992954684677391e-05, "loss": 0.6367, "num_tokens": 601475055.0, "step": 784 }, { "epoch": 0.2876379792057894, "grad_norm": 0.22858967079139142, "learning_rate": 3.9929168823771975e-05, "loss": 0.5701, "num_tokens": 602128015.0, "step": 785 }, { "epoch": 0.28800439701369485, "grad_norm": 0.2983023181596343, "learning_rate": 3.992878979132209e-05, "loss": 0.6275, "num_tokens": 602831703.0, "step": 786 }, { "epoch": 0.2883708148216003, "grad_norm": 0.26742759589135723, "learning_rate": 3.9928409749445604e-05, "loss": 0.6423, "num_tokens": 603651415.0, "step": 787 }, { "epoch": 0.2887372326295058, "grad_norm": 0.25963186992964227, "learning_rate": 3.99280286981639e-05, "loss": 0.6482, "num_tokens": 604484979.0, "step": 788 }, { "epoch": 0.28910365043741126, "grad_norm": 0.30430657859027904, "learning_rate": 3.992764663749844e-05, "loss": 0.5789, "num_tokens": 605366552.0, "step": 789 }, { "epoch": 0.28947006824531674, "grad_norm": 0.2374891129006755, "learning_rate": 3.9927263567470745e-05, "loss": 0.6029, "num_tokens": 606275846.0, "step": 790 }, { "epoch": 0.2898364860532222, "grad_norm": 0.22071666249407554, "learning_rate": 3.992687948810236e-05, "loss": 0.5962, "num_tokens": 607121600.0, "step": 791 }, { "epoch": 0.2902029038611276, "grad_norm": 0.2800972911271558, "learning_rate": 3.992649439941492e-05, "loss": 0.5977, "num_tokens": 607902990.0, "step": 792 }, { "epoch": 0.2905693216690331, "grad_norm": 0.2515030528599922, "learning_rate": 3.9926108301430104e-05, "loss": 0.6207, "num_tokens": 608618765.0, "step": 793 }, { "epoch": 0.29093573947693857, "grad_norm": 0.2211015566949198, "learning_rate": 3.9925721194169666e-05, "loss": 0.5894, "num_tokens": 609331531.0, "step": 794 }, { "epoch": 0.29130215728484404, "grad_norm": 0.2547970612909575, "learning_rate": 3.9925333077655377e-05, "loss": 0.608, "num_tokens": 610018508.0, "step": 795 }, { "epoch": 0.2916685750927495, "grad_norm": 0.2607027860721211, "learning_rate": 3.9924943951909096e-05, "loss": 0.6073, "num_tokens": 610844909.0, "step": 796 }, { "epoch": 0.292034992900655, "grad_norm": 0.2377816103131733, "learning_rate": 3.9924553816952746e-05, "loss": 0.6208, "num_tokens": 611618040.0, "step": 797 }, { "epoch": 0.29240141070856046, "grad_norm": 0.2296745529780875, "learning_rate": 3.9924162672808274e-05, "loss": 0.6244, "num_tokens": 612355922.0, "step": 798 }, { "epoch": 0.2927678285164659, "grad_norm": 0.22019699200737416, "learning_rate": 3.992377051949771e-05, "loss": 0.5848, "num_tokens": 613083065.0, "step": 799 }, { "epoch": 0.29313424632437135, "grad_norm": 0.27416947520434565, "learning_rate": 3.9923377357043146e-05, "loss": 0.5716, "num_tokens": 613933311.0, "step": 800 }, { "epoch": 0.2935006641322768, "grad_norm": 0.2455089767217761, "learning_rate": 3.992298318546669e-05, "loss": 0.6342, "num_tokens": 614657233.0, "step": 801 }, { "epoch": 0.2938670819401823, "grad_norm": 0.21390858644557972, "learning_rate": 3.992258800479055e-05, "loss": 0.5831, "num_tokens": 615379963.0, "step": 802 }, { "epoch": 0.29423349974808777, "grad_norm": 0.21998188612954028, "learning_rate": 3.992219181503698e-05, "loss": 0.6186, "num_tokens": 616190062.0, "step": 803 }, { "epoch": 0.29459991755599324, "grad_norm": 0.23169408734227034, "learning_rate": 3.992179461622828e-05, "loss": 0.6191, "num_tokens": 616849948.0, "step": 804 }, { "epoch": 0.2949663353638987, "grad_norm": 0.22375072452801503, "learning_rate": 3.9921396408386815e-05, "loss": 0.5837, "num_tokens": 617851259.0, "step": 805 }, { "epoch": 0.29533275317180413, "grad_norm": 0.26132841263696877, "learning_rate": 3.9920997191535005e-05, "loss": 0.6202, "num_tokens": 618755119.0, "step": 806 }, { "epoch": 0.2956991709797096, "grad_norm": 0.2443614096878844, "learning_rate": 3.992059696569532e-05, "loss": 0.5767, "num_tokens": 619614925.0, "step": 807 }, { "epoch": 0.2960655887876151, "grad_norm": 0.2389209735546082, "learning_rate": 3.99201957308903e-05, "loss": 0.6255, "num_tokens": 620389869.0, "step": 808 }, { "epoch": 0.29643200659552055, "grad_norm": 0.27695351941001595, "learning_rate": 3.991979348714254e-05, "loss": 0.6166, "num_tokens": 621224914.0, "step": 809 }, { "epoch": 0.296798424403426, "grad_norm": 0.2624795843919982, "learning_rate": 3.991939023447467e-05, "loss": 0.6158, "num_tokens": 621960499.0, "step": 810 }, { "epoch": 0.2971648422113315, "grad_norm": 0.36630002852853005, "learning_rate": 3.9918985972909417e-05, "loss": 0.5908, "num_tokens": 622750279.0, "step": 811 }, { "epoch": 0.2975312600192369, "grad_norm": 0.32295956550227284, "learning_rate": 3.991858070246953e-05, "loss": 0.6216, "num_tokens": 623482086.0, "step": 812 }, { "epoch": 0.2978976778271424, "grad_norm": 0.28010984441212505, "learning_rate": 3.9918174423177825e-05, "loss": 0.6129, "num_tokens": 624263859.0, "step": 813 }, { "epoch": 0.29826409563504785, "grad_norm": 0.297722732638583, "learning_rate": 3.9917767135057176e-05, "loss": 0.5913, "num_tokens": 625054636.0, "step": 814 }, { "epoch": 0.2986305134429533, "grad_norm": 0.3449992228733057, "learning_rate": 3.991735883813052e-05, "loss": 0.568, "num_tokens": 625824745.0, "step": 815 }, { "epoch": 0.2989969312508588, "grad_norm": 0.35914019608616904, "learning_rate": 3.991694953242084e-05, "loss": 0.6326, "num_tokens": 626558629.0, "step": 816 }, { "epoch": 0.29936334905876427, "grad_norm": 0.2463273698141283, "learning_rate": 3.9916539217951176e-05, "loss": 0.6231, "num_tokens": 627359130.0, "step": 817 }, { "epoch": 0.29972976686666974, "grad_norm": 0.3863774730832072, "learning_rate": 3.9916127894744635e-05, "loss": 0.6181, "num_tokens": 628084897.0, "step": 818 }, { "epoch": 0.30009618467457516, "grad_norm": 0.35862862865607803, "learning_rate": 3.991571556282438e-05, "loss": 0.595, "num_tokens": 629011757.0, "step": 819 }, { "epoch": 0.30046260248248063, "grad_norm": 0.31411413889630513, "learning_rate": 3.991530222221363e-05, "loss": 0.6372, "num_tokens": 629727181.0, "step": 820 }, { "epoch": 0.3008290202903861, "grad_norm": 0.3215404480995367, "learning_rate": 3.991488787293563e-05, "loss": 0.5742, "num_tokens": 630499929.0, "step": 821 }, { "epoch": 0.3011954380982916, "grad_norm": 0.2120265447207636, "learning_rate": 3.991447251501375e-05, "loss": 0.62, "num_tokens": 631216617.0, "step": 822 }, { "epoch": 0.30156185590619705, "grad_norm": 0.38955718423112917, "learning_rate": 3.9914056148471334e-05, "loss": 0.5933, "num_tokens": 632073656.0, "step": 823 }, { "epoch": 0.3019282737141025, "grad_norm": 0.2873205806938142, "learning_rate": 3.991363877333185e-05, "loss": 0.6047, "num_tokens": 632792114.0, "step": 824 }, { "epoch": 0.302294691522008, "grad_norm": 0.39223250560536066, "learning_rate": 3.9913220389618795e-05, "loss": 0.6102, "num_tokens": 633510995.0, "step": 825 }, { "epoch": 0.3026611093299134, "grad_norm": 0.375621842964365, "learning_rate": 3.991280099735572e-05, "loss": 0.6406, "num_tokens": 634308866.0, "step": 826 }, { "epoch": 0.3030275271378189, "grad_norm": 0.31208252719953694, "learning_rate": 3.991238059656623e-05, "loss": 0.6725, "num_tokens": 635025557.0, "step": 827 }, { "epoch": 0.30339394494572436, "grad_norm": 0.2540094721786809, "learning_rate": 3.991195918727401e-05, "loss": 0.6105, "num_tokens": 635718760.0, "step": 828 }, { "epoch": 0.30376036275362983, "grad_norm": 0.2599170108270276, "learning_rate": 3.991153676950277e-05, "loss": 0.5921, "num_tokens": 636462793.0, "step": 829 }, { "epoch": 0.3041267805615353, "grad_norm": 0.2720125505932028, "learning_rate": 3.991111334327631e-05, "loss": 0.6187, "num_tokens": 637241671.0, "step": 830 }, { "epoch": 0.3044931983694408, "grad_norm": 0.2978812529027591, "learning_rate": 3.991068890861846e-05, "loss": 0.6191, "num_tokens": 637936347.0, "step": 831 }, { "epoch": 0.3048596161773462, "grad_norm": 0.29833433496952233, "learning_rate": 3.9910263465553115e-05, "loss": 0.6072, "num_tokens": 638747617.0, "step": 832 }, { "epoch": 0.30522603398525167, "grad_norm": 0.35822850231458386, "learning_rate": 3.990983701410423e-05, "loss": 0.6237, "num_tokens": 639483293.0, "step": 833 }, { "epoch": 0.30559245179315714, "grad_norm": 0.2604348124281697, "learning_rate": 3.990940955429581e-05, "loss": 0.5987, "num_tokens": 640313094.0, "step": 834 }, { "epoch": 0.3059588696010626, "grad_norm": 0.3792108915270631, "learning_rate": 3.990898108615194e-05, "loss": 0.5815, "num_tokens": 641220841.0, "step": 835 }, { "epoch": 0.3063252874089681, "grad_norm": 0.3830848388237622, "learning_rate": 3.990855160969672e-05, "loss": 0.5945, "num_tokens": 641905740.0, "step": 836 }, { "epoch": 0.30669170521687356, "grad_norm": 0.2505715185828192, "learning_rate": 3.9908121124954343e-05, "loss": 0.5959, "num_tokens": 642777414.0, "step": 837 }, { "epoch": 0.30705812302477903, "grad_norm": 0.30857216030007684, "learning_rate": 3.990768963194905e-05, "loss": 0.6305, "num_tokens": 643539372.0, "step": 838 }, { "epoch": 0.30742454083268445, "grad_norm": 0.21055234936717376, "learning_rate": 3.990725713070514e-05, "loss": 0.5996, "num_tokens": 644347259.0, "step": 839 }, { "epoch": 0.3077909586405899, "grad_norm": 0.2671849855122715, "learning_rate": 3.9906823621246935e-05, "loss": 0.6423, "num_tokens": 645113807.0, "step": 840 }, { "epoch": 0.3081573764484954, "grad_norm": 0.23939901549729395, "learning_rate": 3.990638910359886e-05, "loss": 0.6164, "num_tokens": 645866914.0, "step": 841 }, { "epoch": 0.30852379425640086, "grad_norm": 0.23365024542764323, "learning_rate": 3.99059535777854e-05, "loss": 0.6071, "num_tokens": 646627530.0, "step": 842 }, { "epoch": 0.30889021206430634, "grad_norm": 0.22868856681312397, "learning_rate": 3.990551704383104e-05, "loss": 0.6006, "num_tokens": 647336730.0, "step": 843 }, { "epoch": 0.3092566298722118, "grad_norm": 0.2685715408385277, "learning_rate": 3.990507950176038e-05, "loss": 0.5932, "num_tokens": 648134790.0, "step": 844 }, { "epoch": 0.3096230476801173, "grad_norm": 0.28737546613607445, "learning_rate": 3.990464095159805e-05, "loss": 0.6137, "num_tokens": 648836159.0, "step": 845 }, { "epoch": 0.3099894654880227, "grad_norm": 0.22418709812781776, "learning_rate": 3.9904201393368734e-05, "loss": 0.5856, "num_tokens": 649597064.0, "step": 846 }, { "epoch": 0.31035588329592817, "grad_norm": 0.2815873139669872, "learning_rate": 3.9903760827097186e-05, "loss": 0.6007, "num_tokens": 650405596.0, "step": 847 }, { "epoch": 0.31072230110383364, "grad_norm": 0.21590229110742387, "learning_rate": 3.990331925280822e-05, "loss": 0.6179, "num_tokens": 651080883.0, "step": 848 }, { "epoch": 0.3110887189117391, "grad_norm": 0.2008692865218517, "learning_rate": 3.990287667052667e-05, "loss": 0.5817, "num_tokens": 651773990.0, "step": 849 }, { "epoch": 0.3114551367196446, "grad_norm": 0.28870331120884385, "learning_rate": 3.990243308027748e-05, "loss": 0.5924, "num_tokens": 652407655.0, "step": 850 }, { "epoch": 0.31182155452755006, "grad_norm": 0.23657159563227503, "learning_rate": 3.990198848208562e-05, "loss": 0.6191, "num_tokens": 653153456.0, "step": 851 }, { "epoch": 0.3121879723354555, "grad_norm": 0.20309976659238374, "learning_rate": 3.9901542875976116e-05, "loss": 0.6274, "num_tokens": 653858149.0, "step": 852 }, { "epoch": 0.31255439014336095, "grad_norm": 0.2436318064939398, "learning_rate": 3.990109626197407e-05, "loss": 0.5814, "num_tokens": 654646828.0, "step": 853 }, { "epoch": 0.3129208079512664, "grad_norm": 0.26279899095968867, "learning_rate": 3.99006486401046e-05, "loss": 0.6251, "num_tokens": 655477744.0, "step": 854 }, { "epoch": 0.3132872257591719, "grad_norm": 0.26006747452745294, "learning_rate": 3.990020001039294e-05, "loss": 0.6274, "num_tokens": 656128094.0, "step": 855 }, { "epoch": 0.31365364356707737, "grad_norm": 0.23739822962273127, "learning_rate": 3.989975037286432e-05, "loss": 0.6095, "num_tokens": 656730681.0, "step": 856 }, { "epoch": 0.31402006137498284, "grad_norm": 0.24770106514130436, "learning_rate": 3.989929972754408e-05, "loss": 0.6099, "num_tokens": 657514039.0, "step": 857 }, { "epoch": 0.3143864791828883, "grad_norm": 0.3014183276986585, "learning_rate": 3.9898848074457574e-05, "loss": 0.6118, "num_tokens": 658372862.0, "step": 858 }, { "epoch": 0.31475289699079373, "grad_norm": 0.27445449262727445, "learning_rate": 3.989839541363024e-05, "loss": 0.6633, "num_tokens": 659053346.0, "step": 859 }, { "epoch": 0.3151193147986992, "grad_norm": 0.2414069438481127, "learning_rate": 3.989794174508756e-05, "loss": 0.6058, "num_tokens": 659854144.0, "step": 860 }, { "epoch": 0.3154857326066047, "grad_norm": 0.2926590200602944, "learning_rate": 3.9897487068855085e-05, "loss": 0.6482, "num_tokens": 660628589.0, "step": 861 }, { "epoch": 0.31585215041451015, "grad_norm": 0.3095173938876417, "learning_rate": 3.98970313849584e-05, "loss": 0.582, "num_tokens": 661382216.0, "step": 862 }, { "epoch": 0.3162185682224156, "grad_norm": 0.2706616124462374, "learning_rate": 3.989657469342317e-05, "loss": 0.6149, "num_tokens": 662133005.0, "step": 863 }, { "epoch": 0.3165849860303211, "grad_norm": 0.3147496651651748, "learning_rate": 3.9896116994275106e-05, "loss": 0.6062, "num_tokens": 662908187.0, "step": 864 }, { "epoch": 0.31695140383822656, "grad_norm": 0.2854076236681606, "learning_rate": 3.989565828753999e-05, "loss": 0.6056, "num_tokens": 663597723.0, "step": 865 }, { "epoch": 0.317317821646132, "grad_norm": 0.24482119789080956, "learning_rate": 3.989519857324362e-05, "loss": 0.6045, "num_tokens": 664371543.0, "step": 866 }, { "epoch": 0.31768423945403745, "grad_norm": 0.29827537814857685, "learning_rate": 3.9894737851411895e-05, "loss": 0.6285, "num_tokens": 665043079.0, "step": 867 }, { "epoch": 0.3180506572619429, "grad_norm": 0.2585867189847413, "learning_rate": 3.989427612207076e-05, "loss": 0.6036, "num_tokens": 665776523.0, "step": 868 }, { "epoch": 0.3184170750698484, "grad_norm": 0.24130487944494078, "learning_rate": 3.98938133852462e-05, "loss": 0.5571, "num_tokens": 666598985.0, "step": 869 }, { "epoch": 0.31878349287775387, "grad_norm": 0.23710749852007346, "learning_rate": 3.989334964096427e-05, "loss": 0.5924, "num_tokens": 667365690.0, "step": 870 }, { "epoch": 0.31914991068565934, "grad_norm": 0.2797653959935169, "learning_rate": 3.98928848892511e-05, "loss": 0.6282, "num_tokens": 668057483.0, "step": 871 }, { "epoch": 0.31951632849356476, "grad_norm": 0.23434530919163507, "learning_rate": 3.9892419130132826e-05, "loss": 0.5957, "num_tokens": 668636898.0, "step": 872 }, { "epoch": 0.31988274630147023, "grad_norm": 0.24328265125686782, "learning_rate": 3.989195236363568e-05, "loss": 0.5932, "num_tokens": 669301940.0, "step": 873 }, { "epoch": 0.3202491641093757, "grad_norm": 0.32336354071126455, "learning_rate": 3.989148458978596e-05, "loss": 0.6126, "num_tokens": 670136695.0, "step": 874 }, { "epoch": 0.3206155819172812, "grad_norm": 0.25858482486082335, "learning_rate": 3.989101580860998e-05, "loss": 0.608, "num_tokens": 670815260.0, "step": 875 }, { "epoch": 0.32098199972518665, "grad_norm": 0.22625610755334463, "learning_rate": 3.989054602013414e-05, "loss": 0.5624, "num_tokens": 671467215.0, "step": 876 }, { "epoch": 0.3213484175330921, "grad_norm": 0.3050427529207764, "learning_rate": 3.98900752243849e-05, "loss": 0.6276, "num_tokens": 672141490.0, "step": 877 }, { "epoch": 0.3217148353409976, "grad_norm": 0.27696781514070234, "learning_rate": 3.9889603421388754e-05, "loss": 0.573, "num_tokens": 672988124.0, "step": 878 }, { "epoch": 0.322081253148903, "grad_norm": 0.3032885424419623, "learning_rate": 3.988913061117227e-05, "loss": 0.5672, "num_tokens": 673777177.0, "step": 879 }, { "epoch": 0.3224476709568085, "grad_norm": 0.246398976100194, "learning_rate": 3.9888656793762065e-05, "loss": 0.6055, "num_tokens": 674585502.0, "step": 880 }, { "epoch": 0.32281408876471396, "grad_norm": 0.24215536546740732, "learning_rate": 3.988818196918483e-05, "loss": 0.5928, "num_tokens": 675470699.0, "step": 881 }, { "epoch": 0.32318050657261943, "grad_norm": 0.20774890154135625, "learning_rate": 3.9887706137467285e-05, "loss": 0.5584, "num_tokens": 676298465.0, "step": 882 }, { "epoch": 0.3235469243805249, "grad_norm": 0.226967106009084, "learning_rate": 3.9887229298636223e-05, "loss": 0.614, "num_tokens": 677058185.0, "step": 883 }, { "epoch": 0.3239133421884304, "grad_norm": 0.22043261437728695, "learning_rate": 3.988675145271849e-05, "loss": 0.5868, "num_tokens": 677964814.0, "step": 884 }, { "epoch": 0.32427975999633585, "grad_norm": 0.24618275928758115, "learning_rate": 3.988627259974099e-05, "loss": 0.6273, "num_tokens": 678676012.0, "step": 885 }, { "epoch": 0.32464617780424126, "grad_norm": 0.22027226224250038, "learning_rate": 3.9885792739730686e-05, "loss": 0.5964, "num_tokens": 679406353.0, "step": 886 }, { "epoch": 0.32501259561214674, "grad_norm": 0.20878831262620548, "learning_rate": 3.988531187271459e-05, "loss": 0.5826, "num_tokens": 680246577.0, "step": 887 }, { "epoch": 0.3253790134200522, "grad_norm": 0.23414038479704655, "learning_rate": 3.988482999871978e-05, "loss": 0.5721, "num_tokens": 680903963.0, "step": 888 }, { "epoch": 0.3257454312279577, "grad_norm": 0.2217159150811653, "learning_rate": 3.988434711777339e-05, "loss": 0.6093, "num_tokens": 681670833.0, "step": 889 }, { "epoch": 0.32611184903586315, "grad_norm": 0.22519858620093783, "learning_rate": 3.98838632299026e-05, "loss": 0.6062, "num_tokens": 682426476.0, "step": 890 }, { "epoch": 0.3264782668437686, "grad_norm": 0.2661631078294844, "learning_rate": 3.9883378335134655e-05, "loss": 0.5816, "num_tokens": 683120168.0, "step": 891 }, { "epoch": 0.32684468465167404, "grad_norm": 0.2683568000563921, "learning_rate": 3.988289243349686e-05, "loss": 0.6198, "num_tokens": 683854230.0, "step": 892 }, { "epoch": 0.3272111024595795, "grad_norm": 0.2621661647147486, "learning_rate": 3.988240552501658e-05, "loss": 0.5842, "num_tokens": 684549910.0, "step": 893 }, { "epoch": 0.327577520267485, "grad_norm": 0.33719453276854905, "learning_rate": 3.98819176097212e-05, "loss": 0.5996, "num_tokens": 685321073.0, "step": 894 }, { "epoch": 0.32794393807539046, "grad_norm": 0.29160007892641104, "learning_rate": 3.988142868763822e-05, "loss": 0.6431, "num_tokens": 686054003.0, "step": 895 }, { "epoch": 0.32831035588329593, "grad_norm": 0.2394901949472334, "learning_rate": 3.9880938758795144e-05, "loss": 0.5881, "num_tokens": 686760067.0, "step": 896 }, { "epoch": 0.3286767736912014, "grad_norm": 0.23832987908208134, "learning_rate": 3.988044782321958e-05, "loss": 0.619, "num_tokens": 687610382.0, "step": 897 }, { "epoch": 0.3290431914991069, "grad_norm": 0.2606994107651509, "learning_rate": 3.9879955880939156e-05, "loss": 0.6109, "num_tokens": 688377555.0, "step": 898 }, { "epoch": 0.3294096093070123, "grad_norm": 0.24626891466671585, "learning_rate": 3.987946293198157e-05, "loss": 0.5724, "num_tokens": 689300764.0, "step": 899 }, { "epoch": 0.32977602711491777, "grad_norm": 0.2368155233058307, "learning_rate": 3.9878968976374575e-05, "loss": 0.6205, "num_tokens": 690069055.0, "step": 900 }, { "epoch": 0.33014244492282324, "grad_norm": 0.21681306231418496, "learning_rate": 3.987847401414598e-05, "loss": 0.598, "num_tokens": 690741076.0, "step": 901 }, { "epoch": 0.3305088627307287, "grad_norm": 0.23849117517430715, "learning_rate": 3.987797804532366e-05, "loss": 0.5938, "num_tokens": 691519989.0, "step": 902 }, { "epoch": 0.3308752805386342, "grad_norm": 0.2344954909544746, "learning_rate": 3.987748106993553e-05, "loss": 0.6275, "num_tokens": 692199817.0, "step": 903 }, { "epoch": 0.33124169834653966, "grad_norm": 0.23055475402891637, "learning_rate": 3.987698308800958e-05, "loss": 0.6365, "num_tokens": 692797383.0, "step": 904 }, { "epoch": 0.33160811615444513, "grad_norm": 0.2464025558009832, "learning_rate": 3.987648409957384e-05, "loss": 0.6141, "num_tokens": 693564697.0, "step": 905 }, { "epoch": 0.33197453396235055, "grad_norm": 0.23672569042779562, "learning_rate": 3.987598410465641e-05, "loss": 0.5888, "num_tokens": 694313470.0, "step": 906 }, { "epoch": 0.332340951770256, "grad_norm": 0.24733481878207736, "learning_rate": 3.987548310328543e-05, "loss": 0.6037, "num_tokens": 695142442.0, "step": 907 }, { "epoch": 0.3327073695781615, "grad_norm": 0.2342380180997685, "learning_rate": 3.987498109548912e-05, "loss": 0.5875, "num_tokens": 696073431.0, "step": 908 }, { "epoch": 0.33307378738606697, "grad_norm": 0.2598504608597241, "learning_rate": 3.9874478081295734e-05, "loss": 0.6054, "num_tokens": 696878317.0, "step": 909 }, { "epoch": 0.33344020519397244, "grad_norm": 0.23868850319105844, "learning_rate": 3.9873974060733604e-05, "loss": 0.5907, "num_tokens": 697599745.0, "step": 910 }, { "epoch": 0.3338066230018779, "grad_norm": 0.2489344426830404, "learning_rate": 3.98734690338311e-05, "loss": 0.5815, "num_tokens": 698478195.0, "step": 911 }, { "epoch": 0.33417304080978333, "grad_norm": 0.2226314569140442, "learning_rate": 3.987296300061664e-05, "loss": 0.5944, "num_tokens": 699182669.0, "step": 912 }, { "epoch": 0.3345394586176888, "grad_norm": 0.26396594647595684, "learning_rate": 3.9872455961118756e-05, "loss": 0.6164, "num_tokens": 699950798.0, "step": 913 }, { "epoch": 0.3349058764255943, "grad_norm": 0.2680229914740603, "learning_rate": 3.987194791536596e-05, "loss": 0.6182, "num_tokens": 700723129.0, "step": 914 }, { "epoch": 0.33527229423349975, "grad_norm": 0.28132056251511, "learning_rate": 3.987143886338686e-05, "loss": 0.6208, "num_tokens": 701412973.0, "step": 915 }, { "epoch": 0.3356387120414052, "grad_norm": 0.2678219321556763, "learning_rate": 3.9870928805210134e-05, "loss": 0.6211, "num_tokens": 702244196.0, "step": 916 }, { "epoch": 0.3360051298493107, "grad_norm": 0.2968967828513086, "learning_rate": 3.987041774086448e-05, "loss": 0.6553, "num_tokens": 702934012.0, "step": 917 }, { "epoch": 0.33637154765721616, "grad_norm": 0.2556354892867192, "learning_rate": 3.9869905670378686e-05, "loss": 0.5916, "num_tokens": 703731801.0, "step": 918 }, { "epoch": 0.3367379654651216, "grad_norm": 0.24437233396314292, "learning_rate": 3.9869392593781575e-05, "loss": 0.6308, "num_tokens": 704520696.0, "step": 919 }, { "epoch": 0.33710438327302705, "grad_norm": 0.24752175825723127, "learning_rate": 3.986887851110204e-05, "loss": 0.6313, "num_tokens": 705242714.0, "step": 920 }, { "epoch": 0.3374708010809325, "grad_norm": 0.25911695711941557, "learning_rate": 3.986836342236901e-05, "loss": 0.5943, "num_tokens": 706055017.0, "step": 921 }, { "epoch": 0.337837218888838, "grad_norm": 0.27221210704204807, "learning_rate": 3.9867847327611514e-05, "loss": 0.5888, "num_tokens": 706763379.0, "step": 922 }, { "epoch": 0.33820363669674347, "grad_norm": 0.2351574173208253, "learning_rate": 3.9867330226858584e-05, "loss": 0.6044, "num_tokens": 707463672.0, "step": 923 }, { "epoch": 0.33857005450464894, "grad_norm": 0.3024010455470893, "learning_rate": 3.986681212013934e-05, "loss": 0.6014, "num_tokens": 708260334.0, "step": 924 }, { "epoch": 0.3389364723125544, "grad_norm": 0.2776296171348024, "learning_rate": 3.9866293007482956e-05, "loss": 0.6109, "num_tokens": 709063422.0, "step": 925 }, { "epoch": 0.33930289012045983, "grad_norm": 0.2452512224306409, "learning_rate": 3.9865772888918656e-05, "loss": 0.6676, "num_tokens": 709887723.0, "step": 926 }, { "epoch": 0.3396693079283653, "grad_norm": 0.23995960446936693, "learning_rate": 3.986525176447572e-05, "loss": 0.594, "num_tokens": 710668184.0, "step": 927 }, { "epoch": 0.3400357257362708, "grad_norm": 0.2297676230722394, "learning_rate": 3.9864729634183506e-05, "loss": 0.601, "num_tokens": 711373310.0, "step": 928 }, { "epoch": 0.34040214354417625, "grad_norm": 0.19503332989956837, "learning_rate": 3.986420649807139e-05, "loss": 0.5804, "num_tokens": 712106512.0, "step": 929 }, { "epoch": 0.3407685613520817, "grad_norm": 0.21840844129727266, "learning_rate": 3.9863682356168837e-05, "loss": 0.5873, "num_tokens": 712994704.0, "step": 930 }, { "epoch": 0.3411349791599872, "grad_norm": 0.24521854286880151, "learning_rate": 3.986315720850535e-05, "loss": 0.5908, "num_tokens": 713731890.0, "step": 931 }, { "epoch": 0.3415013969678926, "grad_norm": 0.23046441169866216, "learning_rate": 3.98626310551105e-05, "loss": 0.6351, "num_tokens": 714478374.0, "step": 932 }, { "epoch": 0.3418678147757981, "grad_norm": 0.22547589096500564, "learning_rate": 3.9862103896013916e-05, "loss": 0.5788, "num_tokens": 715226126.0, "step": 933 }, { "epoch": 0.34223423258370356, "grad_norm": 0.23519303558181076, "learning_rate": 3.986157573124527e-05, "loss": 0.5806, "num_tokens": 715981063.0, "step": 934 }, { "epoch": 0.34260065039160903, "grad_norm": 0.2670603236911752, "learning_rate": 3.98610465608343e-05, "loss": 0.6449, "num_tokens": 716672135.0, "step": 935 }, { "epoch": 0.3429670681995145, "grad_norm": 0.2518253155731396, "learning_rate": 3.986051638481081e-05, "loss": 0.5975, "num_tokens": 717416200.0, "step": 936 }, { "epoch": 0.34333348600742, "grad_norm": 0.2427717829984888, "learning_rate": 3.985998520320463e-05, "loss": 0.5842, "num_tokens": 718229350.0, "step": 937 }, { "epoch": 0.34369990381532545, "grad_norm": 0.2318235849959392, "learning_rate": 3.9859453016045675e-05, "loss": 0.5866, "num_tokens": 718948659.0, "step": 938 }, { "epoch": 0.34406632162323086, "grad_norm": 0.2611318233838514, "learning_rate": 3.985891982336392e-05, "loss": 0.6285, "num_tokens": 719726111.0, "step": 939 }, { "epoch": 0.34443273943113634, "grad_norm": 0.22579253816446093, "learning_rate": 3.9858385625189364e-05, "loss": 0.6107, "num_tokens": 720533641.0, "step": 940 }, { "epoch": 0.3447991572390418, "grad_norm": 0.25963284128010505, "learning_rate": 3.98578504215521e-05, "loss": 0.5962, "num_tokens": 721317195.0, "step": 941 }, { "epoch": 0.3451655750469473, "grad_norm": 0.23636680125147147, "learning_rate": 3.985731421248226e-05, "loss": 0.6237, "num_tokens": 722040028.0, "step": 942 }, { "epoch": 0.34553199285485275, "grad_norm": 0.24191065133225928, "learning_rate": 3.985677699801002e-05, "loss": 0.6008, "num_tokens": 722793736.0, "step": 943 }, { "epoch": 0.3458984106627582, "grad_norm": 0.22591618167156383, "learning_rate": 3.985623877816564e-05, "loss": 0.5996, "num_tokens": 723495128.0, "step": 944 }, { "epoch": 0.3462648284706637, "grad_norm": 0.2486674260786215, "learning_rate": 3.9855699552979425e-05, "loss": 0.5641, "num_tokens": 724306514.0, "step": 945 }, { "epoch": 0.3466312462785691, "grad_norm": 0.22679121484738757, "learning_rate": 3.9855159322481725e-05, "loss": 0.6212, "num_tokens": 724909460.0, "step": 946 }, { "epoch": 0.3469976640864746, "grad_norm": 0.27048052473424206, "learning_rate": 3.9854618086702955e-05, "loss": 0.622, "num_tokens": 725570604.0, "step": 947 }, { "epoch": 0.34736408189438006, "grad_norm": 0.22497520865620524, "learning_rate": 3.9854075845673595e-05, "loss": 0.6057, "num_tokens": 726295257.0, "step": 948 }, { "epoch": 0.34773049970228553, "grad_norm": 0.25464819284987683, "learning_rate": 3.9853532599424164e-05, "loss": 0.6094, "num_tokens": 727090951.0, "step": 949 }, { "epoch": 0.348096917510191, "grad_norm": 0.2345242338782417, "learning_rate": 3.9852988347985265e-05, "loss": 0.603, "num_tokens": 727949749.0, "step": 950 }, { "epoch": 0.3484633353180965, "grad_norm": 0.35939477564926353, "learning_rate": 3.985244309138752e-05, "loss": 0.6638, "num_tokens": 728620111.0, "step": 951 }, { "epoch": 0.34882975312600195, "grad_norm": 0.2680922118183737, "learning_rate": 3.985189682966165e-05, "loss": 0.6116, "num_tokens": 729346114.0, "step": 952 }, { "epoch": 0.34919617093390737, "grad_norm": 0.24081130051623217, "learning_rate": 3.9851349562838394e-05, "loss": 0.614, "num_tokens": 730139522.0, "step": 953 }, { "epoch": 0.34956258874181284, "grad_norm": 0.24614782074088598, "learning_rate": 3.985080129094856e-05, "loss": 0.5952, "num_tokens": 730934730.0, "step": 954 }, { "epoch": 0.3499290065497183, "grad_norm": 0.2503939764227545, "learning_rate": 3.985025201402303e-05, "loss": 0.6125, "num_tokens": 731630728.0, "step": 955 }, { "epoch": 0.3502954243576238, "grad_norm": 0.26349913369292494, "learning_rate": 3.984970173209273e-05, "loss": 0.5997, "num_tokens": 732474565.0, "step": 956 }, { "epoch": 0.35066184216552926, "grad_norm": 0.2526514901459037, "learning_rate": 3.984915044518863e-05, "loss": 0.6244, "num_tokens": 733177505.0, "step": 957 }, { "epoch": 0.35102825997343473, "grad_norm": 0.2516033818555026, "learning_rate": 3.984859815334179e-05, "loss": 0.5789, "num_tokens": 733951861.0, "step": 958 }, { "epoch": 0.35139467778134015, "grad_norm": 0.23127747830316597, "learning_rate": 3.984804485658328e-05, "loss": 0.5857, "num_tokens": 734609301.0, "step": 959 }, { "epoch": 0.3517610955892456, "grad_norm": 0.22538276081204467, "learning_rate": 3.984749055494427e-05, "loss": 0.5602, "num_tokens": 735308943.0, "step": 960 }, { "epoch": 0.3521275133971511, "grad_norm": 0.2419607631618357, "learning_rate": 3.984693524845595e-05, "loss": 0.5687, "num_tokens": 735983653.0, "step": 961 }, { "epoch": 0.35249393120505657, "grad_norm": 0.2365870762362527, "learning_rate": 3.98463789371496e-05, "loss": 0.6041, "num_tokens": 736677012.0, "step": 962 }, { "epoch": 0.35286034901296204, "grad_norm": 0.2500405406661335, "learning_rate": 3.984582162105654e-05, "loss": 0.5747, "num_tokens": 737485632.0, "step": 963 }, { "epoch": 0.3532267668208675, "grad_norm": 0.23642891140937372, "learning_rate": 3.984526330020814e-05, "loss": 0.623, "num_tokens": 738375318.0, "step": 964 }, { "epoch": 0.353593184628773, "grad_norm": 0.211352931676921, "learning_rate": 3.984470397463585e-05, "loss": 0.5696, "num_tokens": 739110485.0, "step": 965 }, { "epoch": 0.3539596024366784, "grad_norm": 0.23257382767792842, "learning_rate": 3.984414364437114e-05, "loss": 0.6126, "num_tokens": 739846736.0, "step": 966 }, { "epoch": 0.3543260202445839, "grad_norm": 0.22343232995913667, "learning_rate": 3.9843582309445574e-05, "loss": 0.5598, "num_tokens": 740614687.0, "step": 967 }, { "epoch": 0.35469243805248934, "grad_norm": 0.22392847797444776, "learning_rate": 3.984301996989074e-05, "loss": 0.6231, "num_tokens": 741367282.0, "step": 968 }, { "epoch": 0.3550588558603948, "grad_norm": 0.22046029679980603, "learning_rate": 3.984245662573833e-05, "loss": 0.5825, "num_tokens": 742124174.0, "step": 969 }, { "epoch": 0.3554252736683003, "grad_norm": 0.2039421900490097, "learning_rate": 3.9841892277020025e-05, "loss": 0.5741, "num_tokens": 742926549.0, "step": 970 }, { "epoch": 0.35579169147620576, "grad_norm": 0.23123097901097261, "learning_rate": 3.984132692376762e-05, "loss": 0.5846, "num_tokens": 743736802.0, "step": 971 }, { "epoch": 0.35615810928411124, "grad_norm": 0.22557326428659596, "learning_rate": 3.984076056601294e-05, "loss": 0.6051, "num_tokens": 744522219.0, "step": 972 }, { "epoch": 0.35652452709201665, "grad_norm": 0.23468128023513746, "learning_rate": 3.984019320378787e-05, "loss": 0.6037, "num_tokens": 745241926.0, "step": 973 }, { "epoch": 0.3568909448999221, "grad_norm": 0.23210473591505093, "learning_rate": 3.9839624837124366e-05, "loss": 0.6152, "num_tokens": 745952663.0, "step": 974 }, { "epoch": 0.3572573627078276, "grad_norm": 0.26631679297251026, "learning_rate": 3.983905546605441e-05, "loss": 0.582, "num_tokens": 746809444.0, "step": 975 }, { "epoch": 0.35762378051573307, "grad_norm": 0.23056589347616827, "learning_rate": 3.983848509061007e-05, "loss": 0.6547, "num_tokens": 747582570.0, "step": 976 }, { "epoch": 0.35799019832363854, "grad_norm": 0.30350249547205094, "learning_rate": 3.983791371082345e-05, "loss": 0.5796, "num_tokens": 748363258.0, "step": 977 }, { "epoch": 0.358356616131544, "grad_norm": 0.2541677115377592, "learning_rate": 3.9837341326726735e-05, "loss": 0.6101, "num_tokens": 749032350.0, "step": 978 }, { "epoch": 0.35872303393944943, "grad_norm": 0.3107307576565951, "learning_rate": 3.983676793835214e-05, "loss": 0.6302, "num_tokens": 749797248.0, "step": 979 }, { "epoch": 0.3590894517473549, "grad_norm": 0.25882293219075364, "learning_rate": 3.983619354573195e-05, "loss": 0.5695, "num_tokens": 750428574.0, "step": 980 }, { "epoch": 0.3594558695552604, "grad_norm": 0.25181563417874375, "learning_rate": 3.98356181488985e-05, "loss": 0.5591, "num_tokens": 751282285.0, "step": 981 }, { "epoch": 0.35982228736316585, "grad_norm": 0.27172208572907197, "learning_rate": 3.98350417478842e-05, "loss": 0.5933, "num_tokens": 752042412.0, "step": 982 }, { "epoch": 0.3601887051710713, "grad_norm": 0.23856217914165864, "learning_rate": 3.983446434272149e-05, "loss": 0.5966, "num_tokens": 752752064.0, "step": 983 }, { "epoch": 0.3605551229789768, "grad_norm": 0.2989554028389679, "learning_rate": 3.983388593344288e-05, "loss": 0.5748, "num_tokens": 753507072.0, "step": 984 }, { "epoch": 0.36092154078688227, "grad_norm": 0.2389991367587711, "learning_rate": 3.983330652008095e-05, "loss": 0.5775, "num_tokens": 754359157.0, "step": 985 }, { "epoch": 0.3612879585947877, "grad_norm": 0.27446801533198073, "learning_rate": 3.9832726102668294e-05, "loss": 0.6171, "num_tokens": 755080215.0, "step": 986 }, { "epoch": 0.36165437640269316, "grad_norm": 0.24427871843286794, "learning_rate": 3.983214468123761e-05, "loss": 0.5608, "num_tokens": 755982637.0, "step": 987 }, { "epoch": 0.36202079421059863, "grad_norm": 0.25852149316947304, "learning_rate": 3.983156225582164e-05, "loss": 0.6132, "num_tokens": 756711836.0, "step": 988 }, { "epoch": 0.3623872120185041, "grad_norm": 0.24595930720866113, "learning_rate": 3.9830978826453155e-05, "loss": 0.5724, "num_tokens": 757525176.0, "step": 989 }, { "epoch": 0.3627536298264096, "grad_norm": 0.2745819977527985, "learning_rate": 3.983039439316502e-05, "loss": 0.5865, "num_tokens": 758292553.0, "step": 990 }, { "epoch": 0.36312004763431505, "grad_norm": 0.22977894620504255, "learning_rate": 3.9829808955990134e-05, "loss": 0.555, "num_tokens": 759028717.0, "step": 991 }, { "epoch": 0.3634864654422205, "grad_norm": 0.3086240766772941, "learning_rate": 3.9829222514961454e-05, "loss": 0.5888, "num_tokens": 759813120.0, "step": 992 }, { "epoch": 0.36385288325012594, "grad_norm": 0.31745693677329534, "learning_rate": 3.9828635070112e-05, "loss": 0.6048, "num_tokens": 760604716.0, "step": 993 }, { "epoch": 0.3642193010580314, "grad_norm": 0.21401377210330538, "learning_rate": 3.982804662147486e-05, "loss": 0.5848, "num_tokens": 761430418.0, "step": 994 }, { "epoch": 0.3645857188659369, "grad_norm": 0.2341870067986135, "learning_rate": 3.9827457169083145e-05, "loss": 0.5873, "num_tokens": 762194788.0, "step": 995 }, { "epoch": 0.36495213667384235, "grad_norm": 0.21322414385790542, "learning_rate": 3.9826866712970044e-05, "loss": 0.596, "num_tokens": 762826937.0, "step": 996 }, { "epoch": 0.3653185544817478, "grad_norm": 0.2630423509092928, "learning_rate": 3.982627525316882e-05, "loss": 0.6313, "num_tokens": 763583776.0, "step": 997 }, { "epoch": 0.3656849722896533, "grad_norm": 0.22350815619999542, "learning_rate": 3.9825682789712755e-05, "loss": 0.5902, "num_tokens": 764336927.0, "step": 998 }, { "epoch": 0.3660513900975587, "grad_norm": 0.23874566585057513, "learning_rate": 3.9825089322635205e-05, "loss": 0.5654, "num_tokens": 765092506.0, "step": 999 }, { "epoch": 0.3664178079054642, "grad_norm": 0.2301139169540737, "learning_rate": 3.98244948519696e-05, "loss": 0.5751, "num_tokens": 765745155.0, "step": 1000 }, { "epoch": 0.36678422571336966, "grad_norm": 0.24442523075849737, "learning_rate": 3.98238993777494e-05, "loss": 0.5889, "num_tokens": 766452510.0, "step": 1001 }, { "epoch": 0.36715064352127513, "grad_norm": 0.2471979212182133, "learning_rate": 3.982330290000813e-05, "loss": 0.5664, "num_tokens": 767183538.0, "step": 1002 }, { "epoch": 0.3675170613291806, "grad_norm": 0.2439652400983779, "learning_rate": 3.9822705418779365e-05, "loss": 0.6297, "num_tokens": 767945767.0, "step": 1003 }, { "epoch": 0.3678834791370861, "grad_norm": 0.23673240346180438, "learning_rate": 3.982210693409676e-05, "loss": 0.6468, "num_tokens": 768623800.0, "step": 1004 }, { "epoch": 0.36824989694499155, "grad_norm": 0.2337879060616789, "learning_rate": 3.982150744599401e-05, "loss": 0.621, "num_tokens": 769327150.0, "step": 1005 }, { "epoch": 0.36861631475289697, "grad_norm": 0.21669487768521617, "learning_rate": 3.982090695450485e-05, "loss": 0.5732, "num_tokens": 770019499.0, "step": 1006 }, { "epoch": 0.36898273256080244, "grad_norm": 0.24029024895302353, "learning_rate": 3.9820305459663106e-05, "loss": 0.6112, "num_tokens": 770831908.0, "step": 1007 }, { "epoch": 0.3693491503687079, "grad_norm": 0.24243490433793108, "learning_rate": 3.981970296150264e-05, "loss": 0.6113, "num_tokens": 771509268.0, "step": 1008 }, { "epoch": 0.3697155681766134, "grad_norm": 0.2738837168976193, "learning_rate": 3.981909946005737e-05, "loss": 0.6243, "num_tokens": 772111791.0, "step": 1009 }, { "epoch": 0.37008198598451886, "grad_norm": 0.23161817100954307, "learning_rate": 3.9818494955361274e-05, "loss": 0.5922, "num_tokens": 772941909.0, "step": 1010 }, { "epoch": 0.37044840379242433, "grad_norm": 0.21487162235608778, "learning_rate": 3.9817889447448394e-05, "loss": 0.5832, "num_tokens": 773666512.0, "step": 1011 }, { "epoch": 0.3708148216003298, "grad_norm": 0.22910574223485397, "learning_rate": 3.9817282936352815e-05, "loss": 0.5715, "num_tokens": 774388398.0, "step": 1012 }, { "epoch": 0.3711812394082352, "grad_norm": 0.25742807124578077, "learning_rate": 3.981667542210868e-05, "loss": 0.6017, "num_tokens": 775144672.0, "step": 1013 }, { "epoch": 0.3715476572161407, "grad_norm": 0.23552109949762626, "learning_rate": 3.9816066904750206e-05, "loss": 0.594, "num_tokens": 775828314.0, "step": 1014 }, { "epoch": 0.37191407502404616, "grad_norm": 0.20798982610514072, "learning_rate": 3.981545738431164e-05, "loss": 0.6061, "num_tokens": 776625487.0, "step": 1015 }, { "epoch": 0.37228049283195164, "grad_norm": 0.24172293473647363, "learning_rate": 3.9814846860827315e-05, "loss": 0.597, "num_tokens": 777293627.0, "step": 1016 }, { "epoch": 0.3726469106398571, "grad_norm": 0.24156054711675387, "learning_rate": 3.981423533433159e-05, "loss": 0.6131, "num_tokens": 778103422.0, "step": 1017 }, { "epoch": 0.3730133284477626, "grad_norm": 0.2357356570471783, "learning_rate": 3.98136228048589e-05, "loss": 0.617, "num_tokens": 778778862.0, "step": 1018 }, { "epoch": 0.373379746255668, "grad_norm": 0.2613597316372281, "learning_rate": 3.981300927244374e-05, "loss": 0.5891, "num_tokens": 779449205.0, "step": 1019 }, { "epoch": 0.37374616406357347, "grad_norm": 0.21049753074744343, "learning_rate": 3.981239473712064e-05, "loss": 0.5781, "num_tokens": 780148879.0, "step": 1020 }, { "epoch": 0.37411258187147894, "grad_norm": 0.25720388224154983, "learning_rate": 3.9811779198924203e-05, "loss": 0.6022, "num_tokens": 780873263.0, "step": 1021 }, { "epoch": 0.3744789996793844, "grad_norm": 0.2086017131255923, "learning_rate": 3.98111626578891e-05, "loss": 0.5695, "num_tokens": 781793656.0, "step": 1022 }, { "epoch": 0.3748454174872899, "grad_norm": 0.22226092188380173, "learning_rate": 3.9810545114050016e-05, "loss": 0.5812, "num_tokens": 782533232.0, "step": 1023 }, { "epoch": 0.37521183529519536, "grad_norm": 0.23317090419549624, "learning_rate": 3.980992656744174e-05, "loss": 0.5772, "num_tokens": 783248474.0, "step": 1024 }, { "epoch": 0.37557825310310083, "grad_norm": 0.1958575828948748, "learning_rate": 3.980930701809909e-05, "loss": 0.5902, "num_tokens": 783989405.0, "step": 1025 }, { "epoch": 0.37594467091100625, "grad_norm": 0.23778451871450598, "learning_rate": 3.980868646605696e-05, "loss": 0.5931, "num_tokens": 784758506.0, "step": 1026 }, { "epoch": 0.3763110887189117, "grad_norm": 0.21251927166655804, "learning_rate": 3.980806491135027e-05, "loss": 0.5931, "num_tokens": 785599444.0, "step": 1027 }, { "epoch": 0.3766775065268172, "grad_norm": 0.2310286182819022, "learning_rate": 3.980744235401402e-05, "loss": 0.5798, "num_tokens": 786389758.0, "step": 1028 }, { "epoch": 0.37704392433472267, "grad_norm": 0.21131177793739747, "learning_rate": 3.9806818794083273e-05, "loss": 0.5711, "num_tokens": 787181743.0, "step": 1029 }, { "epoch": 0.37741034214262814, "grad_norm": 0.22508710141658306, "learning_rate": 3.980619423159312e-05, "loss": 0.6002, "num_tokens": 787926989.0, "step": 1030 }, { "epoch": 0.3777767599505336, "grad_norm": 0.21505938599305505, "learning_rate": 3.980556866657874e-05, "loss": 0.603, "num_tokens": 788747016.0, "step": 1031 }, { "epoch": 0.3781431777584391, "grad_norm": 0.2793602712911407, "learning_rate": 3.980494209907534e-05, "loss": 0.616, "num_tokens": 789388121.0, "step": 1032 }, { "epoch": 0.3785095955663445, "grad_norm": 0.269223066821486, "learning_rate": 3.980431452911821e-05, "loss": 0.5999, "num_tokens": 790156538.0, "step": 1033 }, { "epoch": 0.37887601337425, "grad_norm": 0.23956475523366216, "learning_rate": 3.980368595674268e-05, "loss": 0.574, "num_tokens": 790921335.0, "step": 1034 }, { "epoch": 0.37924243118215545, "grad_norm": 0.3384286736315524, "learning_rate": 3.980305638198413e-05, "loss": 0.5842, "num_tokens": 791702386.0, "step": 1035 }, { "epoch": 0.3796088489900609, "grad_norm": 0.20773763603885628, "learning_rate": 3.9802425804878013e-05, "loss": 0.5572, "num_tokens": 792495236.0, "step": 1036 }, { "epoch": 0.3799752667979664, "grad_norm": 0.3217462542616783, "learning_rate": 3.9801794225459834e-05, "loss": 0.6086, "num_tokens": 793289129.0, "step": 1037 }, { "epoch": 0.38034168460587187, "grad_norm": 0.32179098526406097, "learning_rate": 3.980116164376515e-05, "loss": 0.6032, "num_tokens": 794089469.0, "step": 1038 }, { "epoch": 0.3807081024137773, "grad_norm": 0.22983293596602478, "learning_rate": 3.980052805982958e-05, "loss": 0.5836, "num_tokens": 794858566.0, "step": 1039 }, { "epoch": 0.38107452022168276, "grad_norm": 0.22423849474643034, "learning_rate": 3.979989347368879e-05, "loss": 0.5615, "num_tokens": 795720736.0, "step": 1040 }, { "epoch": 0.38144093802958823, "grad_norm": 0.23721686530219058, "learning_rate": 3.9799257885378514e-05, "loss": 0.5742, "num_tokens": 796502986.0, "step": 1041 }, { "epoch": 0.3818073558374937, "grad_norm": 0.20097884525062848, "learning_rate": 3.979862129493453e-05, "loss": 0.5781, "num_tokens": 797353811.0, "step": 1042 }, { "epoch": 0.3821737736453992, "grad_norm": 0.2271876840794155, "learning_rate": 3.979798370239269e-05, "loss": 0.6277, "num_tokens": 798060427.0, "step": 1043 }, { "epoch": 0.38254019145330465, "grad_norm": 0.23998499416913, "learning_rate": 3.9797345107788875e-05, "loss": 0.5843, "num_tokens": 798748125.0, "step": 1044 }, { "epoch": 0.3829066092612101, "grad_norm": 0.21120557695185804, "learning_rate": 3.979670551115906e-05, "loss": 0.6149, "num_tokens": 799465759.0, "step": 1045 }, { "epoch": 0.38327302706911553, "grad_norm": 0.2305661559468616, "learning_rate": 3.979606491253924e-05, "loss": 0.5481, "num_tokens": 800144888.0, "step": 1046 }, { "epoch": 0.383639444877021, "grad_norm": 0.23708813536586293, "learning_rate": 3.9795423311965485e-05, "loss": 0.5816, "num_tokens": 800857344.0, "step": 1047 }, { "epoch": 0.3840058626849265, "grad_norm": 0.22794757188272544, "learning_rate": 3.979478070947393e-05, "loss": 0.654, "num_tokens": 801615700.0, "step": 1048 }, { "epoch": 0.38437228049283195, "grad_norm": 0.25386330296698684, "learning_rate": 3.9794137105100736e-05, "loss": 0.5959, "num_tokens": 802464625.0, "step": 1049 }, { "epoch": 0.3847386983007374, "grad_norm": 0.2501834804477957, "learning_rate": 3.979349249888215e-05, "loss": 0.6187, "num_tokens": 803181414.0, "step": 1050 }, { "epoch": 0.3851051161086429, "grad_norm": 0.22790498083554744, "learning_rate": 3.979284689085446e-05, "loss": 0.5908, "num_tokens": 803959305.0, "step": 1051 }, { "epoch": 0.38547153391654837, "grad_norm": 0.2482709802819294, "learning_rate": 3.9792200281054024e-05, "loss": 0.6016, "num_tokens": 804757623.0, "step": 1052 }, { "epoch": 0.3858379517244538, "grad_norm": 0.22348910972783298, "learning_rate": 3.979155266951724e-05, "loss": 0.6, "num_tokens": 805541157.0, "step": 1053 }, { "epoch": 0.38620436953235926, "grad_norm": 0.23701981829722982, "learning_rate": 3.9790904056280565e-05, "loss": 0.5984, "num_tokens": 806285735.0, "step": 1054 }, { "epoch": 0.38657078734026473, "grad_norm": 0.22014764442614146, "learning_rate": 3.979025444138053e-05, "loss": 0.6113, "num_tokens": 807013983.0, "step": 1055 }, { "epoch": 0.3869372051481702, "grad_norm": 0.22781520495954455, "learning_rate": 3.9789603824853706e-05, "loss": 0.5979, "num_tokens": 807820075.0, "step": 1056 }, { "epoch": 0.3873036229560757, "grad_norm": 0.22924235743152746, "learning_rate": 3.9788952206736724e-05, "loss": 0.5581, "num_tokens": 808620565.0, "step": 1057 }, { "epoch": 0.38767004076398115, "grad_norm": 0.2570981247577598, "learning_rate": 3.9788299587066254e-05, "loss": 0.6077, "num_tokens": 809278432.0, "step": 1058 }, { "epoch": 0.38803645857188657, "grad_norm": 0.2643023028837367, "learning_rate": 3.9787645965879064e-05, "loss": 0.5947, "num_tokens": 809903156.0, "step": 1059 }, { "epoch": 0.38840287637979204, "grad_norm": 0.2162927079050794, "learning_rate": 3.978699134321194e-05, "loss": 0.5964, "num_tokens": 810583492.0, "step": 1060 }, { "epoch": 0.3887692941876975, "grad_norm": 0.22566700889869945, "learning_rate": 3.978633571910175e-05, "loss": 0.5861, "num_tokens": 811282564.0, "step": 1061 }, { "epoch": 0.389135711995603, "grad_norm": 0.23988209048505957, "learning_rate": 3.97856790935854e-05, "loss": 0.6156, "num_tokens": 812016412.0, "step": 1062 }, { "epoch": 0.38950212980350846, "grad_norm": 0.2272281553420197, "learning_rate": 3.978502146669985e-05, "loss": 0.6044, "num_tokens": 812758017.0, "step": 1063 }, { "epoch": 0.38986854761141393, "grad_norm": 0.22115466613811358, "learning_rate": 3.9784362838482144e-05, "loss": 0.5848, "num_tokens": 813463530.0, "step": 1064 }, { "epoch": 0.3902349654193194, "grad_norm": 0.19885787160913862, "learning_rate": 3.978370320896935e-05, "loss": 0.5651, "num_tokens": 814268414.0, "step": 1065 }, { "epoch": 0.3906013832272248, "grad_norm": 0.3208603427015829, "learning_rate": 3.978304257819861e-05, "loss": 0.5864, "num_tokens": 815126021.0, "step": 1066 }, { "epoch": 0.3909678010351303, "grad_norm": 0.23242361886035137, "learning_rate": 3.978238094620713e-05, "loss": 0.5915, "num_tokens": 815924771.0, "step": 1067 }, { "epoch": 0.39133421884303576, "grad_norm": 0.20380932058652546, "learning_rate": 3.978171831303214e-05, "loss": 0.6078, "num_tokens": 816641029.0, "step": 1068 }, { "epoch": 0.39170063665094124, "grad_norm": 0.230187254776345, "learning_rate": 3.9781054678710965e-05, "loss": 0.5972, "num_tokens": 817427968.0, "step": 1069 }, { "epoch": 0.3920670544588467, "grad_norm": 0.1923459161580615, "learning_rate": 3.978039004328097e-05, "loss": 0.6409, "num_tokens": 818163411.0, "step": 1070 }, { "epoch": 0.3924334722667522, "grad_norm": 0.2172338926880452, "learning_rate": 3.977972440677956e-05, "loss": 0.581, "num_tokens": 818906841.0, "step": 1071 }, { "epoch": 0.39279989007465765, "grad_norm": 0.21026779106326832, "learning_rate": 3.9779057769244215e-05, "loss": 0.574, "num_tokens": 819702863.0, "step": 1072 }, { "epoch": 0.39316630788256307, "grad_norm": 0.23933681179144922, "learning_rate": 3.977839013071248e-05, "loss": 0.6527, "num_tokens": 820478145.0, "step": 1073 }, { "epoch": 0.39353272569046854, "grad_norm": 0.2284534458897897, "learning_rate": 3.977772149122194e-05, "loss": 0.5723, "num_tokens": 821253576.0, "step": 1074 }, { "epoch": 0.393899143498374, "grad_norm": 0.2067083331985065, "learning_rate": 3.9777051850810235e-05, "loss": 0.5839, "num_tokens": 822087001.0, "step": 1075 }, { "epoch": 0.3942655613062795, "grad_norm": 0.25140251629119775, "learning_rate": 3.9776381209515065e-05, "loss": 0.5923, "num_tokens": 822892340.0, "step": 1076 }, { "epoch": 0.39463197911418496, "grad_norm": 0.20788885363364518, "learning_rate": 3.97757095673742e-05, "loss": 0.599, "num_tokens": 823581961.0, "step": 1077 }, { "epoch": 0.39499839692209043, "grad_norm": 0.27903003905149437, "learning_rate": 3.9775036924425456e-05, "loss": 0.6086, "num_tokens": 824372125.0, "step": 1078 }, { "epoch": 0.39536481472999585, "grad_norm": 0.22804498591351846, "learning_rate": 3.977436328070669e-05, "loss": 0.5955, "num_tokens": 825101960.0, "step": 1079 }, { "epoch": 0.3957312325379013, "grad_norm": 0.22217233396715674, "learning_rate": 3.9773688636255834e-05, "loss": 0.5785, "num_tokens": 825823846.0, "step": 1080 }, { "epoch": 0.3960976503458068, "grad_norm": 0.23139320283984904, "learning_rate": 3.977301299111087e-05, "loss": 0.5968, "num_tokens": 826549043.0, "step": 1081 }, { "epoch": 0.39646406815371227, "grad_norm": 0.20106006041651184, "learning_rate": 3.977233634530985e-05, "loss": 0.5716, "num_tokens": 827331264.0, "step": 1082 }, { "epoch": 0.39683048596161774, "grad_norm": 0.2205291095745299, "learning_rate": 3.977165869889085e-05, "loss": 0.571, "num_tokens": 828048844.0, "step": 1083 }, { "epoch": 0.3971969037695232, "grad_norm": 0.23043001885571376, "learning_rate": 3.9770980051892045e-05, "loss": 0.6159, "num_tokens": 828770665.0, "step": 1084 }, { "epoch": 0.3975633215774287, "grad_norm": 0.22774395175909481, "learning_rate": 3.977030040435164e-05, "loss": 0.5922, "num_tokens": 829552775.0, "step": 1085 }, { "epoch": 0.3979297393853341, "grad_norm": 0.23352769868013482, "learning_rate": 3.976961975630789e-05, "loss": 0.5867, "num_tokens": 830428199.0, "step": 1086 }, { "epoch": 0.3982961571932396, "grad_norm": 0.2913698704998876, "learning_rate": 3.976893810779912e-05, "loss": 0.5983, "num_tokens": 831145064.0, "step": 1087 }, { "epoch": 0.39866257500114505, "grad_norm": 0.25227013808974147, "learning_rate": 3.9768255458863714e-05, "loss": 0.5936, "num_tokens": 831779031.0, "step": 1088 }, { "epoch": 0.3990289928090505, "grad_norm": 0.23649131030157822, "learning_rate": 3.9767571809540094e-05, "loss": 0.5697, "num_tokens": 832526773.0, "step": 1089 }, { "epoch": 0.399395410616956, "grad_norm": 0.249805803295602, "learning_rate": 3.9766887159866774e-05, "loss": 0.6037, "num_tokens": 833315453.0, "step": 1090 }, { "epoch": 0.39976182842486147, "grad_norm": 0.23746136418815708, "learning_rate": 3.976620150988228e-05, "loss": 0.545, "num_tokens": 834123697.0, "step": 1091 }, { "epoch": 0.40012824623276694, "grad_norm": 0.20125865029023426, "learning_rate": 3.976551485962522e-05, "loss": 0.579, "num_tokens": 834941333.0, "step": 1092 }, { "epoch": 0.40049466404067235, "grad_norm": 0.21649107594339903, "learning_rate": 3.9764827209134254e-05, "loss": 0.6098, "num_tokens": 835708888.0, "step": 1093 }, { "epoch": 0.4008610818485778, "grad_norm": 0.2045903437156318, "learning_rate": 3.976413855844811e-05, "loss": 0.5665, "num_tokens": 836423056.0, "step": 1094 }, { "epoch": 0.4012274996564833, "grad_norm": 0.23383781338585083, "learning_rate": 3.976344890760554e-05, "loss": 0.614, "num_tokens": 837195727.0, "step": 1095 }, { "epoch": 0.40159391746438877, "grad_norm": 0.20393418246109238, "learning_rate": 3.976275825664539e-05, "loss": 0.5665, "num_tokens": 838013896.0, "step": 1096 }, { "epoch": 0.40196033527229424, "grad_norm": 0.22865418462661383, "learning_rate": 3.976206660560653e-05, "loss": 0.5998, "num_tokens": 838807976.0, "step": 1097 }, { "epoch": 0.4023267530801997, "grad_norm": 0.25122643853152593, "learning_rate": 3.9761373954527914e-05, "loss": 0.589, "num_tokens": 839511190.0, "step": 1098 }, { "epoch": 0.40269317088810513, "grad_norm": 0.2246870495170528, "learning_rate": 3.9760680303448536e-05, "loss": 0.5962, "num_tokens": 840263658.0, "step": 1099 }, { "epoch": 0.4030595886960106, "grad_norm": 0.23783886087773384, "learning_rate": 3.975998565240745e-05, "loss": 0.5942, "num_tokens": 841058012.0, "step": 1100 }, { "epoch": 0.4034260065039161, "grad_norm": 0.26109202561197686, "learning_rate": 3.975929000144376e-05, "loss": 0.6024, "num_tokens": 841879639.0, "step": 1101 }, { "epoch": 0.40379242431182155, "grad_norm": 0.19436197521173315, "learning_rate": 3.9758593350596644e-05, "loss": 0.5897, "num_tokens": 842668715.0, "step": 1102 }, { "epoch": 0.404158842119727, "grad_norm": 0.2936150523602037, "learning_rate": 3.975789569990531e-05, "loss": 0.6359, "num_tokens": 843436632.0, "step": 1103 }, { "epoch": 0.4045252599276325, "grad_norm": 0.25029370551731056, "learning_rate": 3.9757197049409056e-05, "loss": 0.604, "num_tokens": 844149201.0, "step": 1104 }, { "epoch": 0.40489167773553797, "grad_norm": 0.20507135406606303, "learning_rate": 3.975649739914721e-05, "loss": 0.5991, "num_tokens": 844970460.0, "step": 1105 }, { "epoch": 0.4052580955434434, "grad_norm": 0.2898955281088899, "learning_rate": 3.975579674915915e-05, "loss": 0.6008, "num_tokens": 845546029.0, "step": 1106 }, { "epoch": 0.40562451335134886, "grad_norm": 0.24827470557856135, "learning_rate": 3.975509509948434e-05, "loss": 0.6097, "num_tokens": 846347758.0, "step": 1107 }, { "epoch": 0.40599093115925433, "grad_norm": 0.20198844259534712, "learning_rate": 3.975439245016228e-05, "loss": 0.5784, "num_tokens": 847052993.0, "step": 1108 }, { "epoch": 0.4063573489671598, "grad_norm": 0.21935262042142753, "learning_rate": 3.9753688801232526e-05, "loss": 0.6087, "num_tokens": 847818334.0, "step": 1109 }, { "epoch": 0.4067237667750653, "grad_norm": 0.20083644913675275, "learning_rate": 3.9752984152734704e-05, "loss": 0.5985, "num_tokens": 848600849.0, "step": 1110 }, { "epoch": 0.40709018458297075, "grad_norm": 0.24359387630805077, "learning_rate": 3.975227850470848e-05, "loss": 0.574, "num_tokens": 849445493.0, "step": 1111 }, { "epoch": 0.4074566023908762, "grad_norm": 0.20647537199330027, "learning_rate": 3.9751571857193576e-05, "loss": 0.5536, "num_tokens": 850325283.0, "step": 1112 }, { "epoch": 0.40782302019878164, "grad_norm": 0.23313865808724188, "learning_rate": 3.97508642102298e-05, "loss": 0.5723, "num_tokens": 851062965.0, "step": 1113 }, { "epoch": 0.4081894380066871, "grad_norm": 0.2142354661863076, "learning_rate": 3.975015556385697e-05, "loss": 0.5523, "num_tokens": 851869633.0, "step": 1114 }, { "epoch": 0.4085558558145926, "grad_norm": 0.2130381985679391, "learning_rate": 3.9749445918115e-05, "loss": 0.6085, "num_tokens": 852622058.0, "step": 1115 }, { "epoch": 0.40892227362249806, "grad_norm": 0.26848524852421685, "learning_rate": 3.974873527304384e-05, "loss": 0.5961, "num_tokens": 853293940.0, "step": 1116 }, { "epoch": 0.40928869143040353, "grad_norm": 0.2129175675847541, "learning_rate": 3.9748023628683496e-05, "loss": 0.5894, "num_tokens": 853953151.0, "step": 1117 }, { "epoch": 0.409655109238309, "grad_norm": 0.23125790412775138, "learning_rate": 3.974731098507405e-05, "loss": 0.5756, "num_tokens": 854713480.0, "step": 1118 }, { "epoch": 0.4100215270462145, "grad_norm": 0.27220914951063585, "learning_rate": 3.9746597342255604e-05, "loss": 0.6332, "num_tokens": 855497140.0, "step": 1119 }, { "epoch": 0.4103879448541199, "grad_norm": 0.21796696637206586, "learning_rate": 3.9745882700268354e-05, "loss": 0.5594, "num_tokens": 856273288.0, "step": 1120 }, { "epoch": 0.41075436266202536, "grad_norm": 0.24016107719600008, "learning_rate": 3.974516705915252e-05, "loss": 0.6151, "num_tokens": 857033142.0, "step": 1121 }, { "epoch": 0.41112078046993084, "grad_norm": 0.23033161963882304, "learning_rate": 3.974445041894841e-05, "loss": 0.589, "num_tokens": 857782844.0, "step": 1122 }, { "epoch": 0.4114871982778363, "grad_norm": 0.20303800492044746, "learning_rate": 3.974373277969637e-05, "loss": 0.5964, "num_tokens": 858525856.0, "step": 1123 }, { "epoch": 0.4118536160857418, "grad_norm": 0.21635713878550597, "learning_rate": 3.9743014141436796e-05, "loss": 0.5699, "num_tokens": 859346637.0, "step": 1124 }, { "epoch": 0.41222003389364725, "grad_norm": 0.22988621137626633, "learning_rate": 3.9742294504210156e-05, "loss": 0.5924, "num_tokens": 860106084.0, "step": 1125 }, { "epoch": 0.41258645170155267, "grad_norm": 0.23950849544162278, "learning_rate": 3.9741573868056954e-05, "loss": 0.6066, "num_tokens": 860750356.0, "step": 1126 }, { "epoch": 0.41295286950945814, "grad_norm": 0.26557681808181266, "learning_rate": 3.974085223301779e-05, "loss": 0.5662, "num_tokens": 861370835.0, "step": 1127 }, { "epoch": 0.4133192873173636, "grad_norm": 0.2352371041880119, "learning_rate": 3.974012959913326e-05, "loss": 0.616, "num_tokens": 862136325.0, "step": 1128 }, { "epoch": 0.4136857051252691, "grad_norm": 0.24469737944131376, "learning_rate": 3.973940596644408e-05, "loss": 0.5722, "num_tokens": 862954180.0, "step": 1129 }, { "epoch": 0.41405212293317456, "grad_norm": 0.24770965166165465, "learning_rate": 3.9738681334990974e-05, "loss": 0.577, "num_tokens": 863665795.0, "step": 1130 }, { "epoch": 0.41441854074108003, "grad_norm": 0.24158375885748248, "learning_rate": 3.973795570481474e-05, "loss": 0.5776, "num_tokens": 864436682.0, "step": 1131 }, { "epoch": 0.4147849585489855, "grad_norm": 0.27266045800217653, "learning_rate": 3.9737229075956245e-05, "loss": 0.5717, "num_tokens": 865141791.0, "step": 1132 }, { "epoch": 0.4151513763568909, "grad_norm": 0.2332977266147024, "learning_rate": 3.973650144845639e-05, "loss": 0.5881, "num_tokens": 865982651.0, "step": 1133 }, { "epoch": 0.4155177941647964, "grad_norm": 0.22573917738288624, "learning_rate": 3.973577282235614e-05, "loss": 0.5964, "num_tokens": 866762560.0, "step": 1134 }, { "epoch": 0.41588421197270187, "grad_norm": 0.20839638322895868, "learning_rate": 3.9735043197696523e-05, "loss": 0.5836, "num_tokens": 867481476.0, "step": 1135 }, { "epoch": 0.41625062978060734, "grad_norm": 0.23711325451428938, "learning_rate": 3.9734312574518626e-05, "loss": 0.5747, "num_tokens": 868190771.0, "step": 1136 }, { "epoch": 0.4166170475885128, "grad_norm": 0.21560157466186008, "learning_rate": 3.973358095286357e-05, "loss": 0.6137, "num_tokens": 869036625.0, "step": 1137 }, { "epoch": 0.4169834653964183, "grad_norm": 0.19985931737008772, "learning_rate": 3.9732848332772545e-05, "loss": 0.6467, "num_tokens": 869739276.0, "step": 1138 }, { "epoch": 0.41734988320432376, "grad_norm": 0.22871955930985846, "learning_rate": 3.9732114714286815e-05, "loss": 0.5873, "num_tokens": 870437277.0, "step": 1139 }, { "epoch": 0.4177163010122292, "grad_norm": 0.20538262716424532, "learning_rate": 3.973138009744767e-05, "loss": 0.5812, "num_tokens": 871248489.0, "step": 1140 }, { "epoch": 0.41808271882013465, "grad_norm": 0.1990580352903205, "learning_rate": 3.9730644482296477e-05, "loss": 0.5543, "num_tokens": 871990909.0, "step": 1141 }, { "epoch": 0.4184491366280401, "grad_norm": 0.20111292713571205, "learning_rate": 3.9729907868874654e-05, "loss": 0.5394, "num_tokens": 872627326.0, "step": 1142 }, { "epoch": 0.4188155544359456, "grad_norm": 0.20013815299025894, "learning_rate": 3.9729170257223676e-05, "loss": 0.5654, "num_tokens": 873483381.0, "step": 1143 }, { "epoch": 0.41918197224385106, "grad_norm": 0.21271669940593896, "learning_rate": 3.972843164738506e-05, "loss": 0.6083, "num_tokens": 874279829.0, "step": 1144 }, { "epoch": 0.41954839005175654, "grad_norm": 0.2107170996044029, "learning_rate": 3.97276920394004e-05, "loss": 0.5616, "num_tokens": 875063067.0, "step": 1145 }, { "epoch": 0.41991480785966195, "grad_norm": 0.19274553571817088, "learning_rate": 3.972695143331134e-05, "loss": 0.5839, "num_tokens": 875892821.0, "step": 1146 }, { "epoch": 0.4202812256675674, "grad_norm": 0.20440956541055008, "learning_rate": 3.972620982915957e-05, "loss": 0.5877, "num_tokens": 876679233.0, "step": 1147 }, { "epoch": 0.4206476434754729, "grad_norm": 0.2298187324105921, "learning_rate": 3.972546722698685e-05, "loss": 0.6076, "num_tokens": 877317763.0, "step": 1148 }, { "epoch": 0.42101406128337837, "grad_norm": 0.22435337052217075, "learning_rate": 3.972472362683498e-05, "loss": 0.6157, "num_tokens": 877982626.0, "step": 1149 }, { "epoch": 0.42138047909128384, "grad_norm": 0.19562450094569922, "learning_rate": 3.972397902874584e-05, "loss": 0.6015, "num_tokens": 878674879.0, "step": 1150 }, { "epoch": 0.4217468968991893, "grad_norm": 0.19398366120976102, "learning_rate": 3.9723233432761353e-05, "loss": 0.5722, "num_tokens": 879529409.0, "step": 1151 }, { "epoch": 0.4221133147070948, "grad_norm": 0.2066509652877605, "learning_rate": 3.972248683892349e-05, "loss": 0.6034, "num_tokens": 880304291.0, "step": 1152 }, { "epoch": 0.4224797325150002, "grad_norm": 0.2324662901957373, "learning_rate": 3.972173924727427e-05, "loss": 0.6208, "num_tokens": 881109286.0, "step": 1153 }, { "epoch": 0.4228461503229057, "grad_norm": 0.19908487520018955, "learning_rate": 3.972099065785582e-05, "loss": 0.5592, "num_tokens": 881960610.0, "step": 1154 }, { "epoch": 0.42321256813081115, "grad_norm": 0.22005906978006812, "learning_rate": 3.972024107071025e-05, "loss": 0.6266, "num_tokens": 882714373.0, "step": 1155 }, { "epoch": 0.4235789859387166, "grad_norm": 0.25501775472688726, "learning_rate": 3.971949048587979e-05, "loss": 0.6292, "num_tokens": 883422222.0, "step": 1156 }, { "epoch": 0.4239454037466221, "grad_norm": 0.21090071849593964, "learning_rate": 3.971873890340668e-05, "loss": 0.5867, "num_tokens": 884177446.0, "step": 1157 }, { "epoch": 0.42431182155452757, "grad_norm": 0.21118579572388166, "learning_rate": 3.9717986323333255e-05, "loss": 0.5828, "num_tokens": 885116211.0, "step": 1158 }, { "epoch": 0.42467823936243304, "grad_norm": 0.20998112849415768, "learning_rate": 3.9717232745701875e-05, "loss": 0.6035, "num_tokens": 886012520.0, "step": 1159 }, { "epoch": 0.42504465717033846, "grad_norm": 0.21173161702535642, "learning_rate": 3.9716478170554974e-05, "loss": 0.6246, "num_tokens": 886792769.0, "step": 1160 }, { "epoch": 0.42541107497824393, "grad_norm": 0.2690918127989715, "learning_rate": 3.971572259793503e-05, "loss": 0.5922, "num_tokens": 887468309.0, "step": 1161 }, { "epoch": 0.4257774927861494, "grad_norm": 0.20588111560608785, "learning_rate": 3.9714966027884584e-05, "loss": 0.6016, "num_tokens": 888166882.0, "step": 1162 }, { "epoch": 0.4261439105940549, "grad_norm": 0.23318211906273867, "learning_rate": 3.971420846044623e-05, "loss": 0.61, "num_tokens": 888944184.0, "step": 1163 }, { "epoch": 0.42651032840196035, "grad_norm": 0.20751109062776496, "learning_rate": 3.971344989566263e-05, "loss": 0.6137, "num_tokens": 889686874.0, "step": 1164 }, { "epoch": 0.4268767462098658, "grad_norm": 0.2143089105466022, "learning_rate": 3.971269033357649e-05, "loss": 0.6141, "num_tokens": 890534168.0, "step": 1165 }, { "epoch": 0.42724316401777124, "grad_norm": 0.21265840660923577, "learning_rate": 3.971192977423057e-05, "loss": 0.6063, "num_tokens": 891317345.0, "step": 1166 }, { "epoch": 0.4276095818256767, "grad_norm": 0.2016644183388641, "learning_rate": 3.971116821766769e-05, "loss": 0.579, "num_tokens": 892160687.0, "step": 1167 }, { "epoch": 0.4279759996335822, "grad_norm": 0.20341383740498395, "learning_rate": 3.971040566393073e-05, "loss": 0.5687, "num_tokens": 893055113.0, "step": 1168 }, { "epoch": 0.42834241744148766, "grad_norm": 0.2218418376871593, "learning_rate": 3.970964211306263e-05, "loss": 0.5664, "num_tokens": 893826174.0, "step": 1169 }, { "epoch": 0.4287088352493931, "grad_norm": 0.19328278327924966, "learning_rate": 3.970887756510636e-05, "loss": 0.6046, "num_tokens": 894483188.0, "step": 1170 }, { "epoch": 0.4290752530572986, "grad_norm": 0.20012415312508935, "learning_rate": 3.970811202010499e-05, "loss": 0.5937, "num_tokens": 895154133.0, "step": 1171 }, { "epoch": 0.4294416708652041, "grad_norm": 0.2200530779647476, "learning_rate": 3.97073454781016e-05, "loss": 0.5616, "num_tokens": 895953434.0, "step": 1172 }, { "epoch": 0.4298080886731095, "grad_norm": 0.19964496877830154, "learning_rate": 3.9706577939139365e-05, "loss": 0.5732, "num_tokens": 896809523.0, "step": 1173 }, { "epoch": 0.43017450648101496, "grad_norm": 0.18715054526591637, "learning_rate": 3.97058094032615e-05, "loss": 0.5729, "num_tokens": 897579828.0, "step": 1174 }, { "epoch": 0.43054092428892043, "grad_norm": 0.2205226089135636, "learning_rate": 3.970503987051126e-05, "loss": 0.5816, "num_tokens": 898322860.0, "step": 1175 }, { "epoch": 0.4309073420968259, "grad_norm": 0.21285049334066242, "learning_rate": 3.970426934093197e-05, "loss": 0.5847, "num_tokens": 899056930.0, "step": 1176 }, { "epoch": 0.4312737599047314, "grad_norm": 0.2578373888765662, "learning_rate": 3.970349781456703e-05, "loss": 0.5825, "num_tokens": 899831815.0, "step": 1177 }, { "epoch": 0.43164017771263685, "grad_norm": 0.2586049232141798, "learning_rate": 3.970272529145987e-05, "loss": 0.6239, "num_tokens": 900628149.0, "step": 1178 }, { "epoch": 0.4320065955205423, "grad_norm": 0.25161798093968246, "learning_rate": 3.970195177165398e-05, "loss": 0.6029, "num_tokens": 901443072.0, "step": 1179 }, { "epoch": 0.43237301332844774, "grad_norm": 0.22685816665153952, "learning_rate": 3.9701177255192916e-05, "loss": 0.5922, "num_tokens": 902285624.0, "step": 1180 }, { "epoch": 0.4327394311363532, "grad_norm": 0.2171914856536397, "learning_rate": 3.970040174212028e-05, "loss": 0.6451, "num_tokens": 902915965.0, "step": 1181 }, { "epoch": 0.4331058489442587, "grad_norm": 0.2459487561016418, "learning_rate": 3.9699625232479736e-05, "loss": 0.6316, "num_tokens": 903706271.0, "step": 1182 }, { "epoch": 0.43347226675216416, "grad_norm": 0.22022021845865553, "learning_rate": 3.969884772631502e-05, "loss": 0.64, "num_tokens": 904540422.0, "step": 1183 }, { "epoch": 0.43383868456006963, "grad_norm": 0.21594156088313238, "learning_rate": 3.969806922366988e-05, "loss": 0.6264, "num_tokens": 905228183.0, "step": 1184 }, { "epoch": 0.4342051023679751, "grad_norm": 0.22895753784776476, "learning_rate": 3.969728972458816e-05, "loss": 0.5528, "num_tokens": 905976966.0, "step": 1185 }, { "epoch": 0.4345715201758805, "grad_norm": 0.1986458391335436, "learning_rate": 3.969650922911375e-05, "loss": 0.5664, "num_tokens": 906752128.0, "step": 1186 }, { "epoch": 0.434937937983786, "grad_norm": 0.21554001885298052, "learning_rate": 3.9695727737290594e-05, "loss": 0.6062, "num_tokens": 907501275.0, "step": 1187 }, { "epoch": 0.43530435579169147, "grad_norm": 0.2153001045980652, "learning_rate": 3.969494524916268e-05, "loss": 0.6193, "num_tokens": 908287144.0, "step": 1188 }, { "epoch": 0.43567077359959694, "grad_norm": 0.20856466072888274, "learning_rate": 3.969416176477408e-05, "loss": 0.5699, "num_tokens": 909104755.0, "step": 1189 }, { "epoch": 0.4360371914075024, "grad_norm": 0.23719588098835112, "learning_rate": 3.969337728416889e-05, "loss": 0.5453, "num_tokens": 909874854.0, "step": 1190 }, { "epoch": 0.4364036092154079, "grad_norm": 0.21728743330760045, "learning_rate": 3.9692591807391286e-05, "loss": 0.5595, "num_tokens": 910620271.0, "step": 1191 }, { "epoch": 0.43677002702331336, "grad_norm": 0.22324455643691735, "learning_rate": 3.9691805334485497e-05, "loss": 0.5442, "num_tokens": 911329678.0, "step": 1192 }, { "epoch": 0.4371364448312188, "grad_norm": 0.24630931847747273, "learning_rate": 3.969101786549579e-05, "loss": 0.588, "num_tokens": 912073423.0, "step": 1193 }, { "epoch": 0.43750286263912425, "grad_norm": 0.2725224200577471, "learning_rate": 3.969022940046651e-05, "loss": 0.5596, "num_tokens": 912903538.0, "step": 1194 }, { "epoch": 0.4378692804470297, "grad_norm": 0.21612676173228362, "learning_rate": 3.968943993944205e-05, "loss": 0.5963, "num_tokens": 913732188.0, "step": 1195 }, { "epoch": 0.4382356982549352, "grad_norm": 0.20782371751709264, "learning_rate": 3.9688649482466855e-05, "loss": 0.6143, "num_tokens": 914494847.0, "step": 1196 }, { "epoch": 0.43860211606284066, "grad_norm": 0.22976490083743492, "learning_rate": 3.968785802958543e-05, "loss": 0.5804, "num_tokens": 915231428.0, "step": 1197 }, { "epoch": 0.43896853387074614, "grad_norm": 0.20709916720864957, "learning_rate": 3.968706558084233e-05, "loss": 0.5757, "num_tokens": 915933797.0, "step": 1198 }, { "epoch": 0.4393349516786516, "grad_norm": 0.21595373232033832, "learning_rate": 3.968627213628219e-05, "loss": 0.5853, "num_tokens": 916769015.0, "step": 1199 }, { "epoch": 0.439701369486557, "grad_norm": 0.21884122674384976, "learning_rate": 3.968547769594966e-05, "loss": 0.6101, "num_tokens": 917543890.0, "step": 1200 }, { "epoch": 0.4400677872944625, "grad_norm": 0.2367315741160416, "learning_rate": 3.9684682259889475e-05, "loss": 0.5708, "num_tokens": 918195259.0, "step": 1201 }, { "epoch": 0.44043420510236797, "grad_norm": 0.2683263207552893, "learning_rate": 3.9683885828146424e-05, "loss": 0.6034, "num_tokens": 918990357.0, "step": 1202 }, { "epoch": 0.44080062291027344, "grad_norm": 0.23442498173688536, "learning_rate": 3.968308840076535e-05, "loss": 0.6026, "num_tokens": 919710738.0, "step": 1203 }, { "epoch": 0.4411670407181789, "grad_norm": 0.22045578331313473, "learning_rate": 3.9682289977791146e-05, "loss": 0.5938, "num_tokens": 920476141.0, "step": 1204 }, { "epoch": 0.4415334585260844, "grad_norm": 0.21571976750032362, "learning_rate": 3.9681490559268766e-05, "loss": 0.6128, "num_tokens": 921245203.0, "step": 1205 }, { "epoch": 0.4418998763339898, "grad_norm": 0.1913394791862274, "learning_rate": 3.968069014524321e-05, "loss": 0.5594, "num_tokens": 922068139.0, "step": 1206 }, { "epoch": 0.4422662941418953, "grad_norm": 0.22433727620765223, "learning_rate": 3.967988873575956e-05, "loss": 0.5947, "num_tokens": 922907798.0, "step": 1207 }, { "epoch": 0.44263271194980075, "grad_norm": 0.19795193820299198, "learning_rate": 3.967908633086292e-05, "loss": 0.5517, "num_tokens": 923708771.0, "step": 1208 }, { "epoch": 0.4429991297577062, "grad_norm": 0.2170615180555752, "learning_rate": 3.9678282930598473e-05, "loss": 0.5864, "num_tokens": 924497994.0, "step": 1209 }, { "epoch": 0.4433655475656117, "grad_norm": 0.27338438300393625, "learning_rate": 3.9677478535011455e-05, "loss": 0.6054, "num_tokens": 925252633.0, "step": 1210 }, { "epoch": 0.44373196537351717, "grad_norm": 0.2126743681255858, "learning_rate": 3.967667314414716e-05, "loss": 0.5688, "num_tokens": 925958119.0, "step": 1211 }, { "epoch": 0.44409838318142264, "grad_norm": 0.2394393766115229, "learning_rate": 3.967586675805092e-05, "loss": 0.5576, "num_tokens": 926648676.0, "step": 1212 }, { "epoch": 0.44446480098932806, "grad_norm": 0.28440592343923526, "learning_rate": 3.9675059376768146e-05, "loss": 0.5506, "num_tokens": 927454325.0, "step": 1213 }, { "epoch": 0.44483121879723353, "grad_norm": 0.21070998261189083, "learning_rate": 3.967425100034429e-05, "loss": 0.5999, "num_tokens": 928342560.0, "step": 1214 }, { "epoch": 0.445197636605139, "grad_norm": 0.23628379997441484, "learning_rate": 3.9673441628824864e-05, "loss": 0.5669, "num_tokens": 929156291.0, "step": 1215 }, { "epoch": 0.4455640544130445, "grad_norm": 0.25408241704558143, "learning_rate": 3.967263126225545e-05, "loss": 0.5598, "num_tokens": 929970614.0, "step": 1216 }, { "epoch": 0.44593047222094995, "grad_norm": 0.20636182706427458, "learning_rate": 3.9671819900681657e-05, "loss": 0.589, "num_tokens": 930753548.0, "step": 1217 }, { "epoch": 0.4462968900288554, "grad_norm": 0.23102476703712466, "learning_rate": 3.967100754414917e-05, "loss": 0.5886, "num_tokens": 931401890.0, "step": 1218 }, { "epoch": 0.4466633078367609, "grad_norm": 0.24812527885797514, "learning_rate": 3.967019419270374e-05, "loss": 0.5761, "num_tokens": 932117737.0, "step": 1219 }, { "epoch": 0.4470297256446663, "grad_norm": 0.22816352555360092, "learning_rate": 3.966937984639114e-05, "loss": 0.6034, "num_tokens": 932846004.0, "step": 1220 }, { "epoch": 0.4473961434525718, "grad_norm": 0.2763008767135071, "learning_rate": 3.966856450525723e-05, "loss": 0.5646, "num_tokens": 933682375.0, "step": 1221 }, { "epoch": 0.44776256126047725, "grad_norm": 0.24003687327916978, "learning_rate": 3.966774816934791e-05, "loss": 0.5999, "num_tokens": 934459875.0, "step": 1222 }, { "epoch": 0.4481289790683827, "grad_norm": 0.19967567691057211, "learning_rate": 3.9666930838709155e-05, "loss": 0.5789, "num_tokens": 935214716.0, "step": 1223 }, { "epoch": 0.4484953968762882, "grad_norm": 0.24771469712352134, "learning_rate": 3.966611251338697e-05, "loss": 0.5962, "num_tokens": 935990567.0, "step": 1224 }, { "epoch": 0.44886181468419367, "grad_norm": 0.19321411021656984, "learning_rate": 3.966529319342743e-05, "loss": 0.569, "num_tokens": 936903677.0, "step": 1225 }, { "epoch": 0.4492282324920991, "grad_norm": 0.2718017038460117, "learning_rate": 3.9664472878876666e-05, "loss": 0.618, "num_tokens": 937684798.0, "step": 1226 }, { "epoch": 0.44959465030000456, "grad_norm": 0.2307116941209027, "learning_rate": 3.966365156978086e-05, "loss": 0.5972, "num_tokens": 938445496.0, "step": 1227 }, { "epoch": 0.44996106810791003, "grad_norm": 0.22085685463145308, "learning_rate": 3.966282926618626e-05, "loss": 0.6099, "num_tokens": 939153512.0, "step": 1228 }, { "epoch": 0.4503274859158155, "grad_norm": 0.24448666569068267, "learning_rate": 3.966200596813916e-05, "loss": 0.5855, "num_tokens": 939968718.0, "step": 1229 }, { "epoch": 0.450693903723721, "grad_norm": 0.2551518207892501, "learning_rate": 3.966118167568591e-05, "loss": 0.601, "num_tokens": 940717803.0, "step": 1230 }, { "epoch": 0.45106032153162645, "grad_norm": 0.19694486692921737, "learning_rate": 3.9660356388872925e-05, "loss": 0.6103, "num_tokens": 941553993.0, "step": 1231 }, { "epoch": 0.4514267393395319, "grad_norm": 0.22643773364397024, "learning_rate": 3.965953010774667e-05, "loss": 0.6039, "num_tokens": 942370755.0, "step": 1232 }, { "epoch": 0.45179315714743734, "grad_norm": 0.26635675673735604, "learning_rate": 3.9658702832353655e-05, "loss": 0.5827, "num_tokens": 943209878.0, "step": 1233 }, { "epoch": 0.4521595749553428, "grad_norm": 0.23241727991587255, "learning_rate": 3.965787456274047e-05, "loss": 0.5932, "num_tokens": 943950335.0, "step": 1234 }, { "epoch": 0.4525259927632483, "grad_norm": 0.23376276556074294, "learning_rate": 3.965704529895374e-05, "loss": 0.5484, "num_tokens": 944659859.0, "step": 1235 }, { "epoch": 0.45289241057115376, "grad_norm": 0.24948450129931002, "learning_rate": 3.9656215041040165e-05, "loss": 0.5962, "num_tokens": 945546682.0, "step": 1236 }, { "epoch": 0.45325882837905923, "grad_norm": 0.23110849224617364, "learning_rate": 3.965538378904648e-05, "loss": 0.5659, "num_tokens": 946389905.0, "step": 1237 }, { "epoch": 0.4536252461869647, "grad_norm": 0.20642481578138838, "learning_rate": 3.965455154301949e-05, "loss": 0.5598, "num_tokens": 947155530.0, "step": 1238 }, { "epoch": 0.4539916639948702, "grad_norm": 0.24771190279352642, "learning_rate": 3.965371830300606e-05, "loss": 0.5863, "num_tokens": 947977771.0, "step": 1239 }, { "epoch": 0.4543580818027756, "grad_norm": 0.24584285454671453, "learning_rate": 3.9652884069053084e-05, "loss": 0.6127, "num_tokens": 948642119.0, "step": 1240 }, { "epoch": 0.45472449961068107, "grad_norm": 0.22161291184365733, "learning_rate": 3.9652048841207546e-05, "loss": 0.5566, "num_tokens": 949424297.0, "step": 1241 }, { "epoch": 0.45509091741858654, "grad_norm": 0.21127334952769675, "learning_rate": 3.9651212619516466e-05, "loss": 0.5883, "num_tokens": 950257744.0, "step": 1242 }, { "epoch": 0.455457335226492, "grad_norm": 0.19695143338086893, "learning_rate": 3.965037540402693e-05, "loss": 0.5571, "num_tokens": 950987075.0, "step": 1243 }, { "epoch": 0.4558237530343975, "grad_norm": 0.2351856464868659, "learning_rate": 3.964953719478607e-05, "loss": 0.6058, "num_tokens": 951869560.0, "step": 1244 }, { "epoch": 0.45619017084230296, "grad_norm": 0.19503375662863337, "learning_rate": 3.964869799184108e-05, "loss": 0.5955, "num_tokens": 952569578.0, "step": 1245 }, { "epoch": 0.4565565886502084, "grad_norm": 0.24225623992003154, "learning_rate": 3.96478577952392e-05, "loss": 0.6, "num_tokens": 953306646.0, "step": 1246 }, { "epoch": 0.45692300645811385, "grad_norm": 0.233127505542049, "learning_rate": 3.964701660502775e-05, "loss": 0.5848, "num_tokens": 953985536.0, "step": 1247 }, { "epoch": 0.4572894242660193, "grad_norm": 0.2023089147267703, "learning_rate": 3.9646174421254085e-05, "loss": 0.5616, "num_tokens": 954741669.0, "step": 1248 }, { "epoch": 0.4576558420739248, "grad_norm": 0.20522168294322296, "learning_rate": 3.9645331243965624e-05, "loss": 0.5541, "num_tokens": 955456736.0, "step": 1249 }, { "epoch": 0.45802225988183026, "grad_norm": 0.18813721336224798, "learning_rate": 3.9644487073209824e-05, "loss": 0.5662, "num_tokens": 956248637.0, "step": 1250 }, { "epoch": 0.45838867768973574, "grad_norm": 0.22442107625312047, "learning_rate": 3.9643641909034234e-05, "loss": 0.5918, "num_tokens": 957013921.0, "step": 1251 }, { "epoch": 0.4587550954976412, "grad_norm": 0.2190928013882131, "learning_rate": 3.964279575148643e-05, "loss": 0.5857, "num_tokens": 957749310.0, "step": 1252 }, { "epoch": 0.4591215133055466, "grad_norm": 0.20868751702177402, "learning_rate": 3.964194860061405e-05, "loss": 0.5977, "num_tokens": 958437316.0, "step": 1253 }, { "epoch": 0.4594879311134521, "grad_norm": 0.17401730873333138, "learning_rate": 3.964110045646479e-05, "loss": 0.5523, "num_tokens": 959231153.0, "step": 1254 }, { "epoch": 0.45985434892135757, "grad_norm": 0.21563540004491044, "learning_rate": 3.9640251319086405e-05, "loss": 0.6056, "num_tokens": 959990745.0, "step": 1255 }, { "epoch": 0.46022076672926304, "grad_norm": 0.2030476353902713, "learning_rate": 3.963940118852671e-05, "loss": 0.5636, "num_tokens": 960778946.0, "step": 1256 }, { "epoch": 0.4605871845371685, "grad_norm": 0.2346381349076874, "learning_rate": 3.963855006483356e-05, "loss": 0.584, "num_tokens": 961544951.0, "step": 1257 }, { "epoch": 0.460953602345074, "grad_norm": 0.2219269739344504, "learning_rate": 3.9637697948054874e-05, "loss": 0.6027, "num_tokens": 962260829.0, "step": 1258 }, { "epoch": 0.46132002015297946, "grad_norm": 0.204230119660192, "learning_rate": 3.963684483823863e-05, "loss": 0.5713, "num_tokens": 962967756.0, "step": 1259 }, { "epoch": 0.4616864379608849, "grad_norm": 0.2524723814984392, "learning_rate": 3.9635990735432864e-05, "loss": 0.6049, "num_tokens": 963771296.0, "step": 1260 }, { "epoch": 0.46205285576879035, "grad_norm": 0.23546022332225378, "learning_rate": 3.9635135639685663e-05, "loss": 0.584, "num_tokens": 964607022.0, "step": 1261 }, { "epoch": 0.4624192735766958, "grad_norm": 0.23141731389190032, "learning_rate": 3.963427955104517e-05, "loss": 0.568, "num_tokens": 965518840.0, "step": 1262 }, { "epoch": 0.4627856913846013, "grad_norm": 0.20690695886131064, "learning_rate": 3.963342246955957e-05, "loss": 0.572, "num_tokens": 966344363.0, "step": 1263 }, { "epoch": 0.46315210919250677, "grad_norm": 0.19256051379123443, "learning_rate": 3.963256439527713e-05, "loss": 0.6011, "num_tokens": 967031306.0, "step": 1264 }, { "epoch": 0.46351852700041224, "grad_norm": 0.2452525917506449, "learning_rate": 3.9631705328246174e-05, "loss": 0.602, "num_tokens": 967765300.0, "step": 1265 }, { "epoch": 0.4638849448083177, "grad_norm": 0.23331259937037757, "learning_rate": 3.963084526851506e-05, "loss": 0.5478, "num_tokens": 968462917.0, "step": 1266 }, { "epoch": 0.46425136261622313, "grad_norm": 0.2336279329134558, "learning_rate": 3.9629984216132194e-05, "loss": 0.5881, "num_tokens": 969173040.0, "step": 1267 }, { "epoch": 0.4646177804241286, "grad_norm": 0.2665073550752302, "learning_rate": 3.962912217114607e-05, "loss": 0.6126, "num_tokens": 969871341.0, "step": 1268 }, { "epoch": 0.4649841982320341, "grad_norm": 0.1968993456257183, "learning_rate": 3.9628259133605233e-05, "loss": 0.5672, "num_tokens": 970720986.0, "step": 1269 }, { "epoch": 0.46535061603993955, "grad_norm": 0.2226318952769781, "learning_rate": 3.962739510355826e-05, "loss": 0.5772, "num_tokens": 971486865.0, "step": 1270 }, { "epoch": 0.465717033847845, "grad_norm": 0.18691587845400454, "learning_rate": 3.96265300810538e-05, "loss": 0.6029, "num_tokens": 972226571.0, "step": 1271 }, { "epoch": 0.4660834516557505, "grad_norm": 0.24662110440639542, "learning_rate": 3.962566406614056e-05, "loss": 0.5546, "num_tokens": 972988587.0, "step": 1272 }, { "epoch": 0.4664498694636559, "grad_norm": 0.22247151485077352, "learning_rate": 3.9624797058867284e-05, "loss": 0.5778, "num_tokens": 973848987.0, "step": 1273 }, { "epoch": 0.4668162872715614, "grad_norm": 0.21463930960402755, "learning_rate": 3.9623929059282804e-05, "loss": 0.58, "num_tokens": 974659485.0, "step": 1274 }, { "epoch": 0.46718270507946685, "grad_norm": 0.22110527883027706, "learning_rate": 3.962306006743598e-05, "loss": 0.5683, "num_tokens": 975388331.0, "step": 1275 }, { "epoch": 0.4675491228873723, "grad_norm": 0.17225854905342716, "learning_rate": 3.9622190083375744e-05, "loss": 0.5626, "num_tokens": 976154121.0, "step": 1276 }, { "epoch": 0.4679155406952778, "grad_norm": 0.22471612512060637, "learning_rate": 3.962131910715107e-05, "loss": 0.5853, "num_tokens": 976897150.0, "step": 1277 }, { "epoch": 0.46828195850318327, "grad_norm": 0.2238030510332919, "learning_rate": 3.9620447138811e-05, "loss": 0.5896, "num_tokens": 977708956.0, "step": 1278 }, { "epoch": 0.46864837631108874, "grad_norm": 0.21856557021536913, "learning_rate": 3.961957417840463e-05, "loss": 0.5914, "num_tokens": 978600495.0, "step": 1279 }, { "epoch": 0.46901479411899416, "grad_norm": 0.23066200394675931, "learning_rate": 3.961870022598111e-05, "loss": 0.5989, "num_tokens": 979379893.0, "step": 1280 }, { "epoch": 0.46938121192689963, "grad_norm": 0.2012409956932694, "learning_rate": 3.961782528158964e-05, "loss": 0.5922, "num_tokens": 980282592.0, "step": 1281 }, { "epoch": 0.4697476297348051, "grad_norm": 0.19883332214296187, "learning_rate": 3.961694934527948e-05, "loss": 0.6016, "num_tokens": 980976467.0, "step": 1282 }, { "epoch": 0.4701140475427106, "grad_norm": 0.18494990303343595, "learning_rate": 3.9616072417099954e-05, "loss": 0.585, "num_tokens": 981736684.0, "step": 1283 }, { "epoch": 0.47048046535061605, "grad_norm": 0.16645815613812281, "learning_rate": 3.961519449710043e-05, "loss": 0.565, "num_tokens": 982681053.0, "step": 1284 }, { "epoch": 0.4708468831585215, "grad_norm": 0.2026344125186022, "learning_rate": 3.961431558533034e-05, "loss": 0.5924, "num_tokens": 983487598.0, "step": 1285 }, { "epoch": 0.471213300966427, "grad_norm": 0.17824533609997073, "learning_rate": 3.961343568183917e-05, "loss": 0.5879, "num_tokens": 984278206.0, "step": 1286 }, { "epoch": 0.4715797187743324, "grad_norm": 0.1792865975193791, "learning_rate": 3.961255478667645e-05, "loss": 0.5737, "num_tokens": 985036772.0, "step": 1287 }, { "epoch": 0.4719461365822379, "grad_norm": 0.2058864174044726, "learning_rate": 3.961167289989179e-05, "loss": 0.5947, "num_tokens": 985812046.0, "step": 1288 }, { "epoch": 0.47231255439014336, "grad_norm": 0.1974736125820847, "learning_rate": 3.9610790021534826e-05, "loss": 0.572, "num_tokens": 986556372.0, "step": 1289 }, { "epoch": 0.47267897219804883, "grad_norm": 0.19633537910379625, "learning_rate": 3.9609906151655284e-05, "loss": 0.5597, "num_tokens": 987308524.0, "step": 1290 }, { "epoch": 0.4730453900059543, "grad_norm": 0.20051646154741254, "learning_rate": 3.9609021290302915e-05, "loss": 0.5722, "num_tokens": 987985433.0, "step": 1291 }, { "epoch": 0.4734118078138598, "grad_norm": 0.2030012183852099, "learning_rate": 3.960813543752754e-05, "loss": 0.5952, "num_tokens": 988760698.0, "step": 1292 }, { "epoch": 0.4737782256217652, "grad_norm": 0.21142347822797025, "learning_rate": 3.960724859337904e-05, "loss": 0.5521, "num_tokens": 989506412.0, "step": 1293 }, { "epoch": 0.47414464342967066, "grad_norm": 0.20718742201288373, "learning_rate": 3.960636075790734e-05, "loss": 0.6053, "num_tokens": 990333013.0, "step": 1294 }, { "epoch": 0.47451106123757614, "grad_norm": 0.21951668431227742, "learning_rate": 3.960547193116242e-05, "loss": 0.5975, "num_tokens": 991134821.0, "step": 1295 }, { "epoch": 0.4748774790454816, "grad_norm": 0.22759477027215225, "learning_rate": 3.960458211319434e-05, "loss": 0.5634, "num_tokens": 991809148.0, "step": 1296 }, { "epoch": 0.4752438968533871, "grad_norm": 0.2194209051297733, "learning_rate": 3.9603691304053193e-05, "loss": 0.5905, "num_tokens": 992612251.0, "step": 1297 }, { "epoch": 0.47561031466129255, "grad_norm": 0.20450698489898153, "learning_rate": 3.9602799503789126e-05, "loss": 0.6082, "num_tokens": 993483791.0, "step": 1298 }, { "epoch": 0.475976732469198, "grad_norm": 0.23948461923099104, "learning_rate": 3.9601906712452355e-05, "loss": 0.5777, "num_tokens": 994172019.0, "step": 1299 }, { "epoch": 0.47634315027710344, "grad_norm": 0.20929277356930745, "learning_rate": 3.960101293009315e-05, "loss": 0.5693, "num_tokens": 994967961.0, "step": 1300 }, { "epoch": 0.4767095680850089, "grad_norm": 0.19131202248365975, "learning_rate": 3.960011815676182e-05, "loss": 0.5502, "num_tokens": 995738570.0, "step": 1301 }, { "epoch": 0.4770759858929144, "grad_norm": 0.18785897091834028, "learning_rate": 3.959922239250876e-05, "loss": 0.5415, "num_tokens": 996564617.0, "step": 1302 }, { "epoch": 0.47744240370081986, "grad_norm": 0.20076377570302795, "learning_rate": 3.959832563738438e-05, "loss": 0.5594, "num_tokens": 997271386.0, "step": 1303 }, { "epoch": 0.47780882150872533, "grad_norm": 0.2206153451314073, "learning_rate": 3.9597427891439194e-05, "loss": 0.6019, "num_tokens": 998046142.0, "step": 1304 }, { "epoch": 0.4781752393166308, "grad_norm": 0.23376834327552642, "learning_rate": 3.959652915472373e-05, "loss": 0.5707, "num_tokens": 998857898.0, "step": 1305 }, { "epoch": 0.4785416571245363, "grad_norm": 0.24335182232050134, "learning_rate": 3.9595629427288586e-05, "loss": 0.5928, "num_tokens": 999591267.0, "step": 1306 }, { "epoch": 0.4789080749324417, "grad_norm": 0.2211290464248457, "learning_rate": 3.959472870918444e-05, "loss": 0.5819, "num_tokens": 1000342857.0, "step": 1307 }, { "epoch": 0.47927449274034717, "grad_norm": 0.17590622728496752, "learning_rate": 3.9593827000461986e-05, "loss": 0.5415, "num_tokens": 1001058979.0, "step": 1308 }, { "epoch": 0.47964091054825264, "grad_norm": 0.19331083972325808, "learning_rate": 3.9592924301171994e-05, "loss": 0.5641, "num_tokens": 1001818597.0, "step": 1309 }, { "epoch": 0.4800073283561581, "grad_norm": 0.2000413206531454, "learning_rate": 3.959202061136529e-05, "loss": 0.6171, "num_tokens": 1002571540.0, "step": 1310 }, { "epoch": 0.4803737461640636, "grad_norm": 0.18156442133641623, "learning_rate": 3.959111593109276e-05, "loss": 0.5454, "num_tokens": 1003381993.0, "step": 1311 }, { "epoch": 0.48074016397196906, "grad_norm": 0.20624636025911336, "learning_rate": 3.959021026040533e-05, "loss": 0.551, "num_tokens": 1004108949.0, "step": 1312 }, { "epoch": 0.4811065817798745, "grad_norm": 0.20613652702427013, "learning_rate": 3.9589303599353993e-05, "loss": 0.617, "num_tokens": 1004808316.0, "step": 1313 }, { "epoch": 0.48147299958777995, "grad_norm": 0.19114308990396717, "learning_rate": 3.9588395947989796e-05, "loss": 0.5864, "num_tokens": 1005640999.0, "step": 1314 }, { "epoch": 0.4818394173956854, "grad_norm": 0.19417531004509314, "learning_rate": 3.958748730636385e-05, "loss": 0.5599, "num_tokens": 1006382131.0, "step": 1315 }, { "epoch": 0.4822058352035909, "grad_norm": 0.2335860539944986, "learning_rate": 3.95865776745273e-05, "loss": 0.5666, "num_tokens": 1007233240.0, "step": 1316 }, { "epoch": 0.48257225301149637, "grad_norm": 0.23031645813446505, "learning_rate": 3.958566705253137e-05, "loss": 0.5927, "num_tokens": 1007943738.0, "step": 1317 }, { "epoch": 0.48293867081940184, "grad_norm": 0.2139342198598339, "learning_rate": 3.958475544042732e-05, "loss": 0.6004, "num_tokens": 1008803267.0, "step": 1318 }, { "epoch": 0.4833050886273073, "grad_norm": 0.20085644593878968, "learning_rate": 3.958384283826649e-05, "loss": 0.5797, "num_tokens": 1009628591.0, "step": 1319 }, { "epoch": 0.48367150643521273, "grad_norm": 0.1951172698109253, "learning_rate": 3.958292924610025e-05, "loss": 0.5633, "num_tokens": 1010429400.0, "step": 1320 }, { "epoch": 0.4840379242431182, "grad_norm": 0.2119772709992466, "learning_rate": 3.9582014663980033e-05, "loss": 0.6224, "num_tokens": 1011161125.0, "step": 1321 }, { "epoch": 0.4844043420510237, "grad_norm": 0.19930995434049964, "learning_rate": 3.958109909195734e-05, "loss": 0.5709, "num_tokens": 1011948259.0, "step": 1322 }, { "epoch": 0.48477075985892915, "grad_norm": 0.277149577675329, "learning_rate": 3.958018253008373e-05, "loss": 0.5911, "num_tokens": 1012642409.0, "step": 1323 }, { "epoch": 0.4851371776668346, "grad_norm": 0.20120003203157438, "learning_rate": 3.9579264978410784e-05, "loss": 0.5731, "num_tokens": 1013454462.0, "step": 1324 }, { "epoch": 0.4855035954747401, "grad_norm": 0.21586941387869732, "learning_rate": 3.957834643699018e-05, "loss": 0.5689, "num_tokens": 1014348577.0, "step": 1325 }, { "epoch": 0.48587001328264556, "grad_norm": 0.22703744859222075, "learning_rate": 3.9577426905873625e-05, "loss": 0.5792, "num_tokens": 1015244710.0, "step": 1326 }, { "epoch": 0.486236431090551, "grad_norm": 0.19409573363806423, "learning_rate": 3.957650638511289e-05, "loss": 0.5622, "num_tokens": 1015973700.0, "step": 1327 }, { "epoch": 0.48660284889845645, "grad_norm": 0.2726329856270817, "learning_rate": 3.957558487475981e-05, "loss": 0.6215, "num_tokens": 1016579005.0, "step": 1328 }, { "epoch": 0.4869692667063619, "grad_norm": 0.21791113969778766, "learning_rate": 3.957466237486626e-05, "loss": 0.5907, "num_tokens": 1017338754.0, "step": 1329 }, { "epoch": 0.4873356845142674, "grad_norm": 0.2880404298915582, "learning_rate": 3.957373888548418e-05, "loss": 0.5677, "num_tokens": 1018085954.0, "step": 1330 }, { "epoch": 0.48770210232217287, "grad_norm": 0.2592933557148272, "learning_rate": 3.957281440666556e-05, "loss": 0.5792, "num_tokens": 1018827702.0, "step": 1331 }, { "epoch": 0.48806852013007834, "grad_norm": 0.19966472878905103, "learning_rate": 3.957188893846246e-05, "loss": 0.5656, "num_tokens": 1019547015.0, "step": 1332 }, { "epoch": 0.48843493793798376, "grad_norm": 0.20884888485753006, "learning_rate": 3.957096248092699e-05, "loss": 0.6042, "num_tokens": 1020295223.0, "step": 1333 }, { "epoch": 0.48880135574588923, "grad_norm": 0.19120365389705515, "learning_rate": 3.9570035034111295e-05, "loss": 0.5418, "num_tokens": 1021023931.0, "step": 1334 }, { "epoch": 0.4891677735537947, "grad_norm": 0.1938368184581482, "learning_rate": 3.9569106598067606e-05, "loss": 0.5665, "num_tokens": 1021867618.0, "step": 1335 }, { "epoch": 0.4895341913617002, "grad_norm": 0.18697129150464442, "learning_rate": 3.956817717284818e-05, "loss": 0.5606, "num_tokens": 1022595769.0, "step": 1336 }, { "epoch": 0.48990060916960565, "grad_norm": 0.1783690016414821, "learning_rate": 3.956724675850536e-05, "loss": 0.5337, "num_tokens": 1023392569.0, "step": 1337 }, { "epoch": 0.4902670269775111, "grad_norm": 0.21748917053290223, "learning_rate": 3.956631535509153e-05, "loss": 0.5782, "num_tokens": 1024071232.0, "step": 1338 }, { "epoch": 0.4906334447854166, "grad_norm": 0.20843274782777743, "learning_rate": 3.9565382962659114e-05, "loss": 0.6016, "num_tokens": 1024783258.0, "step": 1339 }, { "epoch": 0.490999862593322, "grad_norm": 0.18470421507282817, "learning_rate": 3.956444958126063e-05, "loss": 0.607, "num_tokens": 1025543130.0, "step": 1340 }, { "epoch": 0.4913662804012275, "grad_norm": 0.22645398269473754, "learning_rate": 3.956351521094861e-05, "loss": 0.5551, "num_tokens": 1026312011.0, "step": 1341 }, { "epoch": 0.49173269820913296, "grad_norm": 0.20635476916199907, "learning_rate": 3.956257985177567e-05, "loss": 0.5793, "num_tokens": 1027098832.0, "step": 1342 }, { "epoch": 0.49209911601703843, "grad_norm": 0.20054674644850687, "learning_rate": 3.956164350379447e-05, "loss": 0.5917, "num_tokens": 1027837593.0, "step": 1343 }, { "epoch": 0.4924655338249439, "grad_norm": 0.20022805912550426, "learning_rate": 3.956070616705773e-05, "loss": 0.5605, "num_tokens": 1028754901.0, "step": 1344 }, { "epoch": 0.4928319516328494, "grad_norm": 0.18278713325726512, "learning_rate": 3.955976784161823e-05, "loss": 0.5881, "num_tokens": 1029484702.0, "step": 1345 }, { "epoch": 0.49319836944075485, "grad_norm": 0.25202326847180556, "learning_rate": 3.9558828527528794e-05, "loss": 0.5949, "num_tokens": 1030144565.0, "step": 1346 }, { "epoch": 0.49356478724866026, "grad_norm": 0.24852327601699245, "learning_rate": 3.95578882248423e-05, "loss": 0.5935, "num_tokens": 1030859182.0, "step": 1347 }, { "epoch": 0.49393120505656574, "grad_norm": 0.22604527597053942, "learning_rate": 3.9556946933611695e-05, "loss": 0.6227, "num_tokens": 1031638357.0, "step": 1348 }, { "epoch": 0.4942976228644712, "grad_norm": 0.29023300476796465, "learning_rate": 3.9556004653889984e-05, "loss": 0.5844, "num_tokens": 1032340776.0, "step": 1349 }, { "epoch": 0.4946640406723767, "grad_norm": 0.22063401545901917, "learning_rate": 3.95550613857302e-05, "loss": 0.5675, "num_tokens": 1033120615.0, "step": 1350 }, { "epoch": 0.49503045848028215, "grad_norm": 0.2137979238888776, "learning_rate": 3.955411712918547e-05, "loss": 0.5962, "num_tokens": 1033939983.0, "step": 1351 }, { "epoch": 0.4953968762881876, "grad_norm": 0.24614484236594206, "learning_rate": 3.955317188430895e-05, "loss": 0.6116, "num_tokens": 1034730590.0, "step": 1352 }, { "epoch": 0.49576329409609304, "grad_norm": 0.22758111462784145, "learning_rate": 3.955222565115385e-05, "loss": 0.6249, "num_tokens": 1035451151.0, "step": 1353 }, { "epoch": 0.4961297119039985, "grad_norm": 0.22073054859588942, "learning_rate": 3.955127842977347e-05, "loss": 0.58, "num_tokens": 1036195070.0, "step": 1354 }, { "epoch": 0.496496129711904, "grad_norm": 0.21603290038171974, "learning_rate": 3.955033022022111e-05, "loss": 0.5876, "num_tokens": 1036952918.0, "step": 1355 }, { "epoch": 0.49686254751980946, "grad_norm": 0.17719725002072184, "learning_rate": 3.954938102255017e-05, "loss": 0.5545, "num_tokens": 1037931909.0, "step": 1356 }, { "epoch": 0.49722896532771493, "grad_norm": 0.2141430062020357, "learning_rate": 3.9548430836814096e-05, "loss": 0.5619, "num_tokens": 1038703646.0, "step": 1357 }, { "epoch": 0.4975953831356204, "grad_norm": 0.16579699430573777, "learning_rate": 3.954747966306638e-05, "loss": 0.5745, "num_tokens": 1039576610.0, "step": 1358 }, { "epoch": 0.4979618009435259, "grad_norm": 0.1895640175691914, "learning_rate": 3.9546527501360576e-05, "loss": 0.5892, "num_tokens": 1040318216.0, "step": 1359 }, { "epoch": 0.4983282187514313, "grad_norm": 0.18040862789949946, "learning_rate": 3.954557435175029e-05, "loss": 0.5616, "num_tokens": 1041206484.0, "step": 1360 }, { "epoch": 0.49869463655933677, "grad_norm": 0.18423887196987965, "learning_rate": 3.95446202142892e-05, "loss": 0.5546, "num_tokens": 1041965562.0, "step": 1361 }, { "epoch": 0.49906105436724224, "grad_norm": 0.20109991289189325, "learning_rate": 3.9543665089031e-05, "loss": 0.6166, "num_tokens": 1042740620.0, "step": 1362 }, { "epoch": 0.4994274721751477, "grad_norm": 0.21408773758517818, "learning_rate": 3.954270897602949e-05, "loss": 0.6329, "num_tokens": 1043415653.0, "step": 1363 }, { "epoch": 0.4997938899830532, "grad_norm": 0.18704107664075031, "learning_rate": 3.954175187533849e-05, "loss": 0.5856, "num_tokens": 1044302278.0, "step": 1364 }, { "epoch": 0.5001603077909587, "grad_norm": 0.20719923853879166, "learning_rate": 3.954079378701189e-05, "loss": 0.5495, "num_tokens": 1044961756.0, "step": 1365 }, { "epoch": 0.5005267255988641, "grad_norm": 0.1963417718497543, "learning_rate": 3.9539834711103624e-05, "loss": 0.541, "num_tokens": 1045684408.0, "step": 1366 }, { "epoch": 0.5008931434067696, "grad_norm": 0.18891538211307196, "learning_rate": 3.95388746476677e-05, "loss": 0.5589, "num_tokens": 1046487417.0, "step": 1367 }, { "epoch": 0.5012595612146751, "grad_norm": 0.19826197684708202, "learning_rate": 3.9537913596758166e-05, "loss": 0.6021, "num_tokens": 1047320826.0, "step": 1368 }, { "epoch": 0.5016259790225805, "grad_norm": 0.19714899325452054, "learning_rate": 3.9536951558429134e-05, "loss": 0.6024, "num_tokens": 1048020313.0, "step": 1369 }, { "epoch": 0.5019923968304859, "grad_norm": 0.22025252252178437, "learning_rate": 3.953598853273477e-05, "loss": 0.6041, "num_tokens": 1048767606.0, "step": 1370 }, { "epoch": 0.5023588146383914, "grad_norm": 0.20157088737796455, "learning_rate": 3.953502451972929e-05, "loss": 0.5724, "num_tokens": 1049520318.0, "step": 1371 }, { "epoch": 0.5027252324462969, "grad_norm": 0.19802017288794796, "learning_rate": 3.953405951946697e-05, "loss": 0.5949, "num_tokens": 1050301084.0, "step": 1372 }, { "epoch": 0.5030916502542023, "grad_norm": 0.21124504983082426, "learning_rate": 3.953309353200214e-05, "loss": 0.6328, "num_tokens": 1051071547.0, "step": 1373 }, { "epoch": 0.5034580680621078, "grad_norm": 0.2056458467475723, "learning_rate": 3.95321265573892e-05, "loss": 0.6195, "num_tokens": 1051729617.0, "step": 1374 }, { "epoch": 0.5038244858700133, "grad_norm": 0.18471452148651743, "learning_rate": 3.953115859568257e-05, "loss": 0.5985, "num_tokens": 1052489345.0, "step": 1375 }, { "epoch": 0.5041909036779187, "grad_norm": 0.20137731167566855, "learning_rate": 3.9530189646936775e-05, "loss": 0.6201, "num_tokens": 1053271440.0, "step": 1376 }, { "epoch": 0.5045573214858242, "grad_norm": 0.22111764164233744, "learning_rate": 3.952921971120634e-05, "loss": 0.5667, "num_tokens": 1054061864.0, "step": 1377 }, { "epoch": 0.5049237392937297, "grad_norm": 0.21433677017673336, "learning_rate": 3.9528248788545895e-05, "loss": 0.566, "num_tokens": 1054870675.0, "step": 1378 }, { "epoch": 0.5052901571016352, "grad_norm": 0.18999439503935348, "learning_rate": 3.95272768790101e-05, "loss": 0.5808, "num_tokens": 1055667176.0, "step": 1379 }, { "epoch": 0.5056565749095406, "grad_norm": 0.20915734363013397, "learning_rate": 3.952630398265368e-05, "loss": 0.5839, "num_tokens": 1056372665.0, "step": 1380 }, { "epoch": 0.5060229927174461, "grad_norm": 0.1936994884439042, "learning_rate": 3.95253300995314e-05, "loss": 0.5953, "num_tokens": 1057189983.0, "step": 1381 }, { "epoch": 0.5063894105253516, "grad_norm": 0.2134354482464641, "learning_rate": 3.952435522969809e-05, "loss": 0.5962, "num_tokens": 1057877874.0, "step": 1382 }, { "epoch": 0.5067558283332569, "grad_norm": 0.1782027728033449, "learning_rate": 3.952337937320865e-05, "loss": 0.5889, "num_tokens": 1058752028.0, "step": 1383 }, { "epoch": 0.5071222461411624, "grad_norm": 0.25548506579346064, "learning_rate": 3.952240253011802e-05, "loss": 0.5791, "num_tokens": 1059513434.0, "step": 1384 }, { "epoch": 0.5074886639490679, "grad_norm": 0.2578302457515277, "learning_rate": 3.952142470048119e-05, "loss": 0.5928, "num_tokens": 1060171867.0, "step": 1385 }, { "epoch": 0.5078550817569734, "grad_norm": 0.2357202663349108, "learning_rate": 3.952044588435322e-05, "loss": 0.6094, "num_tokens": 1060905355.0, "step": 1386 }, { "epoch": 0.5082214995648788, "grad_norm": 0.21992004028820683, "learning_rate": 3.951946608178921e-05, "loss": 0.5959, "num_tokens": 1061685177.0, "step": 1387 }, { "epoch": 0.5085879173727843, "grad_norm": 0.23111512526695757, "learning_rate": 3.9518485292844345e-05, "loss": 0.595, "num_tokens": 1062399907.0, "step": 1388 }, { "epoch": 0.5089543351806898, "grad_norm": 0.1973631825482561, "learning_rate": 3.951750351757382e-05, "loss": 0.5486, "num_tokens": 1063190072.0, "step": 1389 }, { "epoch": 0.5093207529885952, "grad_norm": 0.2430405735836928, "learning_rate": 3.951652075603294e-05, "loss": 0.5817, "num_tokens": 1063936114.0, "step": 1390 }, { "epoch": 0.5096871707965007, "grad_norm": 0.2234192936931332, "learning_rate": 3.951553700827701e-05, "loss": 0.6152, "num_tokens": 1064675038.0, "step": 1391 }, { "epoch": 0.5100535886044062, "grad_norm": 0.21219613790131497, "learning_rate": 3.951455227436143e-05, "loss": 0.5845, "num_tokens": 1065425510.0, "step": 1392 }, { "epoch": 0.5104200064123117, "grad_norm": 0.27347469609018965, "learning_rate": 3.9513566554341635e-05, "loss": 0.6075, "num_tokens": 1066148618.0, "step": 1393 }, { "epoch": 0.5107864242202171, "grad_norm": 0.2238826015137209, "learning_rate": 3.951257984827314e-05, "loss": 0.5775, "num_tokens": 1066805551.0, "step": 1394 }, { "epoch": 0.5111528420281226, "grad_norm": 0.20674846430610325, "learning_rate": 3.951159215621147e-05, "loss": 0.5638, "num_tokens": 1067658580.0, "step": 1395 }, { "epoch": 0.5115192598360281, "grad_norm": 0.26067738045632866, "learning_rate": 3.951060347821226e-05, "loss": 0.6006, "num_tokens": 1068384535.0, "step": 1396 }, { "epoch": 0.5118856776439334, "grad_norm": 0.20012520617307816, "learning_rate": 3.950961381433117e-05, "loss": 0.5562, "num_tokens": 1069255629.0, "step": 1397 }, { "epoch": 0.5122520954518389, "grad_norm": 0.22837065293165743, "learning_rate": 3.95086231646239e-05, "loss": 0.5729, "num_tokens": 1069936903.0, "step": 1398 }, { "epoch": 0.5126185132597444, "grad_norm": 0.2679708950354936, "learning_rate": 3.950763152914625e-05, "loss": 0.5983, "num_tokens": 1070727834.0, "step": 1399 }, { "epoch": 0.5129849310676499, "grad_norm": 0.1763785291346294, "learning_rate": 3.9506638907954047e-05, "loss": 0.545, "num_tokens": 1071520892.0, "step": 1400 }, { "epoch": 0.5133513488755553, "grad_norm": 0.24779186700450778, "learning_rate": 3.950564530110316e-05, "loss": 0.6015, "num_tokens": 1072285424.0, "step": 1401 }, { "epoch": 0.5137177666834608, "grad_norm": 0.2098506857718754, "learning_rate": 3.950465070864955e-05, "loss": 0.5863, "num_tokens": 1073031305.0, "step": 1402 }, { "epoch": 0.5140841844913663, "grad_norm": 0.20587600576072512, "learning_rate": 3.9503655130649204e-05, "loss": 0.61, "num_tokens": 1073856061.0, "step": 1403 }, { "epoch": 0.5144506022992718, "grad_norm": 0.24141727436696672, "learning_rate": 3.950265856715818e-05, "loss": 0.5576, "num_tokens": 1074635810.0, "step": 1404 }, { "epoch": 0.5148170201071772, "grad_norm": 0.2089157326699597, "learning_rate": 3.950166101823258e-05, "loss": 0.5879, "num_tokens": 1075519865.0, "step": 1405 }, { "epoch": 0.5151834379150827, "grad_norm": 0.21259773686222444, "learning_rate": 3.950066248392859e-05, "loss": 0.592, "num_tokens": 1076267735.0, "step": 1406 }, { "epoch": 0.5155498557229882, "grad_norm": 0.2596159792661967, "learning_rate": 3.9499662964302396e-05, "loss": 0.6235, "num_tokens": 1076972040.0, "step": 1407 }, { "epoch": 0.5159162735308936, "grad_norm": 0.19361518577326434, "learning_rate": 3.94986624594103e-05, "loss": 0.5849, "num_tokens": 1077761892.0, "step": 1408 }, { "epoch": 0.5162826913387991, "grad_norm": 0.18110065910017145, "learning_rate": 3.9497660969308615e-05, "loss": 0.5736, "num_tokens": 1078598189.0, "step": 1409 }, { "epoch": 0.5166491091467045, "grad_norm": 0.21907979286388193, "learning_rate": 3.949665849405374e-05, "loss": 0.573, "num_tokens": 1079312971.0, "step": 1410 }, { "epoch": 0.51701552695461, "grad_norm": 0.18424071230284778, "learning_rate": 3.949565503370211e-05, "loss": 0.5963, "num_tokens": 1080164829.0, "step": 1411 }, { "epoch": 0.5173819447625154, "grad_norm": 0.19250421988611932, "learning_rate": 3.949465058831021e-05, "loss": 0.5953, "num_tokens": 1080961242.0, "step": 1412 }, { "epoch": 0.5177483625704209, "grad_norm": 0.20312856572829768, "learning_rate": 3.949364515793462e-05, "loss": 0.5691, "num_tokens": 1081742994.0, "step": 1413 }, { "epoch": 0.5181147803783264, "grad_norm": 0.18585238983224858, "learning_rate": 3.949263874263192e-05, "loss": 0.623, "num_tokens": 1082531304.0, "step": 1414 }, { "epoch": 0.5184811981862318, "grad_norm": 0.20488349838069048, "learning_rate": 3.949163134245879e-05, "loss": 0.5955, "num_tokens": 1083298867.0, "step": 1415 }, { "epoch": 0.5188476159941373, "grad_norm": 0.20618510997862816, "learning_rate": 3.9490622957471954e-05, "loss": 0.5999, "num_tokens": 1084084683.0, "step": 1416 }, { "epoch": 0.5192140338020428, "grad_norm": 0.17636027308095711, "learning_rate": 3.948961358772817e-05, "loss": 0.5426, "num_tokens": 1084918113.0, "step": 1417 }, { "epoch": 0.5195804516099483, "grad_norm": 0.21789181147679, "learning_rate": 3.9488603233284266e-05, "loss": 0.5617, "num_tokens": 1085820040.0, "step": 1418 }, { "epoch": 0.5199468694178537, "grad_norm": 0.24710981694111916, "learning_rate": 3.948759189419714e-05, "loss": 0.5629, "num_tokens": 1086534027.0, "step": 1419 }, { "epoch": 0.5203132872257592, "grad_norm": 0.20829980525295258, "learning_rate": 3.948657957052372e-05, "loss": 0.5627, "num_tokens": 1087302731.0, "step": 1420 }, { "epoch": 0.5206797050336647, "grad_norm": 0.23797992295802942, "learning_rate": 3.948556626232102e-05, "loss": 0.5725, "num_tokens": 1088051151.0, "step": 1421 }, { "epoch": 0.5210461228415701, "grad_norm": 0.22676209141208883, "learning_rate": 3.948455196964607e-05, "loss": 0.5646, "num_tokens": 1088877101.0, "step": 1422 }, { "epoch": 0.5214125406494756, "grad_norm": 0.1847768102292864, "learning_rate": 3.948353669255599e-05, "loss": 0.5326, "num_tokens": 1089856870.0, "step": 1423 }, { "epoch": 0.521778958457381, "grad_norm": 0.2087052239138468, "learning_rate": 3.9482520431107944e-05, "loss": 0.581, "num_tokens": 1090675038.0, "step": 1424 }, { "epoch": 0.5221453762652865, "grad_norm": 0.20539956075412757, "learning_rate": 3.948150318535914e-05, "loss": 0.5721, "num_tokens": 1091528434.0, "step": 1425 }, { "epoch": 0.5225117940731919, "grad_norm": 0.18999718316435632, "learning_rate": 3.948048495536686e-05, "loss": 0.5552, "num_tokens": 1092351955.0, "step": 1426 }, { "epoch": 0.5228782118810974, "grad_norm": 0.22249205473928624, "learning_rate": 3.947946574118842e-05, "loss": 0.6131, "num_tokens": 1093131819.0, "step": 1427 }, { "epoch": 0.5232446296890029, "grad_norm": 0.2054955704195233, "learning_rate": 3.947844554288122e-05, "loss": 0.5818, "num_tokens": 1093827784.0, "step": 1428 }, { "epoch": 0.5236110474969083, "grad_norm": 0.2035635940666988, "learning_rate": 3.9477424360502686e-05, "loss": 0.585, "num_tokens": 1094530252.0, "step": 1429 }, { "epoch": 0.5239774653048138, "grad_norm": 0.2374985081891767, "learning_rate": 3.947640219411032e-05, "loss": 0.5856, "num_tokens": 1095236193.0, "step": 1430 }, { "epoch": 0.5243438831127193, "grad_norm": 0.19894052663634137, "learning_rate": 3.9475379043761667e-05, "loss": 0.5735, "num_tokens": 1096029168.0, "step": 1431 }, { "epoch": 0.5247103009206248, "grad_norm": 0.2242977221667056, "learning_rate": 3.947435490951433e-05, "loss": 0.5622, "num_tokens": 1096675786.0, "step": 1432 }, { "epoch": 0.5250767187285302, "grad_norm": 0.2231786879282889, "learning_rate": 3.947332979142598e-05, "loss": 0.559, "num_tokens": 1097409109.0, "step": 1433 }, { "epoch": 0.5254431365364357, "grad_norm": 0.21844203304214643, "learning_rate": 3.947230368955432e-05, "loss": 0.5969, "num_tokens": 1098203700.0, "step": 1434 }, { "epoch": 0.5258095543443412, "grad_norm": 0.18231586518651166, "learning_rate": 3.947127660395715e-05, "loss": 0.5508, "num_tokens": 1099014908.0, "step": 1435 }, { "epoch": 0.5261759721522467, "grad_norm": 0.20519089597523968, "learning_rate": 3.947024853469226e-05, "loss": 0.6119, "num_tokens": 1099752600.0, "step": 1436 }, { "epoch": 0.526542389960152, "grad_norm": 0.20184647257088867, "learning_rate": 3.9469219481817545e-05, "loss": 0.596, "num_tokens": 1100440333.0, "step": 1437 }, { "epoch": 0.5269088077680575, "grad_norm": 0.18051199185097214, "learning_rate": 3.946818944539095e-05, "loss": 0.5335, "num_tokens": 1101321117.0, "step": 1438 }, { "epoch": 0.527275225575963, "grad_norm": 0.20341065778420453, "learning_rate": 3.9467158425470476e-05, "loss": 0.5698, "num_tokens": 1102049341.0, "step": 1439 }, { "epoch": 0.5276416433838684, "grad_norm": 0.21894594080613156, "learning_rate": 3.946612642211415e-05, "loss": 0.562, "num_tokens": 1102835476.0, "step": 1440 }, { "epoch": 0.5280080611917739, "grad_norm": 0.20224599101568158, "learning_rate": 3.946509343538009e-05, "loss": 0.6218, "num_tokens": 1103552113.0, "step": 1441 }, { "epoch": 0.5283744789996794, "grad_norm": 0.2285764451575475, "learning_rate": 3.946405946532645e-05, "loss": 0.5659, "num_tokens": 1104314527.0, "step": 1442 }, { "epoch": 0.5287408968075848, "grad_norm": 0.22147859113543436, "learning_rate": 3.9463024512011444e-05, "loss": 0.5664, "num_tokens": 1105038819.0, "step": 1443 }, { "epoch": 0.5291073146154903, "grad_norm": 0.1957531803170507, "learning_rate": 3.946198857549334e-05, "loss": 0.5648, "num_tokens": 1105805598.0, "step": 1444 }, { "epoch": 0.5294737324233958, "grad_norm": 0.254446550572177, "learning_rate": 3.9460951655830465e-05, "loss": 0.567, "num_tokens": 1106528837.0, "step": 1445 }, { "epoch": 0.5298401502313013, "grad_norm": 0.21264442941045542, "learning_rate": 3.9459913753081205e-05, "loss": 0.5648, "num_tokens": 1107296015.0, "step": 1446 }, { "epoch": 0.5302065680392067, "grad_norm": 0.18604830104424133, "learning_rate": 3.9458874867303996e-05, "loss": 0.561, "num_tokens": 1108047082.0, "step": 1447 }, { "epoch": 0.5305729858471122, "grad_norm": 0.24251996037360385, "learning_rate": 3.945783499855732e-05, "loss": 0.565, "num_tokens": 1108706680.0, "step": 1448 }, { "epoch": 0.5309394036550177, "grad_norm": 0.1906502518912467, "learning_rate": 3.945679414689974e-05, "loss": 0.5402, "num_tokens": 1109526790.0, "step": 1449 }, { "epoch": 0.531305821462923, "grad_norm": 0.2131854261422368, "learning_rate": 3.945575231238984e-05, "loss": 0.5521, "num_tokens": 1110304746.0, "step": 1450 }, { "epoch": 0.5316722392708285, "grad_norm": 0.2194086534782707, "learning_rate": 3.945470949508629e-05, "loss": 0.5489, "num_tokens": 1111093426.0, "step": 1451 }, { "epoch": 0.532038657078734, "grad_norm": 0.19592763402756677, "learning_rate": 3.945366569504779e-05, "loss": 0.5786, "num_tokens": 1111851863.0, "step": 1452 }, { "epoch": 0.5324050748866395, "grad_norm": 0.20730752883052567, "learning_rate": 3.945262091233312e-05, "loss": 0.5527, "num_tokens": 1112664297.0, "step": 1453 }, { "epoch": 0.5327714926945449, "grad_norm": 0.21031051653233201, "learning_rate": 3.94515751470011e-05, "loss": 0.588, "num_tokens": 1113311566.0, "step": 1454 }, { "epoch": 0.5331379105024504, "grad_norm": 0.23331381969154402, "learning_rate": 3.94505283991106e-05, "loss": 0.5835, "num_tokens": 1114031688.0, "step": 1455 }, { "epoch": 0.5335043283103559, "grad_norm": 0.21239615269533252, "learning_rate": 3.9449480668720566e-05, "loss": 0.5708, "num_tokens": 1114891027.0, "step": 1456 }, { "epoch": 0.5338707461182614, "grad_norm": 0.20674295471463078, "learning_rate": 3.944843195588999e-05, "loss": 0.5758, "num_tokens": 1115646421.0, "step": 1457 }, { "epoch": 0.5342371639261668, "grad_norm": 0.20987662501837537, "learning_rate": 3.944738226067791e-05, "loss": 0.5668, "num_tokens": 1116409006.0, "step": 1458 }, { "epoch": 0.5346035817340723, "grad_norm": 0.19076162439537997, "learning_rate": 3.944633158314341e-05, "loss": 0.5972, "num_tokens": 1117270112.0, "step": 1459 }, { "epoch": 0.5349699995419778, "grad_norm": 0.19148823114096652, "learning_rate": 3.944527992334567e-05, "loss": 0.5889, "num_tokens": 1118044116.0, "step": 1460 }, { "epoch": 0.5353364173498832, "grad_norm": 0.18302012872060616, "learning_rate": 3.9444227281343896e-05, "loss": 0.5833, "num_tokens": 1118780230.0, "step": 1461 }, { "epoch": 0.5357028351577887, "grad_norm": 0.20646263625199002, "learning_rate": 3.944317365719734e-05, "loss": 0.5843, "num_tokens": 1119484097.0, "step": 1462 }, { "epoch": 0.5360692529656942, "grad_norm": 0.17475868475973372, "learning_rate": 3.9442119050965334e-05, "loss": 0.5408, "num_tokens": 1120251106.0, "step": 1463 }, { "epoch": 0.5364356707735995, "grad_norm": 0.20932393488310627, "learning_rate": 3.944106346270726e-05, "loss": 0.6266, "num_tokens": 1120973360.0, "step": 1464 }, { "epoch": 0.536802088581505, "grad_norm": 0.19198507833669182, "learning_rate": 3.944000689248253e-05, "loss": 0.6002, "num_tokens": 1121700371.0, "step": 1465 }, { "epoch": 0.5371685063894105, "grad_norm": 0.19134208722843532, "learning_rate": 3.943894934035066e-05, "loss": 0.6025, "num_tokens": 1122471701.0, "step": 1466 }, { "epoch": 0.537534924197316, "grad_norm": 0.2032915022145935, "learning_rate": 3.943789080637116e-05, "loss": 0.5706, "num_tokens": 1123308342.0, "step": 1467 }, { "epoch": 0.5379013420052214, "grad_norm": 0.19849911155427072, "learning_rate": 3.943683129060365e-05, "loss": 0.5776, "num_tokens": 1124100743.0, "step": 1468 }, { "epoch": 0.5382677598131269, "grad_norm": 0.19805090254099084, "learning_rate": 3.943577079310778e-05, "loss": 0.5556, "num_tokens": 1124877644.0, "step": 1469 }, { "epoch": 0.5386341776210324, "grad_norm": 0.19712760742861035, "learning_rate": 3.943470931394325e-05, "loss": 0.5564, "num_tokens": 1125632765.0, "step": 1470 }, { "epoch": 0.5390005954289379, "grad_norm": 0.20871586514897472, "learning_rate": 3.9433646853169823e-05, "loss": 0.5805, "num_tokens": 1126484125.0, "step": 1471 }, { "epoch": 0.5393670132368433, "grad_norm": 0.20648827669558453, "learning_rate": 3.943258341084733e-05, "loss": 0.5473, "num_tokens": 1127304542.0, "step": 1472 }, { "epoch": 0.5397334310447488, "grad_norm": 0.22951726851875665, "learning_rate": 3.9431518987035634e-05, "loss": 0.5787, "num_tokens": 1128190071.0, "step": 1473 }, { "epoch": 0.5400998488526543, "grad_norm": 0.17625843114427614, "learning_rate": 3.943045358179467e-05, "loss": 0.5642, "num_tokens": 1128942346.0, "step": 1474 }, { "epoch": 0.5404662666605597, "grad_norm": 0.23038455845910735, "learning_rate": 3.942938719518442e-05, "loss": 0.5921, "num_tokens": 1129800318.0, "step": 1475 }, { "epoch": 0.5408326844684652, "grad_norm": 0.18148767773522645, "learning_rate": 3.9428319827264915e-05, "loss": 0.5541, "num_tokens": 1130548487.0, "step": 1476 }, { "epoch": 0.5411991022763706, "grad_norm": 0.2546768347760027, "learning_rate": 3.942725147809627e-05, "loss": 0.6039, "num_tokens": 1131328817.0, "step": 1477 }, { "epoch": 0.541565520084276, "grad_norm": 0.24978003067052398, "learning_rate": 3.942618214773862e-05, "loss": 0.6004, "num_tokens": 1132071603.0, "step": 1478 }, { "epoch": 0.5419319378921815, "grad_norm": 0.27282001154317287, "learning_rate": 3.942511183625217e-05, "loss": 0.5825, "num_tokens": 1132753126.0, "step": 1479 }, { "epoch": 0.542298355700087, "grad_norm": 0.24991760748154793, "learning_rate": 3.9424040543697184e-05, "loss": 0.5597, "num_tokens": 1133403408.0, "step": 1480 }, { "epoch": 0.5426647735079925, "grad_norm": 0.19558878491617973, "learning_rate": 3.942296827013398e-05, "loss": 0.5844, "num_tokens": 1134212111.0, "step": 1481 }, { "epoch": 0.5430311913158979, "grad_norm": 0.221906236960725, "learning_rate": 3.9421895015622936e-05, "loss": 0.5778, "num_tokens": 1135040316.0, "step": 1482 }, { "epoch": 0.5433976091238034, "grad_norm": 0.1997158930661991, "learning_rate": 3.9420820780224465e-05, "loss": 0.5814, "num_tokens": 1135748221.0, "step": 1483 }, { "epoch": 0.5437640269317089, "grad_norm": 0.2030582715683091, "learning_rate": 3.941974556399905e-05, "loss": 0.5763, "num_tokens": 1136428211.0, "step": 1484 }, { "epoch": 0.5441304447396144, "grad_norm": 0.20850203222127212, "learning_rate": 3.941866936700723e-05, "loss": 0.5714, "num_tokens": 1137275675.0, "step": 1485 }, { "epoch": 0.5444968625475198, "grad_norm": 0.22048195081376568, "learning_rate": 3.941759218930961e-05, "loss": 0.6034, "num_tokens": 1138083319.0, "step": 1486 }, { "epoch": 0.5448632803554253, "grad_norm": 0.2908406856205387, "learning_rate": 3.9416514030966816e-05, "loss": 0.6004, "num_tokens": 1138849162.0, "step": 1487 }, { "epoch": 0.5452296981633308, "grad_norm": 0.21590280845803184, "learning_rate": 3.9415434892039564e-05, "loss": 0.5802, "num_tokens": 1139681060.0, "step": 1488 }, { "epoch": 0.5455961159712363, "grad_norm": 0.19698667148621862, "learning_rate": 3.9414354772588605e-05, "loss": 0.5854, "num_tokens": 1140435623.0, "step": 1489 }, { "epoch": 0.5459625337791416, "grad_norm": 0.2000752040522926, "learning_rate": 3.9413273672674756e-05, "loss": 0.5465, "num_tokens": 1141280339.0, "step": 1490 }, { "epoch": 0.5463289515870471, "grad_norm": 0.1974937159486762, "learning_rate": 3.941219159235889e-05, "loss": 0.561, "num_tokens": 1142117911.0, "step": 1491 }, { "epoch": 0.5466953693949526, "grad_norm": 0.18521931775725173, "learning_rate": 3.9411108531701915e-05, "loss": 0.5944, "num_tokens": 1142817952.0, "step": 1492 }, { "epoch": 0.547061787202858, "grad_norm": 0.2029862129894048, "learning_rate": 3.941002449076482e-05, "loss": 0.5844, "num_tokens": 1143619876.0, "step": 1493 }, { "epoch": 0.5474282050107635, "grad_norm": 0.1792291993193856, "learning_rate": 3.940893946960865e-05, "loss": 0.5784, "num_tokens": 1144479263.0, "step": 1494 }, { "epoch": 0.547794622818669, "grad_norm": 0.18529441696669935, "learning_rate": 3.940785346829446e-05, "loss": 0.566, "num_tokens": 1145246598.0, "step": 1495 }, { "epoch": 0.5481610406265744, "grad_norm": 0.20591475584815186, "learning_rate": 3.940676648688343e-05, "loss": 0.5736, "num_tokens": 1145954686.0, "step": 1496 }, { "epoch": 0.5485274584344799, "grad_norm": 0.18758845997166518, "learning_rate": 3.940567852543675e-05, "loss": 0.5286, "num_tokens": 1146673990.0, "step": 1497 }, { "epoch": 0.5488938762423854, "grad_norm": 0.23690000071029832, "learning_rate": 3.940458958401565e-05, "loss": 0.554, "num_tokens": 1147506638.0, "step": 1498 }, { "epoch": 0.5492602940502909, "grad_norm": 0.2000479277452003, "learning_rate": 3.940349966268147e-05, "loss": 0.5777, "num_tokens": 1148100165.0, "step": 1499 }, { "epoch": 0.5496267118581963, "grad_norm": 0.20689592498550338, "learning_rate": 3.9402408761495565e-05, "loss": 0.6022, "num_tokens": 1148760577.0, "step": 1500 }, { "epoch": 0.5499931296661018, "grad_norm": 0.20100029219415244, "learning_rate": 3.9401316880519353e-05, "loss": 0.5768, "num_tokens": 1149537989.0, "step": 1501 }, { "epoch": 0.5503595474740073, "grad_norm": 0.1835409129690097, "learning_rate": 3.9400224019814305e-05, "loss": 0.5921, "num_tokens": 1150287865.0, "step": 1502 }, { "epoch": 0.5507259652819128, "grad_norm": 0.19200947069359375, "learning_rate": 3.9399130179441954e-05, "loss": 0.5565, "num_tokens": 1151101449.0, "step": 1503 }, { "epoch": 0.5510923830898181, "grad_norm": 0.20140923370179292, "learning_rate": 3.939803535946389e-05, "loss": 0.6149, "num_tokens": 1151837239.0, "step": 1504 }, { "epoch": 0.5514588008977236, "grad_norm": 0.22106466054516502, "learning_rate": 3.939693955994175e-05, "loss": 0.5773, "num_tokens": 1152638385.0, "step": 1505 }, { "epoch": 0.5518252187056291, "grad_norm": 0.2181430734193481, "learning_rate": 3.939584278093723e-05, "loss": 0.5885, "num_tokens": 1153388410.0, "step": 1506 }, { "epoch": 0.5521916365135345, "grad_norm": 0.17911310127580177, "learning_rate": 3.939474502251208e-05, "loss": 0.5655, "num_tokens": 1154205833.0, "step": 1507 }, { "epoch": 0.55255805432144, "grad_norm": 0.23270747779593245, "learning_rate": 3.9393646284728114e-05, "loss": 0.5795, "num_tokens": 1154866816.0, "step": 1508 }, { "epoch": 0.5529244721293455, "grad_norm": 0.19524488920387492, "learning_rate": 3.939254656764718e-05, "loss": 0.5797, "num_tokens": 1155663789.0, "step": 1509 }, { "epoch": 0.553290889937251, "grad_norm": 0.18753321871914003, "learning_rate": 3.9391445871331206e-05, "loss": 0.5777, "num_tokens": 1156393986.0, "step": 1510 }, { "epoch": 0.5536573077451564, "grad_norm": 0.19965250893267417, "learning_rate": 3.9390344195842144e-05, "loss": 0.5559, "num_tokens": 1157138579.0, "step": 1511 }, { "epoch": 0.5540237255530619, "grad_norm": 0.17872905683632637, "learning_rate": 3.9389241541242043e-05, "loss": 0.5415, "num_tokens": 1157887719.0, "step": 1512 }, { "epoch": 0.5543901433609674, "grad_norm": 0.18368765814593832, "learning_rate": 3.9388137907592975e-05, "loss": 0.6024, "num_tokens": 1158693376.0, "step": 1513 }, { "epoch": 0.5547565611688728, "grad_norm": 0.20445529096453505, "learning_rate": 3.938703329495708e-05, "loss": 0.5399, "num_tokens": 1159479579.0, "step": 1514 }, { "epoch": 0.5551229789767783, "grad_norm": 0.1696013384378078, "learning_rate": 3.938592770339655e-05, "loss": 0.5551, "num_tokens": 1160373352.0, "step": 1515 }, { "epoch": 0.5554893967846838, "grad_norm": 0.18738447937238228, "learning_rate": 3.938482113297362e-05, "loss": 0.5583, "num_tokens": 1161166443.0, "step": 1516 }, { "epoch": 0.5558558145925891, "grad_norm": 0.17230815516937034, "learning_rate": 3.938371358375061e-05, "loss": 0.5552, "num_tokens": 1161962238.0, "step": 1517 }, { "epoch": 0.5562222324004946, "grad_norm": 0.18038400991148, "learning_rate": 3.9382605055789864e-05, "loss": 0.6105, "num_tokens": 1162648686.0, "step": 1518 }, { "epoch": 0.5565886502084001, "grad_norm": 0.18177769954174933, "learning_rate": 3.9381495549153796e-05, "loss": 0.5592, "num_tokens": 1163497239.0, "step": 1519 }, { "epoch": 0.5569550680163056, "grad_norm": 0.19046541013192683, "learning_rate": 3.938038506390488e-05, "loss": 0.6109, "num_tokens": 1164245978.0, "step": 1520 }, { "epoch": 0.557321485824211, "grad_norm": 0.2215954654684588, "learning_rate": 3.937927360010564e-05, "loss": 0.5679, "num_tokens": 1165065507.0, "step": 1521 }, { "epoch": 0.5576879036321165, "grad_norm": 0.1876162656648088, "learning_rate": 3.9378161157818645e-05, "loss": 0.5696, "num_tokens": 1165739047.0, "step": 1522 }, { "epoch": 0.558054321440022, "grad_norm": 0.23189470121013672, "learning_rate": 3.937704773710653e-05, "loss": 0.5998, "num_tokens": 1166580810.0, "step": 1523 }, { "epoch": 0.5584207392479275, "grad_norm": 0.23060147495503555, "learning_rate": 3.937593333803199e-05, "loss": 0.5565, "num_tokens": 1167344941.0, "step": 1524 }, { "epoch": 0.5587871570558329, "grad_norm": 0.23296991050684962, "learning_rate": 3.937481796065776e-05, "loss": 0.5684, "num_tokens": 1168230247.0, "step": 1525 }, { "epoch": 0.5591535748637384, "grad_norm": 0.23743430514052527, "learning_rate": 3.937370160504664e-05, "loss": 0.5867, "num_tokens": 1169021900.0, "step": 1526 }, { "epoch": 0.5595199926716439, "grad_norm": 0.17717525985987692, "learning_rate": 3.937258427126148e-05, "loss": 0.5278, "num_tokens": 1169939533.0, "step": 1527 }, { "epoch": 0.5598864104795493, "grad_norm": 0.2151048295697959, "learning_rate": 3.9371465959365195e-05, "loss": 0.5592, "num_tokens": 1170812915.0, "step": 1528 }, { "epoch": 0.5602528282874548, "grad_norm": 0.17044908230851263, "learning_rate": 3.937034666942075e-05, "loss": 0.5261, "num_tokens": 1171634358.0, "step": 1529 }, { "epoch": 0.5606192460953602, "grad_norm": 0.22867691305745066, "learning_rate": 3.9369226401491144e-05, "loss": 0.5552, "num_tokens": 1172395588.0, "step": 1530 }, { "epoch": 0.5609856639032657, "grad_norm": 0.19301734265725265, "learning_rate": 3.936810515563947e-05, "loss": 0.563, "num_tokens": 1173083612.0, "step": 1531 }, { "epoch": 0.5613520817111711, "grad_norm": 0.19534151567685834, "learning_rate": 3.936698293192886e-05, "loss": 0.5582, "num_tokens": 1173851159.0, "step": 1532 }, { "epoch": 0.5617184995190766, "grad_norm": 0.19335253133407526, "learning_rate": 3.9365859730422473e-05, "loss": 0.5861, "num_tokens": 1174504751.0, "step": 1533 }, { "epoch": 0.5620849173269821, "grad_norm": 0.19541804256954545, "learning_rate": 3.936473555118357e-05, "loss": 0.5575, "num_tokens": 1175343453.0, "step": 1534 }, { "epoch": 0.5624513351348875, "grad_norm": 0.184861485457404, "learning_rate": 3.936361039427544e-05, "loss": 0.5629, "num_tokens": 1176105043.0, "step": 1535 }, { "epoch": 0.562817752942793, "grad_norm": 0.1925247322033622, "learning_rate": 3.936248425976142e-05, "loss": 0.5685, "num_tokens": 1176826132.0, "step": 1536 }, { "epoch": 0.5631841707506985, "grad_norm": 0.24658795547264445, "learning_rate": 3.936135714770493e-05, "loss": 0.5896, "num_tokens": 1177472927.0, "step": 1537 }, { "epoch": 0.563550588558604, "grad_norm": 0.2271931918775404, "learning_rate": 3.9360229058169414e-05, "loss": 0.5547, "num_tokens": 1178132760.0, "step": 1538 }, { "epoch": 0.5639170063665094, "grad_norm": 0.19828989230756788, "learning_rate": 3.93590999912184e-05, "loss": 0.5712, "num_tokens": 1178925731.0, "step": 1539 }, { "epoch": 0.5642834241744149, "grad_norm": 0.18518147745669433, "learning_rate": 3.9357969946915445e-05, "loss": 0.6304, "num_tokens": 1179696696.0, "step": 1540 }, { "epoch": 0.5646498419823204, "grad_norm": 0.1738116207560444, "learning_rate": 3.9356838925324175e-05, "loss": 0.5483, "num_tokens": 1180452405.0, "step": 1541 }, { "epoch": 0.5650162597902258, "grad_norm": 0.22953980591080922, "learning_rate": 3.9355706926508276e-05, "loss": 0.5679, "num_tokens": 1181115475.0, "step": 1542 }, { "epoch": 0.5653826775981313, "grad_norm": 0.2167176272609541, "learning_rate": 3.935457395053147e-05, "loss": 0.5498, "num_tokens": 1181788255.0, "step": 1543 }, { "epoch": 0.5657490954060367, "grad_norm": 0.21452307033976936, "learning_rate": 3.935343999745756e-05, "loss": 0.5685, "num_tokens": 1182606849.0, "step": 1544 }, { "epoch": 0.5661155132139422, "grad_norm": 0.21184151317729336, "learning_rate": 3.9352305067350374e-05, "loss": 0.5846, "num_tokens": 1183349687.0, "step": 1545 }, { "epoch": 0.5664819310218476, "grad_norm": 0.23029763987890867, "learning_rate": 3.935116916027383e-05, "loss": 0.5641, "num_tokens": 1183933116.0, "step": 1546 }, { "epoch": 0.5668483488297531, "grad_norm": 0.21218482065108935, "learning_rate": 3.935003227629186e-05, "loss": 0.5595, "num_tokens": 1184662565.0, "step": 1547 }, { "epoch": 0.5672147666376586, "grad_norm": 0.19437328524793615, "learning_rate": 3.934889441546849e-05, "loss": 0.5766, "num_tokens": 1185394930.0, "step": 1548 }, { "epoch": 0.567581184445564, "grad_norm": 0.16853002908449627, "learning_rate": 3.934775557786777e-05, "loss": 0.5463, "num_tokens": 1186307744.0, "step": 1549 }, { "epoch": 0.5679476022534695, "grad_norm": 0.2093702231861054, "learning_rate": 3.934661576355384e-05, "loss": 0.5564, "num_tokens": 1187085126.0, "step": 1550 }, { "epoch": 0.568314020061375, "grad_norm": 0.19508134174664582, "learning_rate": 3.934547497259085e-05, "loss": 0.5772, "num_tokens": 1187760223.0, "step": 1551 }, { "epoch": 0.5686804378692805, "grad_norm": 0.23598206644439926, "learning_rate": 3.934433320504304e-05, "loss": 0.5935, "num_tokens": 1188491987.0, "step": 1552 }, { "epoch": 0.5690468556771859, "grad_norm": 0.19593539612306252, "learning_rate": 3.9343190460974695e-05, "loss": 0.5848, "num_tokens": 1189230974.0, "step": 1553 }, { "epoch": 0.5694132734850914, "grad_norm": 0.19158255512723543, "learning_rate": 3.934204674045015e-05, "loss": 0.5753, "num_tokens": 1190025856.0, "step": 1554 }, { "epoch": 0.5697796912929969, "grad_norm": 0.2112853892308441, "learning_rate": 3.93409020435338e-05, "loss": 0.5852, "num_tokens": 1190887901.0, "step": 1555 }, { "epoch": 0.5701461091009024, "grad_norm": 0.22944925316584616, "learning_rate": 3.9339756370290094e-05, "loss": 0.5644, "num_tokens": 1191705060.0, "step": 1556 }, { "epoch": 0.5705125269088077, "grad_norm": 0.19096198653142524, "learning_rate": 3.933860972078354e-05, "loss": 0.5654, "num_tokens": 1192428508.0, "step": 1557 }, { "epoch": 0.5708789447167132, "grad_norm": 0.21658393532549378, "learning_rate": 3.933746209507868e-05, "loss": 0.5783, "num_tokens": 1193157501.0, "step": 1558 }, { "epoch": 0.5712453625246187, "grad_norm": 0.19855987544793177, "learning_rate": 3.9336313493240154e-05, "loss": 0.5674, "num_tokens": 1193943164.0, "step": 1559 }, { "epoch": 0.5716117803325241, "grad_norm": 0.18614478760700265, "learning_rate": 3.933516391533261e-05, "loss": 0.5729, "num_tokens": 1194579997.0, "step": 1560 }, { "epoch": 0.5719781981404296, "grad_norm": 0.18973920795461655, "learning_rate": 3.933401336142078e-05, "loss": 0.5512, "num_tokens": 1195292651.0, "step": 1561 }, { "epoch": 0.5723446159483351, "grad_norm": 0.19459974443041753, "learning_rate": 3.933286183156944e-05, "loss": 0.5413, "num_tokens": 1196041685.0, "step": 1562 }, { "epoch": 0.5727110337562406, "grad_norm": 0.21059238438285863, "learning_rate": 3.9331709325843423e-05, "loss": 0.6262, "num_tokens": 1196773075.0, "step": 1563 }, { "epoch": 0.573077451564146, "grad_norm": 0.19274663261330374, "learning_rate": 3.933055584430761e-05, "loss": 0.6033, "num_tokens": 1197498381.0, "step": 1564 }, { "epoch": 0.5734438693720515, "grad_norm": 0.2110541131361739, "learning_rate": 3.9329401387026964e-05, "loss": 0.5694, "num_tokens": 1198370180.0, "step": 1565 }, { "epoch": 0.573810287179957, "grad_norm": 0.18974327678252326, "learning_rate": 3.9328245954066474e-05, "loss": 0.5824, "num_tokens": 1199109195.0, "step": 1566 }, { "epoch": 0.5741767049878624, "grad_norm": 0.1882832387744147, "learning_rate": 3.932708954549119e-05, "loss": 0.5681, "num_tokens": 1199993261.0, "step": 1567 }, { "epoch": 0.5745431227957679, "grad_norm": 0.2114579128906114, "learning_rate": 3.932593216136621e-05, "loss": 0.5688, "num_tokens": 1200763832.0, "step": 1568 }, { "epoch": 0.5749095406036734, "grad_norm": 0.2903612314053886, "learning_rate": 3.932477380175672e-05, "loss": 0.5598, "num_tokens": 1201451971.0, "step": 1569 }, { "epoch": 0.5752759584115787, "grad_norm": 0.20514504611848655, "learning_rate": 3.932361446672792e-05, "loss": 0.5715, "num_tokens": 1202250894.0, "step": 1570 }, { "epoch": 0.5756423762194842, "grad_norm": 0.20651737980334925, "learning_rate": 3.9322454156345086e-05, "loss": 0.5893, "num_tokens": 1202926007.0, "step": 1571 }, { "epoch": 0.5760087940273897, "grad_norm": 0.20579649348915424, "learning_rate": 3.932129287067356e-05, "loss": 0.5699, "num_tokens": 1203553672.0, "step": 1572 }, { "epoch": 0.5763752118352952, "grad_norm": 0.20207404737513682, "learning_rate": 3.9320130609778706e-05, "loss": 0.5514, "num_tokens": 1204313051.0, "step": 1573 }, { "epoch": 0.5767416296432006, "grad_norm": 0.18485201826066558, "learning_rate": 3.931896737372597e-05, "loss": 0.5659, "num_tokens": 1205117946.0, "step": 1574 }, { "epoch": 0.5771080474511061, "grad_norm": 0.20522999451127708, "learning_rate": 3.9317803162580845e-05, "loss": 0.5834, "num_tokens": 1205984830.0, "step": 1575 }, { "epoch": 0.5774744652590116, "grad_norm": 0.2068626141065221, "learning_rate": 3.931663797640888e-05, "loss": 0.5834, "num_tokens": 1206650411.0, "step": 1576 }, { "epoch": 0.577840883066917, "grad_norm": 0.21735381484283298, "learning_rate": 3.931547181527566e-05, "loss": 0.5946, "num_tokens": 1207355979.0, "step": 1577 }, { "epoch": 0.5782073008748225, "grad_norm": 0.2641175692659959, "learning_rate": 3.9314304679246876e-05, "loss": 0.5691, "num_tokens": 1208206237.0, "step": 1578 }, { "epoch": 0.578573718682728, "grad_norm": 0.2359583250958034, "learning_rate": 3.931313656838821e-05, "loss": 0.556, "num_tokens": 1208865480.0, "step": 1579 }, { "epoch": 0.5789401364906335, "grad_norm": 0.23767912250421125, "learning_rate": 3.9311967482765434e-05, "loss": 0.5603, "num_tokens": 1209683133.0, "step": 1580 }, { "epoch": 0.579306554298539, "grad_norm": 0.2675398076826414, "learning_rate": 3.931079742244439e-05, "loss": 0.6087, "num_tokens": 1210372581.0, "step": 1581 }, { "epoch": 0.5796729721064444, "grad_norm": 0.17776493215107037, "learning_rate": 3.930962638749093e-05, "loss": 0.5754, "num_tokens": 1211123065.0, "step": 1582 }, { "epoch": 0.5800393899143499, "grad_norm": 0.19425677229724614, "learning_rate": 3.9308454377970996e-05, "loss": 0.5772, "num_tokens": 1211888442.0, "step": 1583 }, { "epoch": 0.5804058077222553, "grad_norm": 0.17933196068953092, "learning_rate": 3.9307281393950575e-05, "loss": 0.5611, "num_tokens": 1212598856.0, "step": 1584 }, { "epoch": 0.5807722255301607, "grad_norm": 0.1914777574224615, "learning_rate": 3.9306107435495714e-05, "loss": 0.5863, "num_tokens": 1213248507.0, "step": 1585 }, { "epoch": 0.5811386433380662, "grad_norm": 0.20433663588806536, "learning_rate": 3.93049325026725e-05, "loss": 0.6249, "num_tokens": 1213961702.0, "step": 1586 }, { "epoch": 0.5815050611459717, "grad_norm": 0.18067669952576293, "learning_rate": 3.930375659554708e-05, "loss": 0.5457, "num_tokens": 1214798828.0, "step": 1587 }, { "epoch": 0.5818714789538771, "grad_norm": 0.2002098696481826, "learning_rate": 3.930257971418567e-05, "loss": 0.5542, "num_tokens": 1215650802.0, "step": 1588 }, { "epoch": 0.5822378967617826, "grad_norm": 0.23514037946424207, "learning_rate": 3.930140185865453e-05, "loss": 0.6183, "num_tokens": 1216357756.0, "step": 1589 }, { "epoch": 0.5826043145696881, "grad_norm": 0.19814698398914504, "learning_rate": 3.930022302901997e-05, "loss": 0.5911, "num_tokens": 1216981627.0, "step": 1590 }, { "epoch": 0.5829707323775936, "grad_norm": 0.2465311646466884, "learning_rate": 3.929904322534837e-05, "loss": 0.5531, "num_tokens": 1217662532.0, "step": 1591 }, { "epoch": 0.583337150185499, "grad_norm": 0.1819934709816427, "learning_rate": 3.929786244770614e-05, "loss": 0.5614, "num_tokens": 1218436512.0, "step": 1592 }, { "epoch": 0.5837035679934045, "grad_norm": 0.19028006288515015, "learning_rate": 3.9296680696159775e-05, "loss": 0.5465, "num_tokens": 1219165826.0, "step": 1593 }, { "epoch": 0.58406998580131, "grad_norm": 0.20361397008968024, "learning_rate": 3.92954979707758e-05, "loss": 0.5746, "num_tokens": 1219843085.0, "step": 1594 }, { "epoch": 0.5844364036092154, "grad_norm": 0.2118138074983815, "learning_rate": 3.929431427162081e-05, "loss": 0.5548, "num_tokens": 1220572918.0, "step": 1595 }, { "epoch": 0.5848028214171209, "grad_norm": 0.18127126062877508, "learning_rate": 3.9293129598761455e-05, "loss": 0.5919, "num_tokens": 1221254033.0, "step": 1596 }, { "epoch": 0.5851692392250263, "grad_norm": 0.19207598138398158, "learning_rate": 3.929194395226442e-05, "loss": 0.5413, "num_tokens": 1222028396.0, "step": 1597 }, { "epoch": 0.5855356570329318, "grad_norm": 0.21703364939135406, "learning_rate": 3.929075733219647e-05, "loss": 0.5586, "num_tokens": 1222734276.0, "step": 1598 }, { "epoch": 0.5859020748408372, "grad_norm": 0.18243602036275422, "learning_rate": 3.928956973862441e-05, "loss": 0.5648, "num_tokens": 1223557266.0, "step": 1599 }, { "epoch": 0.5862684926487427, "grad_norm": 0.19265819544011079, "learning_rate": 3.9288381171615105e-05, "loss": 0.597, "num_tokens": 1224272128.0, "step": 1600 }, { "epoch": 0.5866349104566482, "grad_norm": 0.21264726924510932, "learning_rate": 3.928719163123548e-05, "loss": 0.5911, "num_tokens": 1225016906.0, "step": 1601 }, { "epoch": 0.5870013282645536, "grad_norm": 0.1702650726222326, "learning_rate": 3.92860011175525e-05, "loss": 0.5576, "num_tokens": 1225749615.0, "step": 1602 }, { "epoch": 0.5873677460724591, "grad_norm": 0.20793511185860214, "learning_rate": 3.9284809630633185e-05, "loss": 0.57, "num_tokens": 1226459763.0, "step": 1603 }, { "epoch": 0.5877341638803646, "grad_norm": 0.19872477320969675, "learning_rate": 3.928361717054464e-05, "loss": 0.6041, "num_tokens": 1227255488.0, "step": 1604 }, { "epoch": 0.5881005816882701, "grad_norm": 0.18311502225271653, "learning_rate": 3.928242373735399e-05, "loss": 0.575, "num_tokens": 1228040897.0, "step": 1605 }, { "epoch": 0.5884669994961755, "grad_norm": 0.19661896124993677, "learning_rate": 3.928122933112843e-05, "loss": 0.619, "num_tokens": 1228788979.0, "step": 1606 }, { "epoch": 0.588833417304081, "grad_norm": 0.16237969207780018, "learning_rate": 3.92800339519352e-05, "loss": 0.5323, "num_tokens": 1229581129.0, "step": 1607 }, { "epoch": 0.5891998351119865, "grad_norm": 0.19430937011612345, "learning_rate": 3.927883759984161e-05, "loss": 0.5685, "num_tokens": 1230228110.0, "step": 1608 }, { "epoch": 0.589566252919892, "grad_norm": 0.16173402344474133, "learning_rate": 3.927764027491502e-05, "loss": 0.5567, "num_tokens": 1231124925.0, "step": 1609 }, { "epoch": 0.5899326707277974, "grad_norm": 0.17292384322480722, "learning_rate": 3.9276441977222834e-05, "loss": 0.5738, "num_tokens": 1231969101.0, "step": 1610 }, { "epoch": 0.5902990885357028, "grad_norm": 0.16976362897203537, "learning_rate": 3.927524270683252e-05, "loss": 0.5443, "num_tokens": 1232748688.0, "step": 1611 }, { "epoch": 0.5906655063436083, "grad_norm": 0.1877058440385834, "learning_rate": 3.927404246381161e-05, "loss": 0.5806, "num_tokens": 1233544559.0, "step": 1612 }, { "epoch": 0.5910319241515137, "grad_norm": 0.18453279845015386, "learning_rate": 3.927284124822767e-05, "loss": 0.5627, "num_tokens": 1234336609.0, "step": 1613 }, { "epoch": 0.5913983419594192, "grad_norm": 0.16952167611544822, "learning_rate": 3.927163906014832e-05, "loss": 0.5473, "num_tokens": 1235127059.0, "step": 1614 }, { "epoch": 0.5917647597673247, "grad_norm": 0.17967818731074975, "learning_rate": 3.927043589964128e-05, "loss": 0.6073, "num_tokens": 1235855362.0, "step": 1615 }, { "epoch": 0.5921311775752301, "grad_norm": 0.17536281740374432, "learning_rate": 3.9269231766774256e-05, "loss": 0.5519, "num_tokens": 1236650933.0, "step": 1616 }, { "epoch": 0.5924975953831356, "grad_norm": 0.19424677532723944, "learning_rate": 3.926802666161506e-05, "loss": 0.5961, "num_tokens": 1237334665.0, "step": 1617 }, { "epoch": 0.5928640131910411, "grad_norm": 0.20739627967052587, "learning_rate": 3.9266820584231525e-05, "loss": 0.5884, "num_tokens": 1238031332.0, "step": 1618 }, { "epoch": 0.5932304309989466, "grad_norm": 0.20772462684519763, "learning_rate": 3.9265613534691575e-05, "loss": 0.6101, "num_tokens": 1238713959.0, "step": 1619 }, { "epoch": 0.593596848806852, "grad_norm": 0.21075531118273938, "learning_rate": 3.926440551306317e-05, "loss": 0.5598, "num_tokens": 1239358233.0, "step": 1620 }, { "epoch": 0.5939632666147575, "grad_norm": 0.19257566687242045, "learning_rate": 3.92631965194143e-05, "loss": 0.5672, "num_tokens": 1240094786.0, "step": 1621 }, { "epoch": 0.594329684422663, "grad_norm": 0.21734234924442822, "learning_rate": 3.9261986553813065e-05, "loss": 0.5837, "num_tokens": 1240779350.0, "step": 1622 }, { "epoch": 0.5946961022305685, "grad_norm": 0.18418217644702536, "learning_rate": 3.9260775616327566e-05, "loss": 0.5802, "num_tokens": 1241593339.0, "step": 1623 }, { "epoch": 0.5950625200384738, "grad_norm": 0.21279507880756288, "learning_rate": 3.925956370702599e-05, "loss": 0.58, "num_tokens": 1242376901.0, "step": 1624 }, { "epoch": 0.5954289378463793, "grad_norm": 0.1906196861756078, "learning_rate": 3.925835082597657e-05, "loss": 0.589, "num_tokens": 1243113577.0, "step": 1625 }, { "epoch": 0.5957953556542848, "grad_norm": 0.1933536432700751, "learning_rate": 3.9257136973247584e-05, "loss": 0.5777, "num_tokens": 1243829645.0, "step": 1626 }, { "epoch": 0.5961617734621902, "grad_norm": 0.19197045867100218, "learning_rate": 3.9255922148907394e-05, "loss": 0.5513, "num_tokens": 1244695305.0, "step": 1627 }, { "epoch": 0.5965281912700957, "grad_norm": 0.19368294752471465, "learning_rate": 3.9254706353024384e-05, "loss": 0.5728, "num_tokens": 1245515215.0, "step": 1628 }, { "epoch": 0.5968946090780012, "grad_norm": 0.20666378548091896, "learning_rate": 3.9253489585667e-05, "loss": 0.5633, "num_tokens": 1246331901.0, "step": 1629 }, { "epoch": 0.5972610268859067, "grad_norm": 0.20123577948666335, "learning_rate": 3.9252271846903755e-05, "loss": 0.5716, "num_tokens": 1247011958.0, "step": 1630 }, { "epoch": 0.5976274446938121, "grad_norm": 0.1974682808861828, "learning_rate": 3.925105313680322e-05, "loss": 0.5368, "num_tokens": 1247815663.0, "step": 1631 }, { "epoch": 0.5979938625017176, "grad_norm": 0.18679397074372472, "learning_rate": 3.9249833455434e-05, "loss": 0.5562, "num_tokens": 1248455018.0, "step": 1632 }, { "epoch": 0.5983602803096231, "grad_norm": 0.2571422487748846, "learning_rate": 3.924861280286476e-05, "loss": 0.5893, "num_tokens": 1249273215.0, "step": 1633 }, { "epoch": 0.5987266981175285, "grad_norm": 0.2100648154402742, "learning_rate": 3.924739117916424e-05, "loss": 0.5605, "num_tokens": 1249966389.0, "step": 1634 }, { "epoch": 0.599093115925434, "grad_norm": 0.2541567183546637, "learning_rate": 3.92461685844012e-05, "loss": 0.5885, "num_tokens": 1250757607.0, "step": 1635 }, { "epoch": 0.5994595337333395, "grad_norm": 0.25507815317318694, "learning_rate": 3.9244945018644494e-05, "loss": 0.558, "num_tokens": 1251463934.0, "step": 1636 }, { "epoch": 0.5998259515412449, "grad_norm": 0.20579235109728033, "learning_rate": 3.924372048196301e-05, "loss": 0.5834, "num_tokens": 1252254033.0, "step": 1637 }, { "epoch": 0.6001923693491503, "grad_norm": 0.2655509304032282, "learning_rate": 3.924249497442568e-05, "loss": 0.611, "num_tokens": 1253052102.0, "step": 1638 }, { "epoch": 0.6005587871570558, "grad_norm": 0.23606849623728146, "learning_rate": 3.924126849610151e-05, "loss": 0.5844, "num_tokens": 1253790980.0, "step": 1639 }, { "epoch": 0.6009252049649613, "grad_norm": 0.22906266213238186, "learning_rate": 3.924004104705955e-05, "loss": 0.5742, "num_tokens": 1254532028.0, "step": 1640 }, { "epoch": 0.6012916227728667, "grad_norm": 0.20832912845293083, "learning_rate": 3.9238812627368905e-05, "loss": 0.5709, "num_tokens": 1255256450.0, "step": 1641 }, { "epoch": 0.6016580405807722, "grad_norm": 0.2189220095319481, "learning_rate": 3.9237583237098744e-05, "loss": 0.5455, "num_tokens": 1255957274.0, "step": 1642 }, { "epoch": 0.6020244583886777, "grad_norm": 0.19028541366824828, "learning_rate": 3.923635287631828e-05, "loss": 0.5839, "num_tokens": 1256712249.0, "step": 1643 }, { "epoch": 0.6023908761965832, "grad_norm": 0.19270827971433857, "learning_rate": 3.9235121545096795e-05, "loss": 0.5596, "num_tokens": 1257615248.0, "step": 1644 }, { "epoch": 0.6027572940044886, "grad_norm": 0.17418878786869504, "learning_rate": 3.9233889243503596e-05, "loss": 0.5346, "num_tokens": 1258318436.0, "step": 1645 }, { "epoch": 0.6031237118123941, "grad_norm": 0.18505195959711754, "learning_rate": 3.923265597160809e-05, "loss": 0.5758, "num_tokens": 1259149548.0, "step": 1646 }, { "epoch": 0.6034901296202996, "grad_norm": 0.17241590694196626, "learning_rate": 3.9231421729479685e-05, "loss": 0.5889, "num_tokens": 1259984676.0, "step": 1647 }, { "epoch": 0.603856547428205, "grad_norm": 0.17791110522036205, "learning_rate": 3.9230186517187886e-05, "loss": 0.576, "num_tokens": 1260739840.0, "step": 1648 }, { "epoch": 0.6042229652361105, "grad_norm": 0.18613384122055737, "learning_rate": 3.922895033480223e-05, "loss": 0.5403, "num_tokens": 1261477897.0, "step": 1649 }, { "epoch": 0.604589383044016, "grad_norm": 0.22117205355362304, "learning_rate": 3.9227713182392336e-05, "loss": 0.5781, "num_tokens": 1262259275.0, "step": 1650 }, { "epoch": 0.6049558008519214, "grad_norm": 0.20369508052783455, "learning_rate": 3.9226475060027835e-05, "loss": 0.5666, "num_tokens": 1263087049.0, "step": 1651 }, { "epoch": 0.6053222186598268, "grad_norm": 0.22335621692560206, "learning_rate": 3.922523596777845e-05, "loss": 0.5851, "num_tokens": 1263888175.0, "step": 1652 }, { "epoch": 0.6056886364677323, "grad_norm": 0.23177524476295627, "learning_rate": 3.922399590571394e-05, "loss": 0.5483, "num_tokens": 1264720628.0, "step": 1653 }, { "epoch": 0.6060550542756378, "grad_norm": 0.19481272045013587, "learning_rate": 3.922275487390412e-05, "loss": 0.575, "num_tokens": 1265358794.0, "step": 1654 }, { "epoch": 0.6064214720835432, "grad_norm": 0.25748041852915926, "learning_rate": 3.922151287241886e-05, "loss": 0.545, "num_tokens": 1266164346.0, "step": 1655 }, { "epoch": 0.6067878898914487, "grad_norm": 0.26398449752189174, "learning_rate": 3.92202699013281e-05, "loss": 0.5883, "num_tokens": 1267001153.0, "step": 1656 }, { "epoch": 0.6071543076993542, "grad_norm": 0.17944651043723847, "learning_rate": 3.921902596070181e-05, "loss": 0.6051, "num_tokens": 1267770662.0, "step": 1657 }, { "epoch": 0.6075207255072597, "grad_norm": 0.20968717268890494, "learning_rate": 3.921778105061003e-05, "loss": 0.567, "num_tokens": 1268511811.0, "step": 1658 }, { "epoch": 0.6078871433151651, "grad_norm": 0.2282661186167328, "learning_rate": 3.9216535171122857e-05, "loss": 0.5598, "num_tokens": 1269372982.0, "step": 1659 }, { "epoch": 0.6082535611230706, "grad_norm": 0.1996562217633348, "learning_rate": 3.921528832231043e-05, "loss": 0.5628, "num_tokens": 1270017837.0, "step": 1660 }, { "epoch": 0.6086199789309761, "grad_norm": 0.17426699953797745, "learning_rate": 3.9214040504242946e-05, "loss": 0.5591, "num_tokens": 1270966942.0, "step": 1661 }, { "epoch": 0.6089863967388816, "grad_norm": 0.21069628304431762, "learning_rate": 3.921279171699066e-05, "loss": 0.598, "num_tokens": 1271702501.0, "step": 1662 }, { "epoch": 0.609352814546787, "grad_norm": 0.17136839419581704, "learning_rate": 3.9211541960623895e-05, "loss": 0.5683, "num_tokens": 1272576829.0, "step": 1663 }, { "epoch": 0.6097192323546924, "grad_norm": 0.188595596643417, "learning_rate": 3.9210291235212994e-05, "loss": 0.5728, "num_tokens": 1273268060.0, "step": 1664 }, { "epoch": 0.6100856501625979, "grad_norm": 0.20932949353931835, "learning_rate": 3.92090395408284e-05, "loss": 0.601, "num_tokens": 1274043127.0, "step": 1665 }, { "epoch": 0.6104520679705033, "grad_norm": 0.19059896441082624, "learning_rate": 3.9207786877540556e-05, "loss": 0.5962, "num_tokens": 1274771574.0, "step": 1666 }, { "epoch": 0.6108184857784088, "grad_norm": 0.17556162374275985, "learning_rate": 3.920653324542001e-05, "loss": 0.5372, "num_tokens": 1275638672.0, "step": 1667 }, { "epoch": 0.6111849035863143, "grad_norm": 0.19080503195978427, "learning_rate": 3.920527864453734e-05, "loss": 0.5554, "num_tokens": 1276327680.0, "step": 1668 }, { "epoch": 0.6115513213942197, "grad_norm": 0.18845725419554357, "learning_rate": 3.9204023074963184e-05, "loss": 0.5878, "num_tokens": 1277103551.0, "step": 1669 }, { "epoch": 0.6119177392021252, "grad_norm": 0.16805570361068337, "learning_rate": 3.920276653676823e-05, "loss": 0.5705, "num_tokens": 1277982258.0, "step": 1670 }, { "epoch": 0.6122841570100307, "grad_norm": 0.21400607098742025, "learning_rate": 3.9201509030023224e-05, "loss": 0.6005, "num_tokens": 1278924837.0, "step": 1671 }, { "epoch": 0.6126505748179362, "grad_norm": 0.21305834382809774, "learning_rate": 3.920025055479897e-05, "loss": 0.5356, "num_tokens": 1279715942.0, "step": 1672 }, { "epoch": 0.6130169926258416, "grad_norm": 0.23373260204593416, "learning_rate": 3.9198991111166316e-05, "loss": 0.576, "num_tokens": 1280474417.0, "step": 1673 }, { "epoch": 0.6133834104337471, "grad_norm": 0.18944052909038472, "learning_rate": 3.9197730699196175e-05, "loss": 0.5606, "num_tokens": 1281195334.0, "step": 1674 }, { "epoch": 0.6137498282416526, "grad_norm": 0.17781879342863227, "learning_rate": 3.9196469318959504e-05, "loss": 0.5975, "num_tokens": 1281898897.0, "step": 1675 }, { "epoch": 0.6141162460495581, "grad_norm": 0.18474303234114275, "learning_rate": 3.919520697052734e-05, "loss": 0.5638, "num_tokens": 1282652102.0, "step": 1676 }, { "epoch": 0.6144826638574634, "grad_norm": 0.18792580408584475, "learning_rate": 3.9193943653970735e-05, "loss": 0.5545, "num_tokens": 1283475026.0, "step": 1677 }, { "epoch": 0.6148490816653689, "grad_norm": 0.18250636419370617, "learning_rate": 3.919267936936082e-05, "loss": 0.6019, "num_tokens": 1284304956.0, "step": 1678 }, { "epoch": 0.6152154994732744, "grad_norm": 0.18708620738778664, "learning_rate": 3.919141411676879e-05, "loss": 0.5587, "num_tokens": 1285033256.0, "step": 1679 }, { "epoch": 0.6155819172811798, "grad_norm": 0.18607654340909688, "learning_rate": 3.919014789626586e-05, "loss": 0.5802, "num_tokens": 1285780090.0, "step": 1680 }, { "epoch": 0.6159483350890853, "grad_norm": 0.176982882746924, "learning_rate": 3.918888070792335e-05, "loss": 0.5714, "num_tokens": 1286614611.0, "step": 1681 }, { "epoch": 0.6163147528969908, "grad_norm": 0.19190823274306584, "learning_rate": 3.918761255181258e-05, "loss": 0.5944, "num_tokens": 1287341343.0, "step": 1682 }, { "epoch": 0.6166811707048963, "grad_norm": 0.19188171779084798, "learning_rate": 3.918634342800496e-05, "loss": 0.5779, "num_tokens": 1288085026.0, "step": 1683 }, { "epoch": 0.6170475885128017, "grad_norm": 0.18745971524241564, "learning_rate": 3.9185073336571935e-05, "loss": 0.5504, "num_tokens": 1288734107.0, "step": 1684 }, { "epoch": 0.6174140063207072, "grad_norm": 0.19595003253996382, "learning_rate": 3.9183802277585024e-05, "loss": 0.5633, "num_tokens": 1289340879.0, "step": 1685 }, { "epoch": 0.6177804241286127, "grad_norm": 0.20303576882281427, "learning_rate": 3.9182530251115794e-05, "loss": 0.6075, "num_tokens": 1290096669.0, "step": 1686 }, { "epoch": 0.6181468419365181, "grad_norm": 0.18908699630418363, "learning_rate": 3.918125725723585e-05, "loss": 0.5547, "num_tokens": 1290855287.0, "step": 1687 }, { "epoch": 0.6185132597444236, "grad_norm": 0.19491494316580485, "learning_rate": 3.917998329601686e-05, "loss": 0.5386, "num_tokens": 1291624052.0, "step": 1688 }, { "epoch": 0.6188796775523291, "grad_norm": 0.1991553049732098, "learning_rate": 3.917870836753058e-05, "loss": 0.5321, "num_tokens": 1292424161.0, "step": 1689 }, { "epoch": 0.6192460953602346, "grad_norm": 0.19765422555572562, "learning_rate": 3.917743247184876e-05, "loss": 0.5801, "num_tokens": 1293208425.0, "step": 1690 }, { "epoch": 0.6196125131681399, "grad_norm": 0.19111770182550594, "learning_rate": 3.917615560904324e-05, "loss": 0.5504, "num_tokens": 1294002824.0, "step": 1691 }, { "epoch": 0.6199789309760454, "grad_norm": 0.19625274634807993, "learning_rate": 3.9174877779185934e-05, "loss": 0.5595, "num_tokens": 1294682527.0, "step": 1692 }, { "epoch": 0.6203453487839509, "grad_norm": 0.18608136663591526, "learning_rate": 3.917359898234876e-05, "loss": 0.5699, "num_tokens": 1295488558.0, "step": 1693 }, { "epoch": 0.6207117665918563, "grad_norm": 0.1963201633853269, "learning_rate": 3.917231921860372e-05, "loss": 0.5607, "num_tokens": 1296153883.0, "step": 1694 }, { "epoch": 0.6210781843997618, "grad_norm": 0.2012468813412902, "learning_rate": 3.9171038488022876e-05, "loss": 0.5601, "num_tokens": 1296902956.0, "step": 1695 }, { "epoch": 0.6214446022076673, "grad_norm": 0.20705119481844161, "learning_rate": 3.916975679067834e-05, "loss": 0.5949, "num_tokens": 1297698471.0, "step": 1696 }, { "epoch": 0.6218110200155728, "grad_norm": 0.22877563093784745, "learning_rate": 3.9168474126642266e-05, "loss": 0.5751, "num_tokens": 1298469072.0, "step": 1697 }, { "epoch": 0.6221774378234782, "grad_norm": 0.18022881027680251, "learning_rate": 3.9167190495986864e-05, "loss": 0.5569, "num_tokens": 1299304074.0, "step": 1698 }, { "epoch": 0.6225438556313837, "grad_norm": 0.21107724884194723, "learning_rate": 3.9165905898784415e-05, "loss": 0.6064, "num_tokens": 1300043929.0, "step": 1699 }, { "epoch": 0.6229102734392892, "grad_norm": 0.2067505821601283, "learning_rate": 3.916462033510724e-05, "loss": 0.5699, "num_tokens": 1300842825.0, "step": 1700 }, { "epoch": 0.6232766912471946, "grad_norm": 0.17700205808095093, "learning_rate": 3.9163333805027725e-05, "loss": 0.5414, "num_tokens": 1301636293.0, "step": 1701 }, { "epoch": 0.6236431090551001, "grad_norm": 0.2370341996843488, "learning_rate": 3.9162046308618306e-05, "loss": 0.6032, "num_tokens": 1302392582.0, "step": 1702 }, { "epoch": 0.6240095268630056, "grad_norm": 0.18034522561324773, "learning_rate": 3.916075784595146e-05, "loss": 0.576, "num_tokens": 1303198072.0, "step": 1703 }, { "epoch": 0.624375944670911, "grad_norm": 0.18321579415490183, "learning_rate": 3.915946841709973e-05, "loss": 0.5234, "num_tokens": 1304104630.0, "step": 1704 }, { "epoch": 0.6247423624788164, "grad_norm": 0.19835849380953924, "learning_rate": 3.915817802213572e-05, "loss": 0.5684, "num_tokens": 1304896513.0, "step": 1705 }, { "epoch": 0.6251087802867219, "grad_norm": 0.19287438195353337, "learning_rate": 3.9156886661132085e-05, "loss": 0.5791, "num_tokens": 1305634875.0, "step": 1706 }, { "epoch": 0.6254751980946274, "grad_norm": 0.19331781850521415, "learning_rate": 3.915559433416152e-05, "loss": 0.5684, "num_tokens": 1306405155.0, "step": 1707 }, { "epoch": 0.6258416159025328, "grad_norm": 0.23982886140728746, "learning_rate": 3.915430104129681e-05, "loss": 0.5775, "num_tokens": 1307253285.0, "step": 1708 }, { "epoch": 0.6262080337104383, "grad_norm": 0.2339164751125073, "learning_rate": 3.915300678261073e-05, "loss": 0.5915, "num_tokens": 1308082172.0, "step": 1709 }, { "epoch": 0.6265744515183438, "grad_norm": 0.18825749592878205, "learning_rate": 3.915171155817618e-05, "loss": 0.5469, "num_tokens": 1308773921.0, "step": 1710 }, { "epoch": 0.6269408693262493, "grad_norm": 0.2509914715790982, "learning_rate": 3.9150415368066076e-05, "loss": 0.5867, "num_tokens": 1309544979.0, "step": 1711 }, { "epoch": 0.6273072871341547, "grad_norm": 0.19907976575628403, "learning_rate": 3.91491182123534e-05, "loss": 0.5454, "num_tokens": 1310279943.0, "step": 1712 }, { "epoch": 0.6276737049420602, "grad_norm": 0.19309737901039725, "learning_rate": 3.914782009111117e-05, "loss": 0.5411, "num_tokens": 1311077348.0, "step": 1713 }, { "epoch": 0.6280401227499657, "grad_norm": 0.19071053755280146, "learning_rate": 3.91465210044125e-05, "loss": 0.5782, "num_tokens": 1311807334.0, "step": 1714 }, { "epoch": 0.6284065405578712, "grad_norm": 0.1704941851741697, "learning_rate": 3.91452209523305e-05, "loss": 0.5936, "num_tokens": 1312588077.0, "step": 1715 }, { "epoch": 0.6287729583657766, "grad_norm": 0.1712757042857806, "learning_rate": 3.914391993493838e-05, "loss": 0.5308, "num_tokens": 1313474279.0, "step": 1716 }, { "epoch": 0.629139376173682, "grad_norm": 0.17976510731555068, "learning_rate": 3.91426179523094e-05, "loss": 0.5446, "num_tokens": 1314354629.0, "step": 1717 }, { "epoch": 0.6295057939815875, "grad_norm": 0.18565009211200484, "learning_rate": 3.914131500451684e-05, "loss": 0.5407, "num_tokens": 1315128525.0, "step": 1718 }, { "epoch": 0.6298722117894929, "grad_norm": 0.19665219330853664, "learning_rate": 3.9140011091634075e-05, "loss": 0.5545, "num_tokens": 1315951788.0, "step": 1719 }, { "epoch": 0.6302386295973984, "grad_norm": 0.21037773919044214, "learning_rate": 3.913870621373452e-05, "loss": 0.582, "num_tokens": 1316796808.0, "step": 1720 }, { "epoch": 0.6306050474053039, "grad_norm": 0.2104200334269707, "learning_rate": 3.913740037089164e-05, "loss": 0.5435, "num_tokens": 1317382609.0, "step": 1721 }, { "epoch": 0.6309714652132093, "grad_norm": 0.1844703541537951, "learning_rate": 3.9136093563178946e-05, "loss": 0.549, "num_tokens": 1318178863.0, "step": 1722 }, { "epoch": 0.6313378830211148, "grad_norm": 0.21343897393410086, "learning_rate": 3.913478579067003e-05, "loss": 0.5741, "num_tokens": 1318888224.0, "step": 1723 }, { "epoch": 0.6317043008290203, "grad_norm": 0.17018548555397814, "learning_rate": 3.91334770534385e-05, "loss": 0.5536, "num_tokens": 1319719330.0, "step": 1724 }, { "epoch": 0.6320707186369258, "grad_norm": 0.2001176117353387, "learning_rate": 3.9132167351558064e-05, "loss": 0.5468, "num_tokens": 1320519610.0, "step": 1725 }, { "epoch": 0.6324371364448312, "grad_norm": 0.17327082563187624, "learning_rate": 3.9130856685102455e-05, "loss": 0.5698, "num_tokens": 1321382539.0, "step": 1726 }, { "epoch": 0.6328035542527367, "grad_norm": 0.18176669968860712, "learning_rate": 3.912954505414546e-05, "loss": 0.5812, "num_tokens": 1322145901.0, "step": 1727 }, { "epoch": 0.6331699720606422, "grad_norm": 0.20052586719924523, "learning_rate": 3.912823245876093e-05, "loss": 0.5309, "num_tokens": 1323162900.0, "step": 1728 }, { "epoch": 0.6335363898685477, "grad_norm": 0.20108193750180195, "learning_rate": 3.912691889902276e-05, "loss": 0.559, "num_tokens": 1323914345.0, "step": 1729 }, { "epoch": 0.6339028076764531, "grad_norm": 0.17006547119575066, "learning_rate": 3.9125604375004906e-05, "loss": 0.5793, "num_tokens": 1324693487.0, "step": 1730 }, { "epoch": 0.6342692254843585, "grad_norm": 0.2000089235599305, "learning_rate": 3.91242888867814e-05, "loss": 0.5652, "num_tokens": 1325376896.0, "step": 1731 }, { "epoch": 0.634635643292264, "grad_norm": 0.1897253921306036, "learning_rate": 3.912297243442628e-05, "loss": 0.5832, "num_tokens": 1326228407.0, "step": 1732 }, { "epoch": 0.6350020611001694, "grad_norm": 0.18231562309366847, "learning_rate": 3.912165501801368e-05, "loss": 0.5904, "num_tokens": 1326974827.0, "step": 1733 }, { "epoch": 0.6353684789080749, "grad_norm": 0.18575131528637795, "learning_rate": 3.9120336637617763e-05, "loss": 0.5947, "num_tokens": 1327727360.0, "step": 1734 }, { "epoch": 0.6357348967159804, "grad_norm": 0.2076759917198541, "learning_rate": 3.911901729331277e-05, "loss": 0.5623, "num_tokens": 1328368549.0, "step": 1735 }, { "epoch": 0.6361013145238859, "grad_norm": 0.1895026698052844, "learning_rate": 3.911769698517297e-05, "loss": 0.5705, "num_tokens": 1329167735.0, "step": 1736 }, { "epoch": 0.6364677323317913, "grad_norm": 0.17189471413002366, "learning_rate": 3.91163757132727e-05, "loss": 0.5663, "num_tokens": 1329858104.0, "step": 1737 }, { "epoch": 0.6368341501396968, "grad_norm": 0.19808898929582805, "learning_rate": 3.911505347768636e-05, "loss": 0.5769, "num_tokens": 1330615148.0, "step": 1738 }, { "epoch": 0.6372005679476023, "grad_norm": 0.18206509909751592, "learning_rate": 3.911373027848839e-05, "loss": 0.5934, "num_tokens": 1331405228.0, "step": 1739 }, { "epoch": 0.6375669857555077, "grad_norm": 0.2088247326101793, "learning_rate": 3.911240611575328e-05, "loss": 0.563, "num_tokens": 1332087257.0, "step": 1740 }, { "epoch": 0.6379334035634132, "grad_norm": 0.18372142920719292, "learning_rate": 3.9111080989555596e-05, "loss": 0.5647, "num_tokens": 1332852598.0, "step": 1741 }, { "epoch": 0.6382998213713187, "grad_norm": 0.17572207840160523, "learning_rate": 3.9109754899969935e-05, "loss": 0.5629, "num_tokens": 1333678296.0, "step": 1742 }, { "epoch": 0.6386662391792242, "grad_norm": 0.19536982758164995, "learning_rate": 3.910842784707097e-05, "loss": 0.595, "num_tokens": 1334437593.0, "step": 1743 }, { "epoch": 0.6390326569871295, "grad_norm": 0.17099008249712788, "learning_rate": 3.9107099830933407e-05, "loss": 0.5632, "num_tokens": 1335256377.0, "step": 1744 }, { "epoch": 0.639399074795035, "grad_norm": 0.2157163465333828, "learning_rate": 3.910577085163201e-05, "loss": 0.5795, "num_tokens": 1335950717.0, "step": 1745 }, { "epoch": 0.6397654926029405, "grad_norm": 0.17435882813040446, "learning_rate": 3.910444090924162e-05, "loss": 0.5425, "num_tokens": 1336765433.0, "step": 1746 }, { "epoch": 0.6401319104108459, "grad_norm": 0.23280693788135293, "learning_rate": 3.910311000383711e-05, "loss": 0.6296, "num_tokens": 1337505575.0, "step": 1747 }, { "epoch": 0.6404983282187514, "grad_norm": 0.20809528774177666, "learning_rate": 3.910177813549341e-05, "loss": 0.5622, "num_tokens": 1338236151.0, "step": 1748 }, { "epoch": 0.6408647460266569, "grad_norm": 0.18669565044383166, "learning_rate": 3.91004453042855e-05, "loss": 0.5918, "num_tokens": 1339015687.0, "step": 1749 }, { "epoch": 0.6412311638345624, "grad_norm": 0.22594037673018053, "learning_rate": 3.909911151028843e-05, "loss": 0.5422, "num_tokens": 1339798288.0, "step": 1750 }, { "epoch": 0.6415975816424678, "grad_norm": 0.2437172621718761, "learning_rate": 3.909777675357729e-05, "loss": 0.5258, "num_tokens": 1340615261.0, "step": 1751 }, { "epoch": 0.6419639994503733, "grad_norm": 0.17135785875787568, "learning_rate": 3.909644103422724e-05, "loss": 0.5549, "num_tokens": 1341439678.0, "step": 1752 }, { "epoch": 0.6423304172582788, "grad_norm": 0.2141490927049503, "learning_rate": 3.909510435231347e-05, "loss": 0.6057, "num_tokens": 1342210960.0, "step": 1753 }, { "epoch": 0.6426968350661842, "grad_norm": 0.19030512964182633, "learning_rate": 3.909376670791125e-05, "loss": 0.5769, "num_tokens": 1343003830.0, "step": 1754 }, { "epoch": 0.6430632528740897, "grad_norm": 0.19818739943339655, "learning_rate": 3.909242810109588e-05, "loss": 0.5958, "num_tokens": 1343627752.0, "step": 1755 }, { "epoch": 0.6434296706819952, "grad_norm": 0.200473764462265, "learning_rate": 3.909108853194273e-05, "loss": 0.5546, "num_tokens": 1344446526.0, "step": 1756 }, { "epoch": 0.6437960884899007, "grad_norm": 0.20157170382730655, "learning_rate": 3.9089748000527225e-05, "loss": 0.5677, "num_tokens": 1345153109.0, "step": 1757 }, { "epoch": 0.644162506297806, "grad_norm": 0.21117011833041993, "learning_rate": 3.9088406506924844e-05, "loss": 0.639, "num_tokens": 1345864600.0, "step": 1758 }, { "epoch": 0.6445289241057115, "grad_norm": 0.19477780561001437, "learning_rate": 3.9087064051211106e-05, "loss": 0.5807, "num_tokens": 1346606673.0, "step": 1759 }, { "epoch": 0.644895341913617, "grad_norm": 0.18142808771021457, "learning_rate": 3.90857206334616e-05, "loss": 0.5326, "num_tokens": 1347477474.0, "step": 1760 }, { "epoch": 0.6452617597215224, "grad_norm": 0.1933486635005754, "learning_rate": 3.908437625375195e-05, "loss": 0.5411, "num_tokens": 1348208372.0, "step": 1761 }, { "epoch": 0.6456281775294279, "grad_norm": 0.21329092638410468, "learning_rate": 3.908303091215786e-05, "loss": 0.558, "num_tokens": 1348999854.0, "step": 1762 }, { "epoch": 0.6459945953373334, "grad_norm": 0.18701999058599109, "learning_rate": 3.908168460875508e-05, "loss": 0.5298, "num_tokens": 1349730553.0, "step": 1763 }, { "epoch": 0.6463610131452389, "grad_norm": 0.19525793528457264, "learning_rate": 3.908033734361939e-05, "loss": 0.5972, "num_tokens": 1350462577.0, "step": 1764 }, { "epoch": 0.6467274309531443, "grad_norm": 0.2281261683222654, "learning_rate": 3.907898911682667e-05, "loss": 0.5758, "num_tokens": 1351271259.0, "step": 1765 }, { "epoch": 0.6470938487610498, "grad_norm": 0.18779254931936715, "learning_rate": 3.907763992845281e-05, "loss": 0.5684, "num_tokens": 1352090702.0, "step": 1766 }, { "epoch": 0.6474602665689553, "grad_norm": 0.1699207316522776, "learning_rate": 3.907628977857378e-05, "loss": 0.5914, "num_tokens": 1352862816.0, "step": 1767 }, { "epoch": 0.6478266843768608, "grad_norm": 0.2436900790737895, "learning_rate": 3.9074938667265584e-05, "loss": 0.5676, "num_tokens": 1353663068.0, "step": 1768 }, { "epoch": 0.6481931021847662, "grad_norm": 0.19462133971475631, "learning_rate": 3.90735865946043e-05, "loss": 0.5936, "num_tokens": 1354418856.0, "step": 1769 }, { "epoch": 0.6485595199926717, "grad_norm": 0.19445929129911185, "learning_rate": 3.9072233560666055e-05, "loss": 0.55, "num_tokens": 1355217256.0, "step": 1770 }, { "epoch": 0.6489259378005771, "grad_norm": 0.20228392971674747, "learning_rate": 3.9070879565527024e-05, "loss": 0.5563, "num_tokens": 1356033007.0, "step": 1771 }, { "epoch": 0.6492923556084825, "grad_norm": 0.17971871596019756, "learning_rate": 3.906952460926344e-05, "loss": 0.5554, "num_tokens": 1356723941.0, "step": 1772 }, { "epoch": 0.649658773416388, "grad_norm": 0.2004445441417048, "learning_rate": 3.9068168691951595e-05, "loss": 0.5512, "num_tokens": 1357471706.0, "step": 1773 }, { "epoch": 0.6500251912242935, "grad_norm": 0.1699946646809195, "learning_rate": 3.9066811813667825e-05, "loss": 0.5465, "num_tokens": 1358256666.0, "step": 1774 }, { "epoch": 0.650391609032199, "grad_norm": 0.18239892289633333, "learning_rate": 3.906545397448852e-05, "loss": 0.5653, "num_tokens": 1358959532.0, "step": 1775 }, { "epoch": 0.6507580268401044, "grad_norm": 0.1965925362783708, "learning_rate": 3.906409517449014e-05, "loss": 0.5416, "num_tokens": 1359736217.0, "step": 1776 }, { "epoch": 0.6511244446480099, "grad_norm": 0.18800515262862436, "learning_rate": 3.9062735413749185e-05, "loss": 0.5668, "num_tokens": 1360465908.0, "step": 1777 }, { "epoch": 0.6514908624559154, "grad_norm": 0.24081202781742062, "learning_rate": 3.90613746923422e-05, "loss": 0.5602, "num_tokens": 1361271200.0, "step": 1778 }, { "epoch": 0.6518572802638208, "grad_norm": 0.24417346092706954, "learning_rate": 3.9060013010345814e-05, "loss": 0.5685, "num_tokens": 1362016443.0, "step": 1779 }, { "epoch": 0.6522236980717263, "grad_norm": 0.18652934557680959, "learning_rate": 3.905865036783668e-05, "loss": 0.5553, "num_tokens": 1362799325.0, "step": 1780 }, { "epoch": 0.6525901158796318, "grad_norm": 0.23529444854584583, "learning_rate": 3.9057286764891525e-05, "loss": 0.5893, "num_tokens": 1363650550.0, "step": 1781 }, { "epoch": 0.6529565336875373, "grad_norm": 0.18374650550823426, "learning_rate": 3.905592220158712e-05, "loss": 0.5315, "num_tokens": 1364346002.0, "step": 1782 }, { "epoch": 0.6533229514954427, "grad_norm": 0.2548393449697479, "learning_rate": 3.905455667800029e-05, "loss": 0.5662, "num_tokens": 1365157734.0, "step": 1783 }, { "epoch": 0.6536893693033481, "grad_norm": 0.21263908182182653, "learning_rate": 3.905319019420792e-05, "loss": 0.5774, "num_tokens": 1366050633.0, "step": 1784 }, { "epoch": 0.6540557871112536, "grad_norm": 0.21776299344408603, "learning_rate": 3.9051822750286936e-05, "loss": 0.554, "num_tokens": 1366706135.0, "step": 1785 }, { "epoch": 0.654422204919159, "grad_norm": 0.2532162934883602, "learning_rate": 3.905045434631435e-05, "loss": 0.5672, "num_tokens": 1367605858.0, "step": 1786 }, { "epoch": 0.6547886227270645, "grad_norm": 0.21647798164106669, "learning_rate": 3.9049084982367184e-05, "loss": 0.5486, "num_tokens": 1368373806.0, "step": 1787 }, { "epoch": 0.65515504053497, "grad_norm": 0.24424892902025355, "learning_rate": 3.9047714658522553e-05, "loss": 0.5754, "num_tokens": 1369192195.0, "step": 1788 }, { "epoch": 0.6555214583428755, "grad_norm": 0.2493734614180281, "learning_rate": 3.904634337485759e-05, "loss": 0.5607, "num_tokens": 1369961461.0, "step": 1789 }, { "epoch": 0.6558878761507809, "grad_norm": 0.21408948363691763, "learning_rate": 3.904497113144952e-05, "loss": 0.5576, "num_tokens": 1370929375.0, "step": 1790 }, { "epoch": 0.6562542939586864, "grad_norm": 0.19000825642304786, "learning_rate": 3.9043597928375586e-05, "loss": 0.5758, "num_tokens": 1371600300.0, "step": 1791 }, { "epoch": 0.6566207117665919, "grad_norm": 0.26605070804174585, "learning_rate": 3.9042223765713115e-05, "loss": 0.579, "num_tokens": 1372350252.0, "step": 1792 }, { "epoch": 0.6569871295744973, "grad_norm": 0.22946714544808988, "learning_rate": 3.9040848643539474e-05, "loss": 0.5506, "num_tokens": 1373164409.0, "step": 1793 }, { "epoch": 0.6573535473824028, "grad_norm": 0.17528575462247764, "learning_rate": 3.9039472561932074e-05, "loss": 0.5854, "num_tokens": 1373871790.0, "step": 1794 }, { "epoch": 0.6577199651903083, "grad_norm": 0.23372321577583166, "learning_rate": 3.903809552096841e-05, "loss": 0.5356, "num_tokens": 1374596357.0, "step": 1795 }, { "epoch": 0.6580863829982138, "grad_norm": 0.2115144286703397, "learning_rate": 3.903671752072599e-05, "loss": 0.5853, "num_tokens": 1375311267.0, "step": 1796 }, { "epoch": 0.6584528008061192, "grad_norm": 0.2180654849793084, "learning_rate": 3.903533856128241e-05, "loss": 0.603, "num_tokens": 1376037998.0, "step": 1797 }, { "epoch": 0.6588192186140246, "grad_norm": 0.2016015722652538, "learning_rate": 3.903395864271531e-05, "loss": 0.5554, "num_tokens": 1376690675.0, "step": 1798 }, { "epoch": 0.6591856364219301, "grad_norm": 0.19978327898077025, "learning_rate": 3.903257776510238e-05, "loss": 0.5689, "num_tokens": 1377392972.0, "step": 1799 }, { "epoch": 0.6595520542298355, "grad_norm": 0.18458294795173744, "learning_rate": 3.903119592852136e-05, "loss": 0.5762, "num_tokens": 1378092026.0, "step": 1800 }, { "epoch": 0.659918472037741, "grad_norm": 0.19908251054611895, "learning_rate": 3.9029813133050064e-05, "loss": 0.6069, "num_tokens": 1378823220.0, "step": 1801 }, { "epoch": 0.6602848898456465, "grad_norm": 0.18201865779776197, "learning_rate": 3.9028429378766334e-05, "loss": 0.578, "num_tokens": 1379583474.0, "step": 1802 }, { "epoch": 0.660651307653552, "grad_norm": 0.18354483715702072, "learning_rate": 3.902704466574808e-05, "loss": 0.5762, "num_tokens": 1380283364.0, "step": 1803 }, { "epoch": 0.6610177254614574, "grad_norm": 0.19215643182858932, "learning_rate": 3.9025658994073266e-05, "loss": 0.5665, "num_tokens": 1380993913.0, "step": 1804 }, { "epoch": 0.6613841432693629, "grad_norm": 0.1764877403897528, "learning_rate": 3.902427236381991e-05, "loss": 0.5474, "num_tokens": 1381747161.0, "step": 1805 }, { "epoch": 0.6617505610772684, "grad_norm": 0.20854350814172914, "learning_rate": 3.9022884775066085e-05, "loss": 0.5533, "num_tokens": 1382448932.0, "step": 1806 }, { "epoch": 0.6621169788851738, "grad_norm": 0.1890754466243229, "learning_rate": 3.90214962278899e-05, "loss": 0.5577, "num_tokens": 1383226413.0, "step": 1807 }, { "epoch": 0.6624833966930793, "grad_norm": 0.1889325795394409, "learning_rate": 3.902010672236955e-05, "loss": 0.5554, "num_tokens": 1383937426.0, "step": 1808 }, { "epoch": 0.6628498145009848, "grad_norm": 0.19766614633224605, "learning_rate": 3.9018716258583265e-05, "loss": 0.5598, "num_tokens": 1384716892.0, "step": 1809 }, { "epoch": 0.6632162323088903, "grad_norm": 0.16318811390405472, "learning_rate": 3.9017324836609315e-05, "loss": 0.5758, "num_tokens": 1385417490.0, "step": 1810 }, { "epoch": 0.6635826501167956, "grad_norm": 0.20387574014711954, "learning_rate": 3.901593245652606e-05, "loss": 0.6085, "num_tokens": 1386136544.0, "step": 1811 }, { "epoch": 0.6639490679247011, "grad_norm": 0.19135569600704955, "learning_rate": 3.9014539118411876e-05, "loss": 0.5999, "num_tokens": 1386837081.0, "step": 1812 }, { "epoch": 0.6643154857326066, "grad_norm": 0.1833340339181244, "learning_rate": 3.9013144822345225e-05, "loss": 0.5725, "num_tokens": 1387644949.0, "step": 1813 }, { "epoch": 0.664681903540512, "grad_norm": 0.1985917825226246, "learning_rate": 3.90117495684046e-05, "loss": 0.535, "num_tokens": 1388409062.0, "step": 1814 }, { "epoch": 0.6650483213484175, "grad_norm": 0.16417719520501478, "learning_rate": 3.901035335666856e-05, "loss": 0.5338, "num_tokens": 1389255992.0, "step": 1815 }, { "epoch": 0.665414739156323, "grad_norm": 0.21930303199710868, "learning_rate": 3.900895618721571e-05, "loss": 0.5445, "num_tokens": 1390065352.0, "step": 1816 }, { "epoch": 0.6657811569642285, "grad_norm": 0.1747739876026398, "learning_rate": 3.900755806012473e-05, "loss": 0.5622, "num_tokens": 1390897703.0, "step": 1817 }, { "epoch": 0.6661475747721339, "grad_norm": 0.20012010352460505, "learning_rate": 3.900615897547432e-05, "loss": 0.5592, "num_tokens": 1391658497.0, "step": 1818 }, { "epoch": 0.6665139925800394, "grad_norm": 0.2063530626014587, "learning_rate": 3.9004758933343255e-05, "loss": 0.5643, "num_tokens": 1392516897.0, "step": 1819 }, { "epoch": 0.6668804103879449, "grad_norm": 0.2077764381262234, "learning_rate": 3.900335793381036e-05, "loss": 0.5593, "num_tokens": 1393315353.0, "step": 1820 }, { "epoch": 0.6672468281958503, "grad_norm": 0.23327232772050663, "learning_rate": 3.900195597695451e-05, "loss": 0.5624, "num_tokens": 1394184330.0, "step": 1821 }, { "epoch": 0.6676132460037558, "grad_norm": 0.18310317201396747, "learning_rate": 3.900055306285465e-05, "loss": 0.6229, "num_tokens": 1394911736.0, "step": 1822 }, { "epoch": 0.6679796638116613, "grad_norm": 0.2500310877048738, "learning_rate": 3.899914919158975e-05, "loss": 0.578, "num_tokens": 1395690008.0, "step": 1823 }, { "epoch": 0.6683460816195667, "grad_norm": 0.22503979691960246, "learning_rate": 3.899774436323887e-05, "loss": 0.5863, "num_tokens": 1396304166.0, "step": 1824 }, { "epoch": 0.6687124994274721, "grad_norm": 0.17394305792070705, "learning_rate": 3.8996338577881095e-05, "loss": 0.533, "num_tokens": 1397117009.0, "step": 1825 }, { "epoch": 0.6690789172353776, "grad_norm": 0.25125206450483184, "learning_rate": 3.8994931835595576e-05, "loss": 0.5812, "num_tokens": 1397891937.0, "step": 1826 }, { "epoch": 0.6694453350432831, "grad_norm": 0.21367558123539185, "learning_rate": 3.899352413646151e-05, "loss": 0.5602, "num_tokens": 1398667766.0, "step": 1827 }, { "epoch": 0.6698117528511885, "grad_norm": 0.18808539042097336, "learning_rate": 3.899211548055816e-05, "loss": 0.5963, "num_tokens": 1399397290.0, "step": 1828 }, { "epoch": 0.670178170659094, "grad_norm": 0.22192935173976955, "learning_rate": 3.899070586796483e-05, "loss": 0.5774, "num_tokens": 1400110734.0, "step": 1829 }, { "epoch": 0.6705445884669995, "grad_norm": 0.19332917501895147, "learning_rate": 3.898929529876088e-05, "loss": 0.5852, "num_tokens": 1400936363.0, "step": 1830 }, { "epoch": 0.670911006274905, "grad_norm": 0.19557120915888285, "learning_rate": 3.898788377302575e-05, "loss": 0.6033, "num_tokens": 1401646466.0, "step": 1831 }, { "epoch": 0.6712774240828104, "grad_norm": 0.17254391290594695, "learning_rate": 3.898647129083889e-05, "loss": 0.5359, "num_tokens": 1402533079.0, "step": 1832 }, { "epoch": 0.6716438418907159, "grad_norm": 0.19572178785048536, "learning_rate": 3.8985057852279825e-05, "loss": 0.5615, "num_tokens": 1403258436.0, "step": 1833 }, { "epoch": 0.6720102596986214, "grad_norm": 0.16739175289575567, "learning_rate": 3.8983643457428144e-05, "loss": 0.5596, "num_tokens": 1404204889.0, "step": 1834 }, { "epoch": 0.6723766775065269, "grad_norm": 0.2103931325533166, "learning_rate": 3.898222810636349e-05, "loss": 0.5704, "num_tokens": 1405096485.0, "step": 1835 }, { "epoch": 0.6727430953144323, "grad_norm": 0.17749226242739363, "learning_rate": 3.8980811799165524e-05, "loss": 0.5728, "num_tokens": 1405857677.0, "step": 1836 }, { "epoch": 0.6731095131223378, "grad_norm": 0.19136152609443582, "learning_rate": 3.8979394535914e-05, "loss": 0.5708, "num_tokens": 1406552250.0, "step": 1837 }, { "epoch": 0.6734759309302432, "grad_norm": 0.17363325372354052, "learning_rate": 3.897797631668872e-05, "loss": 0.5623, "num_tokens": 1407362519.0, "step": 1838 }, { "epoch": 0.6738423487381486, "grad_norm": 0.17962849553337573, "learning_rate": 3.897655714156953e-05, "loss": 0.5373, "num_tokens": 1408112150.0, "step": 1839 }, { "epoch": 0.6742087665460541, "grad_norm": 0.17709232389684124, "learning_rate": 3.897513701063632e-05, "loss": 0.557, "num_tokens": 1408917095.0, "step": 1840 }, { "epoch": 0.6745751843539596, "grad_norm": 0.1631138189649445, "learning_rate": 3.8973715923969055e-05, "loss": 0.5366, "num_tokens": 1409718339.0, "step": 1841 }, { "epoch": 0.674941602161865, "grad_norm": 0.16525533736818668, "learning_rate": 3.897229388164774e-05, "loss": 0.5979, "num_tokens": 1410517159.0, "step": 1842 }, { "epoch": 0.6753080199697705, "grad_norm": 0.1723643449533341, "learning_rate": 3.897087088375246e-05, "loss": 0.5713, "num_tokens": 1411358484.0, "step": 1843 }, { "epoch": 0.675674437777676, "grad_norm": 0.18786618129834656, "learning_rate": 3.89694469303633e-05, "loss": 0.57, "num_tokens": 1412120075.0, "step": 1844 }, { "epoch": 0.6760408555855815, "grad_norm": 0.16537545931288788, "learning_rate": 3.8968022021560455e-05, "loss": 0.5826, "num_tokens": 1412937390.0, "step": 1845 }, { "epoch": 0.6764072733934869, "grad_norm": 0.19357420025409208, "learning_rate": 3.896659615742414e-05, "loss": 0.5984, "num_tokens": 1413640384.0, "step": 1846 }, { "epoch": 0.6767736912013924, "grad_norm": 0.19192404489143405, "learning_rate": 3.896516933803463e-05, "loss": 0.5562, "num_tokens": 1414437674.0, "step": 1847 }, { "epoch": 0.6771401090092979, "grad_norm": 0.2038297605669289, "learning_rate": 3.8963741563472276e-05, "loss": 0.5565, "num_tokens": 1415163191.0, "step": 1848 }, { "epoch": 0.6775065268172034, "grad_norm": 0.19191978837652138, "learning_rate": 3.896231283381745e-05, "loss": 0.5472, "num_tokens": 1416035130.0, "step": 1849 }, { "epoch": 0.6778729446251088, "grad_norm": 0.22660482435258789, "learning_rate": 3.89608831491506e-05, "loss": 0.6058, "num_tokens": 1416730208.0, "step": 1850 }, { "epoch": 0.6782393624330142, "grad_norm": 0.20714117643997138, "learning_rate": 3.89594525095522e-05, "loss": 0.5237, "num_tokens": 1417461409.0, "step": 1851 }, { "epoch": 0.6786057802409197, "grad_norm": 0.22130027336823305, "learning_rate": 3.895802091510282e-05, "loss": 0.5908, "num_tokens": 1418083556.0, "step": 1852 }, { "epoch": 0.6789721980488251, "grad_norm": 0.1911603764743615, "learning_rate": 3.8956588365883064e-05, "loss": 0.5618, "num_tokens": 1418761984.0, "step": 1853 }, { "epoch": 0.6793386158567306, "grad_norm": 0.17529227666376723, "learning_rate": 3.895515486197357e-05, "loss": 0.5517, "num_tokens": 1419528181.0, "step": 1854 }, { "epoch": 0.6797050336646361, "grad_norm": 0.18277921319346918, "learning_rate": 3.895372040345505e-05, "loss": 0.565, "num_tokens": 1420455530.0, "step": 1855 }, { "epoch": 0.6800714514725416, "grad_norm": 0.19124966197786822, "learning_rate": 3.895228499040828e-05, "loss": 0.5689, "num_tokens": 1421127107.0, "step": 1856 }, { "epoch": 0.680437869280447, "grad_norm": 0.1929163639967594, "learning_rate": 3.895084862291407e-05, "loss": 0.5764, "num_tokens": 1421759433.0, "step": 1857 }, { "epoch": 0.6808042870883525, "grad_norm": 0.224241602536335, "learning_rate": 3.894941130105329e-05, "loss": 0.5852, "num_tokens": 1422511762.0, "step": 1858 }, { "epoch": 0.681170704896258, "grad_norm": 0.1834920190825645, "learning_rate": 3.8947973024906855e-05, "loss": 0.6151, "num_tokens": 1423252126.0, "step": 1859 }, { "epoch": 0.6815371227041634, "grad_norm": 0.19758009162891907, "learning_rate": 3.894653379455576e-05, "loss": 0.5639, "num_tokens": 1424072573.0, "step": 1860 }, { "epoch": 0.6819035405120689, "grad_norm": 0.1877372947163573, "learning_rate": 3.8945093610081025e-05, "loss": 0.5893, "num_tokens": 1424816966.0, "step": 1861 }, { "epoch": 0.6822699583199744, "grad_norm": 0.18631008003843708, "learning_rate": 3.894365247156373e-05, "loss": 0.5775, "num_tokens": 1425673642.0, "step": 1862 }, { "epoch": 0.6826363761278799, "grad_norm": 0.17733477079039076, "learning_rate": 3.894221037908504e-05, "loss": 0.5614, "num_tokens": 1426435322.0, "step": 1863 }, { "epoch": 0.6830027939357852, "grad_norm": 0.20991134836085754, "learning_rate": 3.894076733272611e-05, "loss": 0.5765, "num_tokens": 1427133282.0, "step": 1864 }, { "epoch": 0.6833692117436907, "grad_norm": 0.1744586265001001, "learning_rate": 3.893932333256821e-05, "loss": 0.5799, "num_tokens": 1428033627.0, "step": 1865 }, { "epoch": 0.6837356295515962, "grad_norm": 0.19076661338017942, "learning_rate": 3.893787837869265e-05, "loss": 0.5476, "num_tokens": 1428785195.0, "step": 1866 }, { "epoch": 0.6841020473595016, "grad_norm": 0.20547640509633974, "learning_rate": 3.893643247118075e-05, "loss": 0.5804, "num_tokens": 1429512041.0, "step": 1867 }, { "epoch": 0.6844684651674071, "grad_norm": 0.18413709282570048, "learning_rate": 3.8934985610113954e-05, "loss": 0.5558, "num_tokens": 1430254150.0, "step": 1868 }, { "epoch": 0.6848348829753126, "grad_norm": 0.24453987424223092, "learning_rate": 3.89335377955737e-05, "loss": 0.5442, "num_tokens": 1430942491.0, "step": 1869 }, { "epoch": 0.6852013007832181, "grad_norm": 0.2097090772153771, "learning_rate": 3.893208902764151e-05, "loss": 0.5469, "num_tokens": 1431679685.0, "step": 1870 }, { "epoch": 0.6855677185911235, "grad_norm": 0.1674380610368447, "learning_rate": 3.8930639306398945e-05, "loss": 0.5561, "num_tokens": 1432534235.0, "step": 1871 }, { "epoch": 0.685934136399029, "grad_norm": 0.26802369246349395, "learning_rate": 3.8929188631927636e-05, "loss": 0.5954, "num_tokens": 1433317162.0, "step": 1872 }, { "epoch": 0.6863005542069345, "grad_norm": 0.249162061234076, "learning_rate": 3.892773700430926e-05, "loss": 0.5477, "num_tokens": 1434077987.0, "step": 1873 }, { "epoch": 0.68666697201484, "grad_norm": 0.1824892115439721, "learning_rate": 3.8926284423625554e-05, "loss": 0.5487, "num_tokens": 1434776281.0, "step": 1874 }, { "epoch": 0.6870333898227454, "grad_norm": 0.20085832621109292, "learning_rate": 3.892483088995827e-05, "loss": 0.5595, "num_tokens": 1435522231.0, "step": 1875 }, { "epoch": 0.6873998076306509, "grad_norm": 0.26064161567833477, "learning_rate": 3.892337640338928e-05, "loss": 0.5716, "num_tokens": 1436319101.0, "step": 1876 }, { "epoch": 0.6877662254385564, "grad_norm": 0.20312128221066256, "learning_rate": 3.892192096400045e-05, "loss": 0.567, "num_tokens": 1437146466.0, "step": 1877 }, { "epoch": 0.6881326432464617, "grad_norm": 0.21214119731031733, "learning_rate": 3.892046457187375e-05, "loss": 0.6162, "num_tokens": 1437731435.0, "step": 1878 }, { "epoch": 0.6884990610543672, "grad_norm": 0.20853734701452858, "learning_rate": 3.891900722709114e-05, "loss": 0.5486, "num_tokens": 1438443596.0, "step": 1879 }, { "epoch": 0.6888654788622727, "grad_norm": 0.17372493557553847, "learning_rate": 3.891754892973471e-05, "loss": 0.5286, "num_tokens": 1439170747.0, "step": 1880 }, { "epoch": 0.6892318966701781, "grad_norm": 0.21978976223704805, "learning_rate": 3.891608967988654e-05, "loss": 0.6122, "num_tokens": 1439816598.0, "step": 1881 }, { "epoch": 0.6895983144780836, "grad_norm": 0.18860990210428502, "learning_rate": 3.89146294776288e-05, "loss": 0.5396, "num_tokens": 1440589664.0, "step": 1882 }, { "epoch": 0.6899647322859891, "grad_norm": 0.21055669355758436, "learning_rate": 3.89131683230437e-05, "loss": 0.5765, "num_tokens": 1441267339.0, "step": 1883 }, { "epoch": 0.6903311500938946, "grad_norm": 0.22475778870227375, "learning_rate": 3.891170621621351e-05, "loss": 0.5785, "num_tokens": 1442036095.0, "step": 1884 }, { "epoch": 0.6906975679018, "grad_norm": 0.1916618960281402, "learning_rate": 3.891024315722053e-05, "loss": 0.5922, "num_tokens": 1442784009.0, "step": 1885 }, { "epoch": 0.6910639857097055, "grad_norm": 0.20630319766081667, "learning_rate": 3.8908779146147165e-05, "loss": 0.5676, "num_tokens": 1443593703.0, "step": 1886 }, { "epoch": 0.691430403517611, "grad_norm": 0.17779102264046664, "learning_rate": 3.890731418307581e-05, "loss": 0.5491, "num_tokens": 1444434020.0, "step": 1887 }, { "epoch": 0.6917968213255165, "grad_norm": 0.20928921282122862, "learning_rate": 3.890584826808897e-05, "loss": 0.5727, "num_tokens": 1445153595.0, "step": 1888 }, { "epoch": 0.6921632391334219, "grad_norm": 0.17029872620897596, "learning_rate": 3.890438140126916e-05, "loss": 0.5452, "num_tokens": 1445933371.0, "step": 1889 }, { "epoch": 0.6925296569413274, "grad_norm": 0.2062693228949634, "learning_rate": 3.890291358269899e-05, "loss": 0.6089, "num_tokens": 1446581018.0, "step": 1890 }, { "epoch": 0.6928960747492328, "grad_norm": 0.1788506451685047, "learning_rate": 3.8901444812461074e-05, "loss": 0.5501, "num_tokens": 1447377199.0, "step": 1891 }, { "epoch": 0.6932624925571382, "grad_norm": 0.22563417928342083, "learning_rate": 3.8899975090638124e-05, "loss": 0.5999, "num_tokens": 1448188647.0, "step": 1892 }, { "epoch": 0.6936289103650437, "grad_norm": 0.1945484322798396, "learning_rate": 3.889850441731289e-05, "loss": 0.5725, "num_tokens": 1449031838.0, "step": 1893 }, { "epoch": 0.6939953281729492, "grad_norm": 0.20792061728707695, "learning_rate": 3.8897032792568164e-05, "loss": 0.5989, "num_tokens": 1449850709.0, "step": 1894 }, { "epoch": 0.6943617459808546, "grad_norm": 0.21790458363409726, "learning_rate": 3.88955602164868e-05, "loss": 0.5569, "num_tokens": 1450566411.0, "step": 1895 }, { "epoch": 0.6947281637887601, "grad_norm": 0.22802941782378183, "learning_rate": 3.889408668915172e-05, "loss": 0.6107, "num_tokens": 1451270056.0, "step": 1896 }, { "epoch": 0.6950945815966656, "grad_norm": 0.23899004639480934, "learning_rate": 3.889261221064588e-05, "loss": 0.5997, "num_tokens": 1451982935.0, "step": 1897 }, { "epoch": 0.6954609994045711, "grad_norm": 0.20741597094234487, "learning_rate": 3.8891136781052296e-05, "loss": 0.5722, "num_tokens": 1452690291.0, "step": 1898 }, { "epoch": 0.6958274172124765, "grad_norm": 0.24020105029115044, "learning_rate": 3.888966040045403e-05, "loss": 0.5655, "num_tokens": 1453500848.0, "step": 1899 }, { "epoch": 0.696193835020382, "grad_norm": 0.18512146771956764, "learning_rate": 3.8888183068934216e-05, "loss": 0.5131, "num_tokens": 1454259556.0, "step": 1900 }, { "epoch": 0.6965602528282875, "grad_norm": 0.2078519981435248, "learning_rate": 3.8886704786576024e-05, "loss": 0.5592, "num_tokens": 1454961734.0, "step": 1901 }, { "epoch": 0.696926670636193, "grad_norm": 0.22073030331321308, "learning_rate": 3.888522555346269e-05, "loss": 0.5574, "num_tokens": 1455657359.0, "step": 1902 }, { "epoch": 0.6972930884440984, "grad_norm": 0.15505074613819225, "learning_rate": 3.88837453696775e-05, "loss": 0.5439, "num_tokens": 1456513092.0, "step": 1903 }, { "epoch": 0.6976595062520039, "grad_norm": 0.20242035840423672, "learning_rate": 3.8882264235303774e-05, "loss": 0.5697, "num_tokens": 1457288757.0, "step": 1904 }, { "epoch": 0.6980259240599093, "grad_norm": 0.1814360508894445, "learning_rate": 3.8880782150424926e-05, "loss": 0.5725, "num_tokens": 1458114314.0, "step": 1905 }, { "epoch": 0.6983923418678147, "grad_norm": 0.15399665459680212, "learning_rate": 3.8879299115124384e-05, "loss": 0.5555, "num_tokens": 1458921982.0, "step": 1906 }, { "epoch": 0.6987587596757202, "grad_norm": 0.18941357364272698, "learning_rate": 3.887781512948565e-05, "loss": 0.5431, "num_tokens": 1459756881.0, "step": 1907 }, { "epoch": 0.6991251774836257, "grad_norm": 0.17663325606045122, "learning_rate": 3.887633019359228e-05, "loss": 0.5631, "num_tokens": 1460502858.0, "step": 1908 }, { "epoch": 0.6994915952915312, "grad_norm": 0.16518434732073217, "learning_rate": 3.887484430752787e-05, "loss": 0.5624, "num_tokens": 1461361389.0, "step": 1909 }, { "epoch": 0.6998580130994366, "grad_norm": 0.19262415571447003, "learning_rate": 3.8873357471376094e-05, "loss": 0.5659, "num_tokens": 1462119891.0, "step": 1910 }, { "epoch": 0.7002244309073421, "grad_norm": 0.17914961357430667, "learning_rate": 3.887186968522064e-05, "loss": 0.5964, "num_tokens": 1462882805.0, "step": 1911 }, { "epoch": 0.7005908487152476, "grad_norm": 0.16966695688262112, "learning_rate": 3.8870380949145295e-05, "loss": 0.5713, "num_tokens": 1463707140.0, "step": 1912 }, { "epoch": 0.700957266523153, "grad_norm": 0.19548207787160843, "learning_rate": 3.886889126323387e-05, "loss": 0.5489, "num_tokens": 1464545537.0, "step": 1913 }, { "epoch": 0.7013236843310585, "grad_norm": 0.15655113950964902, "learning_rate": 3.8867400627570234e-05, "loss": 0.533, "num_tokens": 1465323906.0, "step": 1914 }, { "epoch": 0.701690102138964, "grad_norm": 0.17025366202451656, "learning_rate": 3.8865909042238316e-05, "loss": 0.562, "num_tokens": 1466116341.0, "step": 1915 }, { "epoch": 0.7020565199468695, "grad_norm": 0.17319317617544197, "learning_rate": 3.88644165073221e-05, "loss": 0.5686, "num_tokens": 1466875733.0, "step": 1916 }, { "epoch": 0.7024229377547749, "grad_norm": 0.18235552744194666, "learning_rate": 3.886292302290561e-05, "loss": 0.5741, "num_tokens": 1467558780.0, "step": 1917 }, { "epoch": 0.7027893555626803, "grad_norm": 0.16455829320305415, "learning_rate": 3.8861428589072936e-05, "loss": 0.5457, "num_tokens": 1468390810.0, "step": 1918 }, { "epoch": 0.7031557733705858, "grad_norm": 0.18995947819706924, "learning_rate": 3.8859933205908215e-05, "loss": 0.5421, "num_tokens": 1469306026.0, "step": 1919 }, { "epoch": 0.7035221911784912, "grad_norm": 0.1691601374319513, "learning_rate": 3.8858436873495645e-05, "loss": 0.5502, "num_tokens": 1470012100.0, "step": 1920 }, { "epoch": 0.7038886089863967, "grad_norm": 0.190408838105772, "learning_rate": 3.885693959191947e-05, "loss": 0.5481, "num_tokens": 1470714589.0, "step": 1921 }, { "epoch": 0.7042550267943022, "grad_norm": 0.17677697912990165, "learning_rate": 3.885544136126398e-05, "loss": 0.5279, "num_tokens": 1471588566.0, "step": 1922 }, { "epoch": 0.7046214446022077, "grad_norm": 0.17131820317446203, "learning_rate": 3.885394218161355e-05, "loss": 0.5563, "num_tokens": 1472376577.0, "step": 1923 }, { "epoch": 0.7049878624101131, "grad_norm": 0.18464929402460017, "learning_rate": 3.8852442053052576e-05, "loss": 0.5691, "num_tokens": 1473114258.0, "step": 1924 }, { "epoch": 0.7053542802180186, "grad_norm": 0.18206850886333373, "learning_rate": 3.885094097566551e-05, "loss": 0.5507, "num_tokens": 1473891421.0, "step": 1925 }, { "epoch": 0.7057206980259241, "grad_norm": 0.192138940233442, "learning_rate": 3.8849438949536875e-05, "loss": 0.5638, "num_tokens": 1474667626.0, "step": 1926 }, { "epoch": 0.7060871158338295, "grad_norm": 0.18399454491567532, "learning_rate": 3.884793597475124e-05, "loss": 0.5693, "num_tokens": 1475344711.0, "step": 1927 }, { "epoch": 0.706453533641735, "grad_norm": 0.1848266982435704, "learning_rate": 3.884643205139322e-05, "loss": 0.5844, "num_tokens": 1476003295.0, "step": 1928 }, { "epoch": 0.7068199514496405, "grad_norm": 0.2064261685388308, "learning_rate": 3.884492717954749e-05, "loss": 0.569, "num_tokens": 1476782188.0, "step": 1929 }, { "epoch": 0.707186369257546, "grad_norm": 0.17450974326880514, "learning_rate": 3.884342135929877e-05, "loss": 0.5749, "num_tokens": 1477514602.0, "step": 1930 }, { "epoch": 0.7075527870654513, "grad_norm": 0.17606751460200085, "learning_rate": 3.884191459073185e-05, "loss": 0.5302, "num_tokens": 1478231868.0, "step": 1931 }, { "epoch": 0.7079192048733568, "grad_norm": 0.24226917138075704, "learning_rate": 3.8840406873931574e-05, "loss": 0.6008, "num_tokens": 1478999621.0, "step": 1932 }, { "epoch": 0.7082856226812623, "grad_norm": 0.1884083102246825, "learning_rate": 3.883889820898281e-05, "loss": 0.5465, "num_tokens": 1479796482.0, "step": 1933 }, { "epoch": 0.7086520404891677, "grad_norm": 0.22624033084947756, "learning_rate": 3.883738859597051e-05, "loss": 0.547, "num_tokens": 1480578047.0, "step": 1934 }, { "epoch": 0.7090184582970732, "grad_norm": 0.21751712539156315, "learning_rate": 3.883587803497966e-05, "loss": 0.5301, "num_tokens": 1481375386.0, "step": 1935 }, { "epoch": 0.7093848761049787, "grad_norm": 0.22172977815424166, "learning_rate": 3.883436652609531e-05, "loss": 0.5497, "num_tokens": 1482009630.0, "step": 1936 }, { "epoch": 0.7097512939128842, "grad_norm": 0.19330200227818517, "learning_rate": 3.883285406940257e-05, "loss": 0.5829, "num_tokens": 1482650934.0, "step": 1937 }, { "epoch": 0.7101177117207896, "grad_norm": 0.19698500482474815, "learning_rate": 3.883134066498659e-05, "loss": 0.5452, "num_tokens": 1483420707.0, "step": 1938 }, { "epoch": 0.7104841295286951, "grad_norm": 0.16070856824062765, "learning_rate": 3.882982631293257e-05, "loss": 0.5524, "num_tokens": 1484218634.0, "step": 1939 }, { "epoch": 0.7108505473366006, "grad_norm": 0.2064455813185155, "learning_rate": 3.882831101332578e-05, "loss": 0.5881, "num_tokens": 1484923939.0, "step": 1940 }, { "epoch": 0.711216965144506, "grad_norm": 0.18876573498273905, "learning_rate": 3.8826794766251526e-05, "loss": 0.5774, "num_tokens": 1485733704.0, "step": 1941 }, { "epoch": 0.7115833829524115, "grad_norm": 0.19297550808822297, "learning_rate": 3.882527757179519e-05, "loss": 0.5525, "num_tokens": 1486453268.0, "step": 1942 }, { "epoch": 0.711949800760317, "grad_norm": 0.222431899050055, "learning_rate": 3.882375943004217e-05, "loss": 0.5589, "num_tokens": 1487212370.0, "step": 1943 }, { "epoch": 0.7123162185682225, "grad_norm": 0.19396920113619584, "learning_rate": 3.882224034107796e-05, "loss": 0.596, "num_tokens": 1487892756.0, "step": 1944 }, { "epoch": 0.7126826363761278, "grad_norm": 0.2309177290209356, "learning_rate": 3.8820720304988084e-05, "loss": 0.5565, "num_tokens": 1488704115.0, "step": 1945 }, { "epoch": 0.7130490541840333, "grad_norm": 0.21652236290139124, "learning_rate": 3.881919932185812e-05, "loss": 0.5468, "num_tokens": 1489448920.0, "step": 1946 }, { "epoch": 0.7134154719919388, "grad_norm": 0.1964472759420169, "learning_rate": 3.88176773917737e-05, "loss": 0.5795, "num_tokens": 1490265964.0, "step": 1947 }, { "epoch": 0.7137818897998442, "grad_norm": 0.20379444256047988, "learning_rate": 3.8816154514820524e-05, "loss": 0.5747, "num_tokens": 1491118755.0, "step": 1948 }, { "epoch": 0.7141483076077497, "grad_norm": 0.17360650947249137, "learning_rate": 3.881463069108432e-05, "loss": 0.5728, "num_tokens": 1491952184.0, "step": 1949 }, { "epoch": 0.7145147254156552, "grad_norm": 0.1806845256318923, "learning_rate": 3.881310592065088e-05, "loss": 0.5298, "num_tokens": 1492689352.0, "step": 1950 }, { "epoch": 0.7148811432235607, "grad_norm": 0.19825914087008645, "learning_rate": 3.8811580203606065e-05, "loss": 0.6166, "num_tokens": 1493450369.0, "step": 1951 }, { "epoch": 0.7152475610314661, "grad_norm": 0.17617984391755592, "learning_rate": 3.8810053540035764e-05, "loss": 0.5815, "num_tokens": 1494180204.0, "step": 1952 }, { "epoch": 0.7156139788393716, "grad_norm": 0.18123359074473974, "learning_rate": 3.880852593002594e-05, "loss": 0.5534, "num_tokens": 1494918496.0, "step": 1953 }, { "epoch": 0.7159803966472771, "grad_norm": 0.17981334620661232, "learning_rate": 3.8806997373662596e-05, "loss": 0.5403, "num_tokens": 1495599144.0, "step": 1954 }, { "epoch": 0.7163468144551826, "grad_norm": 0.21805054339630303, "learning_rate": 3.8805467871031785e-05, "loss": 0.5453, "num_tokens": 1496323128.0, "step": 1955 }, { "epoch": 0.716713232263088, "grad_norm": 0.18220275596820865, "learning_rate": 3.880393742221964e-05, "loss": 0.602, "num_tokens": 1497024979.0, "step": 1956 }, { "epoch": 0.7170796500709935, "grad_norm": 0.23205267241862348, "learning_rate": 3.8802406027312316e-05, "loss": 0.5506, "num_tokens": 1497810225.0, "step": 1957 }, { "epoch": 0.7174460678788989, "grad_norm": 0.1733800731772977, "learning_rate": 3.880087368639603e-05, "loss": 0.5786, "num_tokens": 1498630238.0, "step": 1958 }, { "epoch": 0.7178124856868043, "grad_norm": 0.20133088939698246, "learning_rate": 3.879934039955707e-05, "loss": 0.5402, "num_tokens": 1499365168.0, "step": 1959 }, { "epoch": 0.7181789034947098, "grad_norm": 0.19178878924086876, "learning_rate": 3.8797806166881746e-05, "loss": 0.5364, "num_tokens": 1500234768.0, "step": 1960 }, { "epoch": 0.7185453213026153, "grad_norm": 0.18537306525905012, "learning_rate": 3.8796270988456446e-05, "loss": 0.5697, "num_tokens": 1500962017.0, "step": 1961 }, { "epoch": 0.7189117391105208, "grad_norm": 0.21876590303301083, "learning_rate": 3.8794734864367615e-05, "loss": 0.5861, "num_tokens": 1501725035.0, "step": 1962 }, { "epoch": 0.7192781569184262, "grad_norm": 0.16359472188346294, "learning_rate": 3.879319779470172e-05, "loss": 0.5413, "num_tokens": 1502487919.0, "step": 1963 }, { "epoch": 0.7196445747263317, "grad_norm": 0.16493913573340047, "learning_rate": 3.879165977954531e-05, "loss": 0.5345, "num_tokens": 1503281513.0, "step": 1964 }, { "epoch": 0.7200109925342372, "grad_norm": 0.1779450198298733, "learning_rate": 3.879012081898498e-05, "loss": 0.6038, "num_tokens": 1503966126.0, "step": 1965 }, { "epoch": 0.7203774103421426, "grad_norm": 0.1841227480248361, "learning_rate": 3.8788580913107374e-05, "loss": 0.5981, "num_tokens": 1504688075.0, "step": 1966 }, { "epoch": 0.7207438281500481, "grad_norm": 0.18267606939631031, "learning_rate": 3.878704006199919e-05, "loss": 0.5925, "num_tokens": 1505382176.0, "step": 1967 }, { "epoch": 0.7211102459579536, "grad_norm": 0.18755626345915752, "learning_rate": 3.878549826574719e-05, "loss": 0.5868, "num_tokens": 1506087319.0, "step": 1968 }, { "epoch": 0.7214766637658591, "grad_norm": 0.17921042017305328, "learning_rate": 3.8783955524438176e-05, "loss": 0.5546, "num_tokens": 1506853157.0, "step": 1969 }, { "epoch": 0.7218430815737645, "grad_norm": 0.1801232228471687, "learning_rate": 3.8782411838158996e-05, "loss": 0.5477, "num_tokens": 1507691784.0, "step": 1970 }, { "epoch": 0.7222094993816699, "grad_norm": 0.17293887643857797, "learning_rate": 3.878086720699658e-05, "loss": 0.5224, "num_tokens": 1508414389.0, "step": 1971 }, { "epoch": 0.7225759171895754, "grad_norm": 0.20711272309432452, "learning_rate": 3.877932163103788e-05, "loss": 0.5676, "num_tokens": 1509138612.0, "step": 1972 }, { "epoch": 0.7229423349974808, "grad_norm": 0.16697388632245655, "learning_rate": 3.877777511036992e-05, "loss": 0.5862, "num_tokens": 1509969058.0, "step": 1973 }, { "epoch": 0.7233087528053863, "grad_norm": 0.20479090868776953, "learning_rate": 3.8776227645079785e-05, "loss": 0.5755, "num_tokens": 1510707668.0, "step": 1974 }, { "epoch": 0.7236751706132918, "grad_norm": 0.18691644736336585, "learning_rate": 3.8774679235254574e-05, "loss": 0.5778, "num_tokens": 1511571570.0, "step": 1975 }, { "epoch": 0.7240415884211973, "grad_norm": 0.19272537763563427, "learning_rate": 3.877312988098148e-05, "loss": 0.5512, "num_tokens": 1512204800.0, "step": 1976 }, { "epoch": 0.7244080062291027, "grad_norm": 0.21155343648508626, "learning_rate": 3.8771579582347744e-05, "loss": 0.5595, "num_tokens": 1513017738.0, "step": 1977 }, { "epoch": 0.7247744240370082, "grad_norm": 0.19059217191048805, "learning_rate": 3.8770028339440634e-05, "loss": 0.5785, "num_tokens": 1513765906.0, "step": 1978 }, { "epoch": 0.7251408418449137, "grad_norm": 0.2089927973428334, "learning_rate": 3.8768476152347496e-05, "loss": 0.582, "num_tokens": 1514666574.0, "step": 1979 }, { "epoch": 0.7255072596528191, "grad_norm": 0.186781315412279, "learning_rate": 3.876692302115573e-05, "loss": 0.5969, "num_tokens": 1515394628.0, "step": 1980 }, { "epoch": 0.7258736774607246, "grad_norm": 0.2072849623259543, "learning_rate": 3.876536894595277e-05, "loss": 0.5497, "num_tokens": 1516165352.0, "step": 1981 }, { "epoch": 0.7262400952686301, "grad_norm": 0.18314067432709447, "learning_rate": 3.8763813926826106e-05, "loss": 0.5884, "num_tokens": 1516948207.0, "step": 1982 }, { "epoch": 0.7266065130765356, "grad_norm": 0.22152419338913448, "learning_rate": 3.8762257963863296e-05, "loss": 0.5976, "num_tokens": 1517693466.0, "step": 1983 }, { "epoch": 0.726972930884441, "grad_norm": 0.19945126840445399, "learning_rate": 3.8760701057151955e-05, "loss": 0.6101, "num_tokens": 1518394436.0, "step": 1984 }, { "epoch": 0.7273393486923464, "grad_norm": 0.18031467423142153, "learning_rate": 3.875914320677973e-05, "loss": 0.5595, "num_tokens": 1519108549.0, "step": 1985 }, { "epoch": 0.7277057665002519, "grad_norm": 0.18894692437719202, "learning_rate": 3.8757584412834324e-05, "loss": 0.5483, "num_tokens": 1519833093.0, "step": 1986 }, { "epoch": 0.7280721843081573, "grad_norm": 0.21079172811041805, "learning_rate": 3.875602467540351e-05, "loss": 0.5811, "num_tokens": 1520491601.0, "step": 1987 }, { "epoch": 0.7284386021160628, "grad_norm": 0.17923670871485778, "learning_rate": 3.8754463994575104e-05, "loss": 0.5723, "num_tokens": 1521193439.0, "step": 1988 }, { "epoch": 0.7288050199239683, "grad_norm": 0.19271183552016266, "learning_rate": 3.875290237043697e-05, "loss": 0.5424, "num_tokens": 1521862844.0, "step": 1989 }, { "epoch": 0.7291714377318738, "grad_norm": 0.18677369815127787, "learning_rate": 3.8751339803077036e-05, "loss": 0.5389, "num_tokens": 1522805215.0, "step": 1990 }, { "epoch": 0.7295378555397792, "grad_norm": 0.18434852385178996, "learning_rate": 3.874977629258327e-05, "loss": 0.5547, "num_tokens": 1523582889.0, "step": 1991 }, { "epoch": 0.7299042733476847, "grad_norm": 0.21159140030826046, "learning_rate": 3.874821183904372e-05, "loss": 0.56, "num_tokens": 1524358761.0, "step": 1992 }, { "epoch": 0.7302706911555902, "grad_norm": 0.18494926944551984, "learning_rate": 3.874664644254644e-05, "loss": 0.6076, "num_tokens": 1524974110.0, "step": 1993 }, { "epoch": 0.7306371089634957, "grad_norm": 0.22156441523181725, "learning_rate": 3.8745080103179585e-05, "loss": 0.574, "num_tokens": 1525759971.0, "step": 1994 }, { "epoch": 0.7310035267714011, "grad_norm": 0.19644085974187744, "learning_rate": 3.874351282103134e-05, "loss": 0.5216, "num_tokens": 1526615228.0, "step": 1995 }, { "epoch": 0.7313699445793066, "grad_norm": 0.18258523375456062, "learning_rate": 3.8741944596189937e-05, "loss": 0.5542, "num_tokens": 1527256578.0, "step": 1996 }, { "epoch": 0.7317363623872121, "grad_norm": 0.18473663495386, "learning_rate": 3.874037542874368e-05, "loss": 0.5505, "num_tokens": 1527988418.0, "step": 1997 }, { "epoch": 0.7321027801951174, "grad_norm": 0.21580199866008806, "learning_rate": 3.873880531878091e-05, "loss": 0.5372, "num_tokens": 1528763512.0, "step": 1998 }, { "epoch": 0.7324691980030229, "grad_norm": 0.1698048222988175, "learning_rate": 3.873723426639003e-05, "loss": 0.5946, "num_tokens": 1529610336.0, "step": 1999 }, { "epoch": 0.7328356158109284, "grad_norm": 0.21476715979891992, "learning_rate": 3.873566227165949e-05, "loss": 0.5721, "num_tokens": 1530358919.0, "step": 2000 }, { "epoch": 0.7332020336188338, "grad_norm": 0.17542536110272605, "learning_rate": 3.873408933467782e-05, "loss": 0.5644, "num_tokens": 1531107478.0, "step": 2001 }, { "epoch": 0.7335684514267393, "grad_norm": 0.17707423301222416, "learning_rate": 3.873251545553353e-05, "loss": 0.5689, "num_tokens": 1531872865.0, "step": 2002 }, { "epoch": 0.7339348692346448, "grad_norm": 0.1790513287331369, "learning_rate": 3.873094063431528e-05, "loss": 0.539, "num_tokens": 1532644474.0, "step": 2003 }, { "epoch": 0.7343012870425503, "grad_norm": 0.17376182298699444, "learning_rate": 3.872936487111171e-05, "loss": 0.5895, "num_tokens": 1533426512.0, "step": 2004 }, { "epoch": 0.7346677048504557, "grad_norm": 0.1790706682522081, "learning_rate": 3.872778816601155e-05, "loss": 0.5896, "num_tokens": 1534194342.0, "step": 2005 }, { "epoch": 0.7350341226583612, "grad_norm": 0.16892250388749, "learning_rate": 3.872621051910357e-05, "loss": 0.5772, "num_tokens": 1535129718.0, "step": 2006 }, { "epoch": 0.7354005404662667, "grad_norm": 0.17424576404241898, "learning_rate": 3.8724631930476585e-05, "loss": 0.5711, "num_tokens": 1535799739.0, "step": 2007 }, { "epoch": 0.7357669582741722, "grad_norm": 0.18394437210147543, "learning_rate": 3.872305240021949e-05, "loss": 0.6007, "num_tokens": 1536536825.0, "step": 2008 }, { "epoch": 0.7361333760820776, "grad_norm": 0.2272398220696485, "learning_rate": 3.87214719284212e-05, "loss": 0.5823, "num_tokens": 1537224586.0, "step": 2009 }, { "epoch": 0.7364997938899831, "grad_norm": 0.18040658665509962, "learning_rate": 3.87198905151707e-05, "loss": 0.5721, "num_tokens": 1538022989.0, "step": 2010 }, { "epoch": 0.7368662116978885, "grad_norm": 0.17583303985896412, "learning_rate": 3.871830816055703e-05, "loss": 0.5387, "num_tokens": 1538722521.0, "step": 2011 }, { "epoch": 0.7372326295057939, "grad_norm": 0.21227875660347104, "learning_rate": 3.871672486466929e-05, "loss": 0.5438, "num_tokens": 1539549000.0, "step": 2012 }, { "epoch": 0.7375990473136994, "grad_norm": 0.1675862432670323, "learning_rate": 3.871514062759661e-05, "loss": 0.5466, "num_tokens": 1540265316.0, "step": 2013 }, { "epoch": 0.7379654651216049, "grad_norm": 0.17760461908280448, "learning_rate": 3.8713555449428186e-05, "loss": 0.5437, "num_tokens": 1541155520.0, "step": 2014 }, { "epoch": 0.7383318829295104, "grad_norm": 0.17914996186570595, "learning_rate": 3.8711969330253274e-05, "loss": 0.5871, "num_tokens": 1541922157.0, "step": 2015 }, { "epoch": 0.7386983007374158, "grad_norm": 0.17643704000926436, "learning_rate": 3.871038227016117e-05, "loss": 0.5766, "num_tokens": 1542614763.0, "step": 2016 }, { "epoch": 0.7390647185453213, "grad_norm": 0.18133310698773492, "learning_rate": 3.870879426924123e-05, "loss": 0.5881, "num_tokens": 1543371074.0, "step": 2017 }, { "epoch": 0.7394311363532268, "grad_norm": 0.1902179836323258, "learning_rate": 3.8707205327582865e-05, "loss": 0.5531, "num_tokens": 1544186478.0, "step": 2018 }, { "epoch": 0.7397975541611322, "grad_norm": 0.20962341007193744, "learning_rate": 3.870561544527552e-05, "loss": 0.5819, "num_tokens": 1544787038.0, "step": 2019 }, { "epoch": 0.7401639719690377, "grad_norm": 0.1910440207316012, "learning_rate": 3.8704024622408734e-05, "loss": 0.5626, "num_tokens": 1545496350.0, "step": 2020 }, { "epoch": 0.7405303897769432, "grad_norm": 0.19320234688994584, "learning_rate": 3.8702432859072055e-05, "loss": 0.5789, "num_tokens": 1546301866.0, "step": 2021 }, { "epoch": 0.7408968075848487, "grad_norm": 0.19014323139631822, "learning_rate": 3.870084015535511e-05, "loss": 0.6084, "num_tokens": 1547013276.0, "step": 2022 }, { "epoch": 0.7412632253927541, "grad_norm": 0.20816459118950545, "learning_rate": 3.8699246511347566e-05, "loss": 0.5748, "num_tokens": 1547756953.0, "step": 2023 }, { "epoch": 0.7416296432006596, "grad_norm": 0.1781902151723423, "learning_rate": 3.869765192713916e-05, "loss": 0.5313, "num_tokens": 1548592198.0, "step": 2024 }, { "epoch": 0.741996061008565, "grad_norm": 0.1777410163057693, "learning_rate": 3.8696056402819654e-05, "loss": 0.5767, "num_tokens": 1549411236.0, "step": 2025 }, { "epoch": 0.7423624788164704, "grad_norm": 0.1764764528080366, "learning_rate": 3.869445993847889e-05, "loss": 0.5691, "num_tokens": 1550140534.0, "step": 2026 }, { "epoch": 0.7427288966243759, "grad_norm": 0.17172574505691476, "learning_rate": 3.8692862534206745e-05, "loss": 0.5391, "num_tokens": 1550878378.0, "step": 2027 }, { "epoch": 0.7430953144322814, "grad_norm": 0.20305924423773297, "learning_rate": 3.869126419009317e-05, "loss": 0.5583, "num_tokens": 1551495394.0, "step": 2028 }, { "epoch": 0.7434617322401869, "grad_norm": 0.17817687663534237, "learning_rate": 3.868966490622814e-05, "loss": 0.5637, "num_tokens": 1552204652.0, "step": 2029 }, { "epoch": 0.7438281500480923, "grad_norm": 0.18164254517037923, "learning_rate": 3.86880646827017e-05, "loss": 0.5697, "num_tokens": 1552977106.0, "step": 2030 }, { "epoch": 0.7441945678559978, "grad_norm": 0.20293483213146638, "learning_rate": 3.868646351960395e-05, "loss": 0.5555, "num_tokens": 1553801892.0, "step": 2031 }, { "epoch": 0.7445609856639033, "grad_norm": 0.2005328675153905, "learning_rate": 3.868486141702504e-05, "loss": 0.5655, "num_tokens": 1554505760.0, "step": 2032 }, { "epoch": 0.7449274034718087, "grad_norm": 0.17662561794548517, "learning_rate": 3.8683258375055165e-05, "loss": 0.5393, "num_tokens": 1555346662.0, "step": 2033 }, { "epoch": 0.7452938212797142, "grad_norm": 0.1795148018104176, "learning_rate": 3.8681654393784594e-05, "loss": 0.5394, "num_tokens": 1556216194.0, "step": 2034 }, { "epoch": 0.7456602390876197, "grad_norm": 0.21043908691019414, "learning_rate": 3.8680049473303616e-05, "loss": 0.5725, "num_tokens": 1557153825.0, "step": 2035 }, { "epoch": 0.7460266568955252, "grad_norm": 0.17150020348385472, "learning_rate": 3.8678443613702594e-05, "loss": 0.5388, "num_tokens": 1558049241.0, "step": 2036 }, { "epoch": 0.7463930747034306, "grad_norm": 0.1831568057425788, "learning_rate": 3.8676836815071955e-05, "loss": 0.58, "num_tokens": 1558790187.0, "step": 2037 }, { "epoch": 0.746759492511336, "grad_norm": 0.19981938777314304, "learning_rate": 3.867522907750216e-05, "loss": 0.5426, "num_tokens": 1559592659.0, "step": 2038 }, { "epoch": 0.7471259103192415, "grad_norm": 0.18108246482068083, "learning_rate": 3.867362040108371e-05, "loss": 0.558, "num_tokens": 1560344189.0, "step": 2039 }, { "epoch": 0.7474923281271469, "grad_norm": 0.19494847744891328, "learning_rate": 3.86720107859072e-05, "loss": 0.5861, "num_tokens": 1561072716.0, "step": 2040 }, { "epoch": 0.7478587459350524, "grad_norm": 0.2067457970993894, "learning_rate": 3.867040023206324e-05, "loss": 0.5445, "num_tokens": 1561828933.0, "step": 2041 }, { "epoch": 0.7482251637429579, "grad_norm": 0.18148936331708962, "learning_rate": 3.866878873964252e-05, "loss": 0.5757, "num_tokens": 1562615408.0, "step": 2042 }, { "epoch": 0.7485915815508634, "grad_norm": 0.23121588767620338, "learning_rate": 3.866717630873576e-05, "loss": 0.5464, "num_tokens": 1563290646.0, "step": 2043 }, { "epoch": 0.7489579993587688, "grad_norm": 0.19309978620505105, "learning_rate": 3.866556293943374e-05, "loss": 0.5586, "num_tokens": 1564049666.0, "step": 2044 }, { "epoch": 0.7493244171666743, "grad_norm": 0.16452081878412333, "learning_rate": 3.866394863182731e-05, "loss": 0.5479, "num_tokens": 1564817776.0, "step": 2045 }, { "epoch": 0.7496908349745798, "grad_norm": 0.1895596945471829, "learning_rate": 3.866233338600735e-05, "loss": 0.5773, "num_tokens": 1565582474.0, "step": 2046 }, { "epoch": 0.7500572527824853, "grad_norm": 0.19785848817862747, "learning_rate": 3.866071720206479e-05, "loss": 0.5699, "num_tokens": 1566459124.0, "step": 2047 }, { "epoch": 0.7504236705903907, "grad_norm": 0.17526639474846922, "learning_rate": 3.8659100080090655e-05, "loss": 0.561, "num_tokens": 1567052153.0, "step": 2048 }, { "epoch": 0.7507900883982962, "grad_norm": 0.19152848064972455, "learning_rate": 3.8657482020175964e-05, "loss": 0.546, "num_tokens": 1567790695.0, "step": 2049 }, { "epoch": 0.7511565062062017, "grad_norm": 0.2192944666504224, "learning_rate": 3.8655863022411823e-05, "loss": 0.5585, "num_tokens": 1568611952.0, "step": 2050 }, { "epoch": 0.7515229240141071, "grad_norm": 0.16182459797397059, "learning_rate": 3.8654243086889396e-05, "loss": 0.5768, "num_tokens": 1569362322.0, "step": 2051 }, { "epoch": 0.7518893418220125, "grad_norm": 0.19019654424012986, "learning_rate": 3.865262221369988e-05, "loss": 0.5663, "num_tokens": 1570108703.0, "step": 2052 }, { "epoch": 0.752255759629918, "grad_norm": 0.1873706757395241, "learning_rate": 3.8651000402934535e-05, "loss": 0.555, "num_tokens": 1570948494.0, "step": 2053 }, { "epoch": 0.7526221774378234, "grad_norm": 0.1802104165482124, "learning_rate": 3.864937765468467e-05, "loss": 0.5821, "num_tokens": 1571736883.0, "step": 2054 }, { "epoch": 0.7529885952457289, "grad_norm": 0.1772928375942287, "learning_rate": 3.864775396904165e-05, "loss": 0.5655, "num_tokens": 1572478120.0, "step": 2055 }, { "epoch": 0.7533550130536344, "grad_norm": 0.16520165330742617, "learning_rate": 3.86461293460969e-05, "loss": 0.5581, "num_tokens": 1573268544.0, "step": 2056 }, { "epoch": 0.7537214308615399, "grad_norm": 0.1853864279370797, "learning_rate": 3.864450378594187e-05, "loss": 0.5577, "num_tokens": 1573974781.0, "step": 2057 }, { "epoch": 0.7540878486694453, "grad_norm": 0.19065269430649215, "learning_rate": 3.8642877288668096e-05, "loss": 0.5868, "num_tokens": 1574783379.0, "step": 2058 }, { "epoch": 0.7544542664773508, "grad_norm": 0.18736677015453113, "learning_rate": 3.8641249854367154e-05, "loss": 0.5397, "num_tokens": 1575442689.0, "step": 2059 }, { "epoch": 0.7548206842852563, "grad_norm": 0.17594485879804533, "learning_rate": 3.863962148313067e-05, "loss": 0.5409, "num_tokens": 1576263645.0, "step": 2060 }, { "epoch": 0.7551871020931618, "grad_norm": 0.19084280007296503, "learning_rate": 3.8637992175050326e-05, "loss": 0.5475, "num_tokens": 1577004589.0, "step": 2061 }, { "epoch": 0.7555535199010672, "grad_norm": 0.20072968176764158, "learning_rate": 3.8636361930217846e-05, "loss": 0.5665, "num_tokens": 1577687330.0, "step": 2062 }, { "epoch": 0.7559199377089727, "grad_norm": 0.21397914236686663, "learning_rate": 3.863473074872503e-05, "loss": 0.5823, "num_tokens": 1578409809.0, "step": 2063 }, { "epoch": 0.7562863555168782, "grad_norm": 0.17246296164849467, "learning_rate": 3.86330986306637e-05, "loss": 0.5694, "num_tokens": 1579178778.0, "step": 2064 }, { "epoch": 0.7566527733247835, "grad_norm": 0.19797860433133319, "learning_rate": 3.863146557612577e-05, "loss": 0.5803, "num_tokens": 1579874905.0, "step": 2065 }, { "epoch": 0.757019191132689, "grad_norm": 0.17484110422753074, "learning_rate": 3.862983158520316e-05, "loss": 0.576, "num_tokens": 1580620577.0, "step": 2066 }, { "epoch": 0.7573856089405945, "grad_norm": 0.18133045713284712, "learning_rate": 3.8628196657987885e-05, "loss": 0.5564, "num_tokens": 1581361321.0, "step": 2067 }, { "epoch": 0.7577520267485, "grad_norm": 0.21929915010600123, "learning_rate": 3.862656079457199e-05, "loss": 0.5801, "num_tokens": 1582148024.0, "step": 2068 }, { "epoch": 0.7581184445564054, "grad_norm": 0.18811886271233833, "learning_rate": 3.862492399504757e-05, "loss": 0.5636, "num_tokens": 1582916960.0, "step": 2069 }, { "epoch": 0.7584848623643109, "grad_norm": 0.17198905394109, "learning_rate": 3.862328625950679e-05, "loss": 0.5427, "num_tokens": 1583831044.0, "step": 2070 }, { "epoch": 0.7588512801722164, "grad_norm": 0.17809917224610142, "learning_rate": 3.862164758804185e-05, "loss": 0.5635, "num_tokens": 1584728981.0, "step": 2071 }, { "epoch": 0.7592176979801218, "grad_norm": 0.19821204266227732, "learning_rate": 3.862000798074502e-05, "loss": 0.5889, "num_tokens": 1585390278.0, "step": 2072 }, { "epoch": 0.7595841157880273, "grad_norm": 0.2076853660954187, "learning_rate": 3.86183674377086e-05, "loss": 0.5642, "num_tokens": 1586220725.0, "step": 2073 }, { "epoch": 0.7599505335959328, "grad_norm": 0.2060180068119518, "learning_rate": 3.861672595902496e-05, "loss": 0.5771, "num_tokens": 1586909861.0, "step": 2074 }, { "epoch": 0.7603169514038383, "grad_norm": 0.18221925900073907, "learning_rate": 3.861508354478653e-05, "loss": 0.549, "num_tokens": 1587808279.0, "step": 2075 }, { "epoch": 0.7606833692117437, "grad_norm": 0.20458779974895952, "learning_rate": 3.8613440195085763e-05, "loss": 0.5484, "num_tokens": 1588439263.0, "step": 2076 }, { "epoch": 0.7610497870196492, "grad_norm": 0.21581146315364802, "learning_rate": 3.86117959100152e-05, "loss": 0.5746, "num_tokens": 1589201687.0, "step": 2077 }, { "epoch": 0.7614162048275546, "grad_norm": 0.165978925594322, "learning_rate": 3.861015068966741e-05, "loss": 0.5597, "num_tokens": 1590117849.0, "step": 2078 }, { "epoch": 0.76178262263546, "grad_norm": 0.22368517103873156, "learning_rate": 3.860850453413502e-05, "loss": 0.5605, "num_tokens": 1590917762.0, "step": 2079 }, { "epoch": 0.7621490404433655, "grad_norm": 0.16440809606816112, "learning_rate": 3.860685744351071e-05, "loss": 0.5543, "num_tokens": 1591637333.0, "step": 2080 }, { "epoch": 0.762515458251271, "grad_norm": 0.18909070514504328, "learning_rate": 3.860520941788722e-05, "loss": 0.5676, "num_tokens": 1592270685.0, "step": 2081 }, { "epoch": 0.7628818760591765, "grad_norm": 0.17873689853041524, "learning_rate": 3.860356045735734e-05, "loss": 0.5211, "num_tokens": 1593131524.0, "step": 2082 }, { "epoch": 0.7632482938670819, "grad_norm": 0.16966067719314784, "learning_rate": 3.86019105620139e-05, "loss": 0.5766, "num_tokens": 1593905086.0, "step": 2083 }, { "epoch": 0.7636147116749874, "grad_norm": 0.17533989327957913, "learning_rate": 3.86002597319498e-05, "loss": 0.5662, "num_tokens": 1594718432.0, "step": 2084 }, { "epoch": 0.7639811294828929, "grad_norm": 0.1727464360544634, "learning_rate": 3.859860796725798e-05, "loss": 0.5612, "num_tokens": 1595482449.0, "step": 2085 }, { "epoch": 0.7643475472907983, "grad_norm": 0.22991802723571367, "learning_rate": 3.859695526803145e-05, "loss": 0.5864, "num_tokens": 1596301740.0, "step": 2086 }, { "epoch": 0.7647139650987038, "grad_norm": 0.17675111809351693, "learning_rate": 3.859530163436324e-05, "loss": 0.5707, "num_tokens": 1597155009.0, "step": 2087 }, { "epoch": 0.7650803829066093, "grad_norm": 0.19085369865172755, "learning_rate": 3.859364706634646e-05, "loss": 0.5711, "num_tokens": 1597934808.0, "step": 2088 }, { "epoch": 0.7654468007145148, "grad_norm": 0.21932531821193432, "learning_rate": 3.8591991564074274e-05, "loss": 0.563, "num_tokens": 1598616701.0, "step": 2089 }, { "epoch": 0.7658132185224202, "grad_norm": 0.1885537124262083, "learning_rate": 3.8590335127639894e-05, "loss": 0.5662, "num_tokens": 1599420658.0, "step": 2090 }, { "epoch": 0.7661796363303257, "grad_norm": 0.17591859884548008, "learning_rate": 3.858867775713657e-05, "loss": 0.5532, "num_tokens": 1600152245.0, "step": 2091 }, { "epoch": 0.7665460541382311, "grad_norm": 0.20135156054282427, "learning_rate": 3.8587019452657606e-05, "loss": 0.591, "num_tokens": 1600894713.0, "step": 2092 }, { "epoch": 0.7669124719461365, "grad_norm": 0.18862047763161155, "learning_rate": 3.8585360214296383e-05, "loss": 0.579, "num_tokens": 1601723493.0, "step": 2093 }, { "epoch": 0.767278889754042, "grad_norm": 0.18102969789402965, "learning_rate": 3.8583700042146314e-05, "loss": 0.568, "num_tokens": 1602630756.0, "step": 2094 }, { "epoch": 0.7676453075619475, "grad_norm": 0.20356140925020533, "learning_rate": 3.858203893630088e-05, "loss": 0.5733, "num_tokens": 1603310785.0, "step": 2095 }, { "epoch": 0.768011725369853, "grad_norm": 0.18200401031643995, "learning_rate": 3.858037689685359e-05, "loss": 0.5538, "num_tokens": 1604046466.0, "step": 2096 }, { "epoch": 0.7683781431777584, "grad_norm": 0.18797102303017754, "learning_rate": 3.857871392389802e-05, "loss": 0.5387, "num_tokens": 1604877693.0, "step": 2097 }, { "epoch": 0.7687445609856639, "grad_norm": 0.18451353995174072, "learning_rate": 3.857705001752781e-05, "loss": 0.5611, "num_tokens": 1605661345.0, "step": 2098 }, { "epoch": 0.7691109787935694, "grad_norm": 0.16853864616921244, "learning_rate": 3.8575385177836636e-05, "loss": 0.5777, "num_tokens": 1606370145.0, "step": 2099 }, { "epoch": 0.7694773966014749, "grad_norm": 0.182056929767482, "learning_rate": 3.857371940491824e-05, "loss": 0.5485, "num_tokens": 1607108758.0, "step": 2100 }, { "epoch": 0.7698438144093803, "grad_norm": 0.19654200234975167, "learning_rate": 3.8572052698866385e-05, "loss": 0.5352, "num_tokens": 1607896101.0, "step": 2101 }, { "epoch": 0.7702102322172858, "grad_norm": 0.17725466455249186, "learning_rate": 3.857038505977493e-05, "loss": 0.5296, "num_tokens": 1608629091.0, "step": 2102 }, { "epoch": 0.7705766500251913, "grad_norm": 0.18907300030598925, "learning_rate": 3.8568716487737766e-05, "loss": 0.5423, "num_tokens": 1609576315.0, "step": 2103 }, { "epoch": 0.7709430678330967, "grad_norm": 0.19139479759307368, "learning_rate": 3.856704698284883e-05, "loss": 0.5577, "num_tokens": 1610342723.0, "step": 2104 }, { "epoch": 0.7713094856410021, "grad_norm": 0.19668145909136137, "learning_rate": 3.856537654520212e-05, "loss": 0.5434, "num_tokens": 1611180901.0, "step": 2105 }, { "epoch": 0.7716759034489076, "grad_norm": 0.17125357674424413, "learning_rate": 3.856370517489168e-05, "loss": 0.5304, "num_tokens": 1612247662.0, "step": 2106 }, { "epoch": 0.772042321256813, "grad_norm": 0.17355225944408886, "learning_rate": 3.8562032872011626e-05, "loss": 0.5303, "num_tokens": 1613109064.0, "step": 2107 }, { "epoch": 0.7724087390647185, "grad_norm": 0.17012638721307843, "learning_rate": 3.85603596366561e-05, "loss": 0.5575, "num_tokens": 1613908038.0, "step": 2108 }, { "epoch": 0.772775156872624, "grad_norm": 0.15848550588918855, "learning_rate": 3.855868546891931e-05, "loss": 0.5488, "num_tokens": 1614634722.0, "step": 2109 }, { "epoch": 0.7731415746805295, "grad_norm": 0.18759060185413856, "learning_rate": 3.855701036889552e-05, "loss": 0.6148, "num_tokens": 1615234178.0, "step": 2110 }, { "epoch": 0.7735079924884349, "grad_norm": 0.1743849705362489, "learning_rate": 3.855533433667903e-05, "loss": 0.5827, "num_tokens": 1615902480.0, "step": 2111 }, { "epoch": 0.7738744102963404, "grad_norm": 0.21297837573638725, "learning_rate": 3.855365737236422e-05, "loss": 0.6302, "num_tokens": 1616537371.0, "step": 2112 }, { "epoch": 0.7742408281042459, "grad_norm": 1.1548198974835067, "learning_rate": 3.8551979476045495e-05, "loss": 0.5505, "num_tokens": 1617219475.0, "step": 2113 }, { "epoch": 0.7746072459121514, "grad_norm": 0.2060939220911373, "learning_rate": 3.8550300647817326e-05, "loss": 0.5293, "num_tokens": 1617941877.0, "step": 2114 }, { "epoch": 0.7749736637200568, "grad_norm": 0.1976722995334403, "learning_rate": 3.8548620887774236e-05, "loss": 0.5832, "num_tokens": 1618667086.0, "step": 2115 }, { "epoch": 0.7753400815279623, "grad_norm": 0.1832096345394424, "learning_rate": 3.854694019601081e-05, "loss": 0.5794, "num_tokens": 1619369945.0, "step": 2116 }, { "epoch": 0.7757064993358678, "grad_norm": 0.1867110577554903, "learning_rate": 3.854525857262165e-05, "loss": 0.5521, "num_tokens": 1620090420.0, "step": 2117 }, { "epoch": 0.7760729171437731, "grad_norm": 0.20343173674055345, "learning_rate": 3.854357601770145e-05, "loss": 0.608, "num_tokens": 1620817120.0, "step": 2118 }, { "epoch": 0.7764393349516786, "grad_norm": 0.18636260782196984, "learning_rate": 3.854189253134494e-05, "loss": 0.5743, "num_tokens": 1621477566.0, "step": 2119 }, { "epoch": 0.7768057527595841, "grad_norm": 0.1894287791291684, "learning_rate": 3.8540208113646894e-05, "loss": 0.5398, "num_tokens": 1622230628.0, "step": 2120 }, { "epoch": 0.7771721705674896, "grad_norm": 0.18608476318939907, "learning_rate": 3.853852276470217e-05, "loss": 0.5875, "num_tokens": 1622978381.0, "step": 2121 }, { "epoch": 0.777538588375395, "grad_norm": 0.20039439986957264, "learning_rate": 3.853683648460565e-05, "loss": 0.5908, "num_tokens": 1623805098.0, "step": 2122 }, { "epoch": 0.7779050061833005, "grad_norm": 0.19676814751628072, "learning_rate": 3.8535149273452245e-05, "loss": 0.6005, "num_tokens": 1624478980.0, "step": 2123 }, { "epoch": 0.778271423991206, "grad_norm": 0.2224064882650871, "learning_rate": 3.853346113133698e-05, "loss": 0.6234, "num_tokens": 1625240652.0, "step": 2124 }, { "epoch": 0.7786378417991114, "grad_norm": 0.18200565424110576, "learning_rate": 3.85317720583549e-05, "loss": 0.5747, "num_tokens": 1626078791.0, "step": 2125 }, { "epoch": 0.7790042596070169, "grad_norm": 0.17813030507109254, "learning_rate": 3.853008205460109e-05, "loss": 0.546, "num_tokens": 1626757018.0, "step": 2126 }, { "epoch": 0.7793706774149224, "grad_norm": 0.19629364118636342, "learning_rate": 3.85283911201707e-05, "loss": 0.5811, "num_tokens": 1627579105.0, "step": 2127 }, { "epoch": 0.7797370952228279, "grad_norm": 0.1652472478294749, "learning_rate": 3.852669925515895e-05, "loss": 0.5354, "num_tokens": 1628499428.0, "step": 2128 }, { "epoch": 0.7801035130307333, "grad_norm": 0.17899975113808525, "learning_rate": 3.8525006459661075e-05, "loss": 0.5948, "num_tokens": 1629163230.0, "step": 2129 }, { "epoch": 0.7804699308386388, "grad_norm": 0.18210993515007748, "learning_rate": 3.852331273377239e-05, "loss": 0.5461, "num_tokens": 1630059568.0, "step": 2130 }, { "epoch": 0.7808363486465443, "grad_norm": 0.19560742655881466, "learning_rate": 3.852161807758827e-05, "loss": 0.5466, "num_tokens": 1630778932.0, "step": 2131 }, { "epoch": 0.7812027664544496, "grad_norm": 0.17851919951046344, "learning_rate": 3.85199224912041e-05, "loss": 0.5487, "num_tokens": 1631537418.0, "step": 2132 }, { "epoch": 0.7815691842623551, "grad_norm": 0.1948958013631015, "learning_rate": 3.8518225974715366e-05, "loss": 0.5614, "num_tokens": 1632225752.0, "step": 2133 }, { "epoch": 0.7819356020702606, "grad_norm": 0.2748437832132273, "learning_rate": 3.851652852821758e-05, "loss": 0.6027, "num_tokens": 1632836996.0, "step": 2134 }, { "epoch": 0.782302019878166, "grad_norm": 0.17965522510642426, "learning_rate": 3.851483015180631e-05, "loss": 0.5397, "num_tokens": 1633550681.0, "step": 2135 }, { "epoch": 0.7826684376860715, "grad_norm": 0.1936724144958701, "learning_rate": 3.851313084557718e-05, "loss": 0.5523, "num_tokens": 1634328865.0, "step": 2136 }, { "epoch": 0.783034855493977, "grad_norm": 0.19754498907400858, "learning_rate": 3.8511430609625856e-05, "loss": 0.5486, "num_tokens": 1634982260.0, "step": 2137 }, { "epoch": 0.7834012733018825, "grad_norm": 0.1932201575247118, "learning_rate": 3.850972944404808e-05, "loss": 0.6082, "num_tokens": 1635615428.0, "step": 2138 }, { "epoch": 0.783767691109788, "grad_norm": 0.165129532880853, "learning_rate": 3.850802734893961e-05, "loss": 0.5641, "num_tokens": 1636381968.0, "step": 2139 }, { "epoch": 0.7841341089176934, "grad_norm": 0.18721773072557996, "learning_rate": 3.8506324324396305e-05, "loss": 0.5926, "num_tokens": 1637147735.0, "step": 2140 }, { "epoch": 0.7845005267255989, "grad_norm": 0.16783026382775024, "learning_rate": 3.850462037051402e-05, "loss": 0.5473, "num_tokens": 1637929182.0, "step": 2141 }, { "epoch": 0.7848669445335044, "grad_norm": 0.15674830235147466, "learning_rate": 3.850291548738871e-05, "loss": 0.562, "num_tokens": 1638712306.0, "step": 2142 }, { "epoch": 0.7852333623414098, "grad_norm": 0.17385236383248306, "learning_rate": 3.850120967511636e-05, "loss": 0.5576, "num_tokens": 1639537118.0, "step": 2143 }, { "epoch": 0.7855997801493153, "grad_norm": 0.17359489869542524, "learning_rate": 3.8499502933793006e-05, "loss": 0.5509, "num_tokens": 1640281619.0, "step": 2144 }, { "epoch": 0.7859661979572207, "grad_norm": 0.1755765929913961, "learning_rate": 3.8497795263514744e-05, "loss": 0.5519, "num_tokens": 1641128141.0, "step": 2145 }, { "epoch": 0.7863326157651261, "grad_norm": 0.16372671645280482, "learning_rate": 3.849608666437772e-05, "loss": 0.5644, "num_tokens": 1641885320.0, "step": 2146 }, { "epoch": 0.7866990335730316, "grad_norm": 0.1895076915149787, "learning_rate": 3.849437713647813e-05, "loss": 0.5982, "num_tokens": 1642598154.0, "step": 2147 }, { "epoch": 0.7870654513809371, "grad_norm": 0.1664972354200428, "learning_rate": 3.849266667991222e-05, "loss": 0.5403, "num_tokens": 1643422968.0, "step": 2148 }, { "epoch": 0.7874318691888426, "grad_norm": 0.17279278631959172, "learning_rate": 3.84909552947763e-05, "loss": 0.5788, "num_tokens": 1644145106.0, "step": 2149 }, { "epoch": 0.787798286996748, "grad_norm": 0.18051713744255107, "learning_rate": 3.848924298116671e-05, "loss": 0.5661, "num_tokens": 1644927611.0, "step": 2150 }, { "epoch": 0.7881647048046535, "grad_norm": 0.166016662482481, "learning_rate": 3.848752973917988e-05, "loss": 0.5914, "num_tokens": 1645626849.0, "step": 2151 }, { "epoch": 0.788531122612559, "grad_norm": 0.17010219564565549, "learning_rate": 3.848581556891225e-05, "loss": 0.5693, "num_tokens": 1646356536.0, "step": 2152 }, { "epoch": 0.7888975404204644, "grad_norm": 0.15711891953640836, "learning_rate": 3.848410047046035e-05, "loss": 0.5392, "num_tokens": 1647195942.0, "step": 2153 }, { "epoch": 0.7892639582283699, "grad_norm": 0.18755487181552535, "learning_rate": 3.848238444392071e-05, "loss": 0.5791, "num_tokens": 1647938244.0, "step": 2154 }, { "epoch": 0.7896303760362754, "grad_norm": 0.18369372824657068, "learning_rate": 3.848066748938998e-05, "loss": 0.5709, "num_tokens": 1648835959.0, "step": 2155 }, { "epoch": 0.7899967938441809, "grad_norm": 0.16269793635418478, "learning_rate": 3.847894960696481e-05, "loss": 0.5165, "num_tokens": 1649643626.0, "step": 2156 }, { "epoch": 0.7903632116520863, "grad_norm": 0.16765997662613522, "learning_rate": 3.8477230796741925e-05, "loss": 0.5595, "num_tokens": 1650455752.0, "step": 2157 }, { "epoch": 0.7907296294599917, "grad_norm": 0.17822804534258352, "learning_rate": 3.8475511058818094e-05, "loss": 0.5335, "num_tokens": 1651217804.0, "step": 2158 }, { "epoch": 0.7910960472678972, "grad_norm": 0.16352246274013912, "learning_rate": 3.8473790393290154e-05, "loss": 0.5513, "num_tokens": 1652128547.0, "step": 2159 }, { "epoch": 0.7914624650758026, "grad_norm": 0.1732455183451952, "learning_rate": 3.847206880025496e-05, "loss": 0.5269, "num_tokens": 1652921970.0, "step": 2160 }, { "epoch": 0.7918288828837081, "grad_norm": 0.16750835164648234, "learning_rate": 3.847034627980946e-05, "loss": 0.5593, "num_tokens": 1653621241.0, "step": 2161 }, { "epoch": 0.7921953006916136, "grad_norm": 0.16775016497049933, "learning_rate": 3.846862283205063e-05, "loss": 0.5331, "num_tokens": 1654398713.0, "step": 2162 }, { "epoch": 0.7925617184995191, "grad_norm": 0.1781996538311491, "learning_rate": 3.84668984570755e-05, "loss": 0.6038, "num_tokens": 1655064472.0, "step": 2163 }, { "epoch": 0.7929281363074245, "grad_norm": 0.17466474776739202, "learning_rate": 3.846517315498116e-05, "loss": 0.5614, "num_tokens": 1655806362.0, "step": 2164 }, { "epoch": 0.79329455411533, "grad_norm": 0.1618656268937306, "learning_rate": 3.8463446925864746e-05, "loss": 0.4906, "num_tokens": 1656459096.0, "step": 2165 }, { "epoch": 0.7936609719232355, "grad_norm": 0.17434687681926708, "learning_rate": 3.8461719769823445e-05, "loss": 0.5768, "num_tokens": 1657325814.0, "step": 2166 }, { "epoch": 0.794027389731141, "grad_norm": 0.15998498987636525, "learning_rate": 3.845999168695451e-05, "loss": 0.5532, "num_tokens": 1658053547.0, "step": 2167 }, { "epoch": 0.7943938075390464, "grad_norm": 0.16337513164357806, "learning_rate": 3.845826267735522e-05, "loss": 0.5481, "num_tokens": 1658747926.0, "step": 2168 }, { "epoch": 0.7947602253469519, "grad_norm": 0.17210649065523975, "learning_rate": 3.8456532741122944e-05, "loss": 0.6105, "num_tokens": 1659429606.0, "step": 2169 }, { "epoch": 0.7951266431548574, "grad_norm": 0.16037239191121955, "learning_rate": 3.845480187835505e-05, "loss": 0.5516, "num_tokens": 1660059224.0, "step": 2170 }, { "epoch": 0.7954930609627628, "grad_norm": 0.16752295119978144, "learning_rate": 3.8453070089149016e-05, "loss": 0.5687, "num_tokens": 1660913395.0, "step": 2171 }, { "epoch": 0.7958594787706682, "grad_norm": 0.16276440378327972, "learning_rate": 3.845133737360233e-05, "loss": 0.5309, "num_tokens": 1661705395.0, "step": 2172 }, { "epoch": 0.7962258965785737, "grad_norm": 0.15982791542988686, "learning_rate": 3.844960373181255e-05, "loss": 0.5292, "num_tokens": 1662523999.0, "step": 2173 }, { "epoch": 0.7965923143864792, "grad_norm": 0.16894055443807635, "learning_rate": 3.84478691638773e-05, "loss": 0.5665, "num_tokens": 1663312228.0, "step": 2174 }, { "epoch": 0.7969587321943846, "grad_norm": 0.18033307730724768, "learning_rate": 3.8446133669894216e-05, "loss": 0.5749, "num_tokens": 1664016257.0, "step": 2175 }, { "epoch": 0.7973251500022901, "grad_norm": 0.16068005211105338, "learning_rate": 3.8444397249961014e-05, "loss": 0.5318, "num_tokens": 1664799756.0, "step": 2176 }, { "epoch": 0.7976915678101956, "grad_norm": 0.171852225188168, "learning_rate": 3.844265990417547e-05, "loss": 0.5719, "num_tokens": 1665515095.0, "step": 2177 }, { "epoch": 0.798057985618101, "grad_norm": 0.16334041557761972, "learning_rate": 3.8440921632635394e-05, "loss": 0.5818, "num_tokens": 1666270854.0, "step": 2178 }, { "epoch": 0.7984244034260065, "grad_norm": 0.14709295092098335, "learning_rate": 3.8439182435438646e-05, "loss": 0.548, "num_tokens": 1667121752.0, "step": 2179 }, { "epoch": 0.798790821233912, "grad_norm": 0.1641597182685175, "learning_rate": 3.8437442312683155e-05, "loss": 0.5396, "num_tokens": 1667918848.0, "step": 2180 }, { "epoch": 0.7991572390418175, "grad_norm": 0.16780296819422108, "learning_rate": 3.843570126446689e-05, "loss": 0.5931, "num_tokens": 1668695284.0, "step": 2181 }, { "epoch": 0.7995236568497229, "grad_norm": 0.16444573282783165, "learning_rate": 3.843395929088789e-05, "loss": 0.5523, "num_tokens": 1669480877.0, "step": 2182 }, { "epoch": 0.7998900746576284, "grad_norm": 0.1912177092968345, "learning_rate": 3.843221639204422e-05, "loss": 0.5776, "num_tokens": 1670201731.0, "step": 2183 }, { "epoch": 0.8002564924655339, "grad_norm": 0.16905998726315738, "learning_rate": 3.843047256803399e-05, "loss": 0.6125, "num_tokens": 1671065339.0, "step": 2184 }, { "epoch": 0.8006229102734392, "grad_norm": 0.1713129283922117, "learning_rate": 3.842872781895541e-05, "loss": 0.543, "num_tokens": 1671758575.0, "step": 2185 }, { "epoch": 0.8009893280813447, "grad_norm": 0.18876423942591547, "learning_rate": 3.84269821449067e-05, "loss": 0.5352, "num_tokens": 1672478778.0, "step": 2186 }, { "epoch": 0.8013557458892502, "grad_norm": 0.1862344494399041, "learning_rate": 3.8425235545986144e-05, "loss": 0.5378, "num_tokens": 1673379029.0, "step": 2187 }, { "epoch": 0.8017221636971557, "grad_norm": 5.7083935882499315, "learning_rate": 3.842348802229208e-05, "loss": 0.5629, "num_tokens": 1674066525.0, "step": 2188 }, { "epoch": 0.8020885815050611, "grad_norm": 0.22334231418523115, "learning_rate": 3.84217395739229e-05, "loss": 0.5576, "num_tokens": 1674823325.0, "step": 2189 }, { "epoch": 0.8024549993129666, "grad_norm": 0.17639572257197644, "learning_rate": 3.8419990200977046e-05, "loss": 0.5756, "num_tokens": 1675593209.0, "step": 2190 }, { "epoch": 0.8028214171208721, "grad_norm": 0.40700444590720775, "learning_rate": 3.841823990355301e-05, "loss": 0.5448, "num_tokens": 1676373975.0, "step": 2191 }, { "epoch": 0.8031878349287775, "grad_norm": 0.22650676345742007, "learning_rate": 3.8416488681749336e-05, "loss": 0.5555, "num_tokens": 1677019030.0, "step": 2192 }, { "epoch": 0.803554252736683, "grad_norm": 0.1930800806528012, "learning_rate": 3.8414736535664613e-05, "loss": 0.5608, "num_tokens": 1677665348.0, "step": 2193 }, { "epoch": 0.8039206705445885, "grad_norm": 0.19962384806413158, "learning_rate": 3.84129834653975e-05, "loss": 0.5369, "num_tokens": 1678478025.0, "step": 2194 }, { "epoch": 0.804287088352494, "grad_norm": 0.17410975583311047, "learning_rate": 3.84112294710467e-05, "loss": 0.5144, "num_tokens": 1679223131.0, "step": 2195 }, { "epoch": 0.8046535061603994, "grad_norm": 0.1785362187449325, "learning_rate": 3.840947455271097e-05, "loss": 0.566, "num_tokens": 1679993376.0, "step": 2196 }, { "epoch": 0.8050199239683049, "grad_norm": 0.17887894755410771, "learning_rate": 3.84077187104891e-05, "loss": 0.5267, "num_tokens": 1680767770.0, "step": 2197 }, { "epoch": 0.8053863417762103, "grad_norm": 0.18803770519792035, "learning_rate": 3.840596194447995e-05, "loss": 0.5715, "num_tokens": 1681537007.0, "step": 2198 }, { "epoch": 0.8057527595841157, "grad_norm": 0.1947534238915855, "learning_rate": 3.840420425478245e-05, "loss": 0.5252, "num_tokens": 1682384847.0, "step": 2199 }, { "epoch": 0.8061191773920212, "grad_norm": 0.19792038831686512, "learning_rate": 3.8402445641495544e-05, "loss": 0.5297, "num_tokens": 1683086755.0, "step": 2200 }, { "epoch": 0.8064855951999267, "grad_norm": 0.21753953010308308, "learning_rate": 3.8400686104718244e-05, "loss": 0.6204, "num_tokens": 1683839619.0, "step": 2201 }, { "epoch": 0.8068520130078322, "grad_norm": 0.17749787550432633, "learning_rate": 3.8398925644549624e-05, "loss": 0.5269, "num_tokens": 1684541464.0, "step": 2202 }, { "epoch": 0.8072184308157376, "grad_norm": 0.18229584659659073, "learning_rate": 3.8397164261088786e-05, "loss": 0.5595, "num_tokens": 1685267504.0, "step": 2203 }, { "epoch": 0.8075848486236431, "grad_norm": 0.17379420065472947, "learning_rate": 3.839540195443492e-05, "loss": 0.5606, "num_tokens": 1686042866.0, "step": 2204 }, { "epoch": 0.8079512664315486, "grad_norm": 0.17425689428462365, "learning_rate": 3.839363872468724e-05, "loss": 0.5824, "num_tokens": 1686712643.0, "step": 2205 }, { "epoch": 0.808317684239454, "grad_norm": 0.16142810369056967, "learning_rate": 3.8391874571945015e-05, "loss": 0.5564, "num_tokens": 1687452109.0, "step": 2206 }, { "epoch": 0.8086841020473595, "grad_norm": 0.2010015560056412, "learning_rate": 3.8390109496307566e-05, "loss": 0.5758, "num_tokens": 1688200402.0, "step": 2207 }, { "epoch": 0.809050519855265, "grad_norm": 0.1730649072488557, "learning_rate": 3.838834349787428e-05, "loss": 0.5733, "num_tokens": 1688952252.0, "step": 2208 }, { "epoch": 0.8094169376631705, "grad_norm": 0.18770020611276622, "learning_rate": 3.838657657674459e-05, "loss": 0.5503, "num_tokens": 1689715782.0, "step": 2209 }, { "epoch": 0.8097833554710759, "grad_norm": 0.22006449608306222, "learning_rate": 3.838480873301796e-05, "loss": 0.5887, "num_tokens": 1690420986.0, "step": 2210 }, { "epoch": 0.8101497732789814, "grad_norm": 0.1909821467552275, "learning_rate": 3.838303996679394e-05, "loss": 0.6, "num_tokens": 1691121735.0, "step": 2211 }, { "epoch": 0.8105161910868868, "grad_norm": 0.2326139619625985, "learning_rate": 3.8381270278172114e-05, "loss": 0.5651, "num_tokens": 1691820891.0, "step": 2212 }, { "epoch": 0.8108826088947922, "grad_norm": 0.21478152381674304, "learning_rate": 3.8379499667252106e-05, "loss": 0.5443, "num_tokens": 1692613738.0, "step": 2213 }, { "epoch": 0.8112490267026977, "grad_norm": 0.17365460997489865, "learning_rate": 3.837772813413361e-05, "loss": 0.5499, "num_tokens": 1693358943.0, "step": 2214 }, { "epoch": 0.8116154445106032, "grad_norm": 0.20557477646271805, "learning_rate": 3.837595567891637e-05, "loss": 0.5889, "num_tokens": 1694079560.0, "step": 2215 }, { "epoch": 0.8119818623185087, "grad_norm": 0.21965325040349637, "learning_rate": 3.8374182301700186e-05, "loss": 0.6047, "num_tokens": 1694773440.0, "step": 2216 }, { "epoch": 0.8123482801264141, "grad_norm": 0.17928615091832167, "learning_rate": 3.837240800258489e-05, "loss": 0.5413, "num_tokens": 1695499543.0, "step": 2217 }, { "epoch": 0.8127146979343196, "grad_norm": 0.2018160013479832, "learning_rate": 3.8370632781670385e-05, "loss": 0.5849, "num_tokens": 1696220938.0, "step": 2218 }, { "epoch": 0.8130811157422251, "grad_norm": 0.19624771130186025, "learning_rate": 3.836885663905661e-05, "loss": 0.5259, "num_tokens": 1696941953.0, "step": 2219 }, { "epoch": 0.8134475335501306, "grad_norm": 0.16945667921770133, "learning_rate": 3.836707957484358e-05, "loss": 0.5567, "num_tokens": 1697857086.0, "step": 2220 }, { "epoch": 0.813813951358036, "grad_norm": 0.19640928220568668, "learning_rate": 3.8365301589131337e-05, "loss": 0.5978, "num_tokens": 1698619561.0, "step": 2221 }, { "epoch": 0.8141803691659415, "grad_norm": 0.17912691917145623, "learning_rate": 3.836352268201999e-05, "loss": 0.55, "num_tokens": 1699511757.0, "step": 2222 }, { "epoch": 0.814546786973847, "grad_norm": 0.18018663572088653, "learning_rate": 3.83617428536097e-05, "loss": 0.538, "num_tokens": 1700282664.0, "step": 2223 }, { "epoch": 0.8149132047817524, "grad_norm": 0.1914686985205793, "learning_rate": 3.835996210400066e-05, "loss": 0.5427, "num_tokens": 1701077574.0, "step": 2224 }, { "epoch": 0.8152796225896578, "grad_norm": 0.18155409859686877, "learning_rate": 3.8358180433293145e-05, "loss": 0.5615, "num_tokens": 1701957340.0, "step": 2225 }, { "epoch": 0.8156460403975633, "grad_norm": 0.19368799205594475, "learning_rate": 3.835639784158745e-05, "loss": 0.5566, "num_tokens": 1702866451.0, "step": 2226 }, { "epoch": 0.8160124582054687, "grad_norm": 0.23889753800728974, "learning_rate": 3.8354614328983956e-05, "loss": 0.6069, "num_tokens": 1703635726.0, "step": 2227 }, { "epoch": 0.8163788760133742, "grad_norm": 0.196416115541282, "learning_rate": 3.835282989558307e-05, "loss": 0.5453, "num_tokens": 1704440266.0, "step": 2228 }, { "epoch": 0.8167452938212797, "grad_norm": 0.15952403449563204, "learning_rate": 3.835104454148526e-05, "loss": 0.532, "num_tokens": 1705233567.0, "step": 2229 }, { "epoch": 0.8171117116291852, "grad_norm": 0.18965962951304338, "learning_rate": 3.834925826679104e-05, "loss": 0.5299, "num_tokens": 1706005519.0, "step": 2230 }, { "epoch": 0.8174781294370906, "grad_norm": 0.1733446191895498, "learning_rate": 3.834747107160099e-05, "loss": 0.5624, "num_tokens": 1706767032.0, "step": 2231 }, { "epoch": 0.8178445472449961, "grad_norm": 0.17758692581937208, "learning_rate": 3.834568295601572e-05, "loss": 0.5608, "num_tokens": 1707596141.0, "step": 2232 }, { "epoch": 0.8182109650529016, "grad_norm": 0.18302876274531243, "learning_rate": 3.834389392013592e-05, "loss": 0.5723, "num_tokens": 1708335616.0, "step": 2233 }, { "epoch": 0.8185773828608071, "grad_norm": 0.1761164505481142, "learning_rate": 3.834210396406231e-05, "loss": 0.5688, "num_tokens": 1709163318.0, "step": 2234 }, { "epoch": 0.8189438006687125, "grad_norm": 0.17094195199767323, "learning_rate": 3.8340313087895665e-05, "loss": 0.563, "num_tokens": 1710009643.0, "step": 2235 }, { "epoch": 0.819310218476618, "grad_norm": 0.1574314150090249, "learning_rate": 3.833852129173682e-05, "loss": 0.5558, "num_tokens": 1710768702.0, "step": 2236 }, { "epoch": 0.8196766362845235, "grad_norm": 0.15759686366660006, "learning_rate": 3.833672857568665e-05, "loss": 0.5392, "num_tokens": 1711593425.0, "step": 2237 }, { "epoch": 0.820043054092429, "grad_norm": 0.17014955031802304, "learning_rate": 3.833493493984609e-05, "loss": 0.5333, "num_tokens": 1712395311.0, "step": 2238 }, { "epoch": 0.8204094719003343, "grad_norm": 0.15616062424843166, "learning_rate": 3.833314038431613e-05, "loss": 0.5582, "num_tokens": 1713133257.0, "step": 2239 }, { "epoch": 0.8207758897082398, "grad_norm": 0.18103612638334157, "learning_rate": 3.8331344909197804e-05, "loss": 0.5601, "num_tokens": 1713898025.0, "step": 2240 }, { "epoch": 0.8211423075161453, "grad_norm": 0.18756843202921938, "learning_rate": 3.832954851459221e-05, "loss": 0.5745, "num_tokens": 1714546463.0, "step": 2241 }, { "epoch": 0.8215087253240507, "grad_norm": 0.17442008647419963, "learning_rate": 3.832775120060047e-05, "loss": 0.5275, "num_tokens": 1715435798.0, "step": 2242 }, { "epoch": 0.8218751431319562, "grad_norm": 0.16088569563935343, "learning_rate": 3.832595296732378e-05, "loss": 0.5717, "num_tokens": 1716219345.0, "step": 2243 }, { "epoch": 0.8222415609398617, "grad_norm": 0.19264958157785453, "learning_rate": 3.83241538148634e-05, "loss": 0.5654, "num_tokens": 1716979506.0, "step": 2244 }, { "epoch": 0.8226079787477671, "grad_norm": 0.17282252445101015, "learning_rate": 3.832235374332062e-05, "loss": 0.5656, "num_tokens": 1717752280.0, "step": 2245 }, { "epoch": 0.8229743965556726, "grad_norm": 0.15747759942495382, "learning_rate": 3.832055275279678e-05, "loss": 0.5261, "num_tokens": 1718471318.0, "step": 2246 }, { "epoch": 0.8233408143635781, "grad_norm": 0.14674777250437307, "learning_rate": 3.831875084339329e-05, "loss": 0.5238, "num_tokens": 1719430593.0, "step": 2247 }, { "epoch": 0.8237072321714836, "grad_norm": 0.19484521184814707, "learning_rate": 3.8316948015211584e-05, "loss": 0.561, "num_tokens": 1720095905.0, "step": 2248 }, { "epoch": 0.824073649979389, "grad_norm": 0.1869174898471406, "learning_rate": 3.831514426835317e-05, "loss": 0.5761, "num_tokens": 1720880565.0, "step": 2249 }, { "epoch": 0.8244400677872945, "grad_norm": 0.17068207446318875, "learning_rate": 3.831333960291962e-05, "loss": 0.5635, "num_tokens": 1721720844.0, "step": 2250 }, { "epoch": 0.8248064855952, "grad_norm": 0.22086016739705716, "learning_rate": 3.831153401901252e-05, "loss": 0.5791, "num_tokens": 1722399149.0, "step": 2251 }, { "epoch": 0.8251729034031053, "grad_norm": 0.17586855295682077, "learning_rate": 3.8309727516733545e-05, "loss": 0.5917, "num_tokens": 1723161195.0, "step": 2252 }, { "epoch": 0.8255393212110108, "grad_norm": 0.19018186274833568, "learning_rate": 3.830792009618439e-05, "loss": 0.5342, "num_tokens": 1723959067.0, "step": 2253 }, { "epoch": 0.8259057390189163, "grad_norm": 0.20939739458647458, "learning_rate": 3.8306111757466815e-05, "loss": 0.5587, "num_tokens": 1724670052.0, "step": 2254 }, { "epoch": 0.8262721568268218, "grad_norm": 0.18792002851813355, "learning_rate": 3.830430250068265e-05, "loss": 0.5632, "num_tokens": 1725423248.0, "step": 2255 }, { "epoch": 0.8266385746347272, "grad_norm": 0.1731143385681808, "learning_rate": 3.830249232593375e-05, "loss": 0.5767, "num_tokens": 1726053638.0, "step": 2256 }, { "epoch": 0.8270049924426327, "grad_norm": 0.164322323936967, "learning_rate": 3.830068123332203e-05, "loss": 0.5576, "num_tokens": 1726846977.0, "step": 2257 }, { "epoch": 0.8273714102505382, "grad_norm": 0.1642444012390676, "learning_rate": 3.8298869222949455e-05, "loss": 0.5522, "num_tokens": 1727694140.0, "step": 2258 }, { "epoch": 0.8277378280584436, "grad_norm": 0.17492440745701193, "learning_rate": 3.829705629491805e-05, "loss": 0.5557, "num_tokens": 1728495982.0, "step": 2259 }, { "epoch": 0.8281042458663491, "grad_norm": 0.1976873538002075, "learning_rate": 3.8295242449329884e-05, "loss": 0.5262, "num_tokens": 1729354355.0, "step": 2260 }, { "epoch": 0.8284706636742546, "grad_norm": 0.17219577718161894, "learning_rate": 3.829342768628709e-05, "loss": 0.558, "num_tokens": 1730072547.0, "step": 2261 }, { "epoch": 0.8288370814821601, "grad_norm": 0.19220104397355034, "learning_rate": 3.829161200589182e-05, "loss": 0.5556, "num_tokens": 1730802031.0, "step": 2262 }, { "epoch": 0.8292034992900655, "grad_norm": 0.16251992649389163, "learning_rate": 3.828979540824633e-05, "loss": 0.5168, "num_tokens": 1731515795.0, "step": 2263 }, { "epoch": 0.829569917097971, "grad_norm": 0.16687181343891583, "learning_rate": 3.828797789345288e-05, "loss": 0.531, "num_tokens": 1732290942.0, "step": 2264 }, { "epoch": 0.8299363349058764, "grad_norm": 0.18840303724360435, "learning_rate": 3.82861594616138e-05, "loss": 0.5418, "num_tokens": 1733241444.0, "step": 2265 }, { "epoch": 0.8303027527137818, "grad_norm": 0.1740582316198685, "learning_rate": 3.8284340112831475e-05, "loss": 0.5521, "num_tokens": 1733967571.0, "step": 2266 }, { "epoch": 0.8306691705216873, "grad_norm": 0.20324342237875917, "learning_rate": 3.8282519847208334e-05, "loss": 0.5523, "num_tokens": 1734841449.0, "step": 2267 }, { "epoch": 0.8310355883295928, "grad_norm": 0.17574646938145108, "learning_rate": 3.828069866484687e-05, "loss": 0.5717, "num_tokens": 1735563401.0, "step": 2268 }, { "epoch": 0.8314020061374983, "grad_norm": 0.20940013284086437, "learning_rate": 3.827887656584961e-05, "loss": 0.5972, "num_tokens": 1736294761.0, "step": 2269 }, { "epoch": 0.8317684239454037, "grad_norm": 0.1847651754432558, "learning_rate": 3.827705355031914e-05, "loss": 0.5376, "num_tokens": 1737130191.0, "step": 2270 }, { "epoch": 0.8321348417533092, "grad_norm": 0.16846803282549824, "learning_rate": 3.8275229618358116e-05, "loss": 0.5599, "num_tokens": 1737860421.0, "step": 2271 }, { "epoch": 0.8325012595612147, "grad_norm": 0.2182939449286808, "learning_rate": 3.827340477006921e-05, "loss": 0.5779, "num_tokens": 1738547088.0, "step": 2272 }, { "epoch": 0.8328676773691202, "grad_norm": 0.17740281863504712, "learning_rate": 3.8271579005555185e-05, "loss": 0.5739, "num_tokens": 1739369382.0, "step": 2273 }, { "epoch": 0.8332340951770256, "grad_norm": 0.1720204359233602, "learning_rate": 3.826975232491881e-05, "loss": 0.5586, "num_tokens": 1740155981.0, "step": 2274 }, { "epoch": 0.8336005129849311, "grad_norm": 0.21221741189885382, "learning_rate": 3.826792472826295e-05, "loss": 0.5819, "num_tokens": 1740946174.0, "step": 2275 }, { "epoch": 0.8339669307928366, "grad_norm": 0.1659341518500693, "learning_rate": 3.826609621569049e-05, "loss": 0.5522, "num_tokens": 1741720169.0, "step": 2276 }, { "epoch": 0.834333348600742, "grad_norm": 0.2395838746106157, "learning_rate": 3.826426678730439e-05, "loss": 0.5437, "num_tokens": 1742469868.0, "step": 2277 }, { "epoch": 0.8346997664086475, "grad_norm": 0.19243746766436642, "learning_rate": 3.826243644320764e-05, "loss": 0.5488, "num_tokens": 1743215206.0, "step": 2278 }, { "epoch": 0.8350661842165529, "grad_norm": 0.17886507218365466, "learning_rate": 3.82606051835033e-05, "loss": 0.5981, "num_tokens": 1744000817.0, "step": 2279 }, { "epoch": 0.8354326020244583, "grad_norm": 0.19881986733674337, "learning_rate": 3.825877300829448e-05, "loss": 0.5426, "num_tokens": 1744713672.0, "step": 2280 }, { "epoch": 0.8357990198323638, "grad_norm": 0.16074804862704453, "learning_rate": 3.825693991768431e-05, "loss": 0.5508, "num_tokens": 1745486971.0, "step": 2281 }, { "epoch": 0.8361654376402693, "grad_norm": 0.16550559790417427, "learning_rate": 3.8255105911776025e-05, "loss": 0.5556, "num_tokens": 1746267245.0, "step": 2282 }, { "epoch": 0.8365318554481748, "grad_norm": 0.19553325997873472, "learning_rate": 3.825327099067287e-05, "loss": 0.565, "num_tokens": 1747024403.0, "step": 2283 }, { "epoch": 0.8368982732560802, "grad_norm": 0.1751096560793186, "learning_rate": 3.825143515447815e-05, "loss": 0.556, "num_tokens": 1747872482.0, "step": 2284 }, { "epoch": 0.8372646910639857, "grad_norm": 0.1820140991768338, "learning_rate": 3.824959840329523e-05, "loss": 0.5507, "num_tokens": 1748606097.0, "step": 2285 }, { "epoch": 0.8376311088718912, "grad_norm": 0.20681895692129965, "learning_rate": 3.8247760737227534e-05, "loss": 0.5754, "num_tokens": 1749403736.0, "step": 2286 }, { "epoch": 0.8379975266797967, "grad_norm": 0.18503347037325257, "learning_rate": 3.8245922156378504e-05, "loss": 0.5682, "num_tokens": 1750132648.0, "step": 2287 }, { "epoch": 0.8383639444877021, "grad_norm": 0.1699496814780728, "learning_rate": 3.8244082660851676e-05, "loss": 0.5516, "num_tokens": 1750912918.0, "step": 2288 }, { "epoch": 0.8387303622956076, "grad_norm": 0.16506676161622985, "learning_rate": 3.824224225075061e-05, "loss": 0.5308, "num_tokens": 1751851451.0, "step": 2289 }, { "epoch": 0.8390967801035131, "grad_norm": 0.19601914307252505, "learning_rate": 3.824040092617893e-05, "loss": 0.5798, "num_tokens": 1752602519.0, "step": 2290 }, { "epoch": 0.8394631979114185, "grad_norm": 0.17613205995205197, "learning_rate": 3.823855868724029e-05, "loss": 0.5495, "num_tokens": 1753321819.0, "step": 2291 }, { "epoch": 0.8398296157193239, "grad_norm": 0.15483363520050064, "learning_rate": 3.823671553403843e-05, "loss": 0.4964, "num_tokens": 1754209591.0, "step": 2292 }, { "epoch": 0.8401960335272294, "grad_norm": 0.16559896684595385, "learning_rate": 3.823487146667711e-05, "loss": 0.5292, "num_tokens": 1754895515.0, "step": 2293 }, { "epoch": 0.8405624513351349, "grad_norm": 0.16378460920317228, "learning_rate": 3.8233026485260164e-05, "loss": 0.5174, "num_tokens": 1755652319.0, "step": 2294 }, { "epoch": 0.8409288691430403, "grad_norm": 0.1786575255902884, "learning_rate": 3.8231180589891464e-05, "loss": 0.585, "num_tokens": 1756333383.0, "step": 2295 }, { "epoch": 0.8412952869509458, "grad_norm": 0.19032601332496316, "learning_rate": 3.822933378067494e-05, "loss": 0.5762, "num_tokens": 1757161521.0, "step": 2296 }, { "epoch": 0.8416617047588513, "grad_norm": 0.1586902084549927, "learning_rate": 3.822748605771457e-05, "loss": 0.5837, "num_tokens": 1758060601.0, "step": 2297 }, { "epoch": 0.8420281225667567, "grad_norm": 0.16363572189690553, "learning_rate": 3.822563742111438e-05, "loss": 0.5529, "num_tokens": 1758827469.0, "step": 2298 }, { "epoch": 0.8423945403746622, "grad_norm": 0.17798607521746954, "learning_rate": 3.8223787870978456e-05, "loss": 0.539, "num_tokens": 1759499767.0, "step": 2299 }, { "epoch": 0.8427609581825677, "grad_norm": 0.17168514422489956, "learning_rate": 3.822193740741093e-05, "loss": 0.5512, "num_tokens": 1760331402.0, "step": 2300 }, { "epoch": 0.8431273759904732, "grad_norm": 0.1529059035630808, "learning_rate": 3.8220086030515994e-05, "loss": 0.5475, "num_tokens": 1761130133.0, "step": 2301 }, { "epoch": 0.8434937937983786, "grad_norm": 0.16777520709178434, "learning_rate": 3.821823374039787e-05, "loss": 0.5924, "num_tokens": 1761953422.0, "step": 2302 }, { "epoch": 0.8438602116062841, "grad_norm": 0.16229776470290785, "learning_rate": 3.821638053716086e-05, "loss": 0.5274, "num_tokens": 1762733928.0, "step": 2303 }, { "epoch": 0.8442266294141896, "grad_norm": 0.16790447786377216, "learning_rate": 3.8214526420909295e-05, "loss": 0.5572, "num_tokens": 1763399979.0, "step": 2304 }, { "epoch": 0.8445930472220949, "grad_norm": 0.17383110232023416, "learning_rate": 3.821267139174757e-05, "loss": 0.5261, "num_tokens": 1764165987.0, "step": 2305 }, { "epoch": 0.8449594650300004, "grad_norm": 0.18487008773558036, "learning_rate": 3.821081544978012e-05, "loss": 0.5983, "num_tokens": 1764804548.0, "step": 2306 }, { "epoch": 0.8453258828379059, "grad_norm": 0.16029861886442293, "learning_rate": 3.820895859511144e-05, "loss": 0.5619, "num_tokens": 1765596232.0, "step": 2307 }, { "epoch": 0.8456923006458114, "grad_norm": 0.19217672307876682, "learning_rate": 3.820710082784608e-05, "loss": 0.5189, "num_tokens": 1766364353.0, "step": 2308 }, { "epoch": 0.8460587184537168, "grad_norm": 0.1943124021185507, "learning_rate": 3.820524214808863e-05, "loss": 0.5513, "num_tokens": 1767139517.0, "step": 2309 }, { "epoch": 0.8464251362616223, "grad_norm": 0.17162974629917854, "learning_rate": 3.8203382555943744e-05, "loss": 0.5307, "num_tokens": 1767848672.0, "step": 2310 }, { "epoch": 0.8467915540695278, "grad_norm": 0.20548970187891744, "learning_rate": 3.8201522051516114e-05, "loss": 0.5711, "num_tokens": 1768504975.0, "step": 2311 }, { "epoch": 0.8471579718774332, "grad_norm": 0.18192525027043285, "learning_rate": 3.819966063491049e-05, "loss": 0.5637, "num_tokens": 1769359066.0, "step": 2312 }, { "epoch": 0.8475243896853387, "grad_norm": 0.22286224993740406, "learning_rate": 3.819779830623168e-05, "loss": 0.5597, "num_tokens": 1770096296.0, "step": 2313 }, { "epoch": 0.8478908074932442, "grad_norm": 0.18567844926683066, "learning_rate": 3.819593506558454e-05, "loss": 0.5402, "num_tokens": 1770896231.0, "step": 2314 }, { "epoch": 0.8482572253011497, "grad_norm": 0.19915639494553516, "learning_rate": 3.819407091307396e-05, "loss": 0.5679, "num_tokens": 1771716500.0, "step": 2315 }, { "epoch": 0.8486236431090551, "grad_norm": 0.1937780844219677, "learning_rate": 3.81922058488049e-05, "loss": 0.5464, "num_tokens": 1772453011.0, "step": 2316 }, { "epoch": 0.8489900609169606, "grad_norm": 0.22843137171481498, "learning_rate": 3.819033987288237e-05, "loss": 0.5867, "num_tokens": 1773177260.0, "step": 2317 }, { "epoch": 0.8493564787248661, "grad_norm": 0.20612416616803256, "learning_rate": 3.818847298541144e-05, "loss": 0.5814, "num_tokens": 1773875495.0, "step": 2318 }, { "epoch": 0.8497228965327714, "grad_norm": 0.1813736767931942, "learning_rate": 3.818660518649719e-05, "loss": 0.5595, "num_tokens": 1774615508.0, "step": 2319 }, { "epoch": 0.8500893143406769, "grad_norm": 0.21384192236227972, "learning_rate": 3.8184736476244816e-05, "loss": 0.5498, "num_tokens": 1775423414.0, "step": 2320 }, { "epoch": 0.8504557321485824, "grad_norm": 0.17486475650042738, "learning_rate": 3.8182866854759504e-05, "loss": 0.5694, "num_tokens": 1776217958.0, "step": 2321 }, { "epoch": 0.8508221499564879, "grad_norm": 0.1984025795894448, "learning_rate": 3.818099632214652e-05, "loss": 0.5414, "num_tokens": 1776951841.0, "step": 2322 }, { "epoch": 0.8511885677643933, "grad_norm": 0.23114264884187075, "learning_rate": 3.817912487851119e-05, "loss": 0.5678, "num_tokens": 1777711929.0, "step": 2323 }, { "epoch": 0.8515549855722988, "grad_norm": 0.19868243041534478, "learning_rate": 3.817725252395888e-05, "loss": 0.543, "num_tokens": 1778380987.0, "step": 2324 }, { "epoch": 0.8519214033802043, "grad_norm": 0.26040884047666335, "learning_rate": 3.8175379258594994e-05, "loss": 0.603, "num_tokens": 1779158757.0, "step": 2325 }, { "epoch": 0.8522878211881098, "grad_norm": 0.17260135817400385, "learning_rate": 3.8173505082525014e-05, "loss": 0.5373, "num_tokens": 1779977004.0, "step": 2326 }, { "epoch": 0.8526542389960152, "grad_norm": 0.22358552424292097, "learning_rate": 3.8171629995854446e-05, "loss": 0.5295, "num_tokens": 1780884015.0, "step": 2327 }, { "epoch": 0.8530206568039207, "grad_norm": 0.2670262555518843, "learning_rate": 3.816975399868888e-05, "loss": 0.5526, "num_tokens": 1781781425.0, "step": 2328 }, { "epoch": 0.8533870746118262, "grad_norm": 0.17303837033290856, "learning_rate": 3.816787709113392e-05, "loss": 0.5434, "num_tokens": 1782421248.0, "step": 2329 }, { "epoch": 0.8537534924197316, "grad_norm": 0.2314516075764027, "learning_rate": 3.816599927329525e-05, "loss": 0.5855, "num_tokens": 1783326933.0, "step": 2330 }, { "epoch": 0.8541199102276371, "grad_norm": 0.2023294691076417, "learning_rate": 3.816412054527859e-05, "loss": 0.5889, "num_tokens": 1784128563.0, "step": 2331 }, { "epoch": 0.8544863280355425, "grad_norm": 0.2080233997192728, "learning_rate": 3.816224090718972e-05, "loss": 0.5837, "num_tokens": 1784912776.0, "step": 2332 }, { "epoch": 0.854852745843448, "grad_norm": 0.21100123948772298, "learning_rate": 3.816036035913446e-05, "loss": 0.5704, "num_tokens": 1785511027.0, "step": 2333 }, { "epoch": 0.8552191636513534, "grad_norm": 0.18367039971332275, "learning_rate": 3.8158478901218706e-05, "loss": 0.5154, "num_tokens": 1786267002.0, "step": 2334 }, { "epoch": 0.8555855814592589, "grad_norm": 0.17348096429944881, "learning_rate": 3.8156596533548366e-05, "loss": 0.538, "num_tokens": 1786964995.0, "step": 2335 }, { "epoch": 0.8559519992671644, "grad_norm": 0.1668519205394908, "learning_rate": 3.815471325622944e-05, "loss": 0.5602, "num_tokens": 1787650668.0, "step": 2336 }, { "epoch": 0.8563184170750698, "grad_norm": 0.18423363679703905, "learning_rate": 3.8152829069367944e-05, "loss": 0.5492, "num_tokens": 1788362513.0, "step": 2337 }, { "epoch": 0.8566848348829753, "grad_norm": 0.15536270787517273, "learning_rate": 3.815094397306998e-05, "loss": 0.5627, "num_tokens": 1789144678.0, "step": 2338 }, { "epoch": 0.8570512526908808, "grad_norm": 0.1663569326194693, "learning_rate": 3.8149057967441655e-05, "loss": 0.5642, "num_tokens": 1789847818.0, "step": 2339 }, { "epoch": 0.8574176704987863, "grad_norm": 0.15464534073063907, "learning_rate": 3.8147171052589186e-05, "loss": 0.5374, "num_tokens": 1790773600.0, "step": 2340 }, { "epoch": 0.8577840883066917, "grad_norm": 0.16369609269818455, "learning_rate": 3.814528322861879e-05, "loss": 0.5518, "num_tokens": 1791630471.0, "step": 2341 }, { "epoch": 0.8581505061145972, "grad_norm": 0.17465668842075108, "learning_rate": 3.8143394495636764e-05, "loss": 0.5587, "num_tokens": 1792384087.0, "step": 2342 }, { "epoch": 0.8585169239225027, "grad_norm": 0.1670772878890968, "learning_rate": 3.8141504853749436e-05, "loss": 0.5811, "num_tokens": 1793212781.0, "step": 2343 }, { "epoch": 0.8588833417304081, "grad_norm": 0.15855633465069366, "learning_rate": 3.813961430306321e-05, "loss": 0.505, "num_tokens": 1793896762.0, "step": 2344 }, { "epoch": 0.8592497595383135, "grad_norm": 0.17848190560848984, "learning_rate": 3.813772284368452e-05, "loss": 0.5571, "num_tokens": 1794682282.0, "step": 2345 }, { "epoch": 0.859616177346219, "grad_norm": 0.16046039726174607, "learning_rate": 3.813583047571986e-05, "loss": 0.54, "num_tokens": 1795369861.0, "step": 2346 }, { "epoch": 0.8599825951541245, "grad_norm": 0.17097925103578301, "learning_rate": 3.813393719927579e-05, "loss": 0.5296, "num_tokens": 1796169325.0, "step": 2347 }, { "epoch": 0.8603490129620299, "grad_norm": 0.17403506023900978, "learning_rate": 3.813204301445888e-05, "loss": 0.5395, "num_tokens": 1796929106.0, "step": 2348 }, { "epoch": 0.8607154307699354, "grad_norm": 0.17321478701570858, "learning_rate": 3.813014792137579e-05, "loss": 0.5751, "num_tokens": 1797747161.0, "step": 2349 }, { "epoch": 0.8610818485778409, "grad_norm": 0.18920244279723641, "learning_rate": 3.812825192013321e-05, "loss": 0.5924, "num_tokens": 1798544068.0, "step": 2350 }, { "epoch": 0.8614482663857463, "grad_norm": 0.16881656806780088, "learning_rate": 3.812635501083789e-05, "loss": 0.5655, "num_tokens": 1799254018.0, "step": 2351 }, { "epoch": 0.8618146841936518, "grad_norm": 0.17362625467340864, "learning_rate": 3.8124457193596644e-05, "loss": 0.5519, "num_tokens": 1799953610.0, "step": 2352 }, { "epoch": 0.8621811020015573, "grad_norm": 0.18319948522550425, "learning_rate": 3.8122558468516303e-05, "loss": 0.5531, "num_tokens": 1800632751.0, "step": 2353 }, { "epoch": 0.8625475198094628, "grad_norm": 0.16783805624259263, "learning_rate": 3.812065883570379e-05, "loss": 0.5788, "num_tokens": 1801403733.0, "step": 2354 }, { "epoch": 0.8629139376173682, "grad_norm": 0.18585014822667728, "learning_rate": 3.811875829526603e-05, "loss": 0.5829, "num_tokens": 1802126619.0, "step": 2355 }, { "epoch": 0.8632803554252737, "grad_norm": 0.17067554565474619, "learning_rate": 3.8116856847310056e-05, "loss": 0.5261, "num_tokens": 1802889982.0, "step": 2356 }, { "epoch": 0.8636467732331792, "grad_norm": 0.17555299724432194, "learning_rate": 3.811495449194291e-05, "loss": 0.562, "num_tokens": 1803515910.0, "step": 2357 }, { "epoch": 0.8640131910410846, "grad_norm": 0.19204299163440797, "learning_rate": 3.81130512292717e-05, "loss": 0.5826, "num_tokens": 1804287311.0, "step": 2358 }, { "epoch": 0.86437960884899, "grad_norm": 0.19499590072594564, "learning_rate": 3.8111147059403576e-05, "loss": 0.5824, "num_tokens": 1804973618.0, "step": 2359 }, { "epoch": 0.8647460266568955, "grad_norm": 0.1865182652457786, "learning_rate": 3.8109241982445756e-05, "loss": 0.5749, "num_tokens": 1805765404.0, "step": 2360 }, { "epoch": 0.865112444464801, "grad_norm": 0.17343137801446507, "learning_rate": 3.81073359985055e-05, "loss": 0.5465, "num_tokens": 1806563330.0, "step": 2361 }, { "epoch": 0.8654788622727064, "grad_norm": 0.17138100017944857, "learning_rate": 3.810542910769011e-05, "loss": 0.5432, "num_tokens": 1807402713.0, "step": 2362 }, { "epoch": 0.8658452800806119, "grad_norm": 0.17459283648583118, "learning_rate": 3.810352131010696e-05, "loss": 0.5936, "num_tokens": 1808242314.0, "step": 2363 }, { "epoch": 0.8662116978885174, "grad_norm": 0.16884668174264875, "learning_rate": 3.810161260586345e-05, "loss": 0.5491, "num_tokens": 1809059706.0, "step": 2364 }, { "epoch": 0.8665781156964228, "grad_norm": 0.1578922792113427, "learning_rate": 3.809970299506705e-05, "loss": 0.5375, "num_tokens": 1809869027.0, "step": 2365 }, { "epoch": 0.8669445335043283, "grad_norm": 0.18764133146952974, "learning_rate": 3.8097792477825276e-05, "loss": 0.5452, "num_tokens": 1810654634.0, "step": 2366 }, { "epoch": 0.8673109513122338, "grad_norm": 0.16731334450810337, "learning_rate": 3.8095881054245694e-05, "loss": 0.5564, "num_tokens": 1811361915.0, "step": 2367 }, { "epoch": 0.8676773691201393, "grad_norm": 0.16873395745637487, "learning_rate": 3.809396872443592e-05, "loss": 0.5603, "num_tokens": 1812180349.0, "step": 2368 }, { "epoch": 0.8680437869280447, "grad_norm": 0.16493491406555846, "learning_rate": 3.8092055488503624e-05, "loss": 0.5256, "num_tokens": 1812944464.0, "step": 2369 }, { "epoch": 0.8684102047359502, "grad_norm": 0.20810390789622973, "learning_rate": 3.809014134655652e-05, "loss": 0.5692, "num_tokens": 1813737093.0, "step": 2370 }, { "epoch": 0.8687766225438557, "grad_norm": 0.17179937592211927, "learning_rate": 3.8088226298702384e-05, "loss": 0.589, "num_tokens": 1814501639.0, "step": 2371 }, { "epoch": 0.869143040351761, "grad_norm": 0.1792341878890769, "learning_rate": 3.808631034504903e-05, "loss": 0.5615, "num_tokens": 1815265581.0, "step": 2372 }, { "epoch": 0.8695094581596665, "grad_norm": 0.15785678267496206, "learning_rate": 3.808439348570433e-05, "loss": 0.5304, "num_tokens": 1816082311.0, "step": 2373 }, { "epoch": 0.869875875967572, "grad_norm": 0.15703727713216062, "learning_rate": 3.808247572077622e-05, "loss": 0.5444, "num_tokens": 1816935653.0, "step": 2374 }, { "epoch": 0.8702422937754775, "grad_norm": 0.18503358447600413, "learning_rate": 3.808055705037266e-05, "loss": 0.5251, "num_tokens": 1817767951.0, "step": 2375 }, { "epoch": 0.8706087115833829, "grad_norm": 0.16249382651112307, "learning_rate": 3.807863747460168e-05, "loss": 0.5528, "num_tokens": 1818606069.0, "step": 2376 }, { "epoch": 0.8709751293912884, "grad_norm": 0.1864897223886094, "learning_rate": 3.807671699357136e-05, "loss": 0.5498, "num_tokens": 1819422865.0, "step": 2377 }, { "epoch": 0.8713415471991939, "grad_norm": 0.18911781483340015, "learning_rate": 3.807479560738981e-05, "loss": 0.607, "num_tokens": 1820109852.0, "step": 2378 }, { "epoch": 0.8717079650070994, "grad_norm": 0.16810314172723767, "learning_rate": 3.807287331616524e-05, "loss": 0.5507, "num_tokens": 1820924386.0, "step": 2379 }, { "epoch": 0.8720743828150048, "grad_norm": 0.17207973638641377, "learning_rate": 3.807095012000584e-05, "loss": 0.5674, "num_tokens": 1821634475.0, "step": 2380 }, { "epoch": 0.8724408006229103, "grad_norm": 0.2518087625547774, "learning_rate": 3.806902601901992e-05, "loss": 0.5941, "num_tokens": 1822295434.0, "step": 2381 }, { "epoch": 0.8728072184308158, "grad_norm": 0.18047788854548297, "learning_rate": 3.8067101013315795e-05, "loss": 0.5561, "num_tokens": 1823129584.0, "step": 2382 }, { "epoch": 0.8731736362387212, "grad_norm": 0.17438902888067645, "learning_rate": 3.806517510300186e-05, "loss": 0.5292, "num_tokens": 1823958659.0, "step": 2383 }, { "epoch": 0.8735400540466267, "grad_norm": 0.23090062273536557, "learning_rate": 3.8063248288186524e-05, "loss": 0.6129, "num_tokens": 1824734451.0, "step": 2384 }, { "epoch": 0.8739064718545322, "grad_norm": 0.1663721647479159, "learning_rate": 3.806132056897829e-05, "loss": 0.5352, "num_tokens": 1825640109.0, "step": 2385 }, { "epoch": 0.8742728896624375, "grad_norm": 0.186999964098797, "learning_rate": 3.805939194548569e-05, "loss": 0.5392, "num_tokens": 1826451363.0, "step": 2386 }, { "epoch": 0.874639307470343, "grad_norm": 0.20879467095801546, "learning_rate": 3.805746241781732e-05, "loss": 0.5877, "num_tokens": 1827267222.0, "step": 2387 }, { "epoch": 0.8750057252782485, "grad_norm": 0.15536038059750784, "learning_rate": 3.805553198608179e-05, "loss": 0.5313, "num_tokens": 1828120877.0, "step": 2388 }, { "epoch": 0.875372143086154, "grad_norm": 0.18343258156919834, "learning_rate": 3.80536006503878e-05, "loss": 0.5539, "num_tokens": 1828850945.0, "step": 2389 }, { "epoch": 0.8757385608940594, "grad_norm": 0.1655192273489349, "learning_rate": 3.805166841084409e-05, "loss": 0.5729, "num_tokens": 1829589045.0, "step": 2390 }, { "epoch": 0.8761049787019649, "grad_norm": 0.1600304635558872, "learning_rate": 3.804973526755945e-05, "loss": 0.5513, "num_tokens": 1830400053.0, "step": 2391 }, { "epoch": 0.8764713965098704, "grad_norm": 0.1716182350609048, "learning_rate": 3.804780122064272e-05, "loss": 0.5637, "num_tokens": 1831021335.0, "step": 2392 }, { "epoch": 0.8768378143177759, "grad_norm": 0.1845772609377923, "learning_rate": 3.8045866270202787e-05, "loss": 0.5465, "num_tokens": 1831692329.0, "step": 2393 }, { "epoch": 0.8772042321256813, "grad_norm": 0.1623337090367395, "learning_rate": 3.8043930416348595e-05, "loss": 0.5514, "num_tokens": 1832378566.0, "step": 2394 }, { "epoch": 0.8775706499335868, "grad_norm": 0.1984434106524693, "learning_rate": 3.804199365918913e-05, "loss": 0.5728, "num_tokens": 1833041348.0, "step": 2395 }, { "epoch": 0.8779370677414923, "grad_norm": 0.19898632051946522, "learning_rate": 3.804005599883344e-05, "loss": 0.586, "num_tokens": 1833824941.0, "step": 2396 }, { "epoch": 0.8783034855493977, "grad_norm": 0.19633980506245635, "learning_rate": 3.803811743539062e-05, "loss": 0.5877, "num_tokens": 1834693286.0, "step": 2397 }, { "epoch": 0.8786699033573032, "grad_norm": 0.1614390201958632, "learning_rate": 3.803617796896982e-05, "loss": 0.5651, "num_tokens": 1835431091.0, "step": 2398 }, { "epoch": 0.8790363211652086, "grad_norm": 0.19835829027451873, "learning_rate": 3.803423759968023e-05, "loss": 0.5679, "num_tokens": 1836327536.0, "step": 2399 }, { "epoch": 0.879402738973114, "grad_norm": 0.17358721715852146, "learning_rate": 3.80322963276311e-05, "loss": 0.5384, "num_tokens": 1837081231.0, "step": 2400 }, { "epoch": 0.8797691567810195, "grad_norm": 0.1677116545914469, "learning_rate": 3.803035415293172e-05, "loss": 0.529, "num_tokens": 1837964433.0, "step": 2401 }, { "epoch": 0.880135574588925, "grad_norm": 0.16645354097786036, "learning_rate": 3.802841107569144e-05, "loss": 0.5421, "num_tokens": 1838559490.0, "step": 2402 }, { "epoch": 0.8805019923968305, "grad_norm": 0.16474732486072236, "learning_rate": 3.8026467096019664e-05, "loss": 0.5618, "num_tokens": 1839421475.0, "step": 2403 }, { "epoch": 0.8808684102047359, "grad_norm": 0.1736571332154546, "learning_rate": 3.802452221402584e-05, "loss": 0.5537, "num_tokens": 1840245533.0, "step": 2404 }, { "epoch": 0.8812348280126414, "grad_norm": 0.178421749859383, "learning_rate": 3.802257642981947e-05, "loss": 0.5789, "num_tokens": 1840943262.0, "step": 2405 }, { "epoch": 0.8816012458205469, "grad_norm": 0.2003288658002871, "learning_rate": 3.8020629743510104e-05, "loss": 0.5505, "num_tokens": 1841624609.0, "step": 2406 }, { "epoch": 0.8819676636284524, "grad_norm": 0.18494216071347255, "learning_rate": 3.801868215520735e-05, "loss": 0.5407, "num_tokens": 1842301439.0, "step": 2407 }, { "epoch": 0.8823340814363578, "grad_norm": 0.1880701113047146, "learning_rate": 3.801673366502084e-05, "loss": 0.5279, "num_tokens": 1843212033.0, "step": 2408 }, { "epoch": 0.8827004992442633, "grad_norm": 0.16444711566077438, "learning_rate": 3.8014784273060305e-05, "loss": 0.5475, "num_tokens": 1843977050.0, "step": 2409 }, { "epoch": 0.8830669170521688, "grad_norm": 0.15922380794203742, "learning_rate": 3.8012833979435486e-05, "loss": 0.5321, "num_tokens": 1844705552.0, "step": 2410 }, { "epoch": 0.8834333348600742, "grad_norm": 0.17606955250437614, "learning_rate": 3.8010882784256194e-05, "loss": 0.5577, "num_tokens": 1845395898.0, "step": 2411 }, { "epoch": 0.8837997526679796, "grad_norm": 0.17593435580525935, "learning_rate": 3.800893068763228e-05, "loss": 0.5576, "num_tokens": 1846241402.0, "step": 2412 }, { "epoch": 0.8841661704758851, "grad_norm": 0.18215560661749733, "learning_rate": 3.8006977689673654e-05, "loss": 0.5093, "num_tokens": 1846889066.0, "step": 2413 }, { "epoch": 0.8845325882837906, "grad_norm": 0.194711343318726, "learning_rate": 3.800502379049027e-05, "loss": 0.56, "num_tokens": 1847570771.0, "step": 2414 }, { "epoch": 0.884899006091696, "grad_norm": 0.21191231750655273, "learning_rate": 3.800306899019214e-05, "loss": 0.559, "num_tokens": 1848229783.0, "step": 2415 }, { "epoch": 0.8852654238996015, "grad_norm": 0.2017417248126458, "learning_rate": 3.8001113288889314e-05, "loss": 0.562, "num_tokens": 1848972287.0, "step": 2416 }, { "epoch": 0.885631841707507, "grad_norm": 0.1816190586692792, "learning_rate": 3.799915668669192e-05, "loss": 0.5493, "num_tokens": 1849778990.0, "step": 2417 }, { "epoch": 0.8859982595154124, "grad_norm": 0.18399314381301574, "learning_rate": 3.799719918371011e-05, "loss": 0.5774, "num_tokens": 1850533001.0, "step": 2418 }, { "epoch": 0.8863646773233179, "grad_norm": 0.1675224895505425, "learning_rate": 3.799524078005409e-05, "loss": 0.5242, "num_tokens": 1851302333.0, "step": 2419 }, { "epoch": 0.8867310951312234, "grad_norm": 0.1993940705100294, "learning_rate": 3.799328147583412e-05, "loss": 0.5691, "num_tokens": 1852151773.0, "step": 2420 }, { "epoch": 0.8870975129391289, "grad_norm": 0.1621594470337466, "learning_rate": 3.799132127116052e-05, "loss": 0.5612, "num_tokens": 1852925622.0, "step": 2421 }, { "epoch": 0.8874639307470343, "grad_norm": 0.18927936600498246, "learning_rate": 3.7989360166143666e-05, "loss": 0.5614, "num_tokens": 1853652025.0, "step": 2422 }, { "epoch": 0.8878303485549398, "grad_norm": 0.17109379705643027, "learning_rate": 3.7987398160893945e-05, "loss": 0.53, "num_tokens": 1854457690.0, "step": 2423 }, { "epoch": 0.8881967663628453, "grad_norm": 0.19335251190558897, "learning_rate": 3.798543525552184e-05, "loss": 0.5596, "num_tokens": 1855220944.0, "step": 2424 }, { "epoch": 0.8885631841707508, "grad_norm": 0.1720855510579747, "learning_rate": 3.798347145013786e-05, "loss": 0.5306, "num_tokens": 1855928210.0, "step": 2425 }, { "epoch": 0.8889296019786561, "grad_norm": 0.16809515282447635, "learning_rate": 3.798150674485257e-05, "loss": 0.5851, "num_tokens": 1856574269.0, "step": 2426 }, { "epoch": 0.8892960197865616, "grad_norm": 0.17485847191899748, "learning_rate": 3.797954113977659e-05, "loss": 0.535, "num_tokens": 1857409355.0, "step": 2427 }, { "epoch": 0.8896624375944671, "grad_norm": 0.17534888317039998, "learning_rate": 3.797757463502059e-05, "loss": 0.5516, "num_tokens": 1858200701.0, "step": 2428 }, { "epoch": 0.8900288554023725, "grad_norm": 0.1738064840333265, "learning_rate": 3.7975607230695287e-05, "loss": 0.5454, "num_tokens": 1858932524.0, "step": 2429 }, { "epoch": 0.890395273210278, "grad_norm": 0.1926982862128436, "learning_rate": 3.7973638926911436e-05, "loss": 0.5648, "num_tokens": 1859615155.0, "step": 2430 }, { "epoch": 0.8907616910181835, "grad_norm": 0.16405289722650904, "learning_rate": 3.797166972377988e-05, "loss": 0.5774, "num_tokens": 1860412677.0, "step": 2431 }, { "epoch": 0.891128108826089, "grad_norm": 0.174534074360289, "learning_rate": 3.796969962141147e-05, "loss": 0.5545, "num_tokens": 1861163085.0, "step": 2432 }, { "epoch": 0.8914945266339944, "grad_norm": 0.1900785156238651, "learning_rate": 3.796772861991714e-05, "loss": 0.5531, "num_tokens": 1861998990.0, "step": 2433 }, { "epoch": 0.8918609444418999, "grad_norm": 0.17284933500022565, "learning_rate": 3.796575671940785e-05, "loss": 0.5663, "num_tokens": 1862752197.0, "step": 2434 }, { "epoch": 0.8922273622498054, "grad_norm": 0.17323852042079999, "learning_rate": 3.7963783919994635e-05, "loss": 0.5479, "num_tokens": 1863466932.0, "step": 2435 }, { "epoch": 0.8925937800577108, "grad_norm": 0.16681037734619242, "learning_rate": 3.796181022178855e-05, "loss": 0.5474, "num_tokens": 1864274376.0, "step": 2436 }, { "epoch": 0.8929601978656163, "grad_norm": 0.17480572475852438, "learning_rate": 3.7959835624900737e-05, "loss": 0.5737, "num_tokens": 1865004445.0, "step": 2437 }, { "epoch": 0.8933266156735218, "grad_norm": 0.17932784666491886, "learning_rate": 3.795786012944236e-05, "loss": 0.5235, "num_tokens": 1865850147.0, "step": 2438 }, { "epoch": 0.8936930334814271, "grad_norm": 0.18039082622551303, "learning_rate": 3.795588373552463e-05, "loss": 0.5921, "num_tokens": 1866514607.0, "step": 2439 }, { "epoch": 0.8940594512893326, "grad_norm": 0.15150346809583856, "learning_rate": 3.795390644325885e-05, "loss": 0.5506, "num_tokens": 1867277816.0, "step": 2440 }, { "epoch": 0.8944258690972381, "grad_norm": 0.1706824574421137, "learning_rate": 3.795192825275633e-05, "loss": 0.531, "num_tokens": 1868052598.0, "step": 2441 }, { "epoch": 0.8947922869051436, "grad_norm": 0.16659810180578188, "learning_rate": 3.7949949164128444e-05, "loss": 0.5602, "num_tokens": 1868821767.0, "step": 2442 }, { "epoch": 0.895158704713049, "grad_norm": 0.16627080340644154, "learning_rate": 3.7947969177486625e-05, "loss": 0.5541, "num_tokens": 1869647743.0, "step": 2443 }, { "epoch": 0.8955251225209545, "grad_norm": 0.20241465212294418, "learning_rate": 3.7945988292942346e-05, "loss": 0.598, "num_tokens": 1870292784.0, "step": 2444 }, { "epoch": 0.89589154032886, "grad_norm": 0.18439430948596114, "learning_rate": 3.794400651060714e-05, "loss": 0.5601, "num_tokens": 1870943276.0, "step": 2445 }, { "epoch": 0.8962579581367655, "grad_norm": 0.18199219582514733, "learning_rate": 3.7942023830592566e-05, "loss": 0.5497, "num_tokens": 1871824949.0, "step": 2446 }, { "epoch": 0.8966243759446709, "grad_norm": 0.16748490707961566, "learning_rate": 3.794004025301028e-05, "loss": 0.5613, "num_tokens": 1872606763.0, "step": 2447 }, { "epoch": 0.8969907937525764, "grad_norm": 0.17707425531788493, "learning_rate": 3.793805577797195e-05, "loss": 0.5645, "num_tokens": 1873381640.0, "step": 2448 }, { "epoch": 0.8973572115604819, "grad_norm": 0.19352070260254048, "learning_rate": 3.793607040558931e-05, "loss": 0.552, "num_tokens": 1874163814.0, "step": 2449 }, { "epoch": 0.8977236293683873, "grad_norm": 0.1766678760731324, "learning_rate": 3.793408413597413e-05, "loss": 0.5401, "num_tokens": 1874993398.0, "step": 2450 }, { "epoch": 0.8980900471762928, "grad_norm": 0.2321280290494625, "learning_rate": 3.7932096969238244e-05, "loss": 0.561, "num_tokens": 1875620382.0, "step": 2451 }, { "epoch": 0.8984564649841982, "grad_norm": 0.20321071246664066, "learning_rate": 3.793010890549354e-05, "loss": 0.5633, "num_tokens": 1876501553.0, "step": 2452 }, { "epoch": 0.8988228827921037, "grad_norm": 0.23943078915656327, "learning_rate": 3.792811994485194e-05, "loss": 0.5565, "num_tokens": 1877320964.0, "step": 2453 }, { "epoch": 0.8991893006000091, "grad_norm": 0.2138626930058881, "learning_rate": 3.7926130087425447e-05, "loss": 0.5565, "num_tokens": 1878157352.0, "step": 2454 }, { "epoch": 0.8995557184079146, "grad_norm": 0.17416913008834028, "learning_rate": 3.792413933332607e-05, "loss": 0.5622, "num_tokens": 1879053997.0, "step": 2455 }, { "epoch": 0.8999221362158201, "grad_norm": 0.1926967328087641, "learning_rate": 3.7922147682665895e-05, "loss": 0.5241, "num_tokens": 1879751603.0, "step": 2456 }, { "epoch": 0.9002885540237255, "grad_norm": 0.16552963535585546, "learning_rate": 3.792015513555707e-05, "loss": 0.5533, "num_tokens": 1880474187.0, "step": 2457 }, { "epoch": 0.900654971831631, "grad_norm": 0.1949718712696232, "learning_rate": 3.791816169211178e-05, "loss": 0.5596, "num_tokens": 1881190314.0, "step": 2458 }, { "epoch": 0.9010213896395365, "grad_norm": 0.18456935104683186, "learning_rate": 3.791616735244224e-05, "loss": 0.5744, "num_tokens": 1882080065.0, "step": 2459 }, { "epoch": 0.901387807447442, "grad_norm": 0.1626644650215596, "learning_rate": 3.791417211666075e-05, "loss": 0.5527, "num_tokens": 1882853121.0, "step": 2460 }, { "epoch": 0.9017542252553474, "grad_norm": 0.18549791197175988, "learning_rate": 3.7912175984879656e-05, "loss": 0.5592, "num_tokens": 1883558194.0, "step": 2461 }, { "epoch": 0.9021206430632529, "grad_norm": 0.18220005669684172, "learning_rate": 3.791017895721132e-05, "loss": 0.5686, "num_tokens": 1884365721.0, "step": 2462 }, { "epoch": 0.9024870608711584, "grad_norm": 0.19193622667655166, "learning_rate": 3.790818103376819e-05, "loss": 0.5473, "num_tokens": 1885040887.0, "step": 2463 }, { "epoch": 0.9028534786790638, "grad_norm": 0.1581357970987574, "learning_rate": 3.790618221466275e-05, "loss": 0.5409, "num_tokens": 1885795306.0, "step": 2464 }, { "epoch": 0.9032198964869693, "grad_norm": 0.19300662743577057, "learning_rate": 3.7904182500007546e-05, "loss": 0.5854, "num_tokens": 1886512382.0, "step": 2465 }, { "epoch": 0.9035863142948747, "grad_norm": 0.16062357052533546, "learning_rate": 3.790218188991516e-05, "loss": 0.5163, "num_tokens": 1887378297.0, "step": 2466 }, { "epoch": 0.9039527321027802, "grad_norm": 0.18393624117794316, "learning_rate": 3.790018038449823e-05, "loss": 0.5946, "num_tokens": 1888106760.0, "step": 2467 }, { "epoch": 0.9043191499106856, "grad_norm": 0.18396087043184378, "learning_rate": 3.7898177983869446e-05, "loss": 0.5289, "num_tokens": 1888792957.0, "step": 2468 }, { "epoch": 0.9046855677185911, "grad_norm": 0.18239131374527034, "learning_rate": 3.789617468814155e-05, "loss": 0.5543, "num_tokens": 1889375834.0, "step": 2469 }, { "epoch": 0.9050519855264966, "grad_norm": 0.19121244281620986, "learning_rate": 3.789417049742732e-05, "loss": 0.5468, "num_tokens": 1890081940.0, "step": 2470 }, { "epoch": 0.905418403334402, "grad_norm": 0.18173126569391768, "learning_rate": 3.789216541183961e-05, "loss": 0.5465, "num_tokens": 1890773787.0, "step": 2471 }, { "epoch": 0.9057848211423075, "grad_norm": 0.1701277881178445, "learning_rate": 3.789015943149131e-05, "loss": 0.5112, "num_tokens": 1891629255.0, "step": 2472 }, { "epoch": 0.906151238950213, "grad_norm": 0.1829919081981449, "learning_rate": 3.7888152556495343e-05, "loss": 0.5485, "num_tokens": 1892488748.0, "step": 2473 }, { "epoch": 0.9065176567581185, "grad_norm": 0.15681542902005763, "learning_rate": 3.788614478696472e-05, "loss": 0.5163, "num_tokens": 1893230108.0, "step": 2474 }, { "epoch": 0.9068840745660239, "grad_norm": 0.18145130298295956, "learning_rate": 3.788413612301247e-05, "loss": 0.5317, "num_tokens": 1894079177.0, "step": 2475 }, { "epoch": 0.9072504923739294, "grad_norm": 0.16608083623187597, "learning_rate": 3.7882126564751695e-05, "loss": 0.5287, "num_tokens": 1894906762.0, "step": 2476 }, { "epoch": 0.9076169101818349, "grad_norm": 0.17261059375600002, "learning_rate": 3.788011611229553e-05, "loss": 0.5698, "num_tokens": 1895752232.0, "step": 2477 }, { "epoch": 0.9079833279897404, "grad_norm": 0.19353174858234087, "learning_rate": 3.787810476575717e-05, "loss": 0.5649, "num_tokens": 1896505877.0, "step": 2478 }, { "epoch": 0.9083497457976457, "grad_norm": 0.18101046478161398, "learning_rate": 3.787609252524985e-05, "loss": 0.5207, "num_tokens": 1897377715.0, "step": 2479 }, { "epoch": 0.9087161636055512, "grad_norm": 0.16686680686785924, "learning_rate": 3.787407939088688e-05, "loss": 0.5182, "num_tokens": 1898096944.0, "step": 2480 }, { "epoch": 0.9090825814134567, "grad_norm": 0.1710656496979781, "learning_rate": 3.7872065362781586e-05, "loss": 0.572, "num_tokens": 1898753649.0, "step": 2481 }, { "epoch": 0.9094489992213621, "grad_norm": 0.18200197594245762, "learning_rate": 3.787005044104738e-05, "loss": 0.5866, "num_tokens": 1899458820.0, "step": 2482 }, { "epoch": 0.9098154170292676, "grad_norm": 0.18082993676846695, "learning_rate": 3.786803462579768e-05, "loss": 0.5975, "num_tokens": 1900211606.0, "step": 2483 }, { "epoch": 0.9101818348371731, "grad_norm": 0.18124348989031375, "learning_rate": 3.7866017917146004e-05, "loss": 0.5505, "num_tokens": 1900886211.0, "step": 2484 }, { "epoch": 0.9105482526450785, "grad_norm": 0.16438914504608615, "learning_rate": 3.786400031520589e-05, "loss": 0.5225, "num_tokens": 1901754246.0, "step": 2485 }, { "epoch": 0.910914670452984, "grad_norm": 0.16480244750087977, "learning_rate": 3.786198182009093e-05, "loss": 0.565, "num_tokens": 1902467305.0, "step": 2486 }, { "epoch": 0.9112810882608895, "grad_norm": 0.19566791794657415, "learning_rate": 3.7859962431914774e-05, "loss": 0.5789, "num_tokens": 1903244432.0, "step": 2487 }, { "epoch": 0.911647506068795, "grad_norm": 0.16721460003518832, "learning_rate": 3.785794215079111e-05, "loss": 0.51, "num_tokens": 1903966818.0, "step": 2488 }, { "epoch": 0.9120139238767004, "grad_norm": 0.16736761371944606, "learning_rate": 3.785592097683369e-05, "loss": 0.5454, "num_tokens": 1904815866.0, "step": 2489 }, { "epoch": 0.9123803416846059, "grad_norm": 0.18613726794963528, "learning_rate": 3.785389891015631e-05, "loss": 0.5785, "num_tokens": 1905486055.0, "step": 2490 }, { "epoch": 0.9127467594925114, "grad_norm": 0.15649549596666215, "learning_rate": 3.785187595087281e-05, "loss": 0.5437, "num_tokens": 1906259572.0, "step": 2491 }, { "epoch": 0.9131131773004167, "grad_norm": 0.16487646891890434, "learning_rate": 3.7849852099097095e-05, "loss": 0.5643, "num_tokens": 1906980772.0, "step": 2492 }, { "epoch": 0.9134795951083222, "grad_norm": 0.16890714045320268, "learning_rate": 3.784782735494311e-05, "loss": 0.5475, "num_tokens": 1907672622.0, "step": 2493 }, { "epoch": 0.9138460129162277, "grad_norm": 0.19185803054328746, "learning_rate": 3.784580171852485e-05, "loss": 0.5777, "num_tokens": 1908356139.0, "step": 2494 }, { "epoch": 0.9142124307241332, "grad_norm": 0.1805241118097873, "learning_rate": 3.7843775189956364e-05, "loss": 0.5734, "num_tokens": 1909069120.0, "step": 2495 }, { "epoch": 0.9145788485320386, "grad_norm": 0.17851302685693324, "learning_rate": 3.784174776935174e-05, "loss": 0.5561, "num_tokens": 1909846327.0, "step": 2496 }, { "epoch": 0.9149452663399441, "grad_norm": 0.17518731331046147, "learning_rate": 3.7839719456825145e-05, "loss": 0.5759, "num_tokens": 1910662386.0, "step": 2497 }, { "epoch": 0.9153116841478496, "grad_norm": 0.20623865876918493, "learning_rate": 3.783769025249076e-05, "loss": 0.5381, "num_tokens": 1911435369.0, "step": 2498 }, { "epoch": 0.915678101955755, "grad_norm": 0.18643975503503007, "learning_rate": 3.783566015646285e-05, "loss": 0.563, "num_tokens": 1912155082.0, "step": 2499 }, { "epoch": 0.9160445197636605, "grad_norm": 0.18778201811427045, "learning_rate": 3.783362916885569e-05, "loss": 0.5782, "num_tokens": 1912828499.0, "step": 2500 }, { "epoch": 0.916410937571566, "grad_norm": 0.22965666263159634, "learning_rate": 3.783159728978364e-05, "loss": 0.5263, "num_tokens": 1913660699.0, "step": 2501 }, { "epoch": 0.9167773553794715, "grad_norm": 0.1486594434768562, "learning_rate": 3.7829564519361114e-05, "loss": 0.5154, "num_tokens": 1914507803.0, "step": 2502 }, { "epoch": 0.9171437731873769, "grad_norm": 0.19213672772682008, "learning_rate": 3.782753085770254e-05, "loss": 0.5535, "num_tokens": 1915252264.0, "step": 2503 }, { "epoch": 0.9175101909952824, "grad_norm": 0.2089223453010983, "learning_rate": 3.782549630492243e-05, "loss": 0.5664, "num_tokens": 1915916943.0, "step": 2504 }, { "epoch": 0.9178766088031879, "grad_norm": 0.16698194030869531, "learning_rate": 3.782346086113532e-05, "loss": 0.5641, "num_tokens": 1916749043.0, "step": 2505 }, { "epoch": 0.9182430266110932, "grad_norm": 0.18084344490980037, "learning_rate": 3.782142452645582e-05, "loss": 0.5775, "num_tokens": 1917426690.0, "step": 2506 }, { "epoch": 0.9186094444189987, "grad_norm": 0.1651200251484024, "learning_rate": 3.781938730099858e-05, "loss": 0.575, "num_tokens": 1918250393.0, "step": 2507 }, { "epoch": 0.9189758622269042, "grad_norm": 0.15820748911070182, "learning_rate": 3.781734918487829e-05, "loss": 0.5361, "num_tokens": 1919021892.0, "step": 2508 }, { "epoch": 0.9193422800348097, "grad_norm": 0.1830021236124236, "learning_rate": 3.7815310178209726e-05, "loss": 0.5756, "num_tokens": 1919686917.0, "step": 2509 }, { "epoch": 0.9197086978427151, "grad_norm": 0.14446771257358407, "learning_rate": 3.7813270281107654e-05, "loss": 0.5229, "num_tokens": 1920519826.0, "step": 2510 }, { "epoch": 0.9200751156506206, "grad_norm": 0.16462170692314668, "learning_rate": 3.7811229493686933e-05, "loss": 0.5927, "num_tokens": 1921280353.0, "step": 2511 }, { "epoch": 0.9204415334585261, "grad_norm": 0.1646354506758045, "learning_rate": 3.7809187816062483e-05, "loss": 0.5633, "num_tokens": 1922035820.0, "step": 2512 }, { "epoch": 0.9208079512664316, "grad_norm": 0.1646685192230421, "learning_rate": 3.780714524834924e-05, "loss": 0.5797, "num_tokens": 1922826796.0, "step": 2513 }, { "epoch": 0.921174369074337, "grad_norm": 0.1501378218955539, "learning_rate": 3.780510179066221e-05, "loss": 0.5412, "num_tokens": 1923670232.0, "step": 2514 }, { "epoch": 0.9215407868822425, "grad_norm": 0.15786104656612285, "learning_rate": 3.780305744311643e-05, "loss": 0.5098, "num_tokens": 1924553355.0, "step": 2515 }, { "epoch": 0.921907204690148, "grad_norm": 0.16755324915707856, "learning_rate": 3.780101220582701e-05, "loss": 0.5406, "num_tokens": 1925309547.0, "step": 2516 }, { "epoch": 0.9222736224980534, "grad_norm": 0.18404688919456144, "learning_rate": 3.779896607890912e-05, "loss": 0.58, "num_tokens": 1926029908.0, "step": 2517 }, { "epoch": 0.9226400403059589, "grad_norm": 0.15397909066397644, "learning_rate": 3.7796919062477925e-05, "loss": 0.5341, "num_tokens": 1926966921.0, "step": 2518 }, { "epoch": 0.9230064581138643, "grad_norm": 0.17236429876800463, "learning_rate": 3.7794871156648695e-05, "loss": 0.5543, "num_tokens": 1927795935.0, "step": 2519 }, { "epoch": 0.9233728759217698, "grad_norm": 0.1927786336869217, "learning_rate": 3.779282236153674e-05, "loss": 0.5887, "num_tokens": 1928517787.0, "step": 2520 }, { "epoch": 0.9237392937296752, "grad_norm": 0.18506989757987943, "learning_rate": 3.779077267725739e-05, "loss": 0.5484, "num_tokens": 1929203489.0, "step": 2521 }, { "epoch": 0.9241057115375807, "grad_norm": 0.19476153363486598, "learning_rate": 3.7788722103926063e-05, "loss": 0.5611, "num_tokens": 1929947958.0, "step": 2522 }, { "epoch": 0.9244721293454862, "grad_norm": 0.18044439328663814, "learning_rate": 3.778667064165821e-05, "loss": 0.545, "num_tokens": 1930783882.0, "step": 2523 }, { "epoch": 0.9248385471533916, "grad_norm": 0.18268631118221246, "learning_rate": 3.778461829056932e-05, "loss": 0.6323, "num_tokens": 1931485012.0, "step": 2524 }, { "epoch": 0.9252049649612971, "grad_norm": 0.1838714606863029, "learning_rate": 3.7782565050774954e-05, "loss": 0.5662, "num_tokens": 1932269435.0, "step": 2525 }, { "epoch": 0.9255713827692026, "grad_norm": 0.1662282019193688, "learning_rate": 3.778051092239071e-05, "loss": 0.5424, "num_tokens": 1932950474.0, "step": 2526 }, { "epoch": 0.9259378005771081, "grad_norm": 0.16912835643868068, "learning_rate": 3.777845590553224e-05, "loss": 0.5691, "num_tokens": 1933708743.0, "step": 2527 }, { "epoch": 0.9263042183850135, "grad_norm": 0.17722082351350457, "learning_rate": 3.777640000031525e-05, "loss": 0.5773, "num_tokens": 1934519010.0, "step": 2528 }, { "epoch": 0.926670636192919, "grad_norm": 0.16276206387109604, "learning_rate": 3.777434320685549e-05, "loss": 0.5467, "num_tokens": 1935251106.0, "step": 2529 }, { "epoch": 0.9270370540008245, "grad_norm": 0.1824509330803335, "learning_rate": 3.777228552526876e-05, "loss": 0.544, "num_tokens": 1936011331.0, "step": 2530 }, { "epoch": 0.92740347180873, "grad_norm": 0.18594604135704437, "learning_rate": 3.7770226955670904e-05, "loss": 0.5614, "num_tokens": 1936817827.0, "step": 2531 }, { "epoch": 0.9277698896166354, "grad_norm": 0.1618822418678898, "learning_rate": 3.776816749817783e-05, "loss": 0.5341, "num_tokens": 1937515836.0, "step": 2532 }, { "epoch": 0.9281363074245408, "grad_norm": 0.1741775181322028, "learning_rate": 3.77661071529055e-05, "loss": 0.5532, "num_tokens": 1938281339.0, "step": 2533 }, { "epoch": 0.9285027252324463, "grad_norm": 0.1979838030902989, "learning_rate": 3.77640459199699e-05, "loss": 0.5825, "num_tokens": 1938996800.0, "step": 2534 }, { "epoch": 0.9288691430403517, "grad_norm": 0.17632254403974087, "learning_rate": 3.776198379948709e-05, "loss": 0.5823, "num_tokens": 1939759435.0, "step": 2535 }, { "epoch": 0.9292355608482572, "grad_norm": 0.17234916379407617, "learning_rate": 3.7759920791573166e-05, "loss": 0.5487, "num_tokens": 1940508118.0, "step": 2536 }, { "epoch": 0.9296019786561627, "grad_norm": 0.19412331782933412, "learning_rate": 3.7757856896344284e-05, "loss": 0.5753, "num_tokens": 1941294953.0, "step": 2537 }, { "epoch": 0.9299683964640681, "grad_norm": 0.17559993007807087, "learning_rate": 3.775579211391665e-05, "loss": 0.5801, "num_tokens": 1942092364.0, "step": 2538 }, { "epoch": 0.9303348142719736, "grad_norm": 0.16899406215621682, "learning_rate": 3.77537264444065e-05, "loss": 0.5484, "num_tokens": 1942821462.0, "step": 2539 }, { "epoch": 0.9307012320798791, "grad_norm": 0.19853988908629813, "learning_rate": 3.775165988793014e-05, "loss": 0.5408, "num_tokens": 1943513295.0, "step": 2540 }, { "epoch": 0.9310676498877846, "grad_norm": 0.16919664455582392, "learning_rate": 3.7749592444603935e-05, "loss": 0.521, "num_tokens": 1944315042.0, "step": 2541 }, { "epoch": 0.93143406769569, "grad_norm": 0.19153830695212007, "learning_rate": 3.7747524114544275e-05, "loss": 0.5454, "num_tokens": 1945036748.0, "step": 2542 }, { "epoch": 0.9318004855035955, "grad_norm": 0.21746212833114825, "learning_rate": 3.774545489786761e-05, "loss": 0.5541, "num_tokens": 1945764776.0, "step": 2543 }, { "epoch": 0.932166903311501, "grad_norm": 0.18308819637766952, "learning_rate": 3.774338479469045e-05, "loss": 0.5676, "num_tokens": 1946457501.0, "step": 2544 }, { "epoch": 0.9325333211194065, "grad_norm": 0.2243343674550402, "learning_rate": 3.774131380512933e-05, "loss": 0.5464, "num_tokens": 1947252843.0, "step": 2545 }, { "epoch": 0.9328997389273118, "grad_norm": 0.17542573759043423, "learning_rate": 3.773924192930087e-05, "loss": 0.5469, "num_tokens": 1948012799.0, "step": 2546 }, { "epoch": 0.9332661567352173, "grad_norm": 0.20065200851179768, "learning_rate": 3.7737169167321706e-05, "loss": 0.5373, "num_tokens": 1948766812.0, "step": 2547 }, { "epoch": 0.9336325745431228, "grad_norm": 0.20998986448813736, "learning_rate": 3.773509551930855e-05, "loss": 0.5628, "num_tokens": 1949534486.0, "step": 2548 }, { "epoch": 0.9339989923510282, "grad_norm": 0.1632859207064322, "learning_rate": 3.773302098537814e-05, "loss": 0.5484, "num_tokens": 1950422008.0, "step": 2549 }, { "epoch": 0.9343654101589337, "grad_norm": 0.16585407636186528, "learning_rate": 3.7730945565647286e-05, "loss": 0.5435, "num_tokens": 1951253124.0, "step": 2550 }, { "epoch": 0.9347318279668392, "grad_norm": 0.18128856338652857, "learning_rate": 3.772886926023284e-05, "loss": 0.5415, "num_tokens": 1951865571.0, "step": 2551 }, { "epoch": 0.9350982457747447, "grad_norm": 0.15860791689540232, "learning_rate": 3.772679206925169e-05, "loss": 0.5608, "num_tokens": 1952624982.0, "step": 2552 }, { "epoch": 0.9354646635826501, "grad_norm": 0.22126591396763723, "learning_rate": 3.77247139928208e-05, "loss": 0.5445, "num_tokens": 1953331994.0, "step": 2553 }, { "epoch": 0.9358310813905556, "grad_norm": 0.1645285370240112, "learning_rate": 3.772263503105716e-05, "loss": 0.5261, "num_tokens": 1954197473.0, "step": 2554 }, { "epoch": 0.9361974991984611, "grad_norm": 0.20477376007196765, "learning_rate": 3.772055518407782e-05, "loss": 0.5659, "num_tokens": 1954930267.0, "step": 2555 }, { "epoch": 0.9365639170063665, "grad_norm": 0.1639441546352037, "learning_rate": 3.7718474451999885e-05, "loss": 0.5328, "num_tokens": 1955732740.0, "step": 2556 }, { "epoch": 0.936930334814272, "grad_norm": 0.21068173580272212, "learning_rate": 3.77163928349405e-05, "loss": 0.5609, "num_tokens": 1956510313.0, "step": 2557 }, { "epoch": 0.9372967526221775, "grad_norm": 0.17663437488673253, "learning_rate": 3.771431033301687e-05, "loss": 0.5541, "num_tokens": 1957225144.0, "step": 2558 }, { "epoch": 0.9376631704300828, "grad_norm": 0.17894278484208584, "learning_rate": 3.771222694634624e-05, "loss": 0.5425, "num_tokens": 1957942205.0, "step": 2559 }, { "epoch": 0.9380295882379883, "grad_norm": 0.22275480646580284, "learning_rate": 3.771014267504591e-05, "loss": 0.5502, "num_tokens": 1958636355.0, "step": 2560 }, { "epoch": 0.9383960060458938, "grad_norm": 0.19117371891336718, "learning_rate": 3.770805751923322e-05, "loss": 0.6125, "num_tokens": 1959316373.0, "step": 2561 }, { "epoch": 0.9387624238537993, "grad_norm": 0.16757686227705157, "learning_rate": 3.770597147902558e-05, "loss": 0.5085, "num_tokens": 1960150634.0, "step": 2562 }, { "epoch": 0.9391288416617047, "grad_norm": 0.17064453701061416, "learning_rate": 3.770388455454043e-05, "loss": 0.5337, "num_tokens": 1960974455.0, "step": 2563 }, { "epoch": 0.9394952594696102, "grad_norm": 0.17906363867133876, "learning_rate": 3.7701796745895277e-05, "loss": 0.5672, "num_tokens": 1961592837.0, "step": 2564 }, { "epoch": 0.9398616772775157, "grad_norm": 0.18693910425304144, "learning_rate": 3.7699708053207665e-05, "loss": 0.5569, "num_tokens": 1962291404.0, "step": 2565 }, { "epoch": 0.9402280950854212, "grad_norm": 0.15627771828536935, "learning_rate": 3.769761847659519e-05, "loss": 0.5303, "num_tokens": 1963112080.0, "step": 2566 }, { "epoch": 0.9405945128933266, "grad_norm": 0.1692358391640219, "learning_rate": 3.76955280161755e-05, "loss": 0.5107, "num_tokens": 1963917997.0, "step": 2567 }, { "epoch": 0.9409609307012321, "grad_norm": 0.1623150981592218, "learning_rate": 3.769343667206629e-05, "loss": 0.5111, "num_tokens": 1964855362.0, "step": 2568 }, { "epoch": 0.9413273485091376, "grad_norm": 0.19268554506122512, "learning_rate": 3.769134444438531e-05, "loss": 0.6, "num_tokens": 1965561696.0, "step": 2569 }, { "epoch": 0.941693766317043, "grad_norm": 0.15874315022513424, "learning_rate": 3.768925133325036e-05, "loss": 0.5753, "num_tokens": 1966306040.0, "step": 2570 }, { "epoch": 0.9420601841249485, "grad_norm": 0.15995283830561388, "learning_rate": 3.7687157338779274e-05, "loss": 0.5324, "num_tokens": 1967077755.0, "step": 2571 }, { "epoch": 0.942426601932854, "grad_norm": 0.14896231072738414, "learning_rate": 3.768506246108996e-05, "loss": 0.5401, "num_tokens": 1967887207.0, "step": 2572 }, { "epoch": 0.9427930197407594, "grad_norm": 0.16250178989685288, "learning_rate": 3.768296670030036e-05, "loss": 0.5611, "num_tokens": 1968761627.0, "step": 2573 }, { "epoch": 0.9431594375486648, "grad_norm": 0.16804014987762514, "learning_rate": 3.768087005652846e-05, "loss": 0.5538, "num_tokens": 1969497383.0, "step": 2574 }, { "epoch": 0.9435258553565703, "grad_norm": 0.19785279951233958, "learning_rate": 3.7678772529892325e-05, "loss": 0.5656, "num_tokens": 1970254534.0, "step": 2575 }, { "epoch": 0.9438922731644758, "grad_norm": 0.16259996197116344, "learning_rate": 3.7676674120510034e-05, "loss": 0.5455, "num_tokens": 1970996662.0, "step": 2576 }, { "epoch": 0.9442586909723812, "grad_norm": 0.17043221658750107, "learning_rate": 3.767457482849974e-05, "loss": 0.5449, "num_tokens": 1971752075.0, "step": 2577 }, { "epoch": 0.9446251087802867, "grad_norm": 0.16797126802987933, "learning_rate": 3.7672474653979635e-05, "loss": 0.5146, "num_tokens": 1972605875.0, "step": 2578 }, { "epoch": 0.9449915265881922, "grad_norm": 0.2245446078576814, "learning_rate": 3.767037359706796e-05, "loss": 0.5449, "num_tokens": 1973432244.0, "step": 2579 }, { "epoch": 0.9453579443960977, "grad_norm": 0.1983035308174498, "learning_rate": 3.766827165788301e-05, "loss": 0.5496, "num_tokens": 1974122803.0, "step": 2580 }, { "epoch": 0.9457243622040031, "grad_norm": 0.19392598247947793, "learning_rate": 3.766616883654313e-05, "loss": 0.5572, "num_tokens": 1974842290.0, "step": 2581 }, { "epoch": 0.9460907800119086, "grad_norm": 0.16830554444829096, "learning_rate": 3.7664065133166713e-05, "loss": 0.5961, "num_tokens": 1975576598.0, "step": 2582 }, { "epoch": 0.9464571978198141, "grad_norm": 0.19438103592829276, "learning_rate": 3.7661960547872196e-05, "loss": 0.5678, "num_tokens": 1976274350.0, "step": 2583 }, { "epoch": 0.9468236156277196, "grad_norm": 0.15382613986263702, "learning_rate": 3.765985508077809e-05, "loss": 0.5653, "num_tokens": 1977130907.0, "step": 2584 }, { "epoch": 0.947190033435625, "grad_norm": 0.16356369627887035, "learning_rate": 3.7657748732002913e-05, "loss": 0.5245, "num_tokens": 1977896093.0, "step": 2585 }, { "epoch": 0.9475564512435304, "grad_norm": 0.16172964763251046, "learning_rate": 3.765564150166526e-05, "loss": 0.5627, "num_tokens": 1978693070.0, "step": 2586 }, { "epoch": 0.9479228690514359, "grad_norm": 0.1722922220124875, "learning_rate": 3.765353338988379e-05, "loss": 0.5521, "num_tokens": 1979377934.0, "step": 2587 }, { "epoch": 0.9482892868593413, "grad_norm": 0.16825048864974243, "learning_rate": 3.7651424396777184e-05, "loss": 0.5795, "num_tokens": 1980052109.0, "step": 2588 }, { "epoch": 0.9486557046672468, "grad_norm": 0.1563193710018505, "learning_rate": 3.7649314522464176e-05, "loss": 0.5311, "num_tokens": 1980729297.0, "step": 2589 }, { "epoch": 0.9490221224751523, "grad_norm": 0.1739481994405319, "learning_rate": 3.7647203767063566e-05, "loss": 0.5534, "num_tokens": 1981565072.0, "step": 2590 }, { "epoch": 0.9493885402830577, "grad_norm": 0.14963199965463375, "learning_rate": 3.764509213069419e-05, "loss": 0.5502, "num_tokens": 1982426373.0, "step": 2591 }, { "epoch": 0.9497549580909632, "grad_norm": 0.16633087326567972, "learning_rate": 3.764297961347493e-05, "loss": 0.5416, "num_tokens": 1983185036.0, "step": 2592 }, { "epoch": 0.9501213758988687, "grad_norm": 0.16714463535915708, "learning_rate": 3.7640866215524736e-05, "loss": 0.5504, "num_tokens": 1984012136.0, "step": 2593 }, { "epoch": 0.9504877937067742, "grad_norm": 0.14146996746409848, "learning_rate": 3.763875193696259e-05, "loss": 0.5441, "num_tokens": 1984863456.0, "step": 2594 }, { "epoch": 0.9508542115146796, "grad_norm": 0.163514175582592, "learning_rate": 3.763663677790754e-05, "loss": 0.5343, "num_tokens": 1985518241.0, "step": 2595 }, { "epoch": 0.9512206293225851, "grad_norm": 0.1491424355014571, "learning_rate": 3.7634520738478657e-05, "loss": 0.5186, "num_tokens": 1986363862.0, "step": 2596 }, { "epoch": 0.9515870471304906, "grad_norm": 0.16519444246008413, "learning_rate": 3.7632403818795095e-05, "loss": 0.5692, "num_tokens": 1987072379.0, "step": 2597 }, { "epoch": 0.951953464938396, "grad_norm": 0.1648081019249579, "learning_rate": 3.7630286018976024e-05, "loss": 0.5425, "num_tokens": 1987783858.0, "step": 2598 }, { "epoch": 0.9523198827463014, "grad_norm": 0.16914426691889708, "learning_rate": 3.76281673391407e-05, "loss": 0.5545, "num_tokens": 1988536477.0, "step": 2599 }, { "epoch": 0.9526863005542069, "grad_norm": 0.1758259192833516, "learning_rate": 3.7626047779408395e-05, "loss": 0.5694, "num_tokens": 1989324287.0, "step": 2600 }, { "epoch": 0.9530527183621124, "grad_norm": 0.1711983245451427, "learning_rate": 3.762392733989844e-05, "loss": 0.5703, "num_tokens": 1990108627.0, "step": 2601 }, { "epoch": 0.9534191361700178, "grad_norm": 0.1583313836422678, "learning_rate": 3.762180602073023e-05, "loss": 0.5184, "num_tokens": 1990792039.0, "step": 2602 }, { "epoch": 0.9537855539779233, "grad_norm": 0.21985019246222234, "learning_rate": 3.761968382202321e-05, "loss": 0.5503, "num_tokens": 1991552438.0, "step": 2603 }, { "epoch": 0.9541519717858288, "grad_norm": 0.19922491245753646, "learning_rate": 3.7617560743896834e-05, "loss": 0.5663, "num_tokens": 1992284009.0, "step": 2604 }, { "epoch": 0.9545183895937343, "grad_norm": 0.19341188779597637, "learning_rate": 3.7615436786470665e-05, "loss": 0.5643, "num_tokens": 1993003580.0, "step": 2605 }, { "epoch": 0.9548848074016397, "grad_norm": 0.19286498411591482, "learning_rate": 3.761331194986427e-05, "loss": 0.5597, "num_tokens": 1993758494.0, "step": 2606 }, { "epoch": 0.9552512252095452, "grad_norm": 0.17466760481828192, "learning_rate": 3.761118623419729e-05, "loss": 0.5502, "num_tokens": 1994485067.0, "step": 2607 }, { "epoch": 0.9556176430174507, "grad_norm": 0.2439986081633539, "learning_rate": 3.7609059639589395e-05, "loss": 0.5791, "num_tokens": 1995354486.0, "step": 2608 }, { "epoch": 0.9559840608253561, "grad_norm": 0.17320519282511868, "learning_rate": 3.760693216616033e-05, "loss": 0.5883, "num_tokens": 1996106553.0, "step": 2609 }, { "epoch": 0.9563504786332616, "grad_norm": 0.20062798382805733, "learning_rate": 3.7604803814029865e-05, "loss": 0.5725, "num_tokens": 1996839476.0, "step": 2610 }, { "epoch": 0.9567168964411671, "grad_norm": 0.16454315320980664, "learning_rate": 3.760267458331784e-05, "loss": 0.577, "num_tokens": 1997579945.0, "step": 2611 }, { "epoch": 0.9570833142490726, "grad_norm": 0.18049687717789512, "learning_rate": 3.760054447414412e-05, "loss": 0.5246, "num_tokens": 1998324370.0, "step": 2612 }, { "epoch": 0.9574497320569779, "grad_norm": 0.1819235513901427, "learning_rate": 3.759841348662865e-05, "loss": 0.538, "num_tokens": 1999128430.0, "step": 2613 }, { "epoch": 0.9578161498648834, "grad_norm": 0.1963232902815944, "learning_rate": 3.7596281620891406e-05, "loss": 0.5953, "num_tokens": 1999862198.0, "step": 2614 }, { "epoch": 0.9581825676727889, "grad_norm": 0.18870787549391482, "learning_rate": 3.7594148877052416e-05, "loss": 0.5528, "num_tokens": 2000587578.0, "step": 2615 }, { "epoch": 0.9585489854806943, "grad_norm": 0.1712816603102092, "learning_rate": 3.759201525523176e-05, "loss": 0.5693, "num_tokens": 2001392377.0, "step": 2616 }, { "epoch": 0.9589154032885998, "grad_norm": 0.18140168101226667, "learning_rate": 3.758988075554955e-05, "loss": 0.5221, "num_tokens": 2002122219.0, "step": 2617 }, { "epoch": 0.9592818210965053, "grad_norm": 0.19414091114657106, "learning_rate": 3.758774537812598e-05, "loss": 0.5439, "num_tokens": 2002818029.0, "step": 2618 }, { "epoch": 0.9596482389044108, "grad_norm": 0.17395870955681952, "learning_rate": 3.7585609123081266e-05, "loss": 0.537, "num_tokens": 2003675672.0, "step": 2619 }, { "epoch": 0.9600146567123162, "grad_norm": 0.208752444645244, "learning_rate": 3.758347199053568e-05, "loss": 0.5543, "num_tokens": 2004382600.0, "step": 2620 }, { "epoch": 0.9603810745202217, "grad_norm": 0.15546691326888248, "learning_rate": 3.758133398060957e-05, "loss": 0.5423, "num_tokens": 2005205763.0, "step": 2621 }, { "epoch": 0.9607474923281272, "grad_norm": 0.21678131185854888, "learning_rate": 3.757919509342328e-05, "loss": 0.5456, "num_tokens": 2006103273.0, "step": 2622 }, { "epoch": 0.9611139101360326, "grad_norm": 0.15059213469159666, "learning_rate": 3.757705532909725e-05, "loss": 0.5118, "num_tokens": 2006885639.0, "step": 2623 }, { "epoch": 0.9614803279439381, "grad_norm": 0.18529294406330774, "learning_rate": 3.7574914687751956e-05, "loss": 0.5676, "num_tokens": 2007661330.0, "step": 2624 }, { "epoch": 0.9618467457518436, "grad_norm": 0.17204662021775843, "learning_rate": 3.757277316950791e-05, "loss": 0.5627, "num_tokens": 2008453525.0, "step": 2625 }, { "epoch": 0.962213163559749, "grad_norm": 0.19735965261583852, "learning_rate": 3.757063077448569e-05, "loss": 0.5813, "num_tokens": 2009195377.0, "step": 2626 }, { "epoch": 0.9625795813676544, "grad_norm": 0.16932138939490446, "learning_rate": 3.756848750280592e-05, "loss": 0.549, "num_tokens": 2009945804.0, "step": 2627 }, { "epoch": 0.9629459991755599, "grad_norm": 0.16932782474630417, "learning_rate": 3.756634335458926e-05, "loss": 0.5721, "num_tokens": 2010766587.0, "step": 2628 }, { "epoch": 0.9633124169834654, "grad_norm": 0.13899952780419447, "learning_rate": 3.756419832995643e-05, "loss": 0.5055, "num_tokens": 2011534535.0, "step": 2629 }, { "epoch": 0.9636788347913708, "grad_norm": 0.17735734980686693, "learning_rate": 3.756205242902822e-05, "loss": 0.5413, "num_tokens": 2012336485.0, "step": 2630 }, { "epoch": 0.9640452525992763, "grad_norm": 0.16819622259288666, "learning_rate": 3.7559905651925426e-05, "loss": 0.564, "num_tokens": 2013117217.0, "step": 2631 }, { "epoch": 0.9644116704071818, "grad_norm": 0.17096119536201673, "learning_rate": 3.755775799876892e-05, "loss": 0.5667, "num_tokens": 2013800510.0, "step": 2632 }, { "epoch": 0.9647780882150873, "grad_norm": 0.1697227165679207, "learning_rate": 3.755560946967963e-05, "loss": 0.5438, "num_tokens": 2014576972.0, "step": 2633 }, { "epoch": 0.9651445060229927, "grad_norm": 0.15899482470231696, "learning_rate": 3.7553460064778514e-05, "loss": 0.5541, "num_tokens": 2015359283.0, "step": 2634 }, { "epoch": 0.9655109238308982, "grad_norm": 0.1927750606505753, "learning_rate": 3.7551309784186584e-05, "loss": 0.5789, "num_tokens": 2016129909.0, "step": 2635 }, { "epoch": 0.9658773416388037, "grad_norm": 0.16583505372299576, "learning_rate": 3.754915862802491e-05, "loss": 0.5864, "num_tokens": 2016813544.0, "step": 2636 }, { "epoch": 0.9662437594467091, "grad_norm": 0.18483393672359363, "learning_rate": 3.754700659641461e-05, "loss": 0.5251, "num_tokens": 2017583186.0, "step": 2637 }, { "epoch": 0.9666101772546146, "grad_norm": 0.1717791489614113, "learning_rate": 3.754485368947684e-05, "loss": 0.5249, "num_tokens": 2018362864.0, "step": 2638 }, { "epoch": 0.96697659506252, "grad_norm": 0.1668071437151092, "learning_rate": 3.754269990733282e-05, "loss": 0.5719, "num_tokens": 2019073249.0, "step": 2639 }, { "epoch": 0.9673430128704255, "grad_norm": 0.19442021457075478, "learning_rate": 3.7540545250103805e-05, "loss": 0.5608, "num_tokens": 2019729679.0, "step": 2640 }, { "epoch": 0.9677094306783309, "grad_norm": 0.16098992712199384, "learning_rate": 3.753838971791112e-05, "loss": 0.5574, "num_tokens": 2020499245.0, "step": 2641 }, { "epoch": 0.9680758484862364, "grad_norm": 0.15770520206711328, "learning_rate": 3.753623331087611e-05, "loss": 0.5428, "num_tokens": 2021217591.0, "step": 2642 }, { "epoch": 0.9684422662941419, "grad_norm": 0.1589978788983505, "learning_rate": 3.753407602912019e-05, "loss": 0.545, "num_tokens": 2022125492.0, "step": 2643 }, { "epoch": 0.9688086841020473, "grad_norm": 0.1557613714109691, "learning_rate": 3.7531917872764824e-05, "loss": 0.5342, "num_tokens": 2022800515.0, "step": 2644 }, { "epoch": 0.9691751019099528, "grad_norm": 0.16877667290535547, "learning_rate": 3.752975884193152e-05, "loss": 0.5861, "num_tokens": 2023569374.0, "step": 2645 }, { "epoch": 0.9695415197178583, "grad_norm": 0.1572324560387091, "learning_rate": 3.7527598936741825e-05, "loss": 0.5456, "num_tokens": 2024352213.0, "step": 2646 }, { "epoch": 0.9699079375257638, "grad_norm": 0.15330012037067278, "learning_rate": 3.752543815731736e-05, "loss": 0.5234, "num_tokens": 2025071492.0, "step": 2647 }, { "epoch": 0.9702743553336692, "grad_norm": 0.16535204158333858, "learning_rate": 3.752327650377977e-05, "loss": 0.6007, "num_tokens": 2025719584.0, "step": 2648 }, { "epoch": 0.9706407731415747, "grad_norm": 0.17122885343761957, "learning_rate": 3.752111397625077e-05, "loss": 0.6029, "num_tokens": 2026435703.0, "step": 2649 }, { "epoch": 0.9710071909494802, "grad_norm": 0.1510894283181921, "learning_rate": 3.751895057485211e-05, "loss": 0.5519, "num_tokens": 2027114891.0, "step": 2650 }, { "epoch": 0.9713736087573857, "grad_norm": 0.16443222805781224, "learning_rate": 3.7516786299705595e-05, "loss": 0.5203, "num_tokens": 2027930613.0, "step": 2651 }, { "epoch": 0.9717400265652911, "grad_norm": 0.15265793788114781, "learning_rate": 3.751462115093307e-05, "loss": 0.5537, "num_tokens": 2028760101.0, "step": 2652 }, { "epoch": 0.9721064443731965, "grad_norm": 0.1564708444455352, "learning_rate": 3.751245512865645e-05, "loss": 0.5358, "num_tokens": 2029489055.0, "step": 2653 }, { "epoch": 0.972472862181102, "grad_norm": 0.1567481693242451, "learning_rate": 3.751028823299768e-05, "loss": 0.5658, "num_tokens": 2030247686.0, "step": 2654 }, { "epoch": 0.9728392799890074, "grad_norm": 0.1563704655741386, "learning_rate": 3.7508120464078756e-05, "loss": 0.5154, "num_tokens": 2030990941.0, "step": 2655 }, { "epoch": 0.9732056977969129, "grad_norm": 0.1603349586226755, "learning_rate": 3.750595182202174e-05, "loss": 0.5406, "num_tokens": 2031830975.0, "step": 2656 }, { "epoch": 0.9735721156048184, "grad_norm": 0.1497869000256443, "learning_rate": 3.750378230694872e-05, "loss": 0.5364, "num_tokens": 2032730683.0, "step": 2657 }, { "epoch": 0.9739385334127239, "grad_norm": 0.15968935265096043, "learning_rate": 3.750161191898185e-05, "loss": 0.5287, "num_tokens": 2033571447.0, "step": 2658 }, { "epoch": 0.9743049512206293, "grad_norm": 0.16939308346957047, "learning_rate": 3.749944065824332e-05, "loss": 0.5849, "num_tokens": 2034340443.0, "step": 2659 }, { "epoch": 0.9746713690285348, "grad_norm": 0.18879832924253281, "learning_rate": 3.7497268524855384e-05, "loss": 0.556, "num_tokens": 2035008075.0, "step": 2660 }, { "epoch": 0.9750377868364403, "grad_norm": 0.17680197204001288, "learning_rate": 3.749509551894034e-05, "loss": 0.5721, "num_tokens": 2035726515.0, "step": 2661 }, { "epoch": 0.9754042046443457, "grad_norm": 0.15976238294883793, "learning_rate": 3.749292164062052e-05, "loss": 0.5387, "num_tokens": 2036553380.0, "step": 2662 }, { "epoch": 0.9757706224522512, "grad_norm": 0.1764140413322989, "learning_rate": 3.749074689001832e-05, "loss": 0.5749, "num_tokens": 2037376775.0, "step": 2663 }, { "epoch": 0.9761370402601567, "grad_norm": 0.181301776993558, "learning_rate": 3.74885712672562e-05, "loss": 0.608, "num_tokens": 2038137830.0, "step": 2664 }, { "epoch": 0.9765034580680622, "grad_norm": 2.6975923779843614, "learning_rate": 3.7486394772456634e-05, "loss": 0.5683, "num_tokens": 2038947880.0, "step": 2665 }, { "epoch": 0.9768698758759675, "grad_norm": 0.17942205523385618, "learning_rate": 3.748421740574217e-05, "loss": 0.5385, "num_tokens": 2039709428.0, "step": 2666 }, { "epoch": 0.977236293683873, "grad_norm": 0.17420617045340342, "learning_rate": 3.748203916723539e-05, "loss": 0.5845, "num_tokens": 2040466135.0, "step": 2667 }, { "epoch": 0.9776027114917785, "grad_norm": 0.17253553002236927, "learning_rate": 3.747986005705894e-05, "loss": 0.5755, "num_tokens": 2041209325.0, "step": 2668 }, { "epoch": 0.9779691292996839, "grad_norm": 0.18424282551788382, "learning_rate": 3.747768007533552e-05, "loss": 0.526, "num_tokens": 2042054002.0, "step": 2669 }, { "epoch": 0.9783355471075894, "grad_norm": 0.32318550834087784, "learning_rate": 3.747549922218784e-05, "loss": 0.5578, "num_tokens": 2042860298.0, "step": 2670 }, { "epoch": 0.9787019649154949, "grad_norm": 0.7306671405297737, "learning_rate": 3.747331749773872e-05, "loss": 0.5653, "num_tokens": 2043543202.0, "step": 2671 }, { "epoch": 0.9790683827234004, "grad_norm": 0.23403908190349776, "learning_rate": 3.747113490211096e-05, "loss": 0.568, "num_tokens": 2044279334.0, "step": 2672 }, { "epoch": 0.9794348005313058, "grad_norm": 0.1637432900808479, "learning_rate": 3.746895143542747e-05, "loss": 0.5304, "num_tokens": 2045065050.0, "step": 2673 }, { "epoch": 0.9798012183392113, "grad_norm": 0.16237668546713313, "learning_rate": 3.746676709781117e-05, "loss": 0.5478, "num_tokens": 2045804270.0, "step": 2674 }, { "epoch": 0.9801676361471168, "grad_norm": 0.16146120040569578, "learning_rate": 3.746458188938506e-05, "loss": 0.5455, "num_tokens": 2046575032.0, "step": 2675 }, { "epoch": 0.9805340539550222, "grad_norm": 0.16365174389108095, "learning_rate": 3.7462395810272146e-05, "loss": 0.567, "num_tokens": 2047252089.0, "step": 2676 }, { "epoch": 0.9809004717629277, "grad_norm": 0.16307132204671196, "learning_rate": 3.746020886059553e-05, "loss": 0.5506, "num_tokens": 2047975903.0, "step": 2677 }, { "epoch": 0.9812668895708332, "grad_norm": 0.16865519477070792, "learning_rate": 3.7458021040478326e-05, "loss": 0.5672, "num_tokens": 2048772197.0, "step": 2678 }, { "epoch": 0.9816333073787387, "grad_norm": 0.17135262848325905, "learning_rate": 3.745583235004373e-05, "loss": 0.54, "num_tokens": 2049560975.0, "step": 2679 }, { "epoch": 0.981999725186644, "grad_norm": 0.15783588282190794, "learning_rate": 3.7453642789414945e-05, "loss": 0.5229, "num_tokens": 2050254116.0, "step": 2680 }, { "epoch": 0.9823661429945495, "grad_norm": 0.24596919728294528, "learning_rate": 3.7451452358715276e-05, "loss": 0.5461, "num_tokens": 2051062620.0, "step": 2681 }, { "epoch": 0.982732560802455, "grad_norm": 0.6498474708465954, "learning_rate": 3.7449261058068026e-05, "loss": 0.5392, "num_tokens": 2051784468.0, "step": 2682 }, { "epoch": 0.9830989786103604, "grad_norm": 0.3113116448605952, "learning_rate": 3.744706888759659e-05, "loss": 0.547, "num_tokens": 2052581189.0, "step": 2683 }, { "epoch": 0.9834653964182659, "grad_norm": 0.33194420734539576, "learning_rate": 3.744487584742437e-05, "loss": 0.5303, "num_tokens": 2053265706.0, "step": 2684 }, { "epoch": 0.9838318142261714, "grad_norm": 0.2956933005304857, "learning_rate": 3.744268193767485e-05, "loss": 0.5409, "num_tokens": 2054099964.0, "step": 2685 }, { "epoch": 0.9841982320340769, "grad_norm": 0.3761395728666429, "learning_rate": 3.7440487158471556e-05, "loss": 0.5587, "num_tokens": 2054900382.0, "step": 2686 }, { "epoch": 0.9845646498419823, "grad_norm": 0.17120392101285634, "learning_rate": 3.7438291509938045e-05, "loss": 0.5495, "num_tokens": 2055657632.0, "step": 2687 }, { "epoch": 0.9849310676498878, "grad_norm": 0.20987257842681054, "learning_rate": 3.743609499219795e-05, "loss": 0.556, "num_tokens": 2056454432.0, "step": 2688 }, { "epoch": 0.9852974854577933, "grad_norm": 0.17228624222570843, "learning_rate": 3.7433897605374935e-05, "loss": 0.551, "num_tokens": 2057259796.0, "step": 2689 }, { "epoch": 0.9856639032656987, "grad_norm": 0.17075984124989538, "learning_rate": 3.743169934959271e-05, "loss": 0.5668, "num_tokens": 2057978215.0, "step": 2690 }, { "epoch": 0.9860303210736042, "grad_norm": 0.16085318755249817, "learning_rate": 3.7429500224975046e-05, "loss": 0.56, "num_tokens": 2058948466.0, "step": 2691 }, { "epoch": 0.9863967388815097, "grad_norm": 0.19104434059144473, "learning_rate": 3.7427300231645764e-05, "loss": 0.5301, "num_tokens": 2059684918.0, "step": 2692 }, { "epoch": 0.986763156689415, "grad_norm": 0.17566422738460796, "learning_rate": 3.742509936972872e-05, "loss": 0.5665, "num_tokens": 2060477309.0, "step": 2693 }, { "epoch": 0.9871295744973205, "grad_norm": 0.18690768495055066, "learning_rate": 3.742289763934783e-05, "loss": 0.5675, "num_tokens": 2061232859.0, "step": 2694 }, { "epoch": 0.987495992305226, "grad_norm": 0.16947122281290652, "learning_rate": 3.742069504062706e-05, "loss": 0.5339, "num_tokens": 2062064000.0, "step": 2695 }, { "epoch": 0.9878624101131315, "grad_norm": 0.1847830051328475, "learning_rate": 3.741849157369041e-05, "loss": 0.5657, "num_tokens": 2062853110.0, "step": 2696 }, { "epoch": 0.988228827921037, "grad_norm": 0.2099058027445005, "learning_rate": 3.741628723866195e-05, "loss": 0.5678, "num_tokens": 2063505155.0, "step": 2697 }, { "epoch": 0.9885952457289424, "grad_norm": 0.19998966733846144, "learning_rate": 3.741408203566578e-05, "loss": 0.5773, "num_tokens": 2064258835.0, "step": 2698 }, { "epoch": 0.9889616635368479, "grad_norm": 0.19083886369628372, "learning_rate": 3.741187596482607e-05, "loss": 0.5282, "num_tokens": 2065046627.0, "step": 2699 }, { "epoch": 0.9893280813447534, "grad_norm": 0.1801105031472215, "learning_rate": 3.740966902626701e-05, "loss": 0.5622, "num_tokens": 2065661212.0, "step": 2700 }, { "epoch": 0.9896944991526588, "grad_norm": 0.18847040892362868, "learning_rate": 3.7407461220112866e-05, "loss": 0.5391, "num_tokens": 2066359237.0, "step": 2701 }, { "epoch": 0.9900609169605643, "grad_norm": 0.21960823588421674, "learning_rate": 3.7405252546487946e-05, "loss": 0.5759, "num_tokens": 2067123017.0, "step": 2702 }, { "epoch": 0.9904273347684698, "grad_norm": 0.17561011060624132, "learning_rate": 3.740304300551658e-05, "loss": 0.5476, "num_tokens": 2067929777.0, "step": 2703 }, { "epoch": 0.9907937525763753, "grad_norm": 0.21044792245422014, "learning_rate": 3.740083259732319e-05, "loss": 0.5487, "num_tokens": 2068740965.0, "step": 2704 }, { "epoch": 0.9911601703842807, "grad_norm": 0.21989648804350762, "learning_rate": 3.739862132203223e-05, "loss": 0.547, "num_tokens": 2069479997.0, "step": 2705 }, { "epoch": 0.9915265881921861, "grad_norm": 0.17029464874791805, "learning_rate": 3.7396409179768186e-05, "loss": 0.5589, "num_tokens": 2070153044.0, "step": 2706 }, { "epoch": 0.9918930060000916, "grad_norm": 0.18749075507503563, "learning_rate": 3.7394196170655615e-05, "loss": 0.5537, "num_tokens": 2070986809.0, "step": 2707 }, { "epoch": 0.992259423807997, "grad_norm": 0.17715917759675817, "learning_rate": 3.73919822948191e-05, "loss": 0.5669, "num_tokens": 2071722958.0, "step": 2708 }, { "epoch": 0.9926258416159025, "grad_norm": 0.19593374448484768, "learning_rate": 3.73897675523833e-05, "loss": 0.5941, "num_tokens": 2072426043.0, "step": 2709 }, { "epoch": 0.992992259423808, "grad_norm": 0.17190442654749544, "learning_rate": 3.738755194347291e-05, "loss": 0.5376, "num_tokens": 2073193753.0, "step": 2710 }, { "epoch": 0.9933586772317134, "grad_norm": 0.1860211784031511, "learning_rate": 3.7385335468212666e-05, "loss": 0.5608, "num_tokens": 2074014026.0, "step": 2711 }, { "epoch": 0.9937250950396189, "grad_norm": 0.2040212735684436, "learning_rate": 3.7383118126727364e-05, "loss": 0.5861, "num_tokens": 2074710613.0, "step": 2712 }, { "epoch": 0.9940915128475244, "grad_norm": 0.17821990347063704, "learning_rate": 3.738089991914184e-05, "loss": 0.5338, "num_tokens": 2075439002.0, "step": 2713 }, { "epoch": 0.9944579306554299, "grad_norm": 0.193794635573459, "learning_rate": 3.737868084558099e-05, "loss": 0.5441, "num_tokens": 2076243495.0, "step": 2714 }, { "epoch": 0.9948243484633353, "grad_norm": 0.17993378622540274, "learning_rate": 3.7376460906169746e-05, "loss": 0.5443, "num_tokens": 2077055829.0, "step": 2715 }, { "epoch": 0.9951907662712408, "grad_norm": 0.1751461128980927, "learning_rate": 3.73742401010331e-05, "loss": 0.5499, "num_tokens": 2077771449.0, "step": 2716 }, { "epoch": 0.9955571840791463, "grad_norm": 0.2046970436067949, "learning_rate": 3.7372018430296095e-05, "loss": 0.5375, "num_tokens": 2078538408.0, "step": 2717 }, { "epoch": 0.9959236018870518, "grad_norm": 0.1579389121172516, "learning_rate": 3.736979589408379e-05, "loss": 0.5591, "num_tokens": 2079404674.0, "step": 2718 }, { "epoch": 0.9962900196949572, "grad_norm": 0.1643614410348871, "learning_rate": 3.736757249252135e-05, "loss": 0.5628, "num_tokens": 2080191076.0, "step": 2719 }, { "epoch": 0.9966564375028626, "grad_norm": 0.1701540438737136, "learning_rate": 3.736534822573393e-05, "loss": 0.5473, "num_tokens": 2080909897.0, "step": 2720 }, { "epoch": 0.9970228553107681, "grad_norm": 0.16581346639264996, "learning_rate": 3.736312309384678e-05, "loss": 0.5572, "num_tokens": 2081707998.0, "step": 2721 }, { "epoch": 0.9973892731186735, "grad_norm": 0.16652249086612347, "learning_rate": 3.7360897096985166e-05, "loss": 0.5371, "num_tokens": 2082398615.0, "step": 2722 }, { "epoch": 0.997755690926579, "grad_norm": 0.1595565618065521, "learning_rate": 3.735867023527443e-05, "loss": 0.5319, "num_tokens": 2083172463.0, "step": 2723 }, { "epoch": 0.9981221087344845, "grad_norm": 0.14968165939430358, "learning_rate": 3.7356442508839934e-05, "loss": 0.5422, "num_tokens": 2083951263.0, "step": 2724 }, { "epoch": 0.99848852654239, "grad_norm": 0.15298437693826758, "learning_rate": 3.7354213917807115e-05, "loss": 0.5316, "num_tokens": 2084806590.0, "step": 2725 }, { "epoch": 0.9988549443502954, "grad_norm": 0.16395587741201412, "learning_rate": 3.7351984462301435e-05, "loss": 0.5775, "num_tokens": 2085681376.0, "step": 2726 }, { "epoch": 0.9992213621582009, "grad_norm": 0.16620248865025644, "learning_rate": 3.734975414244843e-05, "loss": 0.5719, "num_tokens": 2086353893.0, "step": 2727 }, { "epoch": 0.9995877799661064, "grad_norm": 0.17526465364229432, "learning_rate": 3.734752295837366e-05, "loss": 0.5347, "num_tokens": 2087106713.0, "step": 2728 }, { "epoch": 0.9999541977740118, "grad_norm": 0.17807274685680563, "learning_rate": 3.7345290910202756e-05, "loss": 0.5465, "num_tokens": 2087766118.0, "step": 2729 }, { "epoch": 1.0, "grad_norm": 0.17807274685680563, "learning_rate": 3.734305799806138e-05, "loss": 0.6561, "num_tokens": 2087833456.0, "step": 2730 }, { "epoch": 1.0003664178079055, "grad_norm": 0.5211851296811167, "learning_rate": 3.734082422207525e-05, "loss": 0.5184, "num_tokens": 2088717753.0, "step": 2731 }, { "epoch": 1.000732835615811, "grad_norm": 0.2266731764316758, "learning_rate": 3.7338589582370136e-05, "loss": 0.5304, "num_tokens": 2089570767.0, "step": 2732 }, { "epoch": 1.0010992534237164, "grad_norm": 0.17659873206168536, "learning_rate": 3.733635407907184e-05, "loss": 0.49, "num_tokens": 2090258013.0, "step": 2733 }, { "epoch": 1.001465671231622, "grad_norm": 0.19873600849310533, "learning_rate": 3.7334117712306244e-05, "loss": 0.4807, "num_tokens": 2090904671.0, "step": 2734 }, { "epoch": 1.0018320890395274, "grad_norm": 0.18969330551535896, "learning_rate": 3.733188048219925e-05, "loss": 0.4919, "num_tokens": 2091740441.0, "step": 2735 }, { "epoch": 1.0021985068474328, "grad_norm": 0.18828853571247497, "learning_rate": 3.732964238887681e-05, "loss": 0.4861, "num_tokens": 2092450252.0, "step": 2736 }, { "epoch": 1.0025649246553383, "grad_norm": 0.17193779944054863, "learning_rate": 3.732740343246495e-05, "loss": 0.5442, "num_tokens": 2093136089.0, "step": 2737 }, { "epoch": 1.0029313424632438, "grad_norm": 0.1722242408691039, "learning_rate": 3.7325163613089717e-05, "loss": 0.4961, "num_tokens": 2093986687.0, "step": 2738 }, { "epoch": 1.0032977602711493, "grad_norm": 0.15387359210654686, "learning_rate": 3.7322922930877223e-05, "loss": 0.5387, "num_tokens": 2094823208.0, "step": 2739 }, { "epoch": 1.0036641780790547, "grad_norm": 0.15006565158671023, "learning_rate": 3.7320681385953616e-05, "loss": 0.484, "num_tokens": 2095637287.0, "step": 2740 }, { "epoch": 1.0040305958869602, "grad_norm": 0.17211348755170236, "learning_rate": 3.731843897844511e-05, "loss": 0.5058, "num_tokens": 2096500607.0, "step": 2741 }, { "epoch": 1.0043970136948657, "grad_norm": 0.17258324596172622, "learning_rate": 3.731619570847794e-05, "loss": 0.5304, "num_tokens": 2097180079.0, "step": 2742 }, { "epoch": 1.0047634315027711, "grad_norm": 0.15841794811796037, "learning_rate": 3.731395157617843e-05, "loss": 0.5124, "num_tokens": 2097955764.0, "step": 2743 }, { "epoch": 1.0051298493106764, "grad_norm": 0.1724790286575845, "learning_rate": 3.731170658167291e-05, "loss": 0.512, "num_tokens": 2098808267.0, "step": 2744 }, { "epoch": 1.0054962671185819, "grad_norm": 0.18244529939590867, "learning_rate": 3.730946072508778e-05, "loss": 0.4991, "num_tokens": 2099589771.0, "step": 2745 }, { "epoch": 1.0058626849264873, "grad_norm": 0.17457250774224997, "learning_rate": 3.730721400654949e-05, "loss": 0.5518, "num_tokens": 2100304728.0, "step": 2746 }, { "epoch": 1.0062291027343928, "grad_norm": 0.15596229769559403, "learning_rate": 3.730496642618454e-05, "loss": 0.4806, "num_tokens": 2101080605.0, "step": 2747 }, { "epoch": 1.0065955205422983, "grad_norm": 0.18362621408804014, "learning_rate": 3.7302717984119474e-05, "loss": 0.4987, "num_tokens": 2101917411.0, "step": 2748 }, { "epoch": 1.0069619383502038, "grad_norm": 0.15076792070947934, "learning_rate": 3.7300468680480864e-05, "loss": 0.4993, "num_tokens": 2102808058.0, "step": 2749 }, { "epoch": 1.0073283561581092, "grad_norm": 0.14923281788647236, "learning_rate": 3.7298218515395375e-05, "loss": 0.4852, "num_tokens": 2103570282.0, "step": 2750 }, { "epoch": 1.0076947739660147, "grad_norm": 0.1575226263368277, "learning_rate": 3.729596748898968e-05, "loss": 0.4868, "num_tokens": 2104247711.0, "step": 2751 }, { "epoch": 1.0080611917739202, "grad_norm": 0.1685784791967435, "learning_rate": 3.729371560139052e-05, "loss": 0.511, "num_tokens": 2105139073.0, "step": 2752 }, { "epoch": 1.0084276095818256, "grad_norm": 0.15642582447685124, "learning_rate": 3.7291462852724686e-05, "loss": 0.4897, "num_tokens": 2105830607.0, "step": 2753 }, { "epoch": 1.0087940273897311, "grad_norm": 0.16890375349607528, "learning_rate": 3.7289209243119e-05, "loss": 0.5107, "num_tokens": 2106684665.0, "step": 2754 }, { "epoch": 1.0091604451976366, "grad_norm": 0.15610393581683682, "learning_rate": 3.728695477270037e-05, "loss": 0.5211, "num_tokens": 2107424126.0, "step": 2755 }, { "epoch": 1.009526863005542, "grad_norm": 0.17397501609925142, "learning_rate": 3.72846994415957e-05, "loss": 0.5534, "num_tokens": 2108132573.0, "step": 2756 }, { "epoch": 1.0098932808134475, "grad_norm": 0.14625513218530725, "learning_rate": 3.728244324993197e-05, "loss": 0.4686, "num_tokens": 2108908936.0, "step": 2757 }, { "epoch": 1.010259698621353, "grad_norm": 0.16824218918675227, "learning_rate": 3.728018619783623e-05, "loss": 0.5042, "num_tokens": 2109588071.0, "step": 2758 }, { "epoch": 1.0106261164292585, "grad_norm": 0.14094846068073333, "learning_rate": 3.727792828543554e-05, "loss": 0.5298, "num_tokens": 2110438972.0, "step": 2759 }, { "epoch": 1.010992534237164, "grad_norm": 0.17205480626344719, "learning_rate": 3.727566951285703e-05, "loss": 0.5055, "num_tokens": 2111304034.0, "step": 2760 }, { "epoch": 1.0113589520450694, "grad_norm": 0.1582939595911215, "learning_rate": 3.7273409880227876e-05, "loss": 0.515, "num_tokens": 2112100329.0, "step": 2761 }, { "epoch": 1.011725369852975, "grad_norm": 0.15687084147184055, "learning_rate": 3.7271149387675295e-05, "loss": 0.498, "num_tokens": 2112895191.0, "step": 2762 }, { "epoch": 1.0120917876608804, "grad_norm": 0.166587459273804, "learning_rate": 3.726888803532655e-05, "loss": 0.5316, "num_tokens": 2113671229.0, "step": 2763 }, { "epoch": 1.0124582054687858, "grad_norm": 0.1608406836251887, "learning_rate": 3.726662582330898e-05, "loss": 0.5179, "num_tokens": 2114370653.0, "step": 2764 }, { "epoch": 1.0128246232766913, "grad_norm": 0.1826842890594443, "learning_rate": 3.726436275174993e-05, "loss": 0.4802, "num_tokens": 2115127293.0, "step": 2765 }, { "epoch": 1.0131910410845968, "grad_norm": 0.1761439572289294, "learning_rate": 3.7262098820776835e-05, "loss": 0.499, "num_tokens": 2115933388.0, "step": 2766 }, { "epoch": 1.0135574588925023, "grad_norm": 0.14483426501113564, "learning_rate": 3.7259834030517145e-05, "loss": 0.4946, "num_tokens": 2116799820.0, "step": 2767 }, { "epoch": 1.0139238767004077, "grad_norm": 0.20304419546064928, "learning_rate": 3.7257568381098385e-05, "loss": 0.5126, "num_tokens": 2117689792.0, "step": 2768 }, { "epoch": 1.0142902945083132, "grad_norm": 0.196901842003913, "learning_rate": 3.72553018726481e-05, "loss": 0.4985, "num_tokens": 2118373801.0, "step": 2769 }, { "epoch": 1.0146567123162185, "grad_norm": 0.16469518649714182, "learning_rate": 3.725303450529391e-05, "loss": 0.5329, "num_tokens": 2119177984.0, "step": 2770 }, { "epoch": 1.015023130124124, "grad_norm": 0.18355837602106917, "learning_rate": 3.7250766279163465e-05, "loss": 0.5048, "num_tokens": 2119881939.0, "step": 2771 }, { "epoch": 1.0153895479320294, "grad_norm": 0.1470428229806655, "learning_rate": 3.724849719438448e-05, "loss": 0.4933, "num_tokens": 2120769632.0, "step": 2772 }, { "epoch": 1.0157559657399349, "grad_norm": 0.1583727834793356, "learning_rate": 3.72462272510847e-05, "loss": 0.5169, "num_tokens": 2121422422.0, "step": 2773 }, { "epoch": 1.0161223835478403, "grad_norm": 0.1646830408289508, "learning_rate": 3.7243956449391925e-05, "loss": 0.5264, "num_tokens": 2122218757.0, "step": 2774 }, { "epoch": 1.0164888013557458, "grad_norm": 0.1412939348914618, "learning_rate": 3.724168478943401e-05, "loss": 0.505, "num_tokens": 2122997475.0, "step": 2775 }, { "epoch": 1.0168552191636513, "grad_norm": 0.1597900797840105, "learning_rate": 3.723941227133887e-05, "loss": 0.4668, "num_tokens": 2123668503.0, "step": 2776 }, { "epoch": 1.0172216369715568, "grad_norm": 0.14655379665063942, "learning_rate": 3.723713889523442e-05, "loss": 0.4982, "num_tokens": 2124445896.0, "step": 2777 }, { "epoch": 1.0175880547794622, "grad_norm": 0.13870175050539527, "learning_rate": 3.723486466124869e-05, "loss": 0.4715, "num_tokens": 2125171207.0, "step": 2778 }, { "epoch": 1.0179544725873677, "grad_norm": 0.1744405805464192, "learning_rate": 3.72325895695097e-05, "loss": 0.541, "num_tokens": 2125838133.0, "step": 2779 }, { "epoch": 1.0183208903952732, "grad_norm": 0.16885193143690214, "learning_rate": 3.7230313620145554e-05, "loss": 0.5366, "num_tokens": 2126526561.0, "step": 2780 }, { "epoch": 1.0186873082031787, "grad_norm": 0.1644640684485996, "learning_rate": 3.7228036813284385e-05, "loss": 0.5263, "num_tokens": 2127265336.0, "step": 2781 }, { "epoch": 1.0190537260110841, "grad_norm": 0.15007309916697695, "learning_rate": 3.722575914905438e-05, "loss": 0.5061, "num_tokens": 2128045354.0, "step": 2782 }, { "epoch": 1.0194201438189896, "grad_norm": 0.14872371835309442, "learning_rate": 3.7223480627583786e-05, "loss": 0.4717, "num_tokens": 2128756483.0, "step": 2783 }, { "epoch": 1.019786561626895, "grad_norm": 0.16287480719729358, "learning_rate": 3.722120124900088e-05, "loss": 0.5185, "num_tokens": 2129514109.0, "step": 2784 }, { "epoch": 1.0201529794348005, "grad_norm": 0.14348599058282266, "learning_rate": 3.7218921013434e-05, "loss": 0.4633, "num_tokens": 2130281024.0, "step": 2785 }, { "epoch": 1.020519397242706, "grad_norm": 0.16328080508697926, "learning_rate": 3.721663992101152e-05, "loss": 0.5245, "num_tokens": 2130983554.0, "step": 2786 }, { "epoch": 1.0208858150506115, "grad_norm": 0.15391951711617752, "learning_rate": 3.721435797186188e-05, "loss": 0.4733, "num_tokens": 2131663172.0, "step": 2787 }, { "epoch": 1.021252232858517, "grad_norm": 0.1719356785516987, "learning_rate": 3.7212075166113556e-05, "loss": 0.4955, "num_tokens": 2132433126.0, "step": 2788 }, { "epoch": 1.0216186506664224, "grad_norm": 0.14732008952986814, "learning_rate": 3.720979150389508e-05, "loss": 0.497, "num_tokens": 2133119631.0, "step": 2789 }, { "epoch": 1.021985068474328, "grad_norm": 0.15471441576385514, "learning_rate": 3.720750698533501e-05, "loss": 0.4865, "num_tokens": 2133900174.0, "step": 2790 }, { "epoch": 1.0223514862822334, "grad_norm": 0.16269381091589663, "learning_rate": 3.720522161056199e-05, "loss": 0.4741, "num_tokens": 2134616780.0, "step": 2791 }, { "epoch": 1.0227179040901389, "grad_norm": 0.16873765190164328, "learning_rate": 3.720293537970467e-05, "loss": 0.5272, "num_tokens": 2135334971.0, "step": 2792 }, { "epoch": 1.0230843218980443, "grad_norm": 0.15850074236017336, "learning_rate": 3.7200648292891785e-05, "loss": 0.5071, "num_tokens": 2136127808.0, "step": 2793 }, { "epoch": 1.0234507397059498, "grad_norm": 0.1459759734022456, "learning_rate": 3.719836035025209e-05, "loss": 0.4839, "num_tokens": 2136870831.0, "step": 2794 }, { "epoch": 1.0238171575138553, "grad_norm": 0.19957053931179763, "learning_rate": 3.719607155191442e-05, "loss": 0.4985, "num_tokens": 2137700087.0, "step": 2795 }, { "epoch": 1.0241835753217607, "grad_norm": 0.1686483275732822, "learning_rate": 3.719378189800762e-05, "loss": 0.5187, "num_tokens": 2138485696.0, "step": 2796 }, { "epoch": 1.024549993129666, "grad_norm": 0.16800282349127696, "learning_rate": 3.719149138866061e-05, "loss": 0.499, "num_tokens": 2139259323.0, "step": 2797 }, { "epoch": 1.0249164109375715, "grad_norm": 0.17633799039545794, "learning_rate": 3.7189200024002345e-05, "loss": 0.4914, "num_tokens": 2140014554.0, "step": 2798 }, { "epoch": 1.025282828745477, "grad_norm": 0.15451126348805513, "learning_rate": 3.718690780416184e-05, "loss": 0.4849, "num_tokens": 2140784132.0, "step": 2799 }, { "epoch": 1.0256492465533824, "grad_norm": 0.1934329445718545, "learning_rate": 3.718461472926814e-05, "loss": 0.5031, "num_tokens": 2141483374.0, "step": 2800 }, { "epoch": 1.0260156643612879, "grad_norm": 0.1776411049878431, "learning_rate": 3.718232079945037e-05, "loss": 0.5199, "num_tokens": 2142179937.0, "step": 2801 }, { "epoch": 1.0263820821691934, "grad_norm": 0.1587147357001108, "learning_rate": 3.7180026014837663e-05, "loss": 0.5303, "num_tokens": 2142889628.0, "step": 2802 }, { "epoch": 1.0267484999770988, "grad_norm": 0.18853409751561587, "learning_rate": 3.7177730375559234e-05, "loss": 0.4974, "num_tokens": 2143635709.0, "step": 2803 }, { "epoch": 1.0271149177850043, "grad_norm": 0.18046509789276743, "learning_rate": 3.7175433881744324e-05, "loss": 0.5169, "num_tokens": 2144326964.0, "step": 2804 }, { "epoch": 1.0274813355929098, "grad_norm": 0.1551019718140442, "learning_rate": 3.717313653352224e-05, "loss": 0.4961, "num_tokens": 2145066833.0, "step": 2805 }, { "epoch": 1.0278477534008152, "grad_norm": 0.17837679517883323, "learning_rate": 3.717083833102231e-05, "loss": 0.5069, "num_tokens": 2145803085.0, "step": 2806 }, { "epoch": 1.0282141712087207, "grad_norm": 0.14711744909449606, "learning_rate": 3.716853927437394e-05, "loss": 0.4833, "num_tokens": 2146581308.0, "step": 2807 }, { "epoch": 1.0285805890166262, "grad_norm": 0.15432986179154695, "learning_rate": 3.716623936370657e-05, "loss": 0.4981, "num_tokens": 2147478341.0, "step": 2808 }, { "epoch": 1.0289470068245317, "grad_norm": 0.15633002938633472, "learning_rate": 3.7163938599149686e-05, "loss": 0.5311, "num_tokens": 2148320753.0, "step": 2809 }, { "epoch": 1.0293134246324371, "grad_norm": 0.1557355534780668, "learning_rate": 3.716163698083283e-05, "loss": 0.4832, "num_tokens": 2149137629.0, "step": 2810 }, { "epoch": 1.0296798424403426, "grad_norm": 0.16741636467052468, "learning_rate": 3.715933450888559e-05, "loss": 0.5065, "num_tokens": 2149979446.0, "step": 2811 }, { "epoch": 1.030046260248248, "grad_norm": 0.17449944863526115, "learning_rate": 3.715703118343759e-05, "loss": 0.5368, "num_tokens": 2150704986.0, "step": 2812 }, { "epoch": 1.0304126780561536, "grad_norm": 0.16079760053935285, "learning_rate": 3.715472700461852e-05, "loss": 0.5134, "num_tokens": 2151416876.0, "step": 2813 }, { "epoch": 1.030779095864059, "grad_norm": 0.14975859104888864, "learning_rate": 3.7152421972558105e-05, "loss": 0.5353, "num_tokens": 2152232781.0, "step": 2814 }, { "epoch": 1.0311455136719645, "grad_norm": 0.18447549736362442, "learning_rate": 3.7150116087386125e-05, "loss": 0.5505, "num_tokens": 2153051744.0, "step": 2815 }, { "epoch": 1.03151193147987, "grad_norm": 0.17849291076935464, "learning_rate": 3.7147809349232404e-05, "loss": 0.5666, "num_tokens": 2153642692.0, "step": 2816 }, { "epoch": 1.0318783492877754, "grad_norm": 0.18143262141701277, "learning_rate": 3.714550175822682e-05, "loss": 0.5276, "num_tokens": 2154396134.0, "step": 2817 }, { "epoch": 1.032244767095681, "grad_norm": 0.15883781027496796, "learning_rate": 3.7143193314499296e-05, "loss": 0.4584, "num_tokens": 2155218391.0, "step": 2818 }, { "epoch": 1.0326111849035864, "grad_norm": 0.1605939688292572, "learning_rate": 3.714088401817979e-05, "loss": 0.4755, "num_tokens": 2155973712.0, "step": 2819 }, { "epoch": 1.0329776027114919, "grad_norm": 0.15466090185702044, "learning_rate": 3.713857386939834e-05, "loss": 0.5022, "num_tokens": 2156829369.0, "step": 2820 }, { "epoch": 1.0333440205193973, "grad_norm": 0.153255742710628, "learning_rate": 3.713626286828499e-05, "loss": 0.5139, "num_tokens": 2157820652.0, "step": 2821 }, { "epoch": 1.0337104383273028, "grad_norm": 0.17049714209809172, "learning_rate": 3.713395101496988e-05, "loss": 0.5056, "num_tokens": 2158531209.0, "step": 2822 }, { "epoch": 1.0340768561352083, "grad_norm": 0.16527405327709432, "learning_rate": 3.713163830958314e-05, "loss": 0.4871, "num_tokens": 2159208904.0, "step": 2823 }, { "epoch": 1.0344432739431135, "grad_norm": 0.16792618035943765, "learning_rate": 3.7129324752255005e-05, "loss": 0.5052, "num_tokens": 2160079927.0, "step": 2824 }, { "epoch": 1.034809691751019, "grad_norm": 0.1906914843843095, "learning_rate": 3.712701034311572e-05, "loss": 0.5052, "num_tokens": 2160797927.0, "step": 2825 }, { "epoch": 1.0351761095589245, "grad_norm": 0.15878472639834473, "learning_rate": 3.7124695082295594e-05, "loss": 0.4786, "num_tokens": 2161567363.0, "step": 2826 }, { "epoch": 1.03554252736683, "grad_norm": 0.1759713718144911, "learning_rate": 3.7122378969924996e-05, "loss": 0.5152, "num_tokens": 2162419837.0, "step": 2827 }, { "epoch": 1.0359089451747354, "grad_norm": 0.19290441176220063, "learning_rate": 3.71200620061343e-05, "loss": 0.5304, "num_tokens": 2163090264.0, "step": 2828 }, { "epoch": 1.036275362982641, "grad_norm": 0.14776024154150338, "learning_rate": 3.711774419105397e-05, "loss": 0.5113, "num_tokens": 2163839461.0, "step": 2829 }, { "epoch": 1.0366417807905464, "grad_norm": 0.19046909873303652, "learning_rate": 3.711542552481451e-05, "loss": 0.5064, "num_tokens": 2164659110.0, "step": 2830 }, { "epoch": 1.0370081985984518, "grad_norm": 0.16710969135292275, "learning_rate": 3.711310600754645e-05, "loss": 0.5231, "num_tokens": 2165391973.0, "step": 2831 }, { "epoch": 1.0373746164063573, "grad_norm": 0.15883710557942704, "learning_rate": 3.7110785639380404e-05, "loss": 0.4832, "num_tokens": 2166053777.0, "step": 2832 }, { "epoch": 1.0377410342142628, "grad_norm": 0.19264413962322388, "learning_rate": 3.7108464420446996e-05, "loss": 0.4889, "num_tokens": 2166837270.0, "step": 2833 }, { "epoch": 1.0381074520221683, "grad_norm": 0.1776177413388952, "learning_rate": 3.7106142350876915e-05, "loss": 0.5247, "num_tokens": 2167569572.0, "step": 2834 }, { "epoch": 1.0384738698300737, "grad_norm": 0.14573385018291174, "learning_rate": 3.710381943080091e-05, "loss": 0.489, "num_tokens": 2168344732.0, "step": 2835 }, { "epoch": 1.0388402876379792, "grad_norm": 0.19845549616845445, "learning_rate": 3.710149566034976e-05, "loss": 0.5482, "num_tokens": 2169091302.0, "step": 2836 }, { "epoch": 1.0392067054458847, "grad_norm": 0.1600610881456943, "learning_rate": 3.7099171039654294e-05, "loss": 0.5438, "num_tokens": 2169764128.0, "step": 2837 }, { "epoch": 1.0395731232537901, "grad_norm": 0.15246795127069235, "learning_rate": 3.70968455688454e-05, "loss": 0.5728, "num_tokens": 2170562724.0, "step": 2838 }, { "epoch": 1.0399395410616956, "grad_norm": 0.19207547786896206, "learning_rate": 3.7094519248054e-05, "loss": 0.541, "num_tokens": 2171219443.0, "step": 2839 }, { "epoch": 1.040305958869601, "grad_norm": 0.17718309025919826, "learning_rate": 3.709219207741108e-05, "loss": 0.5047, "num_tokens": 2171932169.0, "step": 2840 }, { "epoch": 1.0406723766775066, "grad_norm": 0.1474146769195095, "learning_rate": 3.708986405704765e-05, "loss": 0.4992, "num_tokens": 2172598418.0, "step": 2841 }, { "epoch": 1.041038794485412, "grad_norm": 0.1827879862280798, "learning_rate": 3.70875351870948e-05, "loss": 0.4911, "num_tokens": 2173406160.0, "step": 2842 }, { "epoch": 1.0414052122933175, "grad_norm": 0.16958575378026206, "learning_rate": 3.7085205467683633e-05, "loss": 0.5108, "num_tokens": 2174100032.0, "step": 2843 }, { "epoch": 1.041771630101223, "grad_norm": 0.16152642546874293, "learning_rate": 3.708287489894533e-05, "loss": 0.5017, "num_tokens": 2174931649.0, "step": 2844 }, { "epoch": 1.0421380479091285, "grad_norm": 0.16161420363691076, "learning_rate": 3.7080543481011094e-05, "loss": 0.5285, "num_tokens": 2175718277.0, "step": 2845 }, { "epoch": 1.042504465717034, "grad_norm": 0.16865433100658045, "learning_rate": 3.7078211214012195e-05, "loss": 0.5308, "num_tokens": 2176562016.0, "step": 2846 }, { "epoch": 1.0428708835249394, "grad_norm": 0.1597444446319541, "learning_rate": 3.707587809807995e-05, "loss": 0.5125, "num_tokens": 2177374047.0, "step": 2847 }, { "epoch": 1.0432373013328449, "grad_norm": 0.15426820486466883, "learning_rate": 3.707354413334571e-05, "loss": 0.5119, "num_tokens": 2178177423.0, "step": 2848 }, { "epoch": 1.0436037191407503, "grad_norm": 0.14784937771314532, "learning_rate": 3.707120931994089e-05, "loss": 0.4978, "num_tokens": 2179057718.0, "step": 2849 }, { "epoch": 1.0439701369486558, "grad_norm": 0.18687884934319743, "learning_rate": 3.706887365799694e-05, "loss": 0.512, "num_tokens": 2179820656.0, "step": 2850 }, { "epoch": 1.044336554756561, "grad_norm": 0.1500236078588642, "learning_rate": 3.706653714764535e-05, "loss": 0.5026, "num_tokens": 2180612051.0, "step": 2851 }, { "epoch": 1.0447029725644665, "grad_norm": 0.18268986911493498, "learning_rate": 3.706419978901769e-05, "loss": 0.55, "num_tokens": 2181308984.0, "step": 2852 }, { "epoch": 1.045069390372372, "grad_norm": 0.1569666231586783, "learning_rate": 3.706186158224555e-05, "loss": 0.5253, "num_tokens": 2182082698.0, "step": 2853 }, { "epoch": 1.0454358081802775, "grad_norm": 0.19487449804872076, "learning_rate": 3.705952252746056e-05, "loss": 0.5069, "num_tokens": 2182808881.0, "step": 2854 }, { "epoch": 1.045802225988183, "grad_norm": 0.17038294586588063, "learning_rate": 3.705718262479445e-05, "loss": 0.5249, "num_tokens": 2183531793.0, "step": 2855 }, { "epoch": 1.0461686437960884, "grad_norm": 0.16222018849608388, "learning_rate": 3.705484187437893e-05, "loss": 0.4896, "num_tokens": 2184258282.0, "step": 2856 }, { "epoch": 1.046535061603994, "grad_norm": 0.1585235369917694, "learning_rate": 3.7052500276345805e-05, "loss": 0.4999, "num_tokens": 2185064063.0, "step": 2857 }, { "epoch": 1.0469014794118994, "grad_norm": 0.18144971198171825, "learning_rate": 3.70501578308269e-05, "loss": 0.4875, "num_tokens": 2185732030.0, "step": 2858 }, { "epoch": 1.0472678972198048, "grad_norm": 0.1778840975673306, "learning_rate": 3.704781453795411e-05, "loss": 0.5176, "num_tokens": 2186493907.0, "step": 2859 }, { "epoch": 1.0476343150277103, "grad_norm": 0.170337404396032, "learning_rate": 3.704547039785936e-05, "loss": 0.5186, "num_tokens": 2187455545.0, "step": 2860 }, { "epoch": 1.0480007328356158, "grad_norm": 0.16678271884267207, "learning_rate": 3.704312541067463e-05, "loss": 0.5325, "num_tokens": 2188227680.0, "step": 2861 }, { "epoch": 1.0483671506435213, "grad_norm": 0.17667415584969032, "learning_rate": 3.704077957653194e-05, "loss": 0.4888, "num_tokens": 2188976729.0, "step": 2862 }, { "epoch": 1.0487335684514267, "grad_norm": 0.1553843097042789, "learning_rate": 3.703843289556339e-05, "loss": 0.5174, "num_tokens": 2189661634.0, "step": 2863 }, { "epoch": 1.0490999862593322, "grad_norm": 0.1853454307830892, "learning_rate": 3.703608536790108e-05, "loss": 0.5163, "num_tokens": 2190392870.0, "step": 2864 }, { "epoch": 1.0494664040672377, "grad_norm": 0.21194425195802774, "learning_rate": 3.703373699367719e-05, "loss": 0.4887, "num_tokens": 2191015410.0, "step": 2865 }, { "epoch": 1.0498328218751432, "grad_norm": 0.17872442938260696, "learning_rate": 3.7031387773023935e-05, "loss": 0.5289, "num_tokens": 2191676353.0, "step": 2866 }, { "epoch": 1.0501992396830486, "grad_norm": 0.1607807159753383, "learning_rate": 3.7029037706073575e-05, "loss": 0.4789, "num_tokens": 2192349844.0, "step": 2867 }, { "epoch": 1.050565657490954, "grad_norm": 0.230566686501408, "learning_rate": 3.702668679295844e-05, "loss": 0.5304, "num_tokens": 2193257461.0, "step": 2868 }, { "epoch": 1.0509320752988596, "grad_norm": 0.19638472402432744, "learning_rate": 3.7024335033810885e-05, "loss": 0.5045, "num_tokens": 2193968848.0, "step": 2869 }, { "epoch": 1.051298493106765, "grad_norm": 0.15943134794096434, "learning_rate": 3.702198242876331e-05, "loss": 0.5074, "num_tokens": 2194737388.0, "step": 2870 }, { "epoch": 1.0516649109146705, "grad_norm": 0.1881934339311293, "learning_rate": 3.701962897794817e-05, "loss": 0.4818, "num_tokens": 2195538278.0, "step": 2871 }, { "epoch": 1.052031328722576, "grad_norm": 0.16088684804629422, "learning_rate": 3.701727468149798e-05, "loss": 0.5561, "num_tokens": 2196301700.0, "step": 2872 }, { "epoch": 1.0523977465304815, "grad_norm": 0.16588478250641042, "learning_rate": 3.7014919539545285e-05, "loss": 0.5057, "num_tokens": 2196965847.0, "step": 2873 }, { "epoch": 1.052764164338387, "grad_norm": 0.20042645519202926, "learning_rate": 3.701256355222269e-05, "loss": 0.5013, "num_tokens": 2197762104.0, "step": 2874 }, { "epoch": 1.0531305821462924, "grad_norm": 0.1549081169055677, "learning_rate": 3.701020671966284e-05, "loss": 0.4839, "num_tokens": 2198430929.0, "step": 2875 }, { "epoch": 1.0534969999541979, "grad_norm": 0.18489186224120938, "learning_rate": 3.700784904199842e-05, "loss": 0.4954, "num_tokens": 2199177475.0, "step": 2876 }, { "epoch": 1.0538634177621033, "grad_norm": 0.17857531864967016, "learning_rate": 3.700549051936218e-05, "loss": 0.5142, "num_tokens": 2199915049.0, "step": 2877 }, { "epoch": 1.0542298355700086, "grad_norm": 0.1556384074758268, "learning_rate": 3.7003131151886915e-05, "loss": 0.487, "num_tokens": 2200629401.0, "step": 2878 }, { "epoch": 1.054596253377914, "grad_norm": 0.1532128588010719, "learning_rate": 3.700077093970545e-05, "loss": 0.5019, "num_tokens": 2201360597.0, "step": 2879 }, { "epoch": 1.0549626711858195, "grad_norm": 0.16042855497017455, "learning_rate": 3.6998409882950674e-05, "loss": 0.4874, "num_tokens": 2202122539.0, "step": 2880 }, { "epoch": 1.055329088993725, "grad_norm": 0.14461195906724736, "learning_rate": 3.699604798175553e-05, "loss": 0.4938, "num_tokens": 2202868832.0, "step": 2881 }, { "epoch": 1.0556955068016305, "grad_norm": 0.1501205315643935, "learning_rate": 3.699368523625297e-05, "loss": 0.5205, "num_tokens": 2203722967.0, "step": 2882 }, { "epoch": 1.056061924609536, "grad_norm": 0.15187234636202662, "learning_rate": 3.699132164657606e-05, "loss": 0.5314, "num_tokens": 2204510249.0, "step": 2883 }, { "epoch": 1.0564283424174414, "grad_norm": 0.16960865764141386, "learning_rate": 3.698895721285784e-05, "loss": 0.4867, "num_tokens": 2205197664.0, "step": 2884 }, { "epoch": 1.056794760225347, "grad_norm": 0.14131818547161368, "learning_rate": 3.698659193523145e-05, "loss": 0.5053, "num_tokens": 2205983110.0, "step": 2885 }, { "epoch": 1.0571611780332524, "grad_norm": 0.1564604977279255, "learning_rate": 3.698422581383006e-05, "loss": 0.5119, "num_tokens": 2206827274.0, "step": 2886 }, { "epoch": 1.0575275958411579, "grad_norm": 0.16157159468069204, "learning_rate": 3.6981858848786886e-05, "loss": 0.4971, "num_tokens": 2207585922.0, "step": 2887 }, { "epoch": 1.0578940136490633, "grad_norm": 0.16757197902344076, "learning_rate": 3.697949104023519e-05, "loss": 0.5231, "num_tokens": 2208339192.0, "step": 2888 }, { "epoch": 1.0582604314569688, "grad_norm": 0.16155186851733033, "learning_rate": 3.6977122388308284e-05, "loss": 0.5283, "num_tokens": 2209131172.0, "step": 2889 }, { "epoch": 1.0586268492648743, "grad_norm": 0.19389850574618014, "learning_rate": 3.6974752893139525e-05, "loss": 0.492, "num_tokens": 2209737915.0, "step": 2890 }, { "epoch": 1.0589932670727797, "grad_norm": 0.17609722342350168, "learning_rate": 3.697238255486233e-05, "loss": 0.5254, "num_tokens": 2210555313.0, "step": 2891 }, { "epoch": 1.0593596848806852, "grad_norm": 0.17872087169830478, "learning_rate": 3.697001137361014e-05, "loss": 0.5284, "num_tokens": 2211307365.0, "step": 2892 }, { "epoch": 1.0597261026885907, "grad_norm": 0.17394788100756123, "learning_rate": 3.696763934951648e-05, "loss": 0.529, "num_tokens": 2212053743.0, "step": 2893 }, { "epoch": 1.0600925204964962, "grad_norm": 0.18383914951181618, "learning_rate": 3.696526648271487e-05, "loss": 0.5119, "num_tokens": 2212723491.0, "step": 2894 }, { "epoch": 1.0604589383044016, "grad_norm": 0.20477245649434714, "learning_rate": 3.696289277333893e-05, "loss": 0.5691, "num_tokens": 2213385158.0, "step": 2895 }, { "epoch": 1.060825356112307, "grad_norm": 0.1639130902655811, "learning_rate": 3.696051822152229e-05, "loss": 0.5394, "num_tokens": 2214183462.0, "step": 2896 }, { "epoch": 1.0611917739202126, "grad_norm": 0.17873523959927343, "learning_rate": 3.695814282739866e-05, "loss": 0.4932, "num_tokens": 2214923695.0, "step": 2897 }, { "epoch": 1.061558191728118, "grad_norm": 0.169891766754911, "learning_rate": 3.695576659110176e-05, "loss": 0.4988, "num_tokens": 2215671251.0, "step": 2898 }, { "epoch": 1.0619246095360235, "grad_norm": 0.17716783923060525, "learning_rate": 3.695338951276539e-05, "loss": 0.4772, "num_tokens": 2216457043.0, "step": 2899 }, { "epoch": 1.062291027343929, "grad_norm": 0.15459472016135528, "learning_rate": 3.6951011592523375e-05, "loss": 0.5073, "num_tokens": 2217142860.0, "step": 2900 }, { "epoch": 1.0626574451518345, "grad_norm": 0.17837760986413567, "learning_rate": 3.6948632830509606e-05, "loss": 0.5113, "num_tokens": 2217936519.0, "step": 2901 }, { "epoch": 1.06302386295974, "grad_norm": 0.17275496953962077, "learning_rate": 3.6946253226858e-05, "loss": 0.5016, "num_tokens": 2218602740.0, "step": 2902 }, { "epoch": 1.0633902807676452, "grad_norm": 0.18002606041288396, "learning_rate": 3.6943872781702543e-05, "loss": 0.5116, "num_tokens": 2219250944.0, "step": 2903 }, { "epoch": 1.0637566985755509, "grad_norm": 0.15833275892247153, "learning_rate": 3.694149149517726e-05, "loss": 0.5235, "num_tokens": 2219951516.0, "step": 2904 }, { "epoch": 1.0641231163834561, "grad_norm": 0.16050962466439017, "learning_rate": 3.6939109367416206e-05, "loss": 0.5185, "num_tokens": 2220718780.0, "step": 2905 }, { "epoch": 1.0644895341913616, "grad_norm": 0.17187386774230456, "learning_rate": 3.6936726398553524e-05, "loss": 0.5414, "num_tokens": 2221417720.0, "step": 2906 }, { "epoch": 1.064855951999267, "grad_norm": 0.19858831829131882, "learning_rate": 3.693434258872336e-05, "loss": 0.4983, "num_tokens": 2222163105.0, "step": 2907 }, { "epoch": 1.0652223698071726, "grad_norm": 0.15001904957438147, "learning_rate": 3.693195793805994e-05, "loss": 0.4953, "num_tokens": 2222845791.0, "step": 2908 }, { "epoch": 1.065588787615078, "grad_norm": 0.18775743624770247, "learning_rate": 3.692957244669752e-05, "loss": 0.5127, "num_tokens": 2223688556.0, "step": 2909 }, { "epoch": 1.0659552054229835, "grad_norm": 0.15850207434295865, "learning_rate": 3.692718611477041e-05, "loss": 0.551, "num_tokens": 2224490908.0, "step": 2910 }, { "epoch": 1.066321623230889, "grad_norm": 0.20870758518463245, "learning_rate": 3.692479894241296e-05, "loss": 0.5131, "num_tokens": 2225118336.0, "step": 2911 }, { "epoch": 1.0666880410387944, "grad_norm": 0.19768243437787256, "learning_rate": 3.692241092975958e-05, "loss": 0.4888, "num_tokens": 2225909832.0, "step": 2912 }, { "epoch": 1.0670544588467, "grad_norm": 0.16221608579157318, "learning_rate": 3.69200220769447e-05, "loss": 0.5025, "num_tokens": 2226602085.0, "step": 2913 }, { "epoch": 1.0674208766546054, "grad_norm": 0.19981581640391757, "learning_rate": 3.6917632384102846e-05, "loss": 0.4937, "num_tokens": 2227287412.0, "step": 2914 }, { "epoch": 1.0677872944625109, "grad_norm": 0.15352899402616202, "learning_rate": 3.6915241851368544e-05, "loss": 0.5322, "num_tokens": 2228120566.0, "step": 2915 }, { "epoch": 1.0681537122704163, "grad_norm": 0.18169915589555713, "learning_rate": 3.691285047887639e-05, "loss": 0.4969, "num_tokens": 2228956017.0, "step": 2916 }, { "epoch": 1.0685201300783218, "grad_norm": 0.15502826054721994, "learning_rate": 3.691045826676102e-05, "loss": 0.5, "num_tokens": 2229708280.0, "step": 2917 }, { "epoch": 1.0688865478862273, "grad_norm": 0.16410982827178802, "learning_rate": 3.690806521515714e-05, "loss": 0.493, "num_tokens": 2230471322.0, "step": 2918 }, { "epoch": 1.0692529656941328, "grad_norm": 0.17469451334738956, "learning_rate": 3.690567132419945e-05, "loss": 0.5212, "num_tokens": 2231109433.0, "step": 2919 }, { "epoch": 1.0696193835020382, "grad_norm": 0.158612668749135, "learning_rate": 3.690327659402276e-05, "loss": 0.4916, "num_tokens": 2231899846.0, "step": 2920 }, { "epoch": 1.0699858013099437, "grad_norm": 0.167856201613886, "learning_rate": 3.690088102476188e-05, "loss": 0.5489, "num_tokens": 2232653557.0, "step": 2921 }, { "epoch": 1.0703522191178492, "grad_norm": 0.16027796928455054, "learning_rate": 3.689848461655169e-05, "loss": 0.4991, "num_tokens": 2233397856.0, "step": 2922 }, { "epoch": 1.0707186369257546, "grad_norm": 0.17078891147376885, "learning_rate": 3.689608736952712e-05, "loss": 0.5244, "num_tokens": 2234198230.0, "step": 2923 }, { "epoch": 1.0710850547336601, "grad_norm": 0.17531004764044167, "learning_rate": 3.689368928382313e-05, "loss": 0.5208, "num_tokens": 2234981520.0, "step": 2924 }, { "epoch": 1.0714514725415656, "grad_norm": 0.14151083870402656, "learning_rate": 3.6891290359574745e-05, "loss": 0.5109, "num_tokens": 2235751142.0, "step": 2925 }, { "epoch": 1.071817890349471, "grad_norm": 0.17303367330674618, "learning_rate": 3.6888890596917024e-05, "loss": 0.4817, "num_tokens": 2236479277.0, "step": 2926 }, { "epoch": 1.0721843081573765, "grad_norm": 0.17011156565945068, "learning_rate": 3.688648999598508e-05, "loss": 0.5008, "num_tokens": 2237276142.0, "step": 2927 }, { "epoch": 1.072550725965282, "grad_norm": 0.18263039044621882, "learning_rate": 3.6884088556914075e-05, "loss": 0.5325, "num_tokens": 2238027212.0, "step": 2928 }, { "epoch": 1.0729171437731875, "grad_norm": 0.17370108548196736, "learning_rate": 3.6881686279839206e-05, "loss": 0.4874, "num_tokens": 2238842122.0, "step": 2929 }, { "epoch": 1.0732835615810927, "grad_norm": 0.16324054120636572, "learning_rate": 3.687928316489573e-05, "loss": 0.4973, "num_tokens": 2239725111.0, "step": 2930 }, { "epoch": 1.0736499793889984, "grad_norm": 0.19890333028334428, "learning_rate": 3.687687921221895e-05, "loss": 0.5146, "num_tokens": 2240460952.0, "step": 2931 }, { "epoch": 1.0740163971969037, "grad_norm": 0.13739000374760799, "learning_rate": 3.6874474421944214e-05, "loss": 0.497, "num_tokens": 2241237527.0, "step": 2932 }, { "epoch": 1.0743828150048091, "grad_norm": 0.17894292505376103, "learning_rate": 3.687206879420691e-05, "loss": 0.4734, "num_tokens": 2241880860.0, "step": 2933 }, { "epoch": 1.0747492328127146, "grad_norm": 0.15437329739536226, "learning_rate": 3.686966232914249e-05, "loss": 0.5071, "num_tokens": 2242729623.0, "step": 2934 }, { "epoch": 1.07511565062062, "grad_norm": 0.16990290695057522, "learning_rate": 3.686725502688643e-05, "loss": 0.555, "num_tokens": 2243396792.0, "step": 2935 }, { "epoch": 1.0754820684285256, "grad_norm": 0.1576227443491719, "learning_rate": 3.686484688757427e-05, "loss": 0.5116, "num_tokens": 2244289900.0, "step": 2936 }, { "epoch": 1.075848486236431, "grad_norm": 0.14995767088333786, "learning_rate": 3.68624379113416e-05, "loss": 0.5481, "num_tokens": 2245039828.0, "step": 2937 }, { "epoch": 1.0762149040443365, "grad_norm": 0.18623853606590646, "learning_rate": 3.686002809832405e-05, "loss": 0.4971, "num_tokens": 2245809246.0, "step": 2938 }, { "epoch": 1.076581321852242, "grad_norm": 0.14677894879446024, "learning_rate": 3.685761744865729e-05, "loss": 0.5185, "num_tokens": 2246652382.0, "step": 2939 }, { "epoch": 1.0769477396601475, "grad_norm": 0.15495612419460822, "learning_rate": 3.6855205962477046e-05, "loss": 0.5215, "num_tokens": 2247531733.0, "step": 2940 }, { "epoch": 1.077314157468053, "grad_norm": 0.15463572896106328, "learning_rate": 3.685279363991909e-05, "loss": 0.5086, "num_tokens": 2248301964.0, "step": 2941 }, { "epoch": 1.0776805752759584, "grad_norm": 0.17082575679692408, "learning_rate": 3.685038048111925e-05, "loss": 0.5051, "num_tokens": 2249111671.0, "step": 2942 }, { "epoch": 1.0780469930838639, "grad_norm": 0.15844094201873168, "learning_rate": 3.684796648621337e-05, "loss": 0.5138, "num_tokens": 2249893316.0, "step": 2943 }, { "epoch": 1.0784134108917693, "grad_norm": 0.1524364722959673, "learning_rate": 3.684555165533739e-05, "loss": 0.4917, "num_tokens": 2250688079.0, "step": 2944 }, { "epoch": 1.0787798286996748, "grad_norm": 0.17029190750290457, "learning_rate": 3.684313598862724e-05, "loss": 0.5161, "num_tokens": 2251518490.0, "step": 2945 }, { "epoch": 1.0791462465075803, "grad_norm": 0.16162494620585754, "learning_rate": 3.684071948621895e-05, "loss": 0.4986, "num_tokens": 2252277667.0, "step": 2946 }, { "epoch": 1.0795126643154858, "grad_norm": 0.14676675333380762, "learning_rate": 3.683830214824856e-05, "loss": 0.5032, "num_tokens": 2253107441.0, "step": 2947 }, { "epoch": 1.0798790821233912, "grad_norm": 0.1616910127275054, "learning_rate": 3.683588397485219e-05, "loss": 0.4796, "num_tokens": 2253842730.0, "step": 2948 }, { "epoch": 1.0802454999312967, "grad_norm": 0.15326359463907313, "learning_rate": 3.683346496616597e-05, "loss": 0.4887, "num_tokens": 2254550882.0, "step": 2949 }, { "epoch": 1.0806119177392022, "grad_norm": 0.16734548204349314, "learning_rate": 3.6831045122326096e-05, "loss": 0.4962, "num_tokens": 2255291061.0, "step": 2950 }, { "epoch": 1.0809783355471076, "grad_norm": 0.15336696618377088, "learning_rate": 3.6828624443468817e-05, "loss": 0.5321, "num_tokens": 2255991694.0, "step": 2951 }, { "epoch": 1.0813447533550131, "grad_norm": 0.17772293523916588, "learning_rate": 3.682620292973042e-05, "loss": 0.547, "num_tokens": 2256706929.0, "step": 2952 }, { "epoch": 1.0817111711629186, "grad_norm": 0.15037822932826836, "learning_rate": 3.682378058124724e-05, "loss": 0.4802, "num_tokens": 2257529293.0, "step": 2953 }, { "epoch": 1.082077588970824, "grad_norm": 0.16315240923324703, "learning_rate": 3.682135739815566e-05, "loss": 0.4859, "num_tokens": 2258328644.0, "step": 2954 }, { "epoch": 1.0824440067787295, "grad_norm": 0.14656553110855197, "learning_rate": 3.6818933380592114e-05, "loss": 0.5082, "num_tokens": 2259133447.0, "step": 2955 }, { "epoch": 1.082810424586635, "grad_norm": 0.1461455072935149, "learning_rate": 3.6816508528693074e-05, "loss": 0.4667, "num_tokens": 2259966007.0, "step": 2956 }, { "epoch": 1.0831768423945403, "grad_norm": 0.16556724702291536, "learning_rate": 3.6814082842595065e-05, "loss": 0.5174, "num_tokens": 2260674903.0, "step": 2957 }, { "epoch": 1.0835432602024457, "grad_norm": 0.14721171516066936, "learning_rate": 3.681165632243466e-05, "loss": 0.5033, "num_tokens": 2261519351.0, "step": 2958 }, { "epoch": 1.0839096780103512, "grad_norm": 0.1538375501801897, "learning_rate": 3.6809228968348465e-05, "loss": 0.4993, "num_tokens": 2262242551.0, "step": 2959 }, { "epoch": 1.0842760958182567, "grad_norm": 0.16202645202394297, "learning_rate": 3.680680078047317e-05, "loss": 0.5139, "num_tokens": 2263031426.0, "step": 2960 }, { "epoch": 1.0846425136261622, "grad_norm": 0.178309215618974, "learning_rate": 3.680437175894546e-05, "loss": 0.5087, "num_tokens": 2263740699.0, "step": 2961 }, { "epoch": 1.0850089314340676, "grad_norm": 0.1662378608118618, "learning_rate": 3.680194190390211e-05, "loss": 0.5375, "num_tokens": 2264513209.0, "step": 2962 }, { "epoch": 1.085375349241973, "grad_norm": 0.17296006637364714, "learning_rate": 3.6799511215479926e-05, "loss": 0.557, "num_tokens": 2265137890.0, "step": 2963 }, { "epoch": 1.0857417670498786, "grad_norm": 0.1608765774005849, "learning_rate": 3.679707969381575e-05, "loss": 0.5012, "num_tokens": 2265844970.0, "step": 2964 }, { "epoch": 1.086108184857784, "grad_norm": 0.14565416476000978, "learning_rate": 3.679464733904649e-05, "loss": 0.5169, "num_tokens": 2266648335.0, "step": 2965 }, { "epoch": 1.0864746026656895, "grad_norm": 0.15320967291106585, "learning_rate": 3.6792214151309095e-05, "loss": 0.4918, "num_tokens": 2267412919.0, "step": 2966 }, { "epoch": 1.086841020473595, "grad_norm": 0.15951801743471888, "learning_rate": 3.678978013074055e-05, "loss": 0.5166, "num_tokens": 2268078387.0, "step": 2967 }, { "epoch": 1.0872074382815005, "grad_norm": 0.1369578619139743, "learning_rate": 3.678734527747789e-05, "loss": 0.4685, "num_tokens": 2268897109.0, "step": 2968 }, { "epoch": 1.087573856089406, "grad_norm": 0.16675406718470082, "learning_rate": 3.6784909591658225e-05, "loss": 0.5115, "num_tokens": 2269585950.0, "step": 2969 }, { "epoch": 1.0879402738973114, "grad_norm": 0.1611533340309759, "learning_rate": 3.678247307341866e-05, "loss": 0.5244, "num_tokens": 2270394471.0, "step": 2970 }, { "epoch": 1.0883066917052169, "grad_norm": 0.1813240339223596, "learning_rate": 3.6780035722896406e-05, "loss": 0.5031, "num_tokens": 2271145982.0, "step": 2971 }, { "epoch": 1.0886731095131224, "grad_norm": 0.15622672109304458, "learning_rate": 3.6777597540228666e-05, "loss": 0.4945, "num_tokens": 2271935651.0, "step": 2972 }, { "epoch": 1.0890395273210278, "grad_norm": 0.166237640947709, "learning_rate": 3.6775158525552724e-05, "loss": 0.4907, "num_tokens": 2272705776.0, "step": 2973 }, { "epoch": 1.0894059451289333, "grad_norm": 0.1438641688397723, "learning_rate": 3.677271867900591e-05, "loss": 0.4884, "num_tokens": 2273431900.0, "step": 2974 }, { "epoch": 1.0897723629368388, "grad_norm": 0.15777246239241904, "learning_rate": 3.677027800072557e-05, "loss": 0.508, "num_tokens": 2274162298.0, "step": 2975 }, { "epoch": 1.0901387807447442, "grad_norm": 0.18717594524891126, "learning_rate": 3.676783649084914e-05, "loss": 0.5432, "num_tokens": 2274873959.0, "step": 2976 }, { "epoch": 1.0905051985526497, "grad_norm": 0.1733497960386583, "learning_rate": 3.676539414951407e-05, "loss": 0.5388, "num_tokens": 2275472965.0, "step": 2977 }, { "epoch": 1.0908716163605552, "grad_norm": 0.14752010245270886, "learning_rate": 3.6762950976857874e-05, "loss": 0.5236, "num_tokens": 2276219644.0, "step": 2978 }, { "epoch": 1.0912380341684607, "grad_norm": 0.17083922289389766, "learning_rate": 3.676050697301811e-05, "loss": 0.4862, "num_tokens": 2277063351.0, "step": 2979 }, { "epoch": 1.0916044519763661, "grad_norm": 0.1691171068407259, "learning_rate": 3.675806213813237e-05, "loss": 0.514, "num_tokens": 2277653190.0, "step": 2980 }, { "epoch": 1.0919708697842716, "grad_norm": 0.18345243802944142, "learning_rate": 3.675561647233832e-05, "loss": 0.492, "num_tokens": 2278467795.0, "step": 2981 }, { "epoch": 1.092337287592177, "grad_norm": 0.15105613385537986, "learning_rate": 3.675316997577364e-05, "loss": 0.4701, "num_tokens": 2279220682.0, "step": 2982 }, { "epoch": 1.0927037054000825, "grad_norm": 0.16189988069290015, "learning_rate": 3.675072264857608e-05, "loss": 0.4982, "num_tokens": 2280120527.0, "step": 2983 }, { "epoch": 1.0930701232079878, "grad_norm": 0.16127351210150903, "learning_rate": 3.674827449088343e-05, "loss": 0.523, "num_tokens": 2280917583.0, "step": 2984 }, { "epoch": 1.0934365410158933, "grad_norm": 0.17880434332319428, "learning_rate": 3.674582550283352e-05, "loss": 0.5095, "num_tokens": 2281700486.0, "step": 2985 }, { "epoch": 1.0938029588237987, "grad_norm": 0.16663830590397335, "learning_rate": 3.674337568456424e-05, "loss": 0.521, "num_tokens": 2282394097.0, "step": 2986 }, { "epoch": 1.0941693766317042, "grad_norm": 0.17134763373917772, "learning_rate": 3.6740925036213505e-05, "loss": 0.5, "num_tokens": 2283111685.0, "step": 2987 }, { "epoch": 1.0945357944396097, "grad_norm": 0.15126551188737725, "learning_rate": 3.673847355791931e-05, "loss": 0.5048, "num_tokens": 2283844464.0, "step": 2988 }, { "epoch": 1.0949022122475152, "grad_norm": 0.154181551642721, "learning_rate": 3.6736021249819675e-05, "loss": 0.4767, "num_tokens": 2284671459.0, "step": 2989 }, { "epoch": 1.0952686300554206, "grad_norm": 0.1717298302850861, "learning_rate": 3.673356811205266e-05, "loss": 0.5422, "num_tokens": 2285455131.0, "step": 2990 }, { "epoch": 1.095635047863326, "grad_norm": 0.17014657295439484, "learning_rate": 3.6731114144756396e-05, "loss": 0.48, "num_tokens": 2286179862.0, "step": 2991 }, { "epoch": 1.0960014656712316, "grad_norm": 0.1448082940777005, "learning_rate": 3.6728659348069034e-05, "loss": 0.4655, "num_tokens": 2286987809.0, "step": 2992 }, { "epoch": 1.096367883479137, "grad_norm": 0.2040088736688293, "learning_rate": 3.672620372212878e-05, "loss": 0.5253, "num_tokens": 2287618251.0, "step": 2993 }, { "epoch": 1.0967343012870425, "grad_norm": 0.18228595479130152, "learning_rate": 3.6723747267073904e-05, "loss": 0.5157, "num_tokens": 2288329707.0, "step": 2994 }, { "epoch": 1.097100719094948, "grad_norm": 0.17133969788898212, "learning_rate": 3.67212899830427e-05, "loss": 0.4929, "num_tokens": 2289034628.0, "step": 2995 }, { "epoch": 1.0974671369028535, "grad_norm": 0.2225925922971639, "learning_rate": 3.671883187017353e-05, "loss": 0.4663, "num_tokens": 2289866497.0, "step": 2996 }, { "epoch": 1.097833554710759, "grad_norm": 0.1546195613767152, "learning_rate": 3.671637292860477e-05, "loss": 0.4721, "num_tokens": 2290704813.0, "step": 2997 }, { "epoch": 1.0981999725186644, "grad_norm": 0.18546934328080364, "learning_rate": 3.671391315847489e-05, "loss": 0.4982, "num_tokens": 2291568488.0, "step": 2998 }, { "epoch": 1.0985663903265699, "grad_norm": 0.203754036137524, "learning_rate": 3.671145255992235e-05, "loss": 0.5091, "num_tokens": 2292317534.0, "step": 2999 }, { "epoch": 1.0989328081344754, "grad_norm": 0.16678144750674867, "learning_rate": 3.6708991133085714e-05, "loss": 0.4956, "num_tokens": 2293008339.0, "step": 3000 }, { "epoch": 1.0992992259423808, "grad_norm": 0.19079396317927708, "learning_rate": 3.670652887810355e-05, "loss": 0.5168, "num_tokens": 2293887768.0, "step": 3001 }, { "epoch": 1.0996656437502863, "grad_norm": 0.18346307334461248, "learning_rate": 3.670406579511449e-05, "loss": 0.4932, "num_tokens": 2294678098.0, "step": 3002 }, { "epoch": 1.1000320615581918, "grad_norm": 0.15277335282907006, "learning_rate": 3.670160188425721e-05, "loss": 0.5407, "num_tokens": 2295416961.0, "step": 3003 }, { "epoch": 1.1003984793660972, "grad_norm": 0.1606008009645219, "learning_rate": 3.669913714567044e-05, "loss": 0.5202, "num_tokens": 2296235460.0, "step": 3004 }, { "epoch": 1.1007648971740027, "grad_norm": 0.18114565388974987, "learning_rate": 3.6696671579492933e-05, "loss": 0.4756, "num_tokens": 2297054970.0, "step": 3005 }, { "epoch": 1.1011313149819082, "grad_norm": 0.15885138862208828, "learning_rate": 3.6694205185863524e-05, "loss": 0.5202, "num_tokens": 2297851827.0, "step": 3006 }, { "epoch": 1.1014977327898137, "grad_norm": 0.17382945602166272, "learning_rate": 3.669173796492107e-05, "loss": 0.5289, "num_tokens": 2298632835.0, "step": 3007 }, { "epoch": 1.1018641505977191, "grad_norm": 0.16920531112560624, "learning_rate": 3.668926991680448e-05, "loss": 0.5372, "num_tokens": 2299297003.0, "step": 3008 }, { "epoch": 1.1022305684056246, "grad_norm": 0.1516022519516047, "learning_rate": 3.668680104165271e-05, "loss": 0.4982, "num_tokens": 2300021215.0, "step": 3009 }, { "epoch": 1.10259698621353, "grad_norm": 0.15384565300574968, "learning_rate": 3.668433133960475e-05, "loss": 0.5066, "num_tokens": 2300935143.0, "step": 3010 }, { "epoch": 1.1029634040214353, "grad_norm": 0.14148732614895404, "learning_rate": 3.668186081079967e-05, "loss": 0.5033, "num_tokens": 2301683354.0, "step": 3011 }, { "epoch": 1.1033298218293408, "grad_norm": 0.14123560854621933, "learning_rate": 3.667938945537656e-05, "loss": 0.492, "num_tokens": 2302442810.0, "step": 3012 }, { "epoch": 1.1036962396372463, "grad_norm": 0.16800466661977034, "learning_rate": 3.6676917273474556e-05, "loss": 0.5056, "num_tokens": 2303155871.0, "step": 3013 }, { "epoch": 1.1040626574451518, "grad_norm": 0.1643907748218865, "learning_rate": 3.6674444265232846e-05, "loss": 0.484, "num_tokens": 2303835435.0, "step": 3014 }, { "epoch": 1.1044290752530572, "grad_norm": 0.15928322841882536, "learning_rate": 3.667197043079067e-05, "loss": 0.538, "num_tokens": 2304625546.0, "step": 3015 }, { "epoch": 1.1047954930609627, "grad_norm": 0.1538326013734548, "learning_rate": 3.666949577028731e-05, "loss": 0.4889, "num_tokens": 2305400978.0, "step": 3016 }, { "epoch": 1.1051619108688682, "grad_norm": 0.1530338881713503, "learning_rate": 3.6667020283862094e-05, "loss": 0.4882, "num_tokens": 2306168712.0, "step": 3017 }, { "epoch": 1.1055283286767736, "grad_norm": 0.1539738558224865, "learning_rate": 3.666454397165439e-05, "loss": 0.493, "num_tokens": 2306771994.0, "step": 3018 }, { "epoch": 1.1058947464846791, "grad_norm": 0.17021615212686028, "learning_rate": 3.6662066833803624e-05, "loss": 0.5147, "num_tokens": 2307432774.0, "step": 3019 }, { "epoch": 1.1062611642925846, "grad_norm": 0.16486021887887048, "learning_rate": 3.665958887044928e-05, "loss": 0.5392, "num_tokens": 2308164473.0, "step": 3020 }, { "epoch": 1.10662758210049, "grad_norm": 0.16193279304563615, "learning_rate": 3.665711008173084e-05, "loss": 0.468, "num_tokens": 2308992516.0, "step": 3021 }, { "epoch": 1.1069939999083955, "grad_norm": 0.17521456626087595, "learning_rate": 3.665463046778789e-05, "loss": 0.5575, "num_tokens": 2309632515.0, "step": 3022 }, { "epoch": 1.107360417716301, "grad_norm": 0.1588571054644955, "learning_rate": 3.6652150028760026e-05, "loss": 0.5108, "num_tokens": 2310368751.0, "step": 3023 }, { "epoch": 1.1077268355242065, "grad_norm": 0.1808266995481405, "learning_rate": 3.664966876478691e-05, "loss": 0.5102, "num_tokens": 2311163854.0, "step": 3024 }, { "epoch": 1.108093253332112, "grad_norm": 0.16636986532399103, "learning_rate": 3.664718667600823e-05, "loss": 0.5222, "num_tokens": 2311887000.0, "step": 3025 }, { "epoch": 1.1084596711400174, "grad_norm": 0.1605757690687926, "learning_rate": 3.6644703762563745e-05, "loss": 0.5052, "num_tokens": 2312706124.0, "step": 3026 }, { "epoch": 1.108826088947923, "grad_norm": 0.16910039083055547, "learning_rate": 3.664222002459324e-05, "loss": 0.5152, "num_tokens": 2313550948.0, "step": 3027 }, { "epoch": 1.1091925067558284, "grad_norm": 0.17357519246124994, "learning_rate": 3.6639735462236555e-05, "loss": 0.5621, "num_tokens": 2314282622.0, "step": 3028 }, { "epoch": 1.1095589245637338, "grad_norm": 0.19425689564313753, "learning_rate": 3.6637250075633577e-05, "loss": 0.5352, "num_tokens": 2315020980.0, "step": 3029 }, { "epoch": 1.1099253423716393, "grad_norm": 0.18022802142194255, "learning_rate": 3.6634763864924244e-05, "loss": 0.5499, "num_tokens": 2315854170.0, "step": 3030 }, { "epoch": 1.1102917601795448, "grad_norm": 0.1885467524091528, "learning_rate": 3.663227683024853e-05, "loss": 0.5363, "num_tokens": 2316650014.0, "step": 3031 }, { "epoch": 1.1106581779874503, "grad_norm": 0.16239023159265822, "learning_rate": 3.662978897174645e-05, "loss": 0.4934, "num_tokens": 2317402686.0, "step": 3032 }, { "epoch": 1.1110245957953557, "grad_norm": 0.20517859123918944, "learning_rate": 3.662730028955809e-05, "loss": 0.5373, "num_tokens": 2318161950.0, "step": 3033 }, { "epoch": 1.1113910136032612, "grad_norm": 0.16466424670739557, "learning_rate": 3.6624810783823564e-05, "loss": 0.5037, "num_tokens": 2318911172.0, "step": 3034 }, { "epoch": 1.1117574314111667, "grad_norm": 0.18417835132782728, "learning_rate": 3.6622320454683035e-05, "loss": 0.5509, "num_tokens": 2319566129.0, "step": 3035 }, { "epoch": 1.1121238492190721, "grad_norm": 0.1747551050628786, "learning_rate": 3.661982930227671e-05, "loss": 0.4999, "num_tokens": 2320311082.0, "step": 3036 }, { "epoch": 1.1124902670269776, "grad_norm": 0.14645211974710154, "learning_rate": 3.661733732674486e-05, "loss": 0.4841, "num_tokens": 2321150955.0, "step": 3037 }, { "epoch": 1.1128566848348829, "grad_norm": 0.16573690320229098, "learning_rate": 3.6614844528227765e-05, "loss": 0.5164, "num_tokens": 2321999772.0, "step": 3038 }, { "epoch": 1.1132231026427883, "grad_norm": 0.1481372661644725, "learning_rate": 3.661235090686579e-05, "loss": 0.499, "num_tokens": 2322772251.0, "step": 3039 }, { "epoch": 1.1135895204506938, "grad_norm": 0.1554446985404105, "learning_rate": 3.6609856462799324e-05, "loss": 0.4767, "num_tokens": 2323588270.0, "step": 3040 }, { "epoch": 1.1139559382585993, "grad_norm": 0.1560816092967435, "learning_rate": 3.660736119616882e-05, "loss": 0.5309, "num_tokens": 2324417340.0, "step": 3041 }, { "epoch": 1.1143223560665048, "grad_norm": 0.15282244123643413, "learning_rate": 3.660486510711475e-05, "loss": 0.4929, "num_tokens": 2325200058.0, "step": 3042 }, { "epoch": 1.1146887738744102, "grad_norm": 0.1581199589141775, "learning_rate": 3.660236819577767e-05, "loss": 0.5307, "num_tokens": 2325916609.0, "step": 3043 }, { "epoch": 1.1150551916823157, "grad_norm": 0.18872971978922162, "learning_rate": 3.659987046229814e-05, "loss": 0.5354, "num_tokens": 2326671836.0, "step": 3044 }, { "epoch": 1.1154216094902212, "grad_norm": 0.15268450804548053, "learning_rate": 3.65973719068168e-05, "loss": 0.4793, "num_tokens": 2327364027.0, "step": 3045 }, { "epoch": 1.1157880272981267, "grad_norm": 0.1650443775315327, "learning_rate": 3.659487252947432e-05, "loss": 0.4958, "num_tokens": 2328208649.0, "step": 3046 }, { "epoch": 1.1161544451060321, "grad_norm": 0.17024531393658762, "learning_rate": 3.659237233041142e-05, "loss": 0.5141, "num_tokens": 2328845366.0, "step": 3047 }, { "epoch": 1.1165208629139376, "grad_norm": 0.15827303606881893, "learning_rate": 3.6589871309768874e-05, "loss": 0.5421, "num_tokens": 2329582755.0, "step": 3048 }, { "epoch": 1.116887280721843, "grad_norm": 0.1849178536691577, "learning_rate": 3.6587369467687477e-05, "loss": 0.4921, "num_tokens": 2330364099.0, "step": 3049 }, { "epoch": 1.1172536985297485, "grad_norm": 0.1533462962371167, "learning_rate": 3.6584866804308093e-05, "loss": 0.5028, "num_tokens": 2331045302.0, "step": 3050 }, { "epoch": 1.117620116337654, "grad_norm": 0.158650747888117, "learning_rate": 3.6582363319771644e-05, "loss": 0.5124, "num_tokens": 2331737649.0, "step": 3051 }, { "epoch": 1.1179865341455595, "grad_norm": 0.1642912932712474, "learning_rate": 3.657985901421906e-05, "loss": 0.4824, "num_tokens": 2332549004.0, "step": 3052 }, { "epoch": 1.118352951953465, "grad_norm": 0.16115877662601744, "learning_rate": 3.657735388779135e-05, "loss": 0.5075, "num_tokens": 2333298166.0, "step": 3053 }, { "epoch": 1.1187193697613704, "grad_norm": 0.17623141283804453, "learning_rate": 3.6574847940629555e-05, "loss": 0.5214, "num_tokens": 2334100433.0, "step": 3054 }, { "epoch": 1.119085787569276, "grad_norm": 0.16923514680438864, "learning_rate": 3.657234117287477e-05, "loss": 0.5523, "num_tokens": 2334785744.0, "step": 3055 }, { "epoch": 1.1194522053771814, "grad_norm": 0.17246900607959548, "learning_rate": 3.656983358466811e-05, "loss": 0.5053, "num_tokens": 2335457025.0, "step": 3056 }, { "epoch": 1.1198186231850868, "grad_norm": 0.20168027574510536, "learning_rate": 3.656732517615079e-05, "loss": 0.5101, "num_tokens": 2336144582.0, "step": 3057 }, { "epoch": 1.1201850409929923, "grad_norm": 0.1684571564524584, "learning_rate": 3.6564815947464004e-05, "loss": 0.5514, "num_tokens": 2336936089.0, "step": 3058 }, { "epoch": 1.1205514588008978, "grad_norm": 0.18459873469003896, "learning_rate": 3.656230589874905e-05, "loss": 0.5199, "num_tokens": 2337671430.0, "step": 3059 }, { "epoch": 1.1209178766088033, "grad_norm": 0.21597744107864117, "learning_rate": 3.655979503014725e-05, "loss": 0.5474, "num_tokens": 2338335174.0, "step": 3060 }, { "epoch": 1.1212842944167087, "grad_norm": 0.17240523937528357, "learning_rate": 3.6557283341799956e-05, "loss": 0.479, "num_tokens": 2339000864.0, "step": 3061 }, { "epoch": 1.1216507122246142, "grad_norm": 0.18005593305944603, "learning_rate": 3.655477083384859e-05, "loss": 0.5488, "num_tokens": 2339677573.0, "step": 3062 }, { "epoch": 1.1220171300325197, "grad_norm": 0.17891228839522128, "learning_rate": 3.655225750643461e-05, "loss": 0.4896, "num_tokens": 2340565209.0, "step": 3063 }, { "epoch": 1.1223835478404252, "grad_norm": 0.16027158542184708, "learning_rate": 3.6549743359699525e-05, "loss": 0.5064, "num_tokens": 2341297028.0, "step": 3064 }, { "epoch": 1.1227499656483304, "grad_norm": 0.17374409730869075, "learning_rate": 3.6547228393784876e-05, "loss": 0.5228, "num_tokens": 2341916600.0, "step": 3065 }, { "epoch": 1.1231163834562359, "grad_norm": 0.19929391063572455, "learning_rate": 3.654471260883227e-05, "loss": 0.5159, "num_tokens": 2342731833.0, "step": 3066 }, { "epoch": 1.1234828012641414, "grad_norm": 0.17192943704018226, "learning_rate": 3.654219600498335e-05, "loss": 0.5423, "num_tokens": 2343428595.0, "step": 3067 }, { "epoch": 1.1238492190720468, "grad_norm": 0.14568244635157018, "learning_rate": 3.65396785823798e-05, "loss": 0.5338, "num_tokens": 2344312489.0, "step": 3068 }, { "epoch": 1.1242156368799523, "grad_norm": 0.20268911216613636, "learning_rate": 3.6537160341163353e-05, "loss": 0.5528, "num_tokens": 2345063814.0, "step": 3069 }, { "epoch": 1.1245820546878578, "grad_norm": 0.18408081013082161, "learning_rate": 3.653464128147581e-05, "loss": 0.4783, "num_tokens": 2345747202.0, "step": 3070 }, { "epoch": 1.1249484724957632, "grad_norm": 0.13558216246819155, "learning_rate": 3.6532121403458984e-05, "loss": 0.4502, "num_tokens": 2346567868.0, "step": 3071 }, { "epoch": 1.1253148903036687, "grad_norm": 0.2053327660343316, "learning_rate": 3.652960070725475e-05, "loss": 0.5274, "num_tokens": 2347296178.0, "step": 3072 }, { "epoch": 1.1256813081115742, "grad_norm": 0.17611929036341428, "learning_rate": 3.652707919300502e-05, "loss": 0.5055, "num_tokens": 2348043773.0, "step": 3073 }, { "epoch": 1.1260477259194797, "grad_norm": 0.16055172216944258, "learning_rate": 3.652455686085178e-05, "loss": 0.4991, "num_tokens": 2348853168.0, "step": 3074 }, { "epoch": 1.1264141437273851, "grad_norm": 0.14283567825210636, "learning_rate": 3.652203371093703e-05, "loss": 0.4793, "num_tokens": 2349768443.0, "step": 3075 }, { "epoch": 1.1267805615352906, "grad_norm": 0.18578449482034612, "learning_rate": 3.651950974340283e-05, "loss": 0.493, "num_tokens": 2350464173.0, "step": 3076 }, { "epoch": 1.127146979343196, "grad_norm": 0.16439036480614985, "learning_rate": 3.651698495839129e-05, "loss": 0.5212, "num_tokens": 2351233166.0, "step": 3077 }, { "epoch": 1.1275133971511015, "grad_norm": 0.1782950417950175, "learning_rate": 3.651445935604455e-05, "loss": 0.5059, "num_tokens": 2352031250.0, "step": 3078 }, { "epoch": 1.127879814959007, "grad_norm": 0.16671350836965115, "learning_rate": 3.651193293650482e-05, "loss": 0.4991, "num_tokens": 2352817668.0, "step": 3079 }, { "epoch": 1.1282462327669125, "grad_norm": 0.1567463403925067, "learning_rate": 3.650940569991432e-05, "loss": 0.5034, "num_tokens": 2353617585.0, "step": 3080 }, { "epoch": 1.128612650574818, "grad_norm": 0.15993400147255726, "learning_rate": 3.650687764641537e-05, "loss": 0.5089, "num_tokens": 2354407877.0, "step": 3081 }, { "epoch": 1.1289790683827234, "grad_norm": 0.1759186721703438, "learning_rate": 3.6504348776150276e-05, "loss": 0.514, "num_tokens": 2355191552.0, "step": 3082 }, { "epoch": 1.129345486190629, "grad_norm": 0.1784517029297514, "learning_rate": 3.650181908926143e-05, "loss": 0.5338, "num_tokens": 2355933730.0, "step": 3083 }, { "epoch": 1.1297119039985344, "grad_norm": 0.1653507691448674, "learning_rate": 3.6499288585891265e-05, "loss": 0.5258, "num_tokens": 2356639742.0, "step": 3084 }, { "epoch": 1.1300783218064399, "grad_norm": 0.15698715764129523, "learning_rate": 3.649675726618225e-05, "loss": 0.4784, "num_tokens": 2357489908.0, "step": 3085 }, { "epoch": 1.1304447396143453, "grad_norm": 0.15884159056516373, "learning_rate": 3.649422513027689e-05, "loss": 0.5089, "num_tokens": 2358234428.0, "step": 3086 }, { "epoch": 1.1308111574222508, "grad_norm": 0.1577427822098448, "learning_rate": 3.6491692178317755e-05, "loss": 0.5111, "num_tokens": 2358962472.0, "step": 3087 }, { "epoch": 1.1311775752301563, "grad_norm": 0.15346640183733393, "learning_rate": 3.6489158410447475e-05, "loss": 0.4989, "num_tokens": 2359812792.0, "step": 3088 }, { "epoch": 1.1315439930380617, "grad_norm": 0.16739961066701103, "learning_rate": 3.6486623826808674e-05, "loss": 0.4984, "num_tokens": 2360612920.0, "step": 3089 }, { "epoch": 1.131910410845967, "grad_norm": 0.15478769644753634, "learning_rate": 3.648408842754409e-05, "loss": 0.5231, "num_tokens": 2361482067.0, "step": 3090 }, { "epoch": 1.1322768286538727, "grad_norm": 0.16755001790545332, "learning_rate": 3.648155221279644e-05, "loss": 0.5281, "num_tokens": 2362190636.0, "step": 3091 }, { "epoch": 1.132643246461778, "grad_norm": 0.15875307642517403, "learning_rate": 3.6479015182708526e-05, "loss": 0.5043, "num_tokens": 2362939989.0, "step": 3092 }, { "epoch": 1.1330096642696834, "grad_norm": 0.1592999838533204, "learning_rate": 3.6476477337423204e-05, "loss": 0.4877, "num_tokens": 2363722319.0, "step": 3093 }, { "epoch": 1.1333760820775889, "grad_norm": 0.14810064184665095, "learning_rate": 3.647393867708334e-05, "loss": 0.5047, "num_tokens": 2364517132.0, "step": 3094 }, { "epoch": 1.1337424998854944, "grad_norm": 0.18548294864243128, "learning_rate": 3.647139920183187e-05, "loss": 0.4965, "num_tokens": 2365238989.0, "step": 3095 }, { "epoch": 1.1341089176933998, "grad_norm": 0.16138837636838788, "learning_rate": 3.646885891181178e-05, "loss": 0.5176, "num_tokens": 2366120666.0, "step": 3096 }, { "epoch": 1.1344753355013053, "grad_norm": 0.20482098044095823, "learning_rate": 3.646631780716609e-05, "loss": 0.5203, "num_tokens": 2366856761.0, "step": 3097 }, { "epoch": 1.1348417533092108, "grad_norm": 0.17991827983443193, "learning_rate": 3.646377588803786e-05, "loss": 0.4987, "num_tokens": 2367717258.0, "step": 3098 }, { "epoch": 1.1352081711171162, "grad_norm": 0.1519415704555204, "learning_rate": 3.646123315457022e-05, "loss": 0.4859, "num_tokens": 2368556467.0, "step": 3099 }, { "epoch": 1.1355745889250217, "grad_norm": 0.1827287697462959, "learning_rate": 3.645868960690632e-05, "loss": 0.5073, "num_tokens": 2369260083.0, "step": 3100 }, { "epoch": 1.1359410067329272, "grad_norm": 0.1675875399348352, "learning_rate": 3.6456145245189375e-05, "loss": 0.485, "num_tokens": 2370001120.0, "step": 3101 }, { "epoch": 1.1363074245408327, "grad_norm": 0.14820793556195386, "learning_rate": 3.645360006956262e-05, "loss": 0.5087, "num_tokens": 2370708299.0, "step": 3102 }, { "epoch": 1.1366738423487381, "grad_norm": 0.16543493114913574, "learning_rate": 3.645105408016937e-05, "loss": 0.5099, "num_tokens": 2371445827.0, "step": 3103 }, { "epoch": 1.1370402601566436, "grad_norm": 0.19593758242136666, "learning_rate": 3.6448507277152976e-05, "loss": 0.5145, "num_tokens": 2372150715.0, "step": 3104 }, { "epoch": 1.137406677964549, "grad_norm": 0.13884280772229496, "learning_rate": 3.6445959660656816e-05, "loss": 0.4812, "num_tokens": 2372902281.0, "step": 3105 }, { "epoch": 1.1377730957724546, "grad_norm": 0.18977581677731306, "learning_rate": 3.644341123082431e-05, "loss": 0.4918, "num_tokens": 2373607278.0, "step": 3106 }, { "epoch": 1.13813951358036, "grad_norm": 0.18601127149983437, "learning_rate": 3.644086198779898e-05, "loss": 0.5291, "num_tokens": 2374335483.0, "step": 3107 }, { "epoch": 1.1385059313882655, "grad_norm": 0.15284617907226447, "learning_rate": 3.643831193172432e-05, "loss": 0.5199, "num_tokens": 2375066936.0, "step": 3108 }, { "epoch": 1.138872349196171, "grad_norm": 0.19523528328061565, "learning_rate": 3.643576106274391e-05, "loss": 0.4948, "num_tokens": 2375828790.0, "step": 3109 }, { "epoch": 1.1392387670040764, "grad_norm": 0.1692524124979597, "learning_rate": 3.643320938100138e-05, "loss": 0.5093, "num_tokens": 2376542451.0, "step": 3110 }, { "epoch": 1.139605184811982, "grad_norm": 0.1496343478149975, "learning_rate": 3.643065688664039e-05, "loss": 0.4929, "num_tokens": 2377391500.0, "step": 3111 }, { "epoch": 1.1399716026198874, "grad_norm": 0.225247754540048, "learning_rate": 3.6428103579804636e-05, "loss": 0.5549, "num_tokens": 2378071410.0, "step": 3112 }, { "epoch": 1.1403380204277929, "grad_norm": 0.17098560186670972, "learning_rate": 3.6425549460637895e-05, "loss": 0.5216, "num_tokens": 2378806300.0, "step": 3113 }, { "epoch": 1.1407044382356983, "grad_norm": 0.1828947830120226, "learning_rate": 3.6422994529283964e-05, "loss": 0.5301, "num_tokens": 2379522540.0, "step": 3114 }, { "epoch": 1.1410708560436038, "grad_norm": 0.16636054547604176, "learning_rate": 3.642043878588668e-05, "loss": 0.5043, "num_tokens": 2380333504.0, "step": 3115 }, { "epoch": 1.1414372738515093, "grad_norm": 0.18540764687956868, "learning_rate": 3.641788223058995e-05, "loss": 0.5408, "num_tokens": 2381015702.0, "step": 3116 }, { "epoch": 1.1418036916594145, "grad_norm": 0.1464239930772066, "learning_rate": 3.64153248635377e-05, "loss": 0.4906, "num_tokens": 2381830513.0, "step": 3117 }, { "epoch": 1.1421701094673202, "grad_norm": 0.14931273611147092, "learning_rate": 3.641276668487392e-05, "loss": 0.4689, "num_tokens": 2382514527.0, "step": 3118 }, { "epoch": 1.1425365272752255, "grad_norm": 0.1869784679907627, "learning_rate": 3.641020769474265e-05, "loss": 0.5007, "num_tokens": 2383331321.0, "step": 3119 }, { "epoch": 1.142902945083131, "grad_norm": 0.1636310503313159, "learning_rate": 3.6407647893287955e-05, "loss": 0.5591, "num_tokens": 2384136297.0, "step": 3120 }, { "epoch": 1.1432693628910364, "grad_norm": 0.15971877941213644, "learning_rate": 3.640508728065396e-05, "loss": 0.4937, "num_tokens": 2384916220.0, "step": 3121 }, { "epoch": 1.143635780698942, "grad_norm": 0.17453402978000312, "learning_rate": 3.640252585698483e-05, "loss": 0.4981, "num_tokens": 2385703558.0, "step": 3122 }, { "epoch": 1.1440021985068474, "grad_norm": 0.18154266016069026, "learning_rate": 3.639996362242479e-05, "loss": 0.5571, "num_tokens": 2386475361.0, "step": 3123 }, { "epoch": 1.1443686163147528, "grad_norm": 0.1485242603007214, "learning_rate": 3.639740057711809e-05, "loss": 0.518, "num_tokens": 2387235970.0, "step": 3124 }, { "epoch": 1.1447350341226583, "grad_norm": 0.17468763534445894, "learning_rate": 3.639483672120902e-05, "loss": 0.48, "num_tokens": 2387998804.0, "step": 3125 }, { "epoch": 1.1451014519305638, "grad_norm": 0.13886533468995638, "learning_rate": 3.639227205484196e-05, "loss": 0.5011, "num_tokens": 2388808356.0, "step": 3126 }, { "epoch": 1.1454678697384693, "grad_norm": 0.1454749653764975, "learning_rate": 3.638970657816128e-05, "loss": 0.488, "num_tokens": 2389551202.0, "step": 3127 }, { "epoch": 1.1458342875463747, "grad_norm": 0.1646702456106913, "learning_rate": 3.638714029131144e-05, "loss": 0.5123, "num_tokens": 2390315990.0, "step": 3128 }, { "epoch": 1.1462007053542802, "grad_norm": 0.1521032296681127, "learning_rate": 3.638457319443692e-05, "loss": 0.4914, "num_tokens": 2391006062.0, "step": 3129 }, { "epoch": 1.1465671231621857, "grad_norm": 0.1665801400900973, "learning_rate": 3.638200528768225e-05, "loss": 0.4909, "num_tokens": 2391723711.0, "step": 3130 }, { "epoch": 1.1469335409700911, "grad_norm": 0.16975090355595915, "learning_rate": 3.637943657119201e-05, "loss": 0.5107, "num_tokens": 2392420004.0, "step": 3131 }, { "epoch": 1.1472999587779966, "grad_norm": 0.16678172331677749, "learning_rate": 3.6376867045110824e-05, "loss": 0.4856, "num_tokens": 2393144390.0, "step": 3132 }, { "epoch": 1.147666376585902, "grad_norm": 0.19365666187233405, "learning_rate": 3.6374296709583357e-05, "loss": 0.497, "num_tokens": 2393984305.0, "step": 3133 }, { "epoch": 1.1480327943938076, "grad_norm": 0.14726308965546203, "learning_rate": 3.6371725564754336e-05, "loss": 0.4719, "num_tokens": 2394754176.0, "step": 3134 }, { "epoch": 1.148399212201713, "grad_norm": 0.1786863646768443, "learning_rate": 3.636915361076851e-05, "loss": 0.5057, "num_tokens": 2395568775.0, "step": 3135 }, { "epoch": 1.1487656300096185, "grad_norm": 0.20072607081727453, "learning_rate": 3.636658084777069e-05, "loss": 0.5333, "num_tokens": 2396269100.0, "step": 3136 }, { "epoch": 1.149132047817524, "grad_norm": 0.15600603947357178, "learning_rate": 3.636400727590573e-05, "loss": 0.5041, "num_tokens": 2397151229.0, "step": 3137 }, { "epoch": 1.1494984656254295, "grad_norm": 0.16803828793927006, "learning_rate": 3.6361432895318516e-05, "loss": 0.5008, "num_tokens": 2397926166.0, "step": 3138 }, { "epoch": 1.149864883433335, "grad_norm": 0.1920267072254678, "learning_rate": 3.6358857706154005e-05, "loss": 0.4983, "num_tokens": 2398755543.0, "step": 3139 }, { "epoch": 1.1502313012412404, "grad_norm": 0.15556392498347624, "learning_rate": 3.635628170855717e-05, "loss": 0.5265, "num_tokens": 2399465436.0, "step": 3140 }, { "epoch": 1.1505977190491459, "grad_norm": 0.15019862657157973, "learning_rate": 3.6353704902673066e-05, "loss": 0.5122, "num_tokens": 2400371323.0, "step": 3141 }, { "epoch": 1.1509641368570513, "grad_norm": 0.19180529873443303, "learning_rate": 3.635112728864676e-05, "loss": 0.5224, "num_tokens": 2401180003.0, "step": 3142 }, { "epoch": 1.1513305546649568, "grad_norm": 0.154481821713769, "learning_rate": 3.634854886662337e-05, "loss": 0.4857, "num_tokens": 2401949137.0, "step": 3143 }, { "epoch": 1.151696972472862, "grad_norm": 0.15553262160668163, "learning_rate": 3.6345969636748076e-05, "loss": 0.5107, "num_tokens": 2402675830.0, "step": 3144 }, { "epoch": 1.1520633902807678, "grad_norm": 0.15914675738754014, "learning_rate": 3.6343389599166094e-05, "loss": 0.4664, "num_tokens": 2403482995.0, "step": 3145 }, { "epoch": 1.152429808088673, "grad_norm": 0.15107849807105486, "learning_rate": 3.634080875402268e-05, "loss": 0.5005, "num_tokens": 2404294804.0, "step": 3146 }, { "epoch": 1.1527962258965785, "grad_norm": 0.16096195023811613, "learning_rate": 3.633822710146315e-05, "loss": 0.5125, "num_tokens": 2405006709.0, "step": 3147 }, { "epoch": 1.153162643704484, "grad_norm": 0.17571380762019423, "learning_rate": 3.6335644641632836e-05, "loss": 0.5339, "num_tokens": 2405758332.0, "step": 3148 }, { "epoch": 1.1535290615123894, "grad_norm": 0.16588167088100175, "learning_rate": 3.6333061374677156e-05, "loss": 0.506, "num_tokens": 2406502859.0, "step": 3149 }, { "epoch": 1.153895479320295, "grad_norm": 0.1695919123207216, "learning_rate": 3.633047730074155e-05, "loss": 0.5097, "num_tokens": 2407247492.0, "step": 3150 }, { "epoch": 1.1542618971282004, "grad_norm": 0.15843331131602884, "learning_rate": 3.63278924199715e-05, "loss": 0.5252, "num_tokens": 2407940332.0, "step": 3151 }, { "epoch": 1.1546283149361058, "grad_norm": 0.18244086557514927, "learning_rate": 3.632530673251254e-05, "loss": 0.586, "num_tokens": 2408641695.0, "step": 3152 }, { "epoch": 1.1549947327440113, "grad_norm": 0.16346090485640802, "learning_rate": 3.6322720238510246e-05, "loss": 0.5277, "num_tokens": 2409300490.0, "step": 3153 }, { "epoch": 1.1553611505519168, "grad_norm": 0.16055762093569978, "learning_rate": 3.632013293811026e-05, "loss": 0.5088, "num_tokens": 2410099515.0, "step": 3154 }, { "epoch": 1.1557275683598223, "grad_norm": 0.16784294169169886, "learning_rate": 3.6317544831458233e-05, "loss": 0.5144, "num_tokens": 2410934396.0, "step": 3155 }, { "epoch": 1.1560939861677277, "grad_norm": 0.1766779709618348, "learning_rate": 3.6314955918699894e-05, "loss": 0.5377, "num_tokens": 2411580094.0, "step": 3156 }, { "epoch": 1.1564604039756332, "grad_norm": 0.19029659035428187, "learning_rate": 3.6312366199981004e-05, "loss": 0.5159, "num_tokens": 2412497451.0, "step": 3157 }, { "epoch": 1.1568268217835387, "grad_norm": 0.1509540878483695, "learning_rate": 3.630977567544735e-05, "loss": 0.5066, "num_tokens": 2413235483.0, "step": 3158 }, { "epoch": 1.1571932395914442, "grad_norm": 0.160906306224681, "learning_rate": 3.63071843452448e-05, "loss": 0.5257, "num_tokens": 2413961180.0, "step": 3159 }, { "epoch": 1.1575596573993496, "grad_norm": 0.1778385373919443, "learning_rate": 3.6304592209519254e-05, "loss": 0.5228, "num_tokens": 2414700106.0, "step": 3160 }, { "epoch": 1.157926075207255, "grad_norm": 0.16152591110409134, "learning_rate": 3.630199926841665e-05, "loss": 0.5076, "num_tokens": 2415432197.0, "step": 3161 }, { "epoch": 1.1582924930151606, "grad_norm": 0.19387503206847734, "learning_rate": 3.6299405522082974e-05, "loss": 0.5212, "num_tokens": 2416183487.0, "step": 3162 }, { "epoch": 1.158658910823066, "grad_norm": 0.17572474024100876, "learning_rate": 3.629681097066425e-05, "loss": 0.5298, "num_tokens": 2416847850.0, "step": 3163 }, { "epoch": 1.1590253286309715, "grad_norm": 0.1807306033101281, "learning_rate": 3.6294215614306576e-05, "loss": 0.5251, "num_tokens": 2417502199.0, "step": 3164 }, { "epoch": 1.159391746438877, "grad_norm": 0.1848644231677983, "learning_rate": 3.629161945315606e-05, "loss": 0.5348, "num_tokens": 2418233246.0, "step": 3165 }, { "epoch": 1.1597581642467825, "grad_norm": 0.15308428425238182, "learning_rate": 3.628902248735888e-05, "loss": 0.493, "num_tokens": 2419124107.0, "step": 3166 }, { "epoch": 1.160124582054688, "grad_norm": 0.16458227529600652, "learning_rate": 3.6286424717061255e-05, "loss": 0.4964, "num_tokens": 2419860879.0, "step": 3167 }, { "epoch": 1.1604909998625934, "grad_norm": 0.15800572496121518, "learning_rate": 3.6283826142409424e-05, "loss": 0.4994, "num_tokens": 2420645989.0, "step": 3168 }, { "epoch": 1.1608574176704989, "grad_norm": 0.17070271151696603, "learning_rate": 3.628122676354971e-05, "loss": 0.5573, "num_tokens": 2421307358.0, "step": 3169 }, { "epoch": 1.1612238354784044, "grad_norm": 0.14088155106915218, "learning_rate": 3.627862658062846e-05, "loss": 0.4783, "num_tokens": 2422131530.0, "step": 3170 }, { "epoch": 1.1615902532863096, "grad_norm": 0.18539985172723197, "learning_rate": 3.627602559379206e-05, "loss": 0.5293, "num_tokens": 2422944055.0, "step": 3171 }, { "epoch": 1.1619566710942153, "grad_norm": 0.15632465723115882, "learning_rate": 3.627342380318697e-05, "loss": 0.5037, "num_tokens": 2423703898.0, "step": 3172 }, { "epoch": 1.1623230889021205, "grad_norm": 0.1608829865688931, "learning_rate": 3.627082120895965e-05, "loss": 0.5, "num_tokens": 2424449475.0, "step": 3173 }, { "epoch": 1.162689506710026, "grad_norm": 0.1796313117870203, "learning_rate": 3.626821781125666e-05, "loss": 0.4989, "num_tokens": 2425164959.0, "step": 3174 }, { "epoch": 1.1630559245179315, "grad_norm": 0.16039896447630578, "learning_rate": 3.6265613610224554e-05, "loss": 0.5275, "num_tokens": 2425949690.0, "step": 3175 }, { "epoch": 1.163422342325837, "grad_norm": 0.16630916189122213, "learning_rate": 3.626300860600996e-05, "loss": 0.5211, "num_tokens": 2426639621.0, "step": 3176 }, { "epoch": 1.1637887601337424, "grad_norm": 0.1519852093327594, "learning_rate": 3.626040279875956e-05, "loss": 0.484, "num_tokens": 2427396575.0, "step": 3177 }, { "epoch": 1.164155177941648, "grad_norm": 0.17605986086334124, "learning_rate": 3.6257796188620034e-05, "loss": 0.4855, "num_tokens": 2428117533.0, "step": 3178 }, { "epoch": 1.1645215957495534, "grad_norm": 0.15651702232014322, "learning_rate": 3.625518877573817e-05, "loss": 0.5176, "num_tokens": 2428914607.0, "step": 3179 }, { "epoch": 1.1648880135574589, "grad_norm": 0.16073671422654404, "learning_rate": 3.625258056026075e-05, "loss": 0.5152, "num_tokens": 2429695767.0, "step": 3180 }, { "epoch": 1.1652544313653643, "grad_norm": 0.17944566121135713, "learning_rate": 3.624997154233464e-05, "loss": 0.4914, "num_tokens": 2430441286.0, "step": 3181 }, { "epoch": 1.1656208491732698, "grad_norm": 0.16351204448486284, "learning_rate": 3.624736172210672e-05, "loss": 0.48, "num_tokens": 2431280291.0, "step": 3182 }, { "epoch": 1.1659872669811753, "grad_norm": 0.18589913022121396, "learning_rate": 3.6244751099723935e-05, "loss": 0.54, "num_tokens": 2431959087.0, "step": 3183 }, { "epoch": 1.1663536847890807, "grad_norm": 0.17691880502023263, "learning_rate": 3.624213967533325e-05, "loss": 0.5107, "num_tokens": 2432797257.0, "step": 3184 }, { "epoch": 1.1667201025969862, "grad_norm": 0.19141275416807893, "learning_rate": 3.623952744908172e-05, "loss": 0.5458, "num_tokens": 2433543856.0, "step": 3185 }, { "epoch": 1.1670865204048917, "grad_norm": 0.17589402485940522, "learning_rate": 3.6236914421116406e-05, "loss": 0.504, "num_tokens": 2434362949.0, "step": 3186 }, { "epoch": 1.1674529382127972, "grad_norm": 0.14622161392276692, "learning_rate": 3.6234300591584425e-05, "loss": 0.4688, "num_tokens": 2435098491.0, "step": 3187 }, { "epoch": 1.1678193560207026, "grad_norm": 0.19241115665068242, "learning_rate": 3.623168596063295e-05, "loss": 0.5158, "num_tokens": 2435798937.0, "step": 3188 }, { "epoch": 1.168185773828608, "grad_norm": 0.16788138620235166, "learning_rate": 3.622907052840917e-05, "loss": 0.4924, "num_tokens": 2436696747.0, "step": 3189 }, { "epoch": 1.1685521916365136, "grad_norm": 0.17102674485842187, "learning_rate": 3.6226454295060367e-05, "loss": 0.5088, "num_tokens": 2437456425.0, "step": 3190 }, { "epoch": 1.168918609444419, "grad_norm": 0.1689156468879589, "learning_rate": 3.6223837260733816e-05, "loss": 0.5253, "num_tokens": 2438232371.0, "step": 3191 }, { "epoch": 1.1692850272523245, "grad_norm": 0.17312759174983264, "learning_rate": 3.6221219425576875e-05, "loss": 0.4937, "num_tokens": 2438997060.0, "step": 3192 }, { "epoch": 1.16965144506023, "grad_norm": 0.1484085148208272, "learning_rate": 3.621860078973692e-05, "loss": 0.4927, "num_tokens": 2439728688.0, "step": 3193 }, { "epoch": 1.1700178628681355, "grad_norm": 0.17436909075826865, "learning_rate": 3.62159813533614e-05, "loss": 0.4749, "num_tokens": 2440413602.0, "step": 3194 }, { "epoch": 1.170384280676041, "grad_norm": 0.16129438811429198, "learning_rate": 3.6213361116597786e-05, "loss": 0.4959, "num_tokens": 2441058744.0, "step": 3195 }, { "epoch": 1.1707506984839462, "grad_norm": 0.1722061075492763, "learning_rate": 3.621074007959361e-05, "loss": 0.5397, "num_tokens": 2441858676.0, "step": 3196 }, { "epoch": 1.171117116291852, "grad_norm": 0.16924229880574473, "learning_rate": 3.620811824249643e-05, "loss": 0.4891, "num_tokens": 2442588763.0, "step": 3197 }, { "epoch": 1.1714835340997571, "grad_norm": 0.18320698554246895, "learning_rate": 3.6205495605453876e-05, "loss": 0.5328, "num_tokens": 2443327128.0, "step": 3198 }, { "epoch": 1.1718499519076628, "grad_norm": 0.1816339107538734, "learning_rate": 3.620287216861359e-05, "loss": 0.5374, "num_tokens": 2444027860.0, "step": 3199 }, { "epoch": 1.172216369715568, "grad_norm": 0.1816728957910666, "learning_rate": 3.62002479321233e-05, "loss": 0.5157, "num_tokens": 2444819809.0, "step": 3200 }, { "epoch": 1.1725827875234736, "grad_norm": 0.1559898072115348, "learning_rate": 3.6197622896130725e-05, "loss": 0.471, "num_tokens": 2445485246.0, "step": 3201 }, { "epoch": 1.172949205331379, "grad_norm": 0.1906473500963141, "learning_rate": 3.619499706078369e-05, "loss": 0.5042, "num_tokens": 2446179962.0, "step": 3202 }, { "epoch": 1.1733156231392845, "grad_norm": 0.1744090413330248, "learning_rate": 3.6192370426230015e-05, "loss": 0.4891, "num_tokens": 2446888649.0, "step": 3203 }, { "epoch": 1.17368204094719, "grad_norm": 0.1648649627180224, "learning_rate": 3.618974299261759e-05, "loss": 0.5043, "num_tokens": 2447638748.0, "step": 3204 }, { "epoch": 1.1740484587550954, "grad_norm": 0.16918743274865397, "learning_rate": 3.618711476009435e-05, "loss": 0.5218, "num_tokens": 2448409244.0, "step": 3205 }, { "epoch": 1.174414876563001, "grad_norm": 0.1786376094065329, "learning_rate": 3.618448572880826e-05, "loss": 0.5335, "num_tokens": 2449107579.0, "step": 3206 }, { "epoch": 1.1747812943709064, "grad_norm": 0.1605317488308044, "learning_rate": 3.618185589890735e-05, "loss": 0.5126, "num_tokens": 2449966251.0, "step": 3207 }, { "epoch": 1.1751477121788119, "grad_norm": 0.18623193549190684, "learning_rate": 3.6179225270539676e-05, "loss": 0.5079, "num_tokens": 2450863856.0, "step": 3208 }, { "epoch": 1.1755141299867173, "grad_norm": 0.16207497217236252, "learning_rate": 3.617659384385335e-05, "loss": 0.5225, "num_tokens": 2451624482.0, "step": 3209 }, { "epoch": 1.1758805477946228, "grad_norm": 0.18962998013940127, "learning_rate": 3.6173961618996536e-05, "loss": 0.4972, "num_tokens": 2452320767.0, "step": 3210 }, { "epoch": 1.1762469656025283, "grad_norm": 0.17893691930744554, "learning_rate": 3.617132859611742e-05, "loss": 0.459, "num_tokens": 2453040959.0, "step": 3211 }, { "epoch": 1.1766133834104338, "grad_norm": 0.15799504052478244, "learning_rate": 3.616869477536425e-05, "loss": 0.5181, "num_tokens": 2453757713.0, "step": 3212 }, { "epoch": 1.1769798012183392, "grad_norm": 0.17877119536032535, "learning_rate": 3.616606015688532e-05, "loss": 0.4718, "num_tokens": 2454631907.0, "step": 3213 }, { "epoch": 1.1773462190262447, "grad_norm": 0.170058593511498, "learning_rate": 3.616342474082896e-05, "loss": 0.5326, "num_tokens": 2455474869.0, "step": 3214 }, { "epoch": 1.1777126368341502, "grad_norm": 0.16887159006488425, "learning_rate": 3.616078852734356e-05, "loss": 0.5419, "num_tokens": 2456297213.0, "step": 3215 }, { "epoch": 1.1780790546420556, "grad_norm": 0.17026385132384883, "learning_rate": 3.615815151657752e-05, "loss": 0.5194, "num_tokens": 2456953190.0, "step": 3216 }, { "epoch": 1.1784454724499611, "grad_norm": 0.16817337878237354, "learning_rate": 3.615551370867933e-05, "loss": 0.5149, "num_tokens": 2457684206.0, "step": 3217 }, { "epoch": 1.1788118902578666, "grad_norm": 0.16623336436071742, "learning_rate": 3.61528751037975e-05, "loss": 0.5224, "num_tokens": 2458406353.0, "step": 3218 }, { "epoch": 1.179178308065772, "grad_norm": 0.1614840410745612, "learning_rate": 3.615023570208059e-05, "loss": 0.5072, "num_tokens": 2459160252.0, "step": 3219 }, { "epoch": 1.1795447258736775, "grad_norm": 0.15400061371456336, "learning_rate": 3.614759550367719e-05, "loss": 0.4862, "num_tokens": 2459889238.0, "step": 3220 }, { "epoch": 1.179911143681583, "grad_norm": 0.1429469444996067, "learning_rate": 3.6144954508735965e-05, "loss": 0.4843, "num_tokens": 2460600404.0, "step": 3221 }, { "epoch": 1.1802775614894885, "grad_norm": 0.156023340689515, "learning_rate": 3.61423127174056e-05, "loss": 0.4793, "num_tokens": 2461336560.0, "step": 3222 }, { "epoch": 1.1806439792973937, "grad_norm": 0.1371193518163066, "learning_rate": 3.613967012983484e-05, "loss": 0.4762, "num_tokens": 2462143779.0, "step": 3223 }, { "epoch": 1.1810103971052994, "grad_norm": 0.16661701147953958, "learning_rate": 3.613702674617245e-05, "loss": 0.5151, "num_tokens": 2462891501.0, "step": 3224 }, { "epoch": 1.1813768149132047, "grad_norm": 0.1600105994503114, "learning_rate": 3.613438256656728e-05, "loss": 0.5531, "num_tokens": 2463699851.0, "step": 3225 }, { "epoch": 1.1817432327211104, "grad_norm": 0.16488101966374882, "learning_rate": 3.613173759116819e-05, "loss": 0.5174, "num_tokens": 2464450512.0, "step": 3226 }, { "epoch": 1.1821096505290156, "grad_norm": 0.1755644819558573, "learning_rate": 3.612909182012411e-05, "loss": 0.5035, "num_tokens": 2465127478.0, "step": 3227 }, { "epoch": 1.182476068336921, "grad_norm": 0.14602767504927616, "learning_rate": 3.6126445253583975e-05, "loss": 0.5137, "num_tokens": 2466017858.0, "step": 3228 }, { "epoch": 1.1828424861448266, "grad_norm": 0.16425099527764642, "learning_rate": 3.612379789169682e-05, "loss": 0.5145, "num_tokens": 2466734802.0, "step": 3229 }, { "epoch": 1.183208903952732, "grad_norm": 0.1451945454489232, "learning_rate": 3.612114973461169e-05, "loss": 0.4866, "num_tokens": 2467545066.0, "step": 3230 }, { "epoch": 1.1835753217606375, "grad_norm": 0.14515602203378034, "learning_rate": 3.611850078247767e-05, "loss": 0.4973, "num_tokens": 2468334122.0, "step": 3231 }, { "epoch": 1.183941739568543, "grad_norm": 0.15123350809475156, "learning_rate": 3.611585103544391e-05, "loss": 0.5087, "num_tokens": 2469046662.0, "step": 3232 }, { "epoch": 1.1843081573764485, "grad_norm": 0.15038855632300438, "learning_rate": 3.61132004936596e-05, "loss": 0.4926, "num_tokens": 2469748367.0, "step": 3233 }, { "epoch": 1.184674575184354, "grad_norm": 0.14441603792099272, "learning_rate": 3.611054915727396e-05, "loss": 0.4998, "num_tokens": 2470477579.0, "step": 3234 }, { "epoch": 1.1850409929922594, "grad_norm": 0.15668378763805288, "learning_rate": 3.6107897026436277e-05, "loss": 0.5301, "num_tokens": 2471154106.0, "step": 3235 }, { "epoch": 1.1854074108001649, "grad_norm": 0.1592220494746894, "learning_rate": 3.6105244101295865e-05, "loss": 0.5292, "num_tokens": 2472024489.0, "step": 3236 }, { "epoch": 1.1857738286080703, "grad_norm": 0.15823244473134793, "learning_rate": 3.610259038200209e-05, "loss": 0.4964, "num_tokens": 2472768925.0, "step": 3237 }, { "epoch": 1.1861402464159758, "grad_norm": 0.15220320396226228, "learning_rate": 3.609993586870436e-05, "loss": 0.5016, "num_tokens": 2473590909.0, "step": 3238 }, { "epoch": 1.1865066642238813, "grad_norm": 0.16027203132642961, "learning_rate": 3.609728056155213e-05, "loss": 0.5387, "num_tokens": 2474391813.0, "step": 3239 }, { "epoch": 1.1868730820317868, "grad_norm": 0.18353730997311077, "learning_rate": 3.6094624460694895e-05, "loss": 0.5288, "num_tokens": 2475173934.0, "step": 3240 }, { "epoch": 1.1872394998396922, "grad_norm": 0.1357923742833776, "learning_rate": 3.609196756628221e-05, "loss": 0.5009, "num_tokens": 2476104595.0, "step": 3241 }, { "epoch": 1.1876059176475977, "grad_norm": 0.17783906610450817, "learning_rate": 3.608930987846366e-05, "loss": 0.4828, "num_tokens": 2476941561.0, "step": 3242 }, { "epoch": 1.1879723354555032, "grad_norm": 0.14618992161865904, "learning_rate": 3.608665139738887e-05, "loss": 0.4963, "num_tokens": 2477662059.0, "step": 3243 }, { "epoch": 1.1883387532634087, "grad_norm": 0.1643809249312429, "learning_rate": 3.608399212320753e-05, "loss": 0.4959, "num_tokens": 2478409758.0, "step": 3244 }, { "epoch": 1.1887051710713141, "grad_norm": 0.15912295640970622, "learning_rate": 3.608133205606935e-05, "loss": 0.4923, "num_tokens": 2479189839.0, "step": 3245 }, { "epoch": 1.1890715888792196, "grad_norm": 0.15402210835688662, "learning_rate": 3.60786711961241e-05, "loss": 0.5051, "num_tokens": 2479926193.0, "step": 3246 }, { "epoch": 1.189438006687125, "grad_norm": 0.15921922111575118, "learning_rate": 3.60760095435216e-05, "loss": 0.5033, "num_tokens": 2480800072.0, "step": 3247 }, { "epoch": 1.1898044244950305, "grad_norm": 0.19273688504358236, "learning_rate": 3.60733470984117e-05, "loss": 0.5119, "num_tokens": 2481444026.0, "step": 3248 }, { "epoch": 1.190170842302936, "grad_norm": 0.15152841582817955, "learning_rate": 3.607068386094429e-05, "loss": 0.5369, "num_tokens": 2482201359.0, "step": 3249 }, { "epoch": 1.1905372601108413, "grad_norm": 0.1659248509871374, "learning_rate": 3.6068019831269345e-05, "loss": 0.4989, "num_tokens": 2482945600.0, "step": 3250 }, { "epoch": 1.190903677918747, "grad_norm": 0.18108673383870344, "learning_rate": 3.606535500953683e-05, "loss": 0.5163, "num_tokens": 2483667774.0, "step": 3251 }, { "epoch": 1.1912700957266522, "grad_norm": 0.14601167623851116, "learning_rate": 3.606268939589678e-05, "loss": 0.4897, "num_tokens": 2484562201.0, "step": 3252 }, { "epoch": 1.1916365135345577, "grad_norm": 0.19679685688710474, "learning_rate": 3.606002299049929e-05, "loss": 0.51, "num_tokens": 2485271748.0, "step": 3253 }, { "epoch": 1.1920029313424632, "grad_norm": 0.16558000930604835, "learning_rate": 3.6057355793494476e-05, "loss": 0.4879, "num_tokens": 2486067721.0, "step": 3254 }, { "epoch": 1.1923693491503686, "grad_norm": 0.15501218373423195, "learning_rate": 3.60546878050325e-05, "loss": 0.5415, "num_tokens": 2486773941.0, "step": 3255 }, { "epoch": 1.192735766958274, "grad_norm": 0.18120606220836252, "learning_rate": 3.605201902526358e-05, "loss": 0.5121, "num_tokens": 2487584743.0, "step": 3256 }, { "epoch": 1.1931021847661796, "grad_norm": 0.16926794121744543, "learning_rate": 3.6049349454337985e-05, "loss": 0.5357, "num_tokens": 2488354997.0, "step": 3257 }, { "epoch": 1.193468602574085, "grad_norm": 0.17101418196727228, "learning_rate": 3.6046679092406007e-05, "loss": 0.5488, "num_tokens": 2489058378.0, "step": 3258 }, { "epoch": 1.1938350203819905, "grad_norm": 0.16167050514353182, "learning_rate": 3.604400793961798e-05, "loss": 0.5046, "num_tokens": 2489779492.0, "step": 3259 }, { "epoch": 1.194201438189896, "grad_norm": 0.162479632498331, "learning_rate": 3.604133599612431e-05, "loss": 0.5016, "num_tokens": 2490488031.0, "step": 3260 }, { "epoch": 1.1945678559978015, "grad_norm": 0.1607130799435705, "learning_rate": 3.603866326207543e-05, "loss": 0.52, "num_tokens": 2491223415.0, "step": 3261 }, { "epoch": 1.194934273805707, "grad_norm": 0.16577007633066612, "learning_rate": 3.603598973762183e-05, "loss": 0.51, "num_tokens": 2491982914.0, "step": 3262 }, { "epoch": 1.1953006916136124, "grad_norm": 0.1676297594866388, "learning_rate": 3.6033315422914024e-05, "loss": 0.5238, "num_tokens": 2492775113.0, "step": 3263 }, { "epoch": 1.1956671094215179, "grad_norm": 0.1631291894647977, "learning_rate": 3.6030640318102576e-05, "loss": 0.5121, "num_tokens": 2493652053.0, "step": 3264 }, { "epoch": 1.1960335272294234, "grad_norm": 0.14340902833048666, "learning_rate": 3.602796442333811e-05, "loss": 0.5009, "num_tokens": 2494535034.0, "step": 3265 }, { "epoch": 1.1963999450373288, "grad_norm": 0.19520828838567258, "learning_rate": 3.602528773877129e-05, "loss": 0.5114, "num_tokens": 2495199863.0, "step": 3266 }, { "epoch": 1.1967663628452343, "grad_norm": 0.20334167959046212, "learning_rate": 3.602261026455279e-05, "loss": 0.5182, "num_tokens": 2495983893.0, "step": 3267 }, { "epoch": 1.1971327806531398, "grad_norm": 0.14458248105831759, "learning_rate": 3.601993200083339e-05, "loss": 0.5027, "num_tokens": 2496900435.0, "step": 3268 }, { "epoch": 1.1974991984610452, "grad_norm": 0.17436937339553793, "learning_rate": 3.601725294776386e-05, "loss": 0.5202, "num_tokens": 2497705462.0, "step": 3269 }, { "epoch": 1.1978656162689507, "grad_norm": 0.17689453060568294, "learning_rate": 3.601457310549505e-05, "loss": 0.4971, "num_tokens": 2498470719.0, "step": 3270 }, { "epoch": 1.1982320340768562, "grad_norm": 0.14725145568842626, "learning_rate": 3.601189247417784e-05, "loss": 0.5129, "num_tokens": 2499170039.0, "step": 3271 }, { "epoch": 1.1985984518847617, "grad_norm": 0.16713265812790454, "learning_rate": 3.6009211053963145e-05, "loss": 0.5326, "num_tokens": 2499925654.0, "step": 3272 }, { "epoch": 1.1989648696926671, "grad_norm": 0.17656728640296496, "learning_rate": 3.6006528845001937e-05, "loss": 0.5425, "num_tokens": 2500592898.0, "step": 3273 }, { "epoch": 1.1993312875005726, "grad_norm": 0.15406472775801736, "learning_rate": 3.600384584744523e-05, "loss": 0.5134, "num_tokens": 2501330075.0, "step": 3274 }, { "epoch": 1.199697705308478, "grad_norm": 0.17509361387930042, "learning_rate": 3.600116206144408e-05, "loss": 0.5239, "num_tokens": 2502109431.0, "step": 3275 }, { "epoch": 1.2000641231163836, "grad_norm": 0.16212113764953393, "learning_rate": 3.59984774871496e-05, "loss": 0.5084, "num_tokens": 2502859803.0, "step": 3276 }, { "epoch": 1.2004305409242888, "grad_norm": 0.16864961655579708, "learning_rate": 3.5995792124712926e-05, "loss": 0.478, "num_tokens": 2503608599.0, "step": 3277 }, { "epoch": 1.2007969587321945, "grad_norm": 0.1403343922893296, "learning_rate": 3.5993105974285254e-05, "loss": 0.519, "num_tokens": 2504390376.0, "step": 3278 }, { "epoch": 1.2011633765400997, "grad_norm": 0.16003925300370264, "learning_rate": 3.599041903601782e-05, "loss": 0.5083, "num_tokens": 2505249673.0, "step": 3279 }, { "epoch": 1.2015297943480052, "grad_norm": 0.17492117287186726, "learning_rate": 3.5987731310061905e-05, "loss": 0.5439, "num_tokens": 2505935940.0, "step": 3280 }, { "epoch": 1.2018962121559107, "grad_norm": 0.15569355714459884, "learning_rate": 3.598504279656883e-05, "loss": 0.5135, "num_tokens": 2506725229.0, "step": 3281 }, { "epoch": 1.2022626299638162, "grad_norm": 0.18444872742260185, "learning_rate": 3.598235349568996e-05, "loss": 0.4927, "num_tokens": 2507495417.0, "step": 3282 }, { "epoch": 1.2026290477717216, "grad_norm": 0.14465000510057252, "learning_rate": 3.597966340757672e-05, "loss": 0.4836, "num_tokens": 2508352615.0, "step": 3283 }, { "epoch": 1.202995465579627, "grad_norm": 0.17607964344817792, "learning_rate": 3.597697253238055e-05, "loss": 0.4944, "num_tokens": 2509087294.0, "step": 3284 }, { "epoch": 1.2033618833875326, "grad_norm": 0.17810849619910432, "learning_rate": 3.597428087025297e-05, "loss": 0.5263, "num_tokens": 2509762982.0, "step": 3285 }, { "epoch": 1.203728301195438, "grad_norm": 0.17574623353985305, "learning_rate": 3.597158842134552e-05, "loss": 0.5157, "num_tokens": 2510426032.0, "step": 3286 }, { "epoch": 1.2040947190033435, "grad_norm": 0.20446719487878284, "learning_rate": 3.596889518580978e-05, "loss": 0.5165, "num_tokens": 2511087296.0, "step": 3287 }, { "epoch": 1.204461136811249, "grad_norm": 0.15678876084536916, "learning_rate": 3.59662011637974e-05, "loss": 0.5188, "num_tokens": 2511829728.0, "step": 3288 }, { "epoch": 1.2048275546191545, "grad_norm": 0.21997044059589965, "learning_rate": 3.5963506355460045e-05, "loss": 0.5424, "num_tokens": 2512553767.0, "step": 3289 }, { "epoch": 1.20519397242706, "grad_norm": 0.1827812220369593, "learning_rate": 3.596081076094945e-05, "loss": 0.5109, "num_tokens": 2513288054.0, "step": 3290 }, { "epoch": 1.2055603902349654, "grad_norm": 0.1622342538151089, "learning_rate": 3.595811438041738e-05, "loss": 0.4937, "num_tokens": 2513993928.0, "step": 3291 }, { "epoch": 1.205926808042871, "grad_norm": 0.190586472352596, "learning_rate": 3.595541721401564e-05, "loss": 0.5021, "num_tokens": 2514810008.0, "step": 3292 }, { "epoch": 1.2062932258507764, "grad_norm": 0.1795357008210429, "learning_rate": 3.5952719261896086e-05, "loss": 0.511, "num_tokens": 2515617647.0, "step": 3293 }, { "epoch": 1.2066596436586818, "grad_norm": 0.15412867067858577, "learning_rate": 3.595002052421062e-05, "loss": 0.497, "num_tokens": 2516477692.0, "step": 3294 }, { "epoch": 1.2070260614665873, "grad_norm": 0.15927112760582066, "learning_rate": 3.594732100111119e-05, "loss": 0.4912, "num_tokens": 2517304534.0, "step": 3295 }, { "epoch": 1.2073924792744928, "grad_norm": 0.16719701548556015, "learning_rate": 3.594462069274979e-05, "loss": 0.5073, "num_tokens": 2518013369.0, "step": 3296 }, { "epoch": 1.2077588970823983, "grad_norm": 0.17216862686487044, "learning_rate": 3.594191959927843e-05, "loss": 0.5106, "num_tokens": 2518652458.0, "step": 3297 }, { "epoch": 1.2081253148903037, "grad_norm": 0.17862018997912962, "learning_rate": 3.5939217720849205e-05, "loss": 0.5451, "num_tokens": 2519353690.0, "step": 3298 }, { "epoch": 1.2084917326982092, "grad_norm": 0.1817971846018965, "learning_rate": 3.593651505761424e-05, "loss": 0.5543, "num_tokens": 2520050004.0, "step": 3299 }, { "epoch": 1.2088581505061147, "grad_norm": 0.16893518876255473, "learning_rate": 3.593381160972569e-05, "loss": 0.4999, "num_tokens": 2520789249.0, "step": 3300 }, { "epoch": 1.2092245683140201, "grad_norm": 0.15133999157214667, "learning_rate": 3.593110737733577e-05, "loss": 0.5281, "num_tokens": 2521547507.0, "step": 3301 }, { "epoch": 1.2095909861219256, "grad_norm": 0.18302972160593536, "learning_rate": 3.5928402360596724e-05, "loss": 0.5141, "num_tokens": 2522248334.0, "step": 3302 }, { "epoch": 1.209957403929831, "grad_norm": 0.1714692569528523, "learning_rate": 3.592569655966086e-05, "loss": 0.5019, "num_tokens": 2523039741.0, "step": 3303 }, { "epoch": 1.2103238217377363, "grad_norm": 0.16167401845026425, "learning_rate": 3.5922989974680526e-05, "loss": 0.5158, "num_tokens": 2523833614.0, "step": 3304 }, { "epoch": 1.210690239545642, "grad_norm": 0.16163734722426581, "learning_rate": 3.5920282605808094e-05, "loss": 0.4993, "num_tokens": 2524585893.0, "step": 3305 }, { "epoch": 1.2110566573535473, "grad_norm": 0.13897282798770552, "learning_rate": 3.5917574453196e-05, "loss": 0.5053, "num_tokens": 2525422354.0, "step": 3306 }, { "epoch": 1.2114230751614528, "grad_norm": 0.16102952124003447, "learning_rate": 3.591486551699671e-05, "loss": 0.4894, "num_tokens": 2526219022.0, "step": 3307 }, { "epoch": 1.2117894929693582, "grad_norm": 0.14236184779245292, "learning_rate": 3.591215579736276e-05, "loss": 0.505, "num_tokens": 2526958902.0, "step": 3308 }, { "epoch": 1.2121559107772637, "grad_norm": 0.143817178756103, "learning_rate": 3.5909445294446706e-05, "loss": 0.5039, "num_tokens": 2527726360.0, "step": 3309 }, { "epoch": 1.2125223285851692, "grad_norm": 0.15240609307961067, "learning_rate": 3.590673400840114e-05, "loss": 0.4926, "num_tokens": 2528423193.0, "step": 3310 }, { "epoch": 1.2128887463930746, "grad_norm": 0.13822381767161915, "learning_rate": 3.590402193937874e-05, "loss": 0.4934, "num_tokens": 2529291917.0, "step": 3311 }, { "epoch": 1.2132551642009801, "grad_norm": 0.15540489130301405, "learning_rate": 3.5901309087532174e-05, "loss": 0.5282, "num_tokens": 2530028105.0, "step": 3312 }, { "epoch": 1.2136215820088856, "grad_norm": 0.14685306181383323, "learning_rate": 3.5898595453014195e-05, "loss": 0.4884, "num_tokens": 2530933584.0, "step": 3313 }, { "epoch": 1.213987999816791, "grad_norm": 0.14416734895642785, "learning_rate": 3.5895881035977587e-05, "loss": 0.4872, "num_tokens": 2531763377.0, "step": 3314 }, { "epoch": 1.2143544176246965, "grad_norm": 0.15572483071367746, "learning_rate": 3.589316583657517e-05, "loss": 0.5049, "num_tokens": 2532561351.0, "step": 3315 }, { "epoch": 1.214720835432602, "grad_norm": 0.1474931584128372, "learning_rate": 3.589044985495982e-05, "loss": 0.4897, "num_tokens": 2533378296.0, "step": 3316 }, { "epoch": 1.2150872532405075, "grad_norm": 0.1619982383994857, "learning_rate": 3.588773309128445e-05, "loss": 0.5378, "num_tokens": 2534152037.0, "step": 3317 }, { "epoch": 1.215453671048413, "grad_norm": 0.15600364503252365, "learning_rate": 3.588501554570202e-05, "loss": 0.5154, "num_tokens": 2535000173.0, "step": 3318 }, { "epoch": 1.2158200888563184, "grad_norm": 0.1377838053401006, "learning_rate": 3.588229721836554e-05, "loss": 0.4955, "num_tokens": 2535881268.0, "step": 3319 }, { "epoch": 1.216186506664224, "grad_norm": 0.15756432839565315, "learning_rate": 3.5879578109428045e-05, "loss": 0.5131, "num_tokens": 2536703193.0, "step": 3320 }, { "epoch": 1.2165529244721294, "grad_norm": 0.15819047557577723, "learning_rate": 3.587685821904263e-05, "loss": 0.5151, "num_tokens": 2537496793.0, "step": 3321 }, { "epoch": 1.2169193422800348, "grad_norm": 0.15172392999857023, "learning_rate": 3.587413754736243e-05, "loss": 0.5002, "num_tokens": 2538308379.0, "step": 3322 }, { "epoch": 1.2172857600879403, "grad_norm": 0.1404363849524943, "learning_rate": 3.587141609454064e-05, "loss": 0.4597, "num_tokens": 2538917327.0, "step": 3323 }, { "epoch": 1.2176521778958458, "grad_norm": 0.18121533569843795, "learning_rate": 3.586869386073046e-05, "loss": 0.5334, "num_tokens": 2539748907.0, "step": 3324 }, { "epoch": 1.2180185957037513, "grad_norm": 0.1634177340164503, "learning_rate": 3.586597084608517e-05, "loss": 0.5224, "num_tokens": 2540551839.0, "step": 3325 }, { "epoch": 1.2183850135116567, "grad_norm": 0.16356235852102866, "learning_rate": 3.586324705075807e-05, "loss": 0.522, "num_tokens": 2541409968.0, "step": 3326 }, { "epoch": 1.2187514313195622, "grad_norm": 0.15670278400942483, "learning_rate": 3.5860522474902534e-05, "loss": 0.4949, "num_tokens": 2542224113.0, "step": 3327 }, { "epoch": 1.2191178491274677, "grad_norm": 0.1518830223359398, "learning_rate": 3.5857797118671944e-05, "loss": 0.5106, "num_tokens": 2542971786.0, "step": 3328 }, { "epoch": 1.2194842669353732, "grad_norm": 0.18769108944000484, "learning_rate": 3.585507098221975e-05, "loss": 0.5329, "num_tokens": 2543741572.0, "step": 3329 }, { "epoch": 1.2198506847432786, "grad_norm": 0.16588943379816037, "learning_rate": 3.585234406569944e-05, "loss": 0.4775, "num_tokens": 2544537627.0, "step": 3330 }, { "epoch": 1.2202171025511839, "grad_norm": 0.1564421983388034, "learning_rate": 3.584961636926454e-05, "loss": 0.4883, "num_tokens": 2545222735.0, "step": 3331 }, { "epoch": 1.2205835203590896, "grad_norm": 0.1679610227961509, "learning_rate": 3.5846887893068634e-05, "loss": 0.4905, "num_tokens": 2545961430.0, "step": 3332 }, { "epoch": 1.2209499381669948, "grad_norm": 0.1563343057167998, "learning_rate": 3.584415863726533e-05, "loss": 0.4698, "num_tokens": 2546620912.0, "step": 3333 }, { "epoch": 1.2213163559749003, "grad_norm": 0.16342766807012116, "learning_rate": 3.584142860200829e-05, "loss": 0.5002, "num_tokens": 2547379320.0, "step": 3334 }, { "epoch": 1.2216827737828058, "grad_norm": 0.14597185759695092, "learning_rate": 3.583869778745123e-05, "loss": 0.486, "num_tokens": 2548225594.0, "step": 3335 }, { "epoch": 1.2220491915907112, "grad_norm": 0.18582170614760926, "learning_rate": 3.58359661937479e-05, "loss": 0.5278, "num_tokens": 2549129908.0, "step": 3336 }, { "epoch": 1.2224156093986167, "grad_norm": 0.14638742472484484, "learning_rate": 3.583323382105208e-05, "loss": 0.4668, "num_tokens": 2550000152.0, "step": 3337 }, { "epoch": 1.2227820272065222, "grad_norm": 0.14802640709879256, "learning_rate": 3.5830500669517625e-05, "loss": 0.4904, "num_tokens": 2550801592.0, "step": 3338 }, { "epoch": 1.2231484450144277, "grad_norm": 0.14199381010398496, "learning_rate": 3.582776673929841e-05, "loss": 0.5033, "num_tokens": 2551487474.0, "step": 3339 }, { "epoch": 1.2235148628223331, "grad_norm": 0.14932178931508128, "learning_rate": 3.5825032030548356e-05, "loss": 0.5254, "num_tokens": 2552244344.0, "step": 3340 }, { "epoch": 1.2238812806302386, "grad_norm": 0.15088941050540638, "learning_rate": 3.582229654342144e-05, "loss": 0.5067, "num_tokens": 2553009598.0, "step": 3341 }, { "epoch": 1.224247698438144, "grad_norm": 0.14123707176435069, "learning_rate": 3.581956027807167e-05, "loss": 0.5208, "num_tokens": 2553847297.0, "step": 3342 }, { "epoch": 1.2246141162460495, "grad_norm": 0.13432728296228483, "learning_rate": 3.581682323465311e-05, "loss": 0.4906, "num_tokens": 2554637516.0, "step": 3343 }, { "epoch": 1.224980534053955, "grad_norm": 0.15669611268018965, "learning_rate": 3.5814085413319856e-05, "loss": 0.5333, "num_tokens": 2555392790.0, "step": 3344 }, { "epoch": 1.2253469518618605, "grad_norm": 0.14618560390482058, "learning_rate": 3.581134681422605e-05, "loss": 0.5431, "num_tokens": 2556133661.0, "step": 3345 }, { "epoch": 1.225713369669766, "grad_norm": 0.14006813548537683, "learning_rate": 3.5808607437525884e-05, "loss": 0.4808, "num_tokens": 2557007683.0, "step": 3346 }, { "epoch": 1.2260797874776714, "grad_norm": 0.1515264163103856, "learning_rate": 3.58058672833736e-05, "loss": 0.509, "num_tokens": 2557809001.0, "step": 3347 }, { "epoch": 1.226446205285577, "grad_norm": 0.14962416147909388, "learning_rate": 3.580312635192345e-05, "loss": 0.4898, "num_tokens": 2558527139.0, "step": 3348 }, { "epoch": 1.2268126230934824, "grad_norm": 0.15262738133812676, "learning_rate": 3.580038464332977e-05, "loss": 0.5027, "num_tokens": 2559362001.0, "step": 3349 }, { "epoch": 1.2271790409013879, "grad_norm": 0.14811661604167664, "learning_rate": 3.579764215774692e-05, "loss": 0.5148, "num_tokens": 2560267166.0, "step": 3350 }, { "epoch": 1.2275454587092933, "grad_norm": 0.1550139290257887, "learning_rate": 3.579489889532933e-05, "loss": 0.4874, "num_tokens": 2561115022.0, "step": 3351 }, { "epoch": 1.2279118765171988, "grad_norm": 0.14636642790955962, "learning_rate": 3.5792154856231413e-05, "loss": 0.4983, "num_tokens": 2561945644.0, "step": 3352 }, { "epoch": 1.2282782943251043, "grad_norm": 0.16325526022332637, "learning_rate": 3.578941004060768e-05, "loss": 0.4717, "num_tokens": 2562758072.0, "step": 3353 }, { "epoch": 1.2286447121330097, "grad_norm": 0.16293357237414977, "learning_rate": 3.578666444861268e-05, "loss": 0.554, "num_tokens": 2563460705.0, "step": 3354 }, { "epoch": 1.2290111299409152, "grad_norm": 0.16702494294435968, "learning_rate": 3.5783918080400974e-05, "loss": 0.5405, "num_tokens": 2564178491.0, "step": 3355 }, { "epoch": 1.2293775477488207, "grad_norm": 0.16499146447095703, "learning_rate": 3.5781170936127214e-05, "loss": 0.482, "num_tokens": 2565025996.0, "step": 3356 }, { "epoch": 1.2297439655567262, "grad_norm": 0.15666077151310553, "learning_rate": 3.577842301594605e-05, "loss": 0.5138, "num_tokens": 2565723273.0, "step": 3357 }, { "epoch": 1.2301103833646314, "grad_norm": 0.16959498863457156, "learning_rate": 3.5775674320012204e-05, "loss": 0.5496, "num_tokens": 2566515278.0, "step": 3358 }, { "epoch": 1.230476801172537, "grad_norm": 0.15433913101757724, "learning_rate": 3.577292484848042e-05, "loss": 0.4986, "num_tokens": 2567221317.0, "step": 3359 }, { "epoch": 1.2308432189804424, "grad_norm": 0.1622073205329248, "learning_rate": 3.577017460150552e-05, "loss": 0.5192, "num_tokens": 2568009211.0, "step": 3360 }, { "epoch": 1.2312096367883478, "grad_norm": 0.14049398658102658, "learning_rate": 3.576742357924234e-05, "loss": 0.509, "num_tokens": 2568845414.0, "step": 3361 }, { "epoch": 1.2315760545962533, "grad_norm": 0.14633051672808126, "learning_rate": 3.576467178184576e-05, "loss": 0.4662, "num_tokens": 2569664164.0, "step": 3362 }, { "epoch": 1.2319424724041588, "grad_norm": 0.15944254275837447, "learning_rate": 3.576191920947072e-05, "loss": 0.5217, "num_tokens": 2570333778.0, "step": 3363 }, { "epoch": 1.2323088902120642, "grad_norm": 0.15217868610949445, "learning_rate": 3.575916586227219e-05, "loss": 0.4981, "num_tokens": 2571166027.0, "step": 3364 }, { "epoch": 1.2326753080199697, "grad_norm": 0.1548006555270365, "learning_rate": 3.5756411740405186e-05, "loss": 0.4921, "num_tokens": 2571983265.0, "step": 3365 }, { "epoch": 1.2330417258278752, "grad_norm": 0.13731954170077226, "learning_rate": 3.5753656844024783e-05, "loss": 0.4627, "num_tokens": 2572788938.0, "step": 3366 }, { "epoch": 1.2334081436357807, "grad_norm": 0.1706457208254276, "learning_rate": 3.575090117328608e-05, "loss": 0.5277, "num_tokens": 2573520266.0, "step": 3367 }, { "epoch": 1.2337745614436861, "grad_norm": 0.1578109966158417, "learning_rate": 3.574814472834423e-05, "loss": 0.5429, "num_tokens": 2574409943.0, "step": 3368 }, { "epoch": 1.2341409792515916, "grad_norm": 0.13759482705215442, "learning_rate": 3.574538750935442e-05, "loss": 0.4879, "num_tokens": 2575199983.0, "step": 3369 }, { "epoch": 1.234507397059497, "grad_norm": 0.17824348032642867, "learning_rate": 3.5742629516471894e-05, "loss": 0.5342, "num_tokens": 2575947460.0, "step": 3370 }, { "epoch": 1.2348738148674026, "grad_norm": 0.16761349144301216, "learning_rate": 3.573987074985192e-05, "loss": 0.5269, "num_tokens": 2576664778.0, "step": 3371 }, { "epoch": 1.235240232675308, "grad_norm": 0.145553402361127, "learning_rate": 3.573711120964984e-05, "loss": 0.4985, "num_tokens": 2577458455.0, "step": 3372 }, { "epoch": 1.2356066504832135, "grad_norm": 0.17455670002140064, "learning_rate": 3.573435089602101e-05, "loss": 0.5051, "num_tokens": 2578159574.0, "step": 3373 }, { "epoch": 1.235973068291119, "grad_norm": 0.14930783130530362, "learning_rate": 3.5731589809120835e-05, "loss": 0.4731, "num_tokens": 2578993792.0, "step": 3374 }, { "epoch": 1.2363394860990244, "grad_norm": 0.1806967905432253, "learning_rate": 3.572882794910479e-05, "loss": 0.5437, "num_tokens": 2579755100.0, "step": 3375 }, { "epoch": 1.23670590390693, "grad_norm": 0.16353732954946262, "learning_rate": 3.572606531612835e-05, "loss": 0.5483, "num_tokens": 2580490576.0, "step": 3376 }, { "epoch": 1.2370723217148354, "grad_norm": 0.15709098482247374, "learning_rate": 3.572330191034708e-05, "loss": 0.4997, "num_tokens": 2581268108.0, "step": 3377 }, { "epoch": 1.2374387395227409, "grad_norm": 0.14975594013473262, "learning_rate": 3.5720537731916554e-05, "loss": 0.5211, "num_tokens": 2582004029.0, "step": 3378 }, { "epoch": 1.2378051573306463, "grad_norm": 0.15491331315989956, "learning_rate": 3.571777278099239e-05, "loss": 0.5404, "num_tokens": 2582775463.0, "step": 3379 }, { "epoch": 1.2381715751385518, "grad_norm": 0.1508638271396448, "learning_rate": 3.5715007057730274e-05, "loss": 0.5042, "num_tokens": 2583499272.0, "step": 3380 }, { "epoch": 1.2385379929464573, "grad_norm": 0.16490871279509325, "learning_rate": 3.5712240562285925e-05, "loss": 0.484, "num_tokens": 2584215415.0, "step": 3381 }, { "epoch": 1.2389044107543628, "grad_norm": 0.141186250435601, "learning_rate": 3.570947329481509e-05, "loss": 0.4944, "num_tokens": 2585013119.0, "step": 3382 }, { "epoch": 1.2392708285622682, "grad_norm": 0.16810570844560122, "learning_rate": 3.5706705255473575e-05, "loss": 0.514, "num_tokens": 2585720071.0, "step": 3383 }, { "epoch": 1.2396372463701737, "grad_norm": 0.15606218433663227, "learning_rate": 3.570393644441723e-05, "loss": 0.5157, "num_tokens": 2586546558.0, "step": 3384 }, { "epoch": 1.240003664178079, "grad_norm": 0.17273526957743524, "learning_rate": 3.5701166861801946e-05, "loss": 0.5445, "num_tokens": 2587246603.0, "step": 3385 }, { "epoch": 1.2403700819859846, "grad_norm": 0.144898214813831, "learning_rate": 3.5698396507783654e-05, "loss": 0.5024, "num_tokens": 2588074372.0, "step": 3386 }, { "epoch": 1.24073649979389, "grad_norm": 0.14627928714771665, "learning_rate": 3.569562538251832e-05, "loss": 0.4917, "num_tokens": 2588907779.0, "step": 3387 }, { "epoch": 1.2411029176017954, "grad_norm": 0.17556363123940608, "learning_rate": 3.569285348616198e-05, "loss": 0.5064, "num_tokens": 2589537479.0, "step": 3388 }, { "epoch": 1.2414693354097008, "grad_norm": 0.14084121012916923, "learning_rate": 3.569008081887069e-05, "loss": 0.4959, "num_tokens": 2590376030.0, "step": 3389 }, { "epoch": 1.2418357532176063, "grad_norm": 0.16167397509532766, "learning_rate": 3.568730738080056e-05, "loss": 0.5073, "num_tokens": 2591247978.0, "step": 3390 }, { "epoch": 1.2422021710255118, "grad_norm": 0.14277209026190904, "learning_rate": 3.5684533172107734e-05, "loss": 0.4814, "num_tokens": 2592023079.0, "step": 3391 }, { "epoch": 1.2425685888334173, "grad_norm": 0.14211304243904316, "learning_rate": 3.5681758192948406e-05, "loss": 0.4988, "num_tokens": 2592755364.0, "step": 3392 }, { "epoch": 1.2429350066413227, "grad_norm": 0.15214577703013069, "learning_rate": 3.5678982443478824e-05, "loss": 0.5306, "num_tokens": 2593540537.0, "step": 3393 }, { "epoch": 1.2433014244492282, "grad_norm": 0.14496957460471616, "learning_rate": 3.5676205923855254e-05, "loss": 0.5205, "num_tokens": 2594377530.0, "step": 3394 }, { "epoch": 1.2436678422571337, "grad_norm": 0.15456776647404064, "learning_rate": 3.567342863423403e-05, "loss": 0.5096, "num_tokens": 2595058922.0, "step": 3395 }, { "epoch": 1.2440342600650391, "grad_norm": 0.14066218737250866, "learning_rate": 3.5670650574771507e-05, "loss": 0.5187, "num_tokens": 2595794150.0, "step": 3396 }, { "epoch": 1.2444006778729446, "grad_norm": 0.15530454601710295, "learning_rate": 3.56678717456241e-05, "loss": 0.5057, "num_tokens": 2596568663.0, "step": 3397 }, { "epoch": 1.24476709568085, "grad_norm": 0.16793003686143482, "learning_rate": 3.566509214694827e-05, "loss": 0.4879, "num_tokens": 2597276571.0, "step": 3398 }, { "epoch": 1.2451335134887556, "grad_norm": 0.14805575576588395, "learning_rate": 3.566231177890052e-05, "loss": 0.4682, "num_tokens": 2598064952.0, "step": 3399 }, { "epoch": 1.245499931296661, "grad_norm": 0.16838104704487453, "learning_rate": 3.565953064163737e-05, "loss": 0.5269, "num_tokens": 2598805047.0, "step": 3400 }, { "epoch": 1.2458663491045665, "grad_norm": 0.14404529617522382, "learning_rate": 3.565674873531541e-05, "loss": 0.5019, "num_tokens": 2599580387.0, "step": 3401 }, { "epoch": 1.246232766912472, "grad_norm": 0.17278312301481827, "learning_rate": 3.5653966060091284e-05, "loss": 0.4807, "num_tokens": 2600272441.0, "step": 3402 }, { "epoch": 1.2465991847203775, "grad_norm": 0.1469354310811713, "learning_rate": 3.5651182616121635e-05, "loss": 0.5143, "num_tokens": 2601089791.0, "step": 3403 }, { "epoch": 1.246965602528283, "grad_norm": 0.1476269272545798, "learning_rate": 3.564839840356319e-05, "loss": 0.5141, "num_tokens": 2601911019.0, "step": 3404 }, { "epoch": 1.2473320203361884, "grad_norm": 0.15155482594789, "learning_rate": 3.564561342257271e-05, "loss": 0.5255, "num_tokens": 2602725015.0, "step": 3405 }, { "epoch": 1.2476984381440939, "grad_norm": 0.15095156051024422, "learning_rate": 3.564282767330698e-05, "loss": 0.4907, "num_tokens": 2603634706.0, "step": 3406 }, { "epoch": 1.2480648559519993, "grad_norm": 0.1526348514982657, "learning_rate": 3.564004115592287e-05, "loss": 0.5305, "num_tokens": 2604339850.0, "step": 3407 }, { "epoch": 1.2484312737599048, "grad_norm": 0.14333403390928354, "learning_rate": 3.5637253870577234e-05, "loss": 0.4979, "num_tokens": 2605017576.0, "step": 3408 }, { "epoch": 1.2487976915678103, "grad_norm": 0.15839431565782955, "learning_rate": 3.563446581742703e-05, "loss": 0.5135, "num_tokens": 2605665793.0, "step": 3409 }, { "epoch": 1.2491641093757155, "grad_norm": 0.14718895508353444, "learning_rate": 3.563167699662922e-05, "loss": 0.521, "num_tokens": 2606480270.0, "step": 3410 }, { "epoch": 1.2495305271836212, "grad_norm": 0.1475022275203564, "learning_rate": 3.5628887408340806e-05, "loss": 0.5087, "num_tokens": 2607261346.0, "step": 3411 }, { "epoch": 1.2498969449915265, "grad_norm": 0.16183344141152486, "learning_rate": 3.562609705271887e-05, "loss": 0.517, "num_tokens": 2607899433.0, "step": 3412 }, { "epoch": 1.2502633627994322, "grad_norm": 0.15465903645421417, "learning_rate": 3.5623305929920503e-05, "loss": 0.4861, "num_tokens": 2608752789.0, "step": 3413 }, { "epoch": 1.2506297806073374, "grad_norm": 0.16582717342725595, "learning_rate": 3.562051404010285e-05, "loss": 0.5264, "num_tokens": 2609500835.0, "step": 3414 }, { "epoch": 1.250996198415243, "grad_norm": 0.15885275594823248, "learning_rate": 3.5617721383423104e-05, "loss": 0.5003, "num_tokens": 2610244006.0, "step": 3415 }, { "epoch": 1.2513626162231484, "grad_norm": 0.15786171351585468, "learning_rate": 3.56149279600385e-05, "loss": 0.5013, "num_tokens": 2610980432.0, "step": 3416 }, { "epoch": 1.2517290340310538, "grad_norm": 0.17015672343388308, "learning_rate": 3.5612133770106304e-05, "loss": 0.5194, "num_tokens": 2611725379.0, "step": 3417 }, { "epoch": 1.2520954518389593, "grad_norm": 0.16190098882207787, "learning_rate": 3.5609338813783836e-05, "loss": 0.5459, "num_tokens": 2612391540.0, "step": 3418 }, { "epoch": 1.2524618696468648, "grad_norm": 0.178273983016718, "learning_rate": 3.5606543091228474e-05, "loss": 0.4883, "num_tokens": 2613014407.0, "step": 3419 }, { "epoch": 1.2528282874547703, "grad_norm": 0.16024386076436378, "learning_rate": 3.5603746602597594e-05, "loss": 0.5161, "num_tokens": 2613792332.0, "step": 3420 }, { "epoch": 1.2531947052626757, "grad_norm": 0.15419943810368586, "learning_rate": 3.560094934804867e-05, "loss": 0.521, "num_tokens": 2614549123.0, "step": 3421 }, { "epoch": 1.2535611230705812, "grad_norm": 0.15197231921624585, "learning_rate": 3.559815132773918e-05, "loss": 0.4842, "num_tokens": 2615330304.0, "step": 3422 }, { "epoch": 1.2539275408784867, "grad_norm": 0.1512456492043916, "learning_rate": 3.5595352541826665e-05, "loss": 0.5102, "num_tokens": 2616115678.0, "step": 3423 }, { "epoch": 1.2542939586863922, "grad_norm": 0.16998435194323913, "learning_rate": 3.559255299046869e-05, "loss": 0.5103, "num_tokens": 2616831465.0, "step": 3424 }, { "epoch": 1.2546603764942976, "grad_norm": 0.15124306706498553, "learning_rate": 3.5589752673822886e-05, "loss": 0.5326, "num_tokens": 2617595415.0, "step": 3425 }, { "epoch": 1.255026794302203, "grad_norm": 0.16994233078157106, "learning_rate": 3.5586951592046915e-05, "loss": 0.515, "num_tokens": 2618190014.0, "step": 3426 }, { "epoch": 1.2553932121101086, "grad_norm": 0.17456147679758693, "learning_rate": 3.558414974529848e-05, "loss": 0.5277, "num_tokens": 2618858879.0, "step": 3427 }, { "epoch": 1.255759629918014, "grad_norm": 0.1631990482837241, "learning_rate": 3.5581347133735335e-05, "loss": 0.5021, "num_tokens": 2619697967.0, "step": 3428 }, { "epoch": 1.2561260477259195, "grad_norm": 0.1448261263412612, "learning_rate": 3.5578543757515276e-05, "loss": 0.4664, "num_tokens": 2620398664.0, "step": 3429 }, { "epoch": 1.256492465533825, "grad_norm": 0.14695138609061803, "learning_rate": 3.557573961679613e-05, "loss": 0.5087, "num_tokens": 2621241921.0, "step": 3430 }, { "epoch": 1.2568588833417305, "grad_norm": 0.16803003712740053, "learning_rate": 3.557293471173577e-05, "loss": 0.5079, "num_tokens": 2622005485.0, "step": 3431 }, { "epoch": 1.257225301149636, "grad_norm": 0.15358177270341652, "learning_rate": 3.557012904249214e-05, "loss": 0.5257, "num_tokens": 2622805606.0, "step": 3432 }, { "epoch": 1.2575917189575414, "grad_norm": 0.15042045812637816, "learning_rate": 3.556732260922318e-05, "loss": 0.5176, "num_tokens": 2623668770.0, "step": 3433 }, { "epoch": 1.2579581367654469, "grad_norm": 0.15191060186768476, "learning_rate": 3.556451541208691e-05, "loss": 0.4601, "num_tokens": 2624447292.0, "step": 3434 }, { "epoch": 1.2583245545733521, "grad_norm": 0.18404180236522122, "learning_rate": 3.556170745124139e-05, "loss": 0.4942, "num_tokens": 2625177118.0, "step": 3435 }, { "epoch": 1.2586909723812578, "grad_norm": 0.14118373425317135, "learning_rate": 3.5558898726844695e-05, "loss": 0.5395, "num_tokens": 2625878672.0, "step": 3436 }, { "epoch": 1.259057390189163, "grad_norm": 0.18848262994400083, "learning_rate": 3.555608923905497e-05, "loss": 0.5646, "num_tokens": 2626608577.0, "step": 3437 }, { "epoch": 1.2594238079970688, "grad_norm": 0.161783579863651, "learning_rate": 3.55532789880304e-05, "loss": 0.5011, "num_tokens": 2627319085.0, "step": 3438 }, { "epoch": 1.259790225804974, "grad_norm": 0.1472574551543152, "learning_rate": 3.5550467973929204e-05, "loss": 0.5082, "num_tokens": 2628217006.0, "step": 3439 }, { "epoch": 1.2601566436128797, "grad_norm": 0.16669091977702885, "learning_rate": 3.5547656196909646e-05, "loss": 0.5108, "num_tokens": 2628966770.0, "step": 3440 }, { "epoch": 1.260523061420785, "grad_norm": 0.13780033790356766, "learning_rate": 3.5544843657130034e-05, "loss": 0.4738, "num_tokens": 2629791767.0, "step": 3441 }, { "epoch": 1.2608894792286904, "grad_norm": 0.147686672509395, "learning_rate": 3.554203035474872e-05, "loss": 0.5114, "num_tokens": 2630617949.0, "step": 3442 }, { "epoch": 1.261255897036596, "grad_norm": 0.1424876971561124, "learning_rate": 3.55392162899241e-05, "loss": 0.5188, "num_tokens": 2631499189.0, "step": 3443 }, { "epoch": 1.2616223148445014, "grad_norm": 0.1682604321505213, "learning_rate": 3.553640146281461e-05, "loss": 0.5031, "num_tokens": 2632253431.0, "step": 3444 }, { "epoch": 1.2619887326524069, "grad_norm": 0.15156929009038164, "learning_rate": 3.5533585873578737e-05, "loss": 0.5252, "num_tokens": 2633009150.0, "step": 3445 }, { "epoch": 1.2623551504603123, "grad_norm": 0.1555417405214695, "learning_rate": 3.553076952237499e-05, "loss": 0.4926, "num_tokens": 2633800120.0, "step": 3446 }, { "epoch": 1.2627215682682178, "grad_norm": 0.15925361912211627, "learning_rate": 3.552795240936195e-05, "loss": 0.5567, "num_tokens": 2634642169.0, "step": 3447 }, { "epoch": 1.2630879860761233, "grad_norm": 0.15871167187336838, "learning_rate": 3.552513453469822e-05, "loss": 0.5039, "num_tokens": 2635410053.0, "step": 3448 }, { "epoch": 1.2634544038840287, "grad_norm": 0.14793621795846587, "learning_rate": 3.552231589854245e-05, "loss": 0.4897, "num_tokens": 2636101564.0, "step": 3449 }, { "epoch": 1.2638208216919342, "grad_norm": 0.15781560485390764, "learning_rate": 3.5519496501053334e-05, "loss": 0.5163, "num_tokens": 2636883143.0, "step": 3450 }, { "epoch": 1.2641872394998397, "grad_norm": 0.15369907614193035, "learning_rate": 3.5516676342389616e-05, "loss": 0.4956, "num_tokens": 2637532474.0, "step": 3451 }, { "epoch": 1.2645536573077452, "grad_norm": 0.1702089654684562, "learning_rate": 3.551385542271007e-05, "loss": 0.5217, "num_tokens": 2638350583.0, "step": 3452 }, { "epoch": 1.2649200751156506, "grad_norm": 0.17273374196550365, "learning_rate": 3.5511033742173524e-05, "loss": 0.5184, "num_tokens": 2639032646.0, "step": 3453 }, { "epoch": 1.265286492923556, "grad_norm": 0.17105989489867943, "learning_rate": 3.550821130093884e-05, "loss": 0.5218, "num_tokens": 2639857478.0, "step": 3454 }, { "epoch": 1.2656529107314616, "grad_norm": 0.15423039693329868, "learning_rate": 3.550538809916494e-05, "loss": 0.5132, "num_tokens": 2640618455.0, "step": 3455 }, { "epoch": 1.266019328539367, "grad_norm": 0.1681152107774221, "learning_rate": 3.550256413701075e-05, "loss": 0.498, "num_tokens": 2641313487.0, "step": 3456 }, { "epoch": 1.2663857463472725, "grad_norm": 0.1586489816870931, "learning_rate": 3.5499739414635294e-05, "loss": 0.4683, "num_tokens": 2641965692.0, "step": 3457 }, { "epoch": 1.266752164155178, "grad_norm": 0.1410173258874271, "learning_rate": 3.549691393219759e-05, "loss": 0.5169, "num_tokens": 2642719884.0, "step": 3458 }, { "epoch": 1.2671185819630835, "grad_norm": 0.16245960691083167, "learning_rate": 3.549408768985672e-05, "loss": 0.4506, "num_tokens": 2643410695.0, "step": 3459 }, { "epoch": 1.267484999770989, "grad_norm": 0.16515374582797984, "learning_rate": 3.549126068777182e-05, "loss": 0.5218, "num_tokens": 2643996549.0, "step": 3460 }, { "epoch": 1.2678514175788944, "grad_norm": 0.1655431843093726, "learning_rate": 3.548843292610204e-05, "loss": 0.5035, "num_tokens": 2644805273.0, "step": 3461 }, { "epoch": 1.2682178353867997, "grad_norm": 0.16901834306259217, "learning_rate": 3.54856044050066e-05, "loss": 0.493, "num_tokens": 2645619651.0, "step": 3462 }, { "epoch": 1.2685842531947054, "grad_norm": 0.15487120085855294, "learning_rate": 3.5482775124644744e-05, "loss": 0.5214, "num_tokens": 2646397344.0, "step": 3463 }, { "epoch": 1.2689506710026106, "grad_norm": 0.16390519258163222, "learning_rate": 3.547994508517577e-05, "loss": 0.521, "num_tokens": 2647119459.0, "step": 3464 }, { "epoch": 1.2693170888105163, "grad_norm": 0.16191697122508206, "learning_rate": 3.547711428675901e-05, "loss": 0.489, "num_tokens": 2647964039.0, "step": 3465 }, { "epoch": 1.2696835066184216, "grad_norm": 0.16121542101287323, "learning_rate": 3.547428272955386e-05, "loss": 0.4831, "num_tokens": 2648782922.0, "step": 3466 }, { "epoch": 1.2700499244263272, "grad_norm": 0.16176387597449893, "learning_rate": 3.547145041371972e-05, "loss": 0.5017, "num_tokens": 2649633793.0, "step": 3467 }, { "epoch": 1.2704163422342325, "grad_norm": 0.15204172651506093, "learning_rate": 3.546861733941606e-05, "loss": 0.5362, "num_tokens": 2650363927.0, "step": 3468 }, { "epoch": 1.270782760042138, "grad_norm": 0.1509390278532228, "learning_rate": 3.5465783506802406e-05, "loss": 0.5137, "num_tokens": 2651198620.0, "step": 3469 }, { "epoch": 1.2711491778500434, "grad_norm": 0.13974709395745144, "learning_rate": 3.546294891603828e-05, "loss": 0.4558, "num_tokens": 2652035075.0, "step": 3470 }, { "epoch": 1.271515595657949, "grad_norm": 0.15254337849986344, "learning_rate": 3.54601135672833e-05, "loss": 0.5076, "num_tokens": 2652884553.0, "step": 3471 }, { "epoch": 1.2718820134658544, "grad_norm": 0.15119692598702117, "learning_rate": 3.5457277460697094e-05, "loss": 0.5308, "num_tokens": 2653633566.0, "step": 3472 }, { "epoch": 1.2722484312737599, "grad_norm": 0.1789329625415184, "learning_rate": 3.545444059643933e-05, "loss": 0.5123, "num_tokens": 2654438138.0, "step": 3473 }, { "epoch": 1.2726148490816653, "grad_norm": 0.14219556785439127, "learning_rate": 3.545160297466974e-05, "loss": 0.5259, "num_tokens": 2655177748.0, "step": 3474 }, { "epoch": 1.2729812668895708, "grad_norm": 0.15212222256092098, "learning_rate": 3.544876459554809e-05, "loss": 0.5152, "num_tokens": 2655937019.0, "step": 3475 }, { "epoch": 1.2733476846974763, "grad_norm": 0.1417040779418409, "learning_rate": 3.544592545923418e-05, "loss": 0.508, "num_tokens": 2656639853.0, "step": 3476 }, { "epoch": 1.2737141025053818, "grad_norm": 0.17752801866968201, "learning_rate": 3.544308556588786e-05, "loss": 0.5451, "num_tokens": 2657371336.0, "step": 3477 }, { "epoch": 1.2740805203132872, "grad_norm": 0.14845427611779366, "learning_rate": 3.544024491566902e-05, "loss": 0.4687, "num_tokens": 2658041175.0, "step": 3478 }, { "epoch": 1.2744469381211927, "grad_norm": 0.15790989788245527, "learning_rate": 3.54374035087376e-05, "loss": 0.4962, "num_tokens": 2658838817.0, "step": 3479 }, { "epoch": 1.2748133559290982, "grad_norm": 0.14996556831400445, "learning_rate": 3.543456134525357e-05, "loss": 0.533, "num_tokens": 2659543218.0, "step": 3480 }, { "epoch": 1.2751797737370036, "grad_norm": 0.16350010620045172, "learning_rate": 3.543171842537696e-05, "loss": 0.5017, "num_tokens": 2660263548.0, "step": 3481 }, { "epoch": 1.2755461915449091, "grad_norm": 0.13980742306319646, "learning_rate": 3.542887474926782e-05, "loss": 0.4945, "num_tokens": 2661058580.0, "step": 3482 }, { "epoch": 1.2759126093528146, "grad_norm": 0.15748234201735775, "learning_rate": 3.542603031708627e-05, "loss": 0.4994, "num_tokens": 2661784332.0, "step": 3483 }, { "epoch": 1.27627902716072, "grad_norm": 0.1412591939561983, "learning_rate": 3.542318512899243e-05, "loss": 0.5118, "num_tokens": 2662587496.0, "step": 3484 }, { "epoch": 1.2766454449686255, "grad_norm": 0.14501908484149134, "learning_rate": 3.542033918514651e-05, "loss": 0.5227, "num_tokens": 2663374787.0, "step": 3485 }, { "epoch": 1.277011862776531, "grad_norm": 0.14776178765007864, "learning_rate": 3.5417492485708745e-05, "loss": 0.5329, "num_tokens": 2664180328.0, "step": 3486 }, { "epoch": 1.2773782805844365, "grad_norm": 0.1571321285556791, "learning_rate": 3.54146450308394e-05, "loss": 0.5271, "num_tokens": 2664907835.0, "step": 3487 }, { "epoch": 1.277744698392342, "grad_norm": 0.13877756166099312, "learning_rate": 3.5411796820698796e-05, "loss": 0.5227, "num_tokens": 2665696350.0, "step": 3488 }, { "epoch": 1.2781111162002472, "grad_norm": 0.16623181026435027, "learning_rate": 3.54089478554473e-05, "loss": 0.5318, "num_tokens": 2666501105.0, "step": 3489 }, { "epoch": 1.278477534008153, "grad_norm": 0.13648160396026443, "learning_rate": 3.5406098135245304e-05, "loss": 0.508, "num_tokens": 2667249759.0, "step": 3490 }, { "epoch": 1.2788439518160581, "grad_norm": 0.1583873176156642, "learning_rate": 3.5403247660253247e-05, "loss": 0.5065, "num_tokens": 2668092257.0, "step": 3491 }, { "epoch": 1.2792103696239638, "grad_norm": 0.14648262821540645, "learning_rate": 3.5400396430631635e-05, "loss": 0.5201, "num_tokens": 2668893577.0, "step": 3492 }, { "epoch": 1.279576787431869, "grad_norm": 0.15130558291449314, "learning_rate": 3.539754444654099e-05, "loss": 0.5021, "num_tokens": 2669768356.0, "step": 3493 }, { "epoch": 1.2799432052397748, "grad_norm": 0.14829809583410702, "learning_rate": 3.539469170814188e-05, "loss": 0.5136, "num_tokens": 2670506085.0, "step": 3494 }, { "epoch": 1.28030962304768, "grad_norm": 0.15992827508981167, "learning_rate": 3.539183821559493e-05, "loss": 0.5174, "num_tokens": 2671216665.0, "step": 3495 }, { "epoch": 1.2806760408555855, "grad_norm": 0.15776646142007977, "learning_rate": 3.538898396906078e-05, "loss": 0.5112, "num_tokens": 2672005526.0, "step": 3496 }, { "epoch": 1.281042458663491, "grad_norm": 0.16141756297983423, "learning_rate": 3.538612896870014e-05, "loss": 0.5394, "num_tokens": 2672679024.0, "step": 3497 }, { "epoch": 1.2814088764713965, "grad_norm": 0.13788112688339252, "learning_rate": 3.538327321467376e-05, "loss": 0.4981, "num_tokens": 2673391452.0, "step": 3498 }, { "epoch": 1.281775294279302, "grad_norm": 0.16579345443308674, "learning_rate": 3.538041670714241e-05, "loss": 0.5183, "num_tokens": 2674148301.0, "step": 3499 }, { "epoch": 1.2821417120872074, "grad_norm": 0.1694578973114395, "learning_rate": 3.537755944626693e-05, "loss": 0.5202, "num_tokens": 2674835763.0, "step": 3500 }, { "epoch": 1.2825081298951129, "grad_norm": 0.14689910767462586, "learning_rate": 3.537470143220818e-05, "loss": 0.4638, "num_tokens": 2675596095.0, "step": 3501 }, { "epoch": 1.2828745477030183, "grad_norm": 0.1682671786341766, "learning_rate": 3.537184266512708e-05, "loss": 0.5107, "num_tokens": 2676351739.0, "step": 3502 }, { "epoch": 1.2832409655109238, "grad_norm": 0.16365261767765057, "learning_rate": 3.536898314518458e-05, "loss": 0.5116, "num_tokens": 2677155784.0, "step": 3503 }, { "epoch": 1.2836073833188293, "grad_norm": 0.1507344127774948, "learning_rate": 3.536612287254167e-05, "loss": 0.5075, "num_tokens": 2677928675.0, "step": 3504 }, { "epoch": 1.2839738011267348, "grad_norm": 0.1753030891646235, "learning_rate": 3.5363261847359394e-05, "loss": 0.5055, "num_tokens": 2678649535.0, "step": 3505 }, { "epoch": 1.2843402189346402, "grad_norm": 0.16309724023433106, "learning_rate": 3.536040006979884e-05, "loss": 0.5333, "num_tokens": 2679486682.0, "step": 3506 }, { "epoch": 1.2847066367425457, "grad_norm": 0.1465532123646945, "learning_rate": 3.535753754002113e-05, "loss": 0.5036, "num_tokens": 2680262888.0, "step": 3507 }, { "epoch": 1.2850730545504512, "grad_norm": 0.17939986557013235, "learning_rate": 3.5354674258187424e-05, "loss": 0.5424, "num_tokens": 2680965982.0, "step": 3508 }, { "epoch": 1.2854394723583566, "grad_norm": 0.17569070229837946, "learning_rate": 3.535181022445893e-05, "loss": 0.5084, "num_tokens": 2681705729.0, "step": 3509 }, { "epoch": 1.2858058901662621, "grad_norm": 0.13707609221911812, "learning_rate": 3.53489454389969e-05, "loss": 0.4933, "num_tokens": 2682555306.0, "step": 3510 }, { "epoch": 1.2861723079741676, "grad_norm": 0.17356074921781975, "learning_rate": 3.534607990196264e-05, "loss": 0.514, "num_tokens": 2683321848.0, "step": 3511 }, { "epoch": 1.286538725782073, "grad_norm": 0.15947407347648038, "learning_rate": 3.534321361351746e-05, "loss": 0.5043, "num_tokens": 2684216078.0, "step": 3512 }, { "epoch": 1.2869051435899785, "grad_norm": 0.15425002330112086, "learning_rate": 3.5340346573822765e-05, "loss": 0.4899, "num_tokens": 2684946159.0, "step": 3513 }, { "epoch": 1.287271561397884, "grad_norm": 0.17133453386131287, "learning_rate": 3.533747878303996e-05, "loss": 0.4974, "num_tokens": 2685752598.0, "step": 3514 }, { "epoch": 1.2876379792057895, "grad_norm": 0.15429174146992738, "learning_rate": 3.533461024133051e-05, "loss": 0.4954, "num_tokens": 2686423247.0, "step": 3515 }, { "epoch": 1.2880043970136947, "grad_norm": 0.16354139384772282, "learning_rate": 3.533174094885592e-05, "loss": 0.4997, "num_tokens": 2687300171.0, "step": 3516 }, { "epoch": 1.2883708148216004, "grad_norm": 0.16958329191410995, "learning_rate": 3.532887090577773e-05, "loss": 0.5287, "num_tokens": 2688109734.0, "step": 3517 }, { "epoch": 1.2887372326295057, "grad_norm": 0.18646463309820835, "learning_rate": 3.532600011225754e-05, "loss": 0.4886, "num_tokens": 2688922151.0, "step": 3518 }, { "epoch": 1.2891036504374114, "grad_norm": 0.14410569970449247, "learning_rate": 3.532312856845697e-05, "loss": 0.5062, "num_tokens": 2689646989.0, "step": 3519 }, { "epoch": 1.2894700682453166, "grad_norm": 0.15939097728795065, "learning_rate": 3.532025627453771e-05, "loss": 0.5135, "num_tokens": 2690407078.0, "step": 3520 }, { "epoch": 1.2898364860532223, "grad_norm": 0.20077535535530797, "learning_rate": 3.5317383230661466e-05, "loss": 0.5379, "num_tokens": 2691242939.0, "step": 3521 }, { "epoch": 1.2902029038611276, "grad_norm": 0.1417116397259209, "learning_rate": 3.531450943698999e-05, "loss": 0.4961, "num_tokens": 2692115366.0, "step": 3522 }, { "epoch": 1.290569321669033, "grad_norm": 0.16806066869478392, "learning_rate": 3.531163489368509e-05, "loss": 0.4959, "num_tokens": 2692908021.0, "step": 3523 }, { "epoch": 1.2909357394769385, "grad_norm": 0.17013382889710849, "learning_rate": 3.530875960090861e-05, "loss": 0.5184, "num_tokens": 2693752014.0, "step": 3524 }, { "epoch": 1.291302157284844, "grad_norm": 0.15449819504029733, "learning_rate": 3.530588355882243e-05, "loss": 0.5334, "num_tokens": 2694600475.0, "step": 3525 }, { "epoch": 1.2916685750927495, "grad_norm": 0.1847876165950768, "learning_rate": 3.530300676758848e-05, "loss": 0.5147, "num_tokens": 2695338184.0, "step": 3526 }, { "epoch": 1.292034992900655, "grad_norm": 0.15328404965450892, "learning_rate": 3.530012922736873e-05, "loss": 0.5109, "num_tokens": 2696020542.0, "step": 3527 }, { "epoch": 1.2924014107085604, "grad_norm": 0.1538091336344037, "learning_rate": 3.5297250938325184e-05, "loss": 0.5199, "num_tokens": 2696868988.0, "step": 3528 }, { "epoch": 1.2927678285164659, "grad_norm": 0.13614281517171872, "learning_rate": 3.52943719006199e-05, "loss": 0.4679, "num_tokens": 2697640292.0, "step": 3529 }, { "epoch": 1.2931342463243714, "grad_norm": 0.14449660842787498, "learning_rate": 3.5291492114414985e-05, "loss": 0.5031, "num_tokens": 2698367785.0, "step": 3530 }, { "epoch": 1.2935006641322768, "grad_norm": 0.15039331216499732, "learning_rate": 3.528861157987256e-05, "loss": 0.4603, "num_tokens": 2699190508.0, "step": 3531 }, { "epoch": 1.2938670819401823, "grad_norm": 0.1530984751887579, "learning_rate": 3.5285730297154806e-05, "loss": 0.5018, "num_tokens": 2700022653.0, "step": 3532 }, { "epoch": 1.2942334997480878, "grad_norm": 0.13992983318225236, "learning_rate": 3.528284826642395e-05, "loss": 0.5022, "num_tokens": 2700695460.0, "step": 3533 }, { "epoch": 1.2945999175559932, "grad_norm": 0.1552248559101091, "learning_rate": 3.527996548784226e-05, "loss": 0.479, "num_tokens": 2701341707.0, "step": 3534 }, { "epoch": 1.2949663353638987, "grad_norm": 0.20252372914641226, "learning_rate": 3.527708196157204e-05, "loss": 0.541, "num_tokens": 2702140823.0, "step": 3535 }, { "epoch": 1.2953327531718042, "grad_norm": 0.13747593544647913, "learning_rate": 3.5274197687775636e-05, "loss": 0.5104, "num_tokens": 2702937981.0, "step": 3536 }, { "epoch": 1.2956991709797097, "grad_norm": 0.18621603197140266, "learning_rate": 3.527131266661543e-05, "loss": 0.5317, "num_tokens": 2703794098.0, "step": 3537 }, { "epoch": 1.2960655887876151, "grad_norm": 0.1526660018049594, "learning_rate": 3.526842689825388e-05, "loss": 0.521, "num_tokens": 2704577841.0, "step": 3538 }, { "epoch": 1.2964320065955206, "grad_norm": 0.14718349638560743, "learning_rate": 3.526554038285343e-05, "loss": 0.5091, "num_tokens": 2705330177.0, "step": 3539 }, { "epoch": 1.296798424403426, "grad_norm": 0.16588195589515486, "learning_rate": 3.526265312057661e-05, "loss": 0.4754, "num_tokens": 2706072858.0, "step": 3540 }, { "epoch": 1.2971648422113315, "grad_norm": 0.1580432007999362, "learning_rate": 3.525976511158598e-05, "loss": 0.5017, "num_tokens": 2706880160.0, "step": 3541 }, { "epoch": 1.297531260019237, "grad_norm": 0.17980414065633318, "learning_rate": 3.5256876356044145e-05, "loss": 0.512, "num_tokens": 2707625667.0, "step": 3542 }, { "epoch": 1.2978976778271423, "grad_norm": 0.14550416420406465, "learning_rate": 3.5253986854113736e-05, "loss": 0.4859, "num_tokens": 2708439998.0, "step": 3543 }, { "epoch": 1.298264095635048, "grad_norm": 0.1586891712699407, "learning_rate": 3.525109660595745e-05, "loss": 0.5094, "num_tokens": 2709222709.0, "step": 3544 }, { "epoch": 1.2986305134429532, "grad_norm": 0.14505134736933958, "learning_rate": 3.5248205611738e-05, "loss": 0.4908, "num_tokens": 2709986463.0, "step": 3545 }, { "epoch": 1.298996931250859, "grad_norm": 0.13976164275365305, "learning_rate": 3.5245313871618175e-05, "loss": 0.4726, "num_tokens": 2710771964.0, "step": 3546 }, { "epoch": 1.2993633490587642, "grad_norm": 0.15838303579702534, "learning_rate": 3.5242421385760757e-05, "loss": 0.5341, "num_tokens": 2711469955.0, "step": 3547 }, { "epoch": 1.2997297668666699, "grad_norm": 0.1577512235492401, "learning_rate": 3.523952815432862e-05, "loss": 0.5444, "num_tokens": 2712246658.0, "step": 3548 }, { "epoch": 1.300096184674575, "grad_norm": 0.16003584234744758, "learning_rate": 3.523663417748466e-05, "loss": 0.5193, "num_tokens": 2713024119.0, "step": 3549 }, { "epoch": 1.3004626024824806, "grad_norm": 0.1471054703116972, "learning_rate": 3.52337394553918e-05, "loss": 0.4775, "num_tokens": 2713767382.0, "step": 3550 }, { "epoch": 1.300829020290386, "grad_norm": 0.14323713410718444, "learning_rate": 3.523084398821303e-05, "loss": 0.5114, "num_tokens": 2714424356.0, "step": 3551 }, { "epoch": 1.3011954380982915, "grad_norm": 0.17201422834523533, "learning_rate": 3.5227947776111364e-05, "loss": 0.4878, "num_tokens": 2715164158.0, "step": 3552 }, { "epoch": 1.301561855906197, "grad_norm": 0.15103409101700732, "learning_rate": 3.5225050819249866e-05, "loss": 0.5344, "num_tokens": 2715872570.0, "step": 3553 }, { "epoch": 1.3019282737141025, "grad_norm": 0.15453837492917957, "learning_rate": 3.522215311779165e-05, "loss": 0.5049, "num_tokens": 2716750625.0, "step": 3554 }, { "epoch": 1.302294691522008, "grad_norm": 0.15567270524349802, "learning_rate": 3.5219254671899843e-05, "loss": 0.4687, "num_tokens": 2717427586.0, "step": 3555 }, { "epoch": 1.3026611093299134, "grad_norm": 0.14134855192335527, "learning_rate": 3.521635548173765e-05, "loss": 0.4815, "num_tokens": 2718215036.0, "step": 3556 }, { "epoch": 1.3030275271378189, "grad_norm": 0.16583874898879308, "learning_rate": 3.521345554746829e-05, "loss": 0.4937, "num_tokens": 2719084406.0, "step": 3557 }, { "epoch": 1.3033939449457244, "grad_norm": 0.13861431331056073, "learning_rate": 3.5210554869255045e-05, "loss": 0.4994, "num_tokens": 2719825049.0, "step": 3558 }, { "epoch": 1.3037603627536298, "grad_norm": 0.156180269024903, "learning_rate": 3.5207653447261216e-05, "loss": 0.4875, "num_tokens": 2720647840.0, "step": 3559 }, { "epoch": 1.3041267805615353, "grad_norm": 0.1444476018810939, "learning_rate": 3.5204751281650176e-05, "loss": 0.4926, "num_tokens": 2721399518.0, "step": 3560 }, { "epoch": 1.3044931983694408, "grad_norm": 0.16910280497721464, "learning_rate": 3.5201848372585315e-05, "loss": 0.5241, "num_tokens": 2722120445.0, "step": 3561 }, { "epoch": 1.3048596161773462, "grad_norm": 0.14404464399818287, "learning_rate": 3.519894472023006e-05, "loss": 0.4785, "num_tokens": 2722930337.0, "step": 3562 }, { "epoch": 1.3052260339852517, "grad_norm": 0.15824266116614596, "learning_rate": 3.5196040324747915e-05, "loss": 0.4994, "num_tokens": 2723701475.0, "step": 3563 }, { "epoch": 1.3055924517931572, "grad_norm": 0.16175666537253125, "learning_rate": 3.519313518630239e-05, "loss": 0.5119, "num_tokens": 2724474148.0, "step": 3564 }, { "epoch": 1.3059588696010627, "grad_norm": 0.1613146061724975, "learning_rate": 3.5190229305057046e-05, "loss": 0.4902, "num_tokens": 2725200045.0, "step": 3565 }, { "epoch": 1.3063252874089681, "grad_norm": 0.15836796228102296, "learning_rate": 3.5187322681175495e-05, "loss": 0.5206, "num_tokens": 2725880941.0, "step": 3566 }, { "epoch": 1.3066917052168736, "grad_norm": 0.1726694294367409, "learning_rate": 3.51844153148214e-05, "loss": 0.5073, "num_tokens": 2726595996.0, "step": 3567 }, { "epoch": 1.307058123024779, "grad_norm": 0.17906239513003214, "learning_rate": 3.518150720615843e-05, "loss": 0.5476, "num_tokens": 2727277281.0, "step": 3568 }, { "epoch": 1.3074245408326846, "grad_norm": 0.1753619996713945, "learning_rate": 3.517859835535033e-05, "loss": 0.5004, "num_tokens": 2728175158.0, "step": 3569 }, { "epoch": 1.3077909586405898, "grad_norm": 0.157754509246456, "learning_rate": 3.5175688762560855e-05, "loss": 0.4897, "num_tokens": 2729017884.0, "step": 3570 }, { "epoch": 1.3081573764484955, "grad_norm": 0.15507048440087393, "learning_rate": 3.517277842795385e-05, "loss": 0.4767, "num_tokens": 2729766860.0, "step": 3571 }, { "epoch": 1.3085237942564008, "grad_norm": 0.15151107206746048, "learning_rate": 3.5169867351693155e-05, "loss": 0.4916, "num_tokens": 2730464288.0, "step": 3572 }, { "epoch": 1.3088902120643064, "grad_norm": 0.1584578997585996, "learning_rate": 3.516695553394267e-05, "loss": 0.5521, "num_tokens": 2731142572.0, "step": 3573 }, { "epoch": 1.3092566298722117, "grad_norm": 0.1692841819237072, "learning_rate": 3.516404297486634e-05, "loss": 0.5152, "num_tokens": 2731925954.0, "step": 3574 }, { "epoch": 1.3096230476801174, "grad_norm": 0.14096715437102367, "learning_rate": 3.516112967462814e-05, "loss": 0.496, "num_tokens": 2732818339.0, "step": 3575 }, { "epoch": 1.3099894654880226, "grad_norm": 0.15331320351876374, "learning_rate": 3.515821563339211e-05, "loss": 0.5479, "num_tokens": 2733593093.0, "step": 3576 }, { "epoch": 1.3103558832959281, "grad_norm": 0.16456903819087673, "learning_rate": 3.515530085132231e-05, "loss": 0.5024, "num_tokens": 2734415494.0, "step": 3577 }, { "epoch": 1.3107223011038336, "grad_norm": 0.15095563101015103, "learning_rate": 3.5152385328582835e-05, "loss": 0.5386, "num_tokens": 2735174098.0, "step": 3578 }, { "epoch": 1.311088718911739, "grad_norm": 0.1922854720943091, "learning_rate": 3.514946906533786e-05, "loss": 0.541, "num_tokens": 2735990019.0, "step": 3579 }, { "epoch": 1.3114551367196445, "grad_norm": 0.17245341788901622, "learning_rate": 3.514655206175155e-05, "loss": 0.4957, "num_tokens": 2736636129.0, "step": 3580 }, { "epoch": 1.31182155452755, "grad_norm": 0.1632008608030115, "learning_rate": 3.514363431798815e-05, "loss": 0.4646, "num_tokens": 2737322868.0, "step": 3581 }, { "epoch": 1.3121879723354555, "grad_norm": 0.186555314149293, "learning_rate": 3.514071583421194e-05, "loss": 0.5167, "num_tokens": 2738047267.0, "step": 3582 }, { "epoch": 1.312554390143361, "grad_norm": 0.1561954847875452, "learning_rate": 3.513779661058723e-05, "loss": 0.524, "num_tokens": 2738851563.0, "step": 3583 }, { "epoch": 1.3129208079512664, "grad_norm": 0.1635340782400703, "learning_rate": 3.513487664727837e-05, "loss": 0.5148, "num_tokens": 2739694901.0, "step": 3584 }, { "epoch": 1.313287225759172, "grad_norm": 0.15789930237758948, "learning_rate": 3.513195594444978e-05, "loss": 0.4925, "num_tokens": 2740481356.0, "step": 3585 }, { "epoch": 1.3136536435670774, "grad_norm": 0.14658816575636435, "learning_rate": 3.512903450226589e-05, "loss": 0.4869, "num_tokens": 2741187272.0, "step": 3586 }, { "epoch": 1.3140200613749828, "grad_norm": 0.1766301268794343, "learning_rate": 3.5126112320891177e-05, "loss": 0.4708, "num_tokens": 2741873206.0, "step": 3587 }, { "epoch": 1.3143864791828883, "grad_norm": 0.13710200919263207, "learning_rate": 3.5123189400490174e-05, "loss": 0.5245, "num_tokens": 2742697563.0, "step": 3588 }, { "epoch": 1.3147528969907938, "grad_norm": 0.17249041062306097, "learning_rate": 3.512026574122745e-05, "loss": 0.5101, "num_tokens": 2743413276.0, "step": 3589 }, { "epoch": 1.3151193147986993, "grad_norm": 0.14087412996704268, "learning_rate": 3.51173413432676e-05, "loss": 0.4897, "num_tokens": 2744164715.0, "step": 3590 }, { "epoch": 1.3154857326066047, "grad_norm": 0.16655706233382994, "learning_rate": 3.5114416206775285e-05, "loss": 0.4958, "num_tokens": 2744775825.0, "step": 3591 }, { "epoch": 1.3158521504145102, "grad_norm": 0.15402119326381056, "learning_rate": 3.51114903319152e-05, "loss": 0.4967, "num_tokens": 2745576787.0, "step": 3592 }, { "epoch": 1.3162185682224157, "grad_norm": 0.15764566694419294, "learning_rate": 3.510856371885206e-05, "loss": 0.4978, "num_tokens": 2746476323.0, "step": 3593 }, { "epoch": 1.3165849860303211, "grad_norm": 0.16355983015895748, "learning_rate": 3.5105636367750653e-05, "loss": 0.4793, "num_tokens": 2747257573.0, "step": 3594 }, { "epoch": 1.3169514038382266, "grad_norm": 0.15358993964434992, "learning_rate": 3.510270827877579e-05, "loss": 0.5371, "num_tokens": 2747956496.0, "step": 3595 }, { "epoch": 1.317317821646132, "grad_norm": 0.1768405134462522, "learning_rate": 3.509977945209233e-05, "loss": 0.4937, "num_tokens": 2748759248.0, "step": 3596 }, { "epoch": 1.3176842394540373, "grad_norm": 0.15334587945580358, "learning_rate": 3.509684988786517e-05, "loss": 0.4988, "num_tokens": 2749571128.0, "step": 3597 }, { "epoch": 1.318050657261943, "grad_norm": 0.1828203717594286, "learning_rate": 3.5093919586259257e-05, "loss": 0.5188, "num_tokens": 2750362647.0, "step": 3598 }, { "epoch": 1.3184170750698483, "grad_norm": 0.14755586998708156, "learning_rate": 3.5090988547439556e-05, "loss": 0.5139, "num_tokens": 2751141855.0, "step": 3599 }, { "epoch": 1.318783492877754, "grad_norm": 0.14567156222627428, "learning_rate": 3.5088056771571114e-05, "loss": 0.5272, "num_tokens": 2751886184.0, "step": 3600 }, { "epoch": 1.3191499106856592, "grad_norm": 0.1438766114042005, "learning_rate": 3.5085124258818974e-05, "loss": 0.4946, "num_tokens": 2752667676.0, "step": 3601 }, { "epoch": 1.3195163284935647, "grad_norm": 0.14027170811713208, "learning_rate": 3.508219100934826e-05, "loss": 0.5019, "num_tokens": 2753557112.0, "step": 3602 }, { "epoch": 1.3198827463014702, "grad_norm": 0.16556857272379574, "learning_rate": 3.50792570233241e-05, "loss": 0.5065, "num_tokens": 2754249708.0, "step": 3603 }, { "epoch": 1.3202491641093757, "grad_norm": 0.15013960355241274, "learning_rate": 3.50763223009117e-05, "loss": 0.5229, "num_tokens": 2754935959.0, "step": 3604 }, { "epoch": 1.3206155819172811, "grad_norm": 0.13386944600277403, "learning_rate": 3.507338684227629e-05, "loss": 0.5087, "num_tokens": 2755841719.0, "step": 3605 }, { "epoch": 1.3209819997251866, "grad_norm": 0.1474679918402736, "learning_rate": 3.507045064758314e-05, "loss": 0.4957, "num_tokens": 2756443726.0, "step": 3606 }, { "epoch": 1.321348417533092, "grad_norm": 0.15316432201229224, "learning_rate": 3.5067513716997545e-05, "loss": 0.5096, "num_tokens": 2757220820.0, "step": 3607 }, { "epoch": 1.3217148353409975, "grad_norm": 0.13696786166236005, "learning_rate": 3.506457605068489e-05, "loss": 0.4808, "num_tokens": 2757994508.0, "step": 3608 }, { "epoch": 1.322081253148903, "grad_norm": 0.1497132810938408, "learning_rate": 3.506163764881056e-05, "loss": 0.5166, "num_tokens": 2758672617.0, "step": 3609 }, { "epoch": 1.3224476709568085, "grad_norm": 0.1642733147394917, "learning_rate": 3.505869851153998e-05, "loss": 0.5177, "num_tokens": 2759310317.0, "step": 3610 }, { "epoch": 1.322814088764714, "grad_norm": 0.13359258641541083, "learning_rate": 3.505575863903864e-05, "loss": 0.4835, "num_tokens": 2760064621.0, "step": 3611 }, { "epoch": 1.3231805065726194, "grad_norm": 0.15242714358704063, "learning_rate": 3.505281803147207e-05, "loss": 0.5262, "num_tokens": 2760796965.0, "step": 3612 }, { "epoch": 1.323546924380525, "grad_norm": 0.1529640286069327, "learning_rate": 3.5049876689005824e-05, "loss": 0.4921, "num_tokens": 2761595111.0, "step": 3613 }, { "epoch": 1.3239133421884304, "grad_norm": 0.14457644806106454, "learning_rate": 3.504693461180549e-05, "loss": 0.5168, "num_tokens": 2762423196.0, "step": 3614 }, { "epoch": 1.3242797599963358, "grad_norm": 0.14565071347312136, "learning_rate": 3.504399180003674e-05, "loss": 0.4748, "num_tokens": 2763168746.0, "step": 3615 }, { "epoch": 1.3246461778042413, "grad_norm": 0.1604715875387356, "learning_rate": 3.5041048253865244e-05, "loss": 0.5495, "num_tokens": 2763966113.0, "step": 3616 }, { "epoch": 1.3250125956121468, "grad_norm": 0.14675982533902715, "learning_rate": 3.503810397345673e-05, "loss": 0.4825, "num_tokens": 2764699148.0, "step": 3617 }, { "epoch": 1.3253790134200523, "grad_norm": 0.1582165043378403, "learning_rate": 3.503515895897697e-05, "loss": 0.522, "num_tokens": 2765586357.0, "step": 3618 }, { "epoch": 1.3257454312279577, "grad_norm": 0.15332829773119946, "learning_rate": 3.5032213210591766e-05, "loss": 0.5157, "num_tokens": 2766289847.0, "step": 3619 }, { "epoch": 1.3261118490358632, "grad_norm": 0.15319383753614793, "learning_rate": 3.5029266728466983e-05, "loss": 0.4914, "num_tokens": 2767016187.0, "step": 3620 }, { "epoch": 1.3264782668437687, "grad_norm": 0.14944662615811352, "learning_rate": 3.502631951276852e-05, "loss": 0.5146, "num_tokens": 2767897031.0, "step": 3621 }, { "epoch": 1.326844684651674, "grad_norm": 0.16680888855662254, "learning_rate": 3.502337156366228e-05, "loss": 0.5327, "num_tokens": 2768614388.0, "step": 3622 }, { "epoch": 1.3272111024595796, "grad_norm": 0.13784208496780123, "learning_rate": 3.502042288131427e-05, "loss": 0.504, "num_tokens": 2769388997.0, "step": 3623 }, { "epoch": 1.3275775202674849, "grad_norm": 0.1630233592172565, "learning_rate": 3.501747346589049e-05, "loss": 0.511, "num_tokens": 2770022138.0, "step": 3624 }, { "epoch": 1.3279439380753906, "grad_norm": 0.1664290259151949, "learning_rate": 3.5014523317557e-05, "loss": 0.5333, "num_tokens": 2770687810.0, "step": 3625 }, { "epoch": 1.3283103558832958, "grad_norm": 0.14169420695398885, "learning_rate": 3.50115724364799e-05, "loss": 0.5167, "num_tokens": 2771452386.0, "step": 3626 }, { "epoch": 1.3286767736912015, "grad_norm": 0.19813441328748516, "learning_rate": 3.500862082282534e-05, "loss": 0.5539, "num_tokens": 2772189998.0, "step": 3627 }, { "epoch": 1.3290431914991068, "grad_norm": 0.14680208966207095, "learning_rate": 3.500566847675949e-05, "loss": 0.5143, "num_tokens": 2772965393.0, "step": 3628 }, { "epoch": 1.3294096093070122, "grad_norm": 0.155313175689423, "learning_rate": 3.500271539844857e-05, "loss": 0.4956, "num_tokens": 2773804863.0, "step": 3629 }, { "epoch": 1.3297760271149177, "grad_norm": 0.16499314459261197, "learning_rate": 3.499976158805886e-05, "loss": 0.5272, "num_tokens": 2774452632.0, "step": 3630 }, { "epoch": 1.3301424449228232, "grad_norm": 0.14911755482471142, "learning_rate": 3.499680704575665e-05, "loss": 0.5467, "num_tokens": 2775102736.0, "step": 3631 }, { "epoch": 1.3305088627307287, "grad_norm": 0.16436686561117927, "learning_rate": 3.49938517717083e-05, "loss": 0.5041, "num_tokens": 2775904605.0, "step": 3632 }, { "epoch": 1.3308752805386341, "grad_norm": 0.1579478693105057, "learning_rate": 3.499089576608019e-05, "loss": 0.4912, "num_tokens": 2776638156.0, "step": 3633 }, { "epoch": 1.3312416983465396, "grad_norm": 0.16977117243911316, "learning_rate": 3.498793902903874e-05, "loss": 0.5137, "num_tokens": 2777371227.0, "step": 3634 }, { "epoch": 1.331608116154445, "grad_norm": 0.16957466681112238, "learning_rate": 3.4984981560750434e-05, "loss": 0.5146, "num_tokens": 2778172703.0, "step": 3635 }, { "epoch": 1.3319745339623505, "grad_norm": 0.1374574524059484, "learning_rate": 3.498202336138178e-05, "loss": 0.5049, "num_tokens": 2778935413.0, "step": 3636 }, { "epoch": 1.332340951770256, "grad_norm": 0.17404191961238993, "learning_rate": 3.497906443109933e-05, "loss": 0.5389, "num_tokens": 2779561038.0, "step": 3637 }, { "epoch": 1.3327073695781615, "grad_norm": 0.14138131825895986, "learning_rate": 3.497610477006967e-05, "loss": 0.4952, "num_tokens": 2780235144.0, "step": 3638 }, { "epoch": 1.333073787386067, "grad_norm": 0.16905747724708436, "learning_rate": 3.4973144378459457e-05, "loss": 0.4895, "num_tokens": 2780963113.0, "step": 3639 }, { "epoch": 1.3334402051939724, "grad_norm": 0.13978958489486418, "learning_rate": 3.497018325643535e-05, "loss": 0.5231, "num_tokens": 2781760574.0, "step": 3640 }, { "epoch": 1.333806623001878, "grad_norm": 0.14186881173479146, "learning_rate": 3.496722140416406e-05, "loss": 0.5041, "num_tokens": 2782557020.0, "step": 3641 }, { "epoch": 1.3341730408097834, "grad_norm": 0.15135522742077082, "learning_rate": 3.496425882181236e-05, "loss": 0.501, "num_tokens": 2783395081.0, "step": 3642 }, { "epoch": 1.3345394586176889, "grad_norm": 0.15738525120140176, "learning_rate": 3.4961295509547035e-05, "loss": 0.5212, "num_tokens": 2784242382.0, "step": 3643 }, { "epoch": 1.3349058764255943, "grad_norm": 0.14963282849336898, "learning_rate": 3.495833146753494e-05, "loss": 0.4963, "num_tokens": 2785054643.0, "step": 3644 }, { "epoch": 1.3352722942334998, "grad_norm": 0.16869807703301037, "learning_rate": 3.4955366695942945e-05, "loss": 0.5228, "num_tokens": 2785844350.0, "step": 3645 }, { "epoch": 1.3356387120414053, "grad_norm": 0.15173169482249796, "learning_rate": 3.4952401194937976e-05, "loss": 0.4712, "num_tokens": 2786535659.0, "step": 3646 }, { "epoch": 1.3360051298493107, "grad_norm": 0.18860608850296642, "learning_rate": 3.4949434964687004e-05, "loss": 0.5098, "num_tokens": 2787225029.0, "step": 3647 }, { "epoch": 1.3363715476572162, "grad_norm": 0.15978662895907894, "learning_rate": 3.494646800535702e-05, "loss": 0.4966, "num_tokens": 2788013803.0, "step": 3648 }, { "epoch": 1.3367379654651215, "grad_norm": 0.1529531815382985, "learning_rate": 3.494350031711508e-05, "loss": 0.4993, "num_tokens": 2788861187.0, "step": 3649 }, { "epoch": 1.3371043832730272, "grad_norm": 0.16366809991766312, "learning_rate": 3.4940531900128266e-05, "loss": 0.4974, "num_tokens": 2789517003.0, "step": 3650 }, { "epoch": 1.3374708010809324, "grad_norm": 0.1605251059652733, "learning_rate": 3.4937562754563714e-05, "loss": 0.4863, "num_tokens": 2790181298.0, "step": 3651 }, { "epoch": 1.337837218888838, "grad_norm": 0.16293977673374666, "learning_rate": 3.4934592880588585e-05, "loss": 0.5175, "num_tokens": 2790872086.0, "step": 3652 }, { "epoch": 1.3382036366967434, "grad_norm": 0.14913455509802306, "learning_rate": 3.493162227837009e-05, "loss": 0.4734, "num_tokens": 2791607611.0, "step": 3653 }, { "epoch": 1.338570054504649, "grad_norm": 0.15604870380872926, "learning_rate": 3.492865094807547e-05, "loss": 0.5539, "num_tokens": 2792293175.0, "step": 3654 }, { "epoch": 1.3389364723125543, "grad_norm": 0.14898855021591773, "learning_rate": 3.492567888987204e-05, "loss": 0.476, "num_tokens": 2792998302.0, "step": 3655 }, { "epoch": 1.3393028901204598, "grad_norm": 0.16423411801940066, "learning_rate": 3.492270610392712e-05, "loss": 0.5027, "num_tokens": 2793747383.0, "step": 3656 }, { "epoch": 1.3396693079283652, "grad_norm": 0.15353650597067156, "learning_rate": 3.491973259040808e-05, "loss": 0.5013, "num_tokens": 2794493306.0, "step": 3657 }, { "epoch": 1.3400357257362707, "grad_norm": 0.14806189640458364, "learning_rate": 3.4916758349482334e-05, "loss": 0.5134, "num_tokens": 2795341578.0, "step": 3658 }, { "epoch": 1.3404021435441762, "grad_norm": 0.14761171979698617, "learning_rate": 3.4913783381317344e-05, "loss": 0.4907, "num_tokens": 2796093831.0, "step": 3659 }, { "epoch": 1.3407685613520817, "grad_norm": 0.14920511221068777, "learning_rate": 3.4910807686080604e-05, "loss": 0.505, "num_tokens": 2796937733.0, "step": 3660 }, { "epoch": 1.3411349791599871, "grad_norm": 0.15692885642532223, "learning_rate": 3.490783126393965e-05, "loss": 0.5123, "num_tokens": 2797675752.0, "step": 3661 }, { "epoch": 1.3415013969678926, "grad_norm": 0.14593405759154401, "learning_rate": 3.4904854115062066e-05, "loss": 0.4941, "num_tokens": 2798469030.0, "step": 3662 }, { "epoch": 1.341867814775798, "grad_norm": 0.17034112131927406, "learning_rate": 3.490187623961547e-05, "loss": 0.5248, "num_tokens": 2799212197.0, "step": 3663 }, { "epoch": 1.3422342325837036, "grad_norm": 0.1655869149824736, "learning_rate": 3.489889763776752e-05, "loss": 0.5142, "num_tokens": 2799965763.0, "step": 3664 }, { "epoch": 1.342600650391609, "grad_norm": 0.16015688913412932, "learning_rate": 3.489591830968592e-05, "loss": 0.5048, "num_tokens": 2800738830.0, "step": 3665 }, { "epoch": 1.3429670681995145, "grad_norm": 0.13845299494072896, "learning_rate": 3.4892938255538405e-05, "loss": 0.4942, "num_tokens": 2801561033.0, "step": 3666 }, { "epoch": 1.34333348600742, "grad_norm": 0.14796176357102175, "learning_rate": 3.488995747549276e-05, "loss": 0.481, "num_tokens": 2802414694.0, "step": 3667 }, { "epoch": 1.3436999038153254, "grad_norm": 0.14823312654135148, "learning_rate": 3.488697596971682e-05, "loss": 0.489, "num_tokens": 2803152188.0, "step": 3668 }, { "epoch": 1.344066321623231, "grad_norm": 0.142370207091505, "learning_rate": 3.4883993738378435e-05, "loss": 0.4946, "num_tokens": 2803939739.0, "step": 3669 }, { "epoch": 1.3444327394311364, "grad_norm": 0.17296833992211796, "learning_rate": 3.488101078164552e-05, "loss": 0.4983, "num_tokens": 2804651334.0, "step": 3670 }, { "epoch": 1.3447991572390419, "grad_norm": 0.15373534021392732, "learning_rate": 3.4878027099686014e-05, "loss": 0.5262, "num_tokens": 2805464393.0, "step": 3671 }, { "epoch": 1.3451655750469473, "grad_norm": 0.1597897529779758, "learning_rate": 3.487504269266791e-05, "loss": 0.5251, "num_tokens": 2806259909.0, "step": 3672 }, { "epoch": 1.3455319928548528, "grad_norm": 0.1547344659435027, "learning_rate": 3.487205756075924e-05, "loss": 0.4989, "num_tokens": 2806993024.0, "step": 3673 }, { "epoch": 1.3458984106627583, "grad_norm": 0.16521426805407133, "learning_rate": 3.486907170412806e-05, "loss": 0.5443, "num_tokens": 2807611913.0, "step": 3674 }, { "epoch": 1.3462648284706638, "grad_norm": 0.15216380388871886, "learning_rate": 3.4866085122942496e-05, "loss": 0.5005, "num_tokens": 2808390073.0, "step": 3675 }, { "epoch": 1.346631246278569, "grad_norm": 0.14471037988916488, "learning_rate": 3.486309781737069e-05, "loss": 0.4851, "num_tokens": 2809194035.0, "step": 3676 }, { "epoch": 1.3469976640864747, "grad_norm": 0.14111579287534748, "learning_rate": 3.486010978758082e-05, "loss": 0.4786, "num_tokens": 2810090252.0, "step": 3677 }, { "epoch": 1.34736408189438, "grad_norm": 0.14987654753828472, "learning_rate": 3.485712103374114e-05, "loss": 0.4905, "num_tokens": 2810929929.0, "step": 3678 }, { "epoch": 1.3477304997022856, "grad_norm": 0.14058136255615525, "learning_rate": 3.485413155601991e-05, "loss": 0.5025, "num_tokens": 2811688192.0, "step": 3679 }, { "epoch": 1.348096917510191, "grad_norm": 0.1699540898035099, "learning_rate": 3.485114135458545e-05, "loss": 0.526, "num_tokens": 2812518554.0, "step": 3680 }, { "epoch": 1.3484633353180966, "grad_norm": 0.14337024467063048, "learning_rate": 3.484815042960611e-05, "loss": 0.5089, "num_tokens": 2813270023.0, "step": 3681 }, { "epoch": 1.3488297531260018, "grad_norm": 0.16640726013524898, "learning_rate": 3.484515878125029e-05, "loss": 0.5597, "num_tokens": 2813933611.0, "step": 3682 }, { "epoch": 1.3491961709339073, "grad_norm": 0.14889780739215738, "learning_rate": 3.4842166409686415e-05, "loss": 0.4725, "num_tokens": 2814705748.0, "step": 3683 }, { "epoch": 1.3495625887418128, "grad_norm": 0.15925276490289353, "learning_rate": 3.483917331508298e-05, "loss": 0.5218, "num_tokens": 2815591740.0, "step": 3684 }, { "epoch": 1.3499290065497183, "grad_norm": 0.15654555538453327, "learning_rate": 3.4836179497608485e-05, "loss": 0.4949, "num_tokens": 2816309920.0, "step": 3685 }, { "epoch": 1.3502954243576237, "grad_norm": 0.15144778476777793, "learning_rate": 3.483318495743149e-05, "loss": 0.506, "num_tokens": 2817158165.0, "step": 3686 }, { "epoch": 1.3506618421655292, "grad_norm": 0.15343956903680914, "learning_rate": 3.483018969472061e-05, "loss": 0.4894, "num_tokens": 2817879233.0, "step": 3687 }, { "epoch": 1.3510282599734347, "grad_norm": 0.17009170288248854, "learning_rate": 3.482719370964445e-05, "loss": 0.5228, "num_tokens": 2818679505.0, "step": 3688 }, { "epoch": 1.3513946777813401, "grad_norm": 0.15788643516530476, "learning_rate": 3.482419700237173e-05, "loss": 0.5033, "num_tokens": 2819516850.0, "step": 3689 }, { "epoch": 1.3517610955892456, "grad_norm": 0.14521860576325415, "learning_rate": 3.482119957307115e-05, "loss": 0.4731, "num_tokens": 2820458409.0, "step": 3690 }, { "epoch": 1.352127513397151, "grad_norm": 0.1618633504958628, "learning_rate": 3.481820142191147e-05, "loss": 0.5336, "num_tokens": 2821160777.0, "step": 3691 }, { "epoch": 1.3524939312050566, "grad_norm": 0.16839933452489783, "learning_rate": 3.48152025490615e-05, "loss": 0.5197, "num_tokens": 2821941868.0, "step": 3692 }, { "epoch": 1.352860349012962, "grad_norm": 0.166424450909004, "learning_rate": 3.4812202954690066e-05, "loss": 0.4935, "num_tokens": 2822785355.0, "step": 3693 }, { "epoch": 1.3532267668208675, "grad_norm": 0.17942091567752852, "learning_rate": 3.480920263896608e-05, "loss": 0.5487, "num_tokens": 2823478847.0, "step": 3694 }, { "epoch": 1.353593184628773, "grad_norm": 0.14793948443994662, "learning_rate": 3.480620160205844e-05, "loss": 0.4802, "num_tokens": 2824293315.0, "step": 3695 }, { "epoch": 1.3539596024366785, "grad_norm": 0.14788325528354754, "learning_rate": 3.480319984413612e-05, "loss": 0.5018, "num_tokens": 2825148850.0, "step": 3696 }, { "epoch": 1.354326020244584, "grad_norm": 0.1637618716025276, "learning_rate": 3.480019736536812e-05, "loss": 0.5091, "num_tokens": 2825943055.0, "step": 3697 }, { "epoch": 1.3546924380524894, "grad_norm": 0.13934099094519706, "learning_rate": 3.479719416592349e-05, "loss": 0.5052, "num_tokens": 2826737903.0, "step": 3698 }, { "epoch": 1.3550588558603949, "grad_norm": 0.16988292176533723, "learning_rate": 3.4794190245971326e-05, "loss": 0.5067, "num_tokens": 2827533806.0, "step": 3699 }, { "epoch": 1.3554252736683003, "grad_norm": 0.16960703960703546, "learning_rate": 3.479118560568073e-05, "loss": 0.5369, "num_tokens": 2828197664.0, "step": 3700 }, { "epoch": 1.3557916914762058, "grad_norm": 0.15917645062787317, "learning_rate": 3.47881802452209e-05, "loss": 0.5231, "num_tokens": 2828865079.0, "step": 3701 }, { "epoch": 1.3561581092841113, "grad_norm": 0.17713808105616374, "learning_rate": 3.4785174164761016e-05, "loss": 0.5069, "num_tokens": 2829622008.0, "step": 3702 }, { "epoch": 1.3565245270920165, "grad_norm": 0.18364204089639233, "learning_rate": 3.478216736447034e-05, "loss": 0.5066, "num_tokens": 2830352794.0, "step": 3703 }, { "epoch": 1.3568909448999222, "grad_norm": 0.13428895247888645, "learning_rate": 3.477915984451816e-05, "loss": 0.4616, "num_tokens": 2831142491.0, "step": 3704 }, { "epoch": 1.3572573627078275, "grad_norm": 0.21291506937397223, "learning_rate": 3.4776151605073805e-05, "loss": 0.5167, "num_tokens": 2831833685.0, "step": 3705 }, { "epoch": 1.3576237805157332, "grad_norm": 0.16145478726033555, "learning_rate": 3.477314264630664e-05, "loss": 0.5174, "num_tokens": 2832505205.0, "step": 3706 }, { "epoch": 1.3579901983236384, "grad_norm": 0.1541073671451682, "learning_rate": 3.4770132968386086e-05, "loss": 0.5041, "num_tokens": 2833328397.0, "step": 3707 }, { "epoch": 1.3583566161315441, "grad_norm": 0.17629602569798278, "learning_rate": 3.4767122571481584e-05, "loss": 0.5149, "num_tokens": 2834043067.0, "step": 3708 }, { "epoch": 1.3587230339394494, "grad_norm": 0.167716465738794, "learning_rate": 3.476411145576263e-05, "loss": 0.4766, "num_tokens": 2834821779.0, "step": 3709 }, { "epoch": 1.3590894517473548, "grad_norm": 0.15153318369486027, "learning_rate": 3.476109962139875e-05, "loss": 0.5007, "num_tokens": 2835488240.0, "step": 3710 }, { "epoch": 1.3594558695552603, "grad_norm": 0.17451481728163634, "learning_rate": 3.4758087068559524e-05, "loss": 0.5278, "num_tokens": 2836167626.0, "step": 3711 }, { "epoch": 1.3598222873631658, "grad_norm": 0.14723566299173654, "learning_rate": 3.4755073797414565e-05, "loss": 0.4774, "num_tokens": 2837059332.0, "step": 3712 }, { "epoch": 1.3601887051710713, "grad_norm": 0.17063352480764601, "learning_rate": 3.475205980813352e-05, "loss": 0.5511, "num_tokens": 2837763271.0, "step": 3713 }, { "epoch": 1.3605551229789767, "grad_norm": 0.16374136225924757, "learning_rate": 3.474904510088608e-05, "loss": 0.5052, "num_tokens": 2838476714.0, "step": 3714 }, { "epoch": 1.3609215407868822, "grad_norm": 0.15889024224497697, "learning_rate": 3.4746029675841986e-05, "loss": 0.4999, "num_tokens": 2839243544.0, "step": 3715 }, { "epoch": 1.3612879585947877, "grad_norm": 0.15888212645727923, "learning_rate": 3.474301353317102e-05, "loss": 0.5323, "num_tokens": 2839988132.0, "step": 3716 }, { "epoch": 1.3616543764026932, "grad_norm": 0.14984688080153005, "learning_rate": 3.473999667304297e-05, "loss": 0.4927, "num_tokens": 2840765368.0, "step": 3717 }, { "epoch": 1.3620207942105986, "grad_norm": 0.14910210553068792, "learning_rate": 3.473697909562772e-05, "loss": 0.5131, "num_tokens": 2841602499.0, "step": 3718 }, { "epoch": 1.362387212018504, "grad_norm": 0.18332340850379658, "learning_rate": 3.473396080109515e-05, "loss": 0.5068, "num_tokens": 2842385256.0, "step": 3719 }, { "epoch": 1.3627536298264096, "grad_norm": 0.1838660991265003, "learning_rate": 3.473094178961521e-05, "loss": 0.4736, "num_tokens": 2843055216.0, "step": 3720 }, { "epoch": 1.363120047634315, "grad_norm": 0.14083006352014812, "learning_rate": 3.472792206135786e-05, "loss": 0.471, "num_tokens": 2843871603.0, "step": 3721 }, { "epoch": 1.3634864654422205, "grad_norm": 0.17547935944909485, "learning_rate": 3.472490161649312e-05, "loss": 0.5222, "num_tokens": 2844610104.0, "step": 3722 }, { "epoch": 1.363852883250126, "grad_norm": 0.14449310180635325, "learning_rate": 3.4721880455191056e-05, "loss": 0.5266, "num_tokens": 2845378805.0, "step": 3723 }, { "epoch": 1.3642193010580315, "grad_norm": 0.21831038387385973, "learning_rate": 3.4718858577621754e-05, "loss": 0.5007, "num_tokens": 2846033927.0, "step": 3724 }, { "epoch": 1.364585718865937, "grad_norm": 0.14474544342536977, "learning_rate": 3.471583598395536e-05, "loss": 0.522, "num_tokens": 2846783926.0, "step": 3725 }, { "epoch": 1.3649521366738424, "grad_norm": 0.16807398294380757, "learning_rate": 3.471281267436206e-05, "loss": 0.4689, "num_tokens": 2847587328.0, "step": 3726 }, { "epoch": 1.3653185544817479, "grad_norm": 0.1651280130636703, "learning_rate": 3.470978864901205e-05, "loss": 0.5022, "num_tokens": 2848398297.0, "step": 3727 }, { "epoch": 1.3656849722896534, "grad_norm": 0.15194869296340877, "learning_rate": 3.4706763908075606e-05, "loss": 0.498, "num_tokens": 2849167414.0, "step": 3728 }, { "epoch": 1.3660513900975588, "grad_norm": 0.1578002624767528, "learning_rate": 3.470373845172302e-05, "loss": 0.5273, "num_tokens": 2850028870.0, "step": 3729 }, { "epoch": 1.366417807905464, "grad_norm": 0.1471267638531427, "learning_rate": 3.4700712280124635e-05, "loss": 0.5062, "num_tokens": 2850997924.0, "step": 3730 }, { "epoch": 1.3667842257133698, "grad_norm": 0.1631899248505247, "learning_rate": 3.469768539345082e-05, "loss": 0.4959, "num_tokens": 2851712552.0, "step": 3731 }, { "epoch": 1.367150643521275, "grad_norm": 0.14924493173135764, "learning_rate": 3.469465779187201e-05, "loss": 0.4904, "num_tokens": 2852618891.0, "step": 3732 }, { "epoch": 1.3675170613291807, "grad_norm": 0.15295919743304948, "learning_rate": 3.469162947555866e-05, "loss": 0.5103, "num_tokens": 2853432125.0, "step": 3733 }, { "epoch": 1.367883479137086, "grad_norm": 0.17541479125562265, "learning_rate": 3.4688600444681264e-05, "loss": 0.4831, "num_tokens": 2854233146.0, "step": 3734 }, { "epoch": 1.3682498969449917, "grad_norm": 0.1369515221971352, "learning_rate": 3.4685570699410375e-05, "loss": 0.4706, "num_tokens": 2855048276.0, "step": 3735 }, { "epoch": 1.368616314752897, "grad_norm": 0.19110410138000838, "learning_rate": 3.4682540239916554e-05, "loss": 0.5126, "num_tokens": 2855726126.0, "step": 3736 }, { "epoch": 1.3689827325608024, "grad_norm": 0.14209123475787933, "learning_rate": 3.4679509066370436e-05, "loss": 0.5019, "num_tokens": 2856474366.0, "step": 3737 }, { "epoch": 1.3693491503687079, "grad_norm": 0.1784897897861061, "learning_rate": 3.4676477178942686e-05, "loss": 0.5151, "num_tokens": 2857253702.0, "step": 3738 }, { "epoch": 1.3697155681766133, "grad_norm": 0.15528841356486217, "learning_rate": 3.467344457780399e-05, "loss": 0.4939, "num_tokens": 2858122193.0, "step": 3739 }, { "epoch": 1.3700819859845188, "grad_norm": 0.14865490973882511, "learning_rate": 3.467041126312511e-05, "loss": 0.5134, "num_tokens": 2858890994.0, "step": 3740 }, { "epoch": 1.3704484037924243, "grad_norm": 0.1768548605155422, "learning_rate": 3.4667377235076806e-05, "loss": 0.5187, "num_tokens": 2859602284.0, "step": 3741 }, { "epoch": 1.3708148216003297, "grad_norm": 0.15426049462058064, "learning_rate": 3.466434249382991e-05, "loss": 0.4962, "num_tokens": 2860299236.0, "step": 3742 }, { "epoch": 1.3711812394082352, "grad_norm": 0.16275711064944978, "learning_rate": 3.466130703955529e-05, "loss": 0.5297, "num_tokens": 2861142205.0, "step": 3743 }, { "epoch": 1.3715476572161407, "grad_norm": 0.1551777491041445, "learning_rate": 3.465827087242384e-05, "loss": 0.5263, "num_tokens": 2861898282.0, "step": 3744 }, { "epoch": 1.3719140750240462, "grad_norm": 0.12883400657176286, "learning_rate": 3.4655233992606506e-05, "loss": 0.4881, "num_tokens": 2862732333.0, "step": 3745 }, { "epoch": 1.3722804928319516, "grad_norm": 0.14772760440965854, "learning_rate": 3.4652196400274265e-05, "loss": 0.5056, "num_tokens": 2863514791.0, "step": 3746 }, { "epoch": 1.372646910639857, "grad_norm": 0.13197758647495755, "learning_rate": 3.464915809559815e-05, "loss": 0.5417, "num_tokens": 2864317138.0, "step": 3747 }, { "epoch": 1.3730133284477626, "grad_norm": 0.15100594891163596, "learning_rate": 3.464611907874921e-05, "loss": 0.5291, "num_tokens": 2865067056.0, "step": 3748 }, { "epoch": 1.373379746255668, "grad_norm": 0.1361443565567257, "learning_rate": 3.464307934989855e-05, "loss": 0.4816, "num_tokens": 2865833071.0, "step": 3749 }, { "epoch": 1.3737461640635735, "grad_norm": 0.13921588860293607, "learning_rate": 3.4640038909217327e-05, "loss": 0.4726, "num_tokens": 2866492463.0, "step": 3750 }, { "epoch": 1.374112581871479, "grad_norm": 0.1356732760893563, "learning_rate": 3.4636997756876704e-05, "loss": 0.5085, "num_tokens": 2867251943.0, "step": 3751 }, { "epoch": 1.3744789996793845, "grad_norm": 0.1385868916997724, "learning_rate": 3.463395589304792e-05, "loss": 0.5023, "num_tokens": 2868014370.0, "step": 3752 }, { "epoch": 1.37484541748729, "grad_norm": 0.13954443828360355, "learning_rate": 3.463091331790224e-05, "loss": 0.5329, "num_tokens": 2868761797.0, "step": 3753 }, { "epoch": 1.3752118352951954, "grad_norm": 0.16498072289964685, "learning_rate": 3.462787003161095e-05, "loss": 0.4862, "num_tokens": 2869634651.0, "step": 3754 }, { "epoch": 1.375578253103101, "grad_norm": 0.13830161737255126, "learning_rate": 3.46248260343454e-05, "loss": 0.5316, "num_tokens": 2870464488.0, "step": 3755 }, { "epoch": 1.3759446709110064, "grad_norm": 0.1583606730965047, "learning_rate": 3.462178132627698e-05, "loss": 0.4902, "num_tokens": 2871270602.0, "step": 3756 }, { "epoch": 1.3763110887189116, "grad_norm": 0.1364581519573788, "learning_rate": 3.4618735907577104e-05, "loss": 0.4902, "num_tokens": 2872177604.0, "step": 3757 }, { "epoch": 1.3766775065268173, "grad_norm": 0.14798947829908934, "learning_rate": 3.4615689778417245e-05, "loss": 0.5037, "num_tokens": 2872955810.0, "step": 3758 }, { "epoch": 1.3770439243347226, "grad_norm": 0.14768278089100675, "learning_rate": 3.4612642938968897e-05, "loss": 0.5392, "num_tokens": 2873564983.0, "step": 3759 }, { "epoch": 1.3774103421426283, "grad_norm": 0.14763577163635153, "learning_rate": 3.460959538940361e-05, "loss": 0.5136, "num_tokens": 2874238822.0, "step": 3760 }, { "epoch": 1.3777767599505335, "grad_norm": 0.15055713874427504, "learning_rate": 3.460654712989297e-05, "loss": 0.493, "num_tokens": 2874911299.0, "step": 3761 }, { "epoch": 1.3781431777584392, "grad_norm": 0.1482281961770041, "learning_rate": 3.4603498160608576e-05, "loss": 0.5022, "num_tokens": 2875630044.0, "step": 3762 }, { "epoch": 1.3785095955663444, "grad_norm": 0.1568354016272335, "learning_rate": 3.4600448481722124e-05, "loss": 0.5106, "num_tokens": 2876341810.0, "step": 3763 }, { "epoch": 1.37887601337425, "grad_norm": 0.13901497773842628, "learning_rate": 3.459739809340529e-05, "loss": 0.4798, "num_tokens": 2877141707.0, "step": 3764 }, { "epoch": 1.3792424311821554, "grad_norm": 0.14951970627568412, "learning_rate": 3.4594346995829844e-05, "loss": 0.4821, "num_tokens": 2877922696.0, "step": 3765 }, { "epoch": 1.3796088489900609, "grad_norm": 0.14207448889802682, "learning_rate": 3.4591295189167545e-05, "loss": 0.5063, "num_tokens": 2878721376.0, "step": 3766 }, { "epoch": 1.3799752667979663, "grad_norm": 0.17152821395491513, "learning_rate": 3.458824267359022e-05, "loss": 0.5634, "num_tokens": 2879463030.0, "step": 3767 }, { "epoch": 1.3803416846058718, "grad_norm": 0.14630265153461036, "learning_rate": 3.458518944926975e-05, "loss": 0.5187, "num_tokens": 2880284820.0, "step": 3768 }, { "epoch": 1.3807081024137773, "grad_norm": 0.160061081264409, "learning_rate": 3.4582135516378015e-05, "loss": 0.4798, "num_tokens": 2881212999.0, "step": 3769 }, { "epoch": 1.3810745202216828, "grad_norm": 0.14527406537221668, "learning_rate": 3.457908087508697e-05, "loss": 0.514, "num_tokens": 2881961631.0, "step": 3770 }, { "epoch": 1.3814409380295882, "grad_norm": 0.16944562387092652, "learning_rate": 3.457602552556859e-05, "loss": 0.5111, "num_tokens": 2882717873.0, "step": 3771 }, { "epoch": 1.3818073558374937, "grad_norm": 0.16708762366326477, "learning_rate": 3.45729694679949e-05, "loss": 0.4941, "num_tokens": 2883471015.0, "step": 3772 }, { "epoch": 1.3821737736453992, "grad_norm": 0.14508019267615319, "learning_rate": 3.456991270253797e-05, "loss": 0.5187, "num_tokens": 2884387021.0, "step": 3773 }, { "epoch": 1.3825401914533046, "grad_norm": 0.15137733002010834, "learning_rate": 3.4566855229369895e-05, "loss": 0.4913, "num_tokens": 2885164347.0, "step": 3774 }, { "epoch": 1.3829066092612101, "grad_norm": 0.1632264907527016, "learning_rate": 3.456379704866281e-05, "loss": 0.4705, "num_tokens": 2885915048.0, "step": 3775 }, { "epoch": 1.3832730270691156, "grad_norm": 0.13007855431694565, "learning_rate": 3.456073816058891e-05, "loss": 0.4824, "num_tokens": 2886655780.0, "step": 3776 }, { "epoch": 1.383639444877021, "grad_norm": 0.16467355535563696, "learning_rate": 3.455767856532041e-05, "loss": 0.478, "num_tokens": 2887385282.0, "step": 3777 }, { "epoch": 1.3840058626849265, "grad_norm": 0.17179646109442456, "learning_rate": 3.455461826302957e-05, "loss": 0.5181, "num_tokens": 2888178437.0, "step": 3778 }, { "epoch": 1.384372280492832, "grad_norm": 0.14582228307213949, "learning_rate": 3.455155725388869e-05, "loss": 0.4954, "num_tokens": 2889062261.0, "step": 3779 }, { "epoch": 1.3847386983007375, "grad_norm": 0.16960723934252142, "learning_rate": 3.454849553807013e-05, "loss": 0.5142, "num_tokens": 2889686597.0, "step": 3780 }, { "epoch": 1.385105116108643, "grad_norm": 0.1631773511548311, "learning_rate": 3.454543311574623e-05, "loss": 0.5131, "num_tokens": 2890531728.0, "step": 3781 }, { "epoch": 1.3854715339165484, "grad_norm": 0.14597887737233942, "learning_rate": 3.4542369987089444e-05, "loss": 0.4812, "num_tokens": 2891300754.0, "step": 3782 }, { "epoch": 1.385837951724454, "grad_norm": 0.15721918376049798, "learning_rate": 3.453930615227223e-05, "loss": 0.5118, "num_tokens": 2891995001.0, "step": 3783 }, { "epoch": 1.3862043695323591, "grad_norm": 0.15951625448171275, "learning_rate": 3.4536241611467076e-05, "loss": 0.4869, "num_tokens": 2892707326.0, "step": 3784 }, { "epoch": 1.3865707873402648, "grad_norm": 0.15957041648665193, "learning_rate": 3.453317636484653e-05, "loss": 0.5368, "num_tokens": 2893429536.0, "step": 3785 }, { "epoch": 1.38693720514817, "grad_norm": 0.16951270655138653, "learning_rate": 3.4530110412583166e-05, "loss": 0.5322, "num_tokens": 2894178757.0, "step": 3786 }, { "epoch": 1.3873036229560758, "grad_norm": 0.14323651167487944, "learning_rate": 3.45270437548496e-05, "loss": 0.4756, "num_tokens": 2894996498.0, "step": 3787 }, { "epoch": 1.387670040763981, "grad_norm": 0.1544380546024527, "learning_rate": 3.4523976391818506e-05, "loss": 0.5029, "num_tokens": 2895843171.0, "step": 3788 }, { "epoch": 1.3880364585718865, "grad_norm": 0.15883389941424073, "learning_rate": 3.452090832366257e-05, "loss": 0.4876, "num_tokens": 2896555478.0, "step": 3789 }, { "epoch": 1.388402876379792, "grad_norm": 0.15678801932208372, "learning_rate": 3.451783955055453e-05, "loss": 0.5006, "num_tokens": 2897396217.0, "step": 3790 }, { "epoch": 1.3887692941876975, "grad_norm": 0.19278583274597216, "learning_rate": 3.451477007266717e-05, "loss": 0.5191, "num_tokens": 2898192747.0, "step": 3791 }, { "epoch": 1.389135711995603, "grad_norm": 0.1546217562364059, "learning_rate": 3.4511699890173305e-05, "loss": 0.5264, "num_tokens": 2898882258.0, "step": 3792 }, { "epoch": 1.3895021298035084, "grad_norm": 0.15572032692913756, "learning_rate": 3.450862900324579e-05, "loss": 0.5276, "num_tokens": 2899608016.0, "step": 3793 }, { "epoch": 1.3898685476114139, "grad_norm": 0.1687595879990906, "learning_rate": 3.4505557412057534e-05, "loss": 0.5112, "num_tokens": 2900323287.0, "step": 3794 }, { "epoch": 1.3902349654193193, "grad_norm": 0.1554684879368446, "learning_rate": 3.450248511678146e-05, "loss": 0.504, "num_tokens": 2901101198.0, "step": 3795 }, { "epoch": 1.3906013832272248, "grad_norm": 0.14128807757713663, "learning_rate": 3.449941211759055e-05, "loss": 0.4868, "num_tokens": 2902026830.0, "step": 3796 }, { "epoch": 1.3909678010351303, "grad_norm": 0.13475238246930857, "learning_rate": 3.449633841465782e-05, "loss": 0.4967, "num_tokens": 2902856563.0, "step": 3797 }, { "epoch": 1.3913342188430358, "grad_norm": 0.13634881060132759, "learning_rate": 3.4493264008156316e-05, "loss": 0.5024, "num_tokens": 2903715821.0, "step": 3798 }, { "epoch": 1.3917006366509412, "grad_norm": 0.17333363141505242, "learning_rate": 3.4490188898259146e-05, "loss": 0.5158, "num_tokens": 2904530333.0, "step": 3799 }, { "epoch": 1.3920670544588467, "grad_norm": 0.14452210243072686, "learning_rate": 3.448711308513944e-05, "loss": 0.4674, "num_tokens": 2905204466.0, "step": 3800 }, { "epoch": 1.3924334722667522, "grad_norm": 0.14011699165628727, "learning_rate": 3.4484036568970376e-05, "loss": 0.4685, "num_tokens": 2905936173.0, "step": 3801 }, { "epoch": 1.3927998900746577, "grad_norm": 0.14840840763243543, "learning_rate": 3.448095934992517e-05, "loss": 0.4947, "num_tokens": 2906696286.0, "step": 3802 }, { "epoch": 1.3931663078825631, "grad_norm": 0.16603503991764845, "learning_rate": 3.447788142817706e-05, "loss": 0.5002, "num_tokens": 2907502183.0, "step": 3803 }, { "epoch": 1.3935327256904686, "grad_norm": 0.13822391423752894, "learning_rate": 3.447480280389935e-05, "loss": 0.5391, "num_tokens": 2908254575.0, "step": 3804 }, { "epoch": 1.393899143498374, "grad_norm": 0.15568374113155695, "learning_rate": 3.4471723477265376e-05, "loss": 0.5028, "num_tokens": 2908982834.0, "step": 3805 }, { "epoch": 1.3942655613062795, "grad_norm": 0.16238845430118864, "learning_rate": 3.4468643448448504e-05, "loss": 0.4788, "num_tokens": 2909805876.0, "step": 3806 }, { "epoch": 1.394631979114185, "grad_norm": 0.1439730296351035, "learning_rate": 3.446556271762215e-05, "loss": 0.554, "num_tokens": 2910478352.0, "step": 3807 }, { "epoch": 1.3949983969220905, "grad_norm": 0.15960157290780558, "learning_rate": 3.4462481284959765e-05, "loss": 0.521, "num_tokens": 2911240719.0, "step": 3808 }, { "epoch": 1.3953648147299957, "grad_norm": 0.15323448679123447, "learning_rate": 3.4459399150634836e-05, "loss": 0.5096, "num_tokens": 2912008617.0, "step": 3809 }, { "epoch": 1.3957312325379014, "grad_norm": 0.15561447434675976, "learning_rate": 3.44563163148209e-05, "loss": 0.5342, "num_tokens": 2912738777.0, "step": 3810 }, { "epoch": 1.3960976503458067, "grad_norm": 0.14346992583196705, "learning_rate": 3.4453232777691515e-05, "loss": 0.4843, "num_tokens": 2913661538.0, "step": 3811 }, { "epoch": 1.3964640681537124, "grad_norm": 0.15836515559010586, "learning_rate": 3.445014853942031e-05, "loss": 0.5291, "num_tokens": 2914373670.0, "step": 3812 }, { "epoch": 1.3968304859616176, "grad_norm": 0.1410016479358086, "learning_rate": 3.4447063600180905e-05, "loss": 0.5129, "num_tokens": 2915211367.0, "step": 3813 }, { "epoch": 1.3971969037695233, "grad_norm": 0.14782231812932964, "learning_rate": 3.4443977960147016e-05, "loss": 0.5191, "num_tokens": 2915953655.0, "step": 3814 }, { "epoch": 1.3975633215774286, "grad_norm": 0.1534095238751017, "learning_rate": 3.444089161949236e-05, "loss": 0.5393, "num_tokens": 2916615850.0, "step": 3815 }, { "epoch": 1.397929739385334, "grad_norm": 0.16248165572649145, "learning_rate": 3.44378045783907e-05, "loss": 0.5217, "num_tokens": 2917388373.0, "step": 3816 }, { "epoch": 1.3982961571932395, "grad_norm": 0.13310810279131272, "learning_rate": 3.4434716837015835e-05, "loss": 0.4984, "num_tokens": 2918279305.0, "step": 3817 }, { "epoch": 1.398662575001145, "grad_norm": 0.15103277938376009, "learning_rate": 3.4431628395541646e-05, "loss": 0.5025, "num_tokens": 2919049390.0, "step": 3818 }, { "epoch": 1.3990289928090505, "grad_norm": 0.1339821830581923, "learning_rate": 3.442853925414198e-05, "loss": 0.4912, "num_tokens": 2919839952.0, "step": 3819 }, { "epoch": 1.399395410616956, "grad_norm": 0.15412561192059762, "learning_rate": 3.442544941299077e-05, "loss": 0.54, "num_tokens": 2920423136.0, "step": 3820 }, { "epoch": 1.3997618284248614, "grad_norm": 0.14397077025422167, "learning_rate": 3.4422358872262e-05, "loss": 0.5061, "num_tokens": 2921290883.0, "step": 3821 }, { "epoch": 1.4001282462327669, "grad_norm": 0.13479046840501607, "learning_rate": 3.441926763212966e-05, "loss": 0.496, "num_tokens": 2922019982.0, "step": 3822 }, { "epoch": 1.4004946640406724, "grad_norm": 0.15598165750820664, "learning_rate": 3.4416175692767786e-05, "loss": 0.4911, "num_tokens": 2922703854.0, "step": 3823 }, { "epoch": 1.4008610818485778, "grad_norm": 0.14720829269742783, "learning_rate": 3.441308305435047e-05, "loss": 0.5213, "num_tokens": 2923483275.0, "step": 3824 }, { "epoch": 1.4012274996564833, "grad_norm": 0.1400735272318251, "learning_rate": 3.4409989717051826e-05, "loss": 0.4807, "num_tokens": 2924248135.0, "step": 3825 }, { "epoch": 1.4015939174643888, "grad_norm": 0.1576233149508766, "learning_rate": 3.4406895681046023e-05, "loss": 0.516, "num_tokens": 2924996359.0, "step": 3826 }, { "epoch": 1.4019603352722942, "grad_norm": 0.158799956067262, "learning_rate": 3.440380094650727e-05, "loss": 0.517, "num_tokens": 2925722228.0, "step": 3827 }, { "epoch": 1.4023267530801997, "grad_norm": 0.1526433887108333, "learning_rate": 3.440070551360978e-05, "loss": 0.5086, "num_tokens": 2926640271.0, "step": 3828 }, { "epoch": 1.4026931708881052, "grad_norm": 0.15645586212929635, "learning_rate": 3.4397609382527853e-05, "loss": 0.5046, "num_tokens": 2927458321.0, "step": 3829 }, { "epoch": 1.4030595886960107, "grad_norm": 0.1548227082805302, "learning_rate": 3.439451255343581e-05, "loss": 0.5035, "num_tokens": 2928181735.0, "step": 3830 }, { "epoch": 1.4034260065039161, "grad_norm": 0.1550717941505594, "learning_rate": 3.439141502650799e-05, "loss": 0.5097, "num_tokens": 2928950342.0, "step": 3831 }, { "epoch": 1.4037924243118216, "grad_norm": 0.15463880483411843, "learning_rate": 3.43883168019188e-05, "loss": 0.4567, "num_tokens": 2929661787.0, "step": 3832 }, { "epoch": 1.404158842119727, "grad_norm": 0.1437059397587305, "learning_rate": 3.4385217879842684e-05, "loss": 0.4948, "num_tokens": 2930518169.0, "step": 3833 }, { "epoch": 1.4045252599276326, "grad_norm": 0.16154987869885426, "learning_rate": 3.43821182604541e-05, "loss": 0.5329, "num_tokens": 2931237498.0, "step": 3834 }, { "epoch": 1.404891677735538, "grad_norm": 0.1358111920063333, "learning_rate": 3.4379017943927584e-05, "loss": 0.5125, "num_tokens": 2931917501.0, "step": 3835 }, { "epoch": 1.4052580955434433, "grad_norm": 0.1491655935755044, "learning_rate": 3.437591693043767e-05, "loss": 0.4791, "num_tokens": 2932756213.0, "step": 3836 }, { "epoch": 1.405624513351349, "grad_norm": 0.15700365243051626, "learning_rate": 3.437281522015896e-05, "loss": 0.5125, "num_tokens": 2933486416.0, "step": 3837 }, { "epoch": 1.4059909311592542, "grad_norm": 0.1549793540281788, "learning_rate": 3.43697128132661e-05, "loss": 0.4812, "num_tokens": 2934238647.0, "step": 3838 }, { "epoch": 1.40635734896716, "grad_norm": 0.1724462300973283, "learning_rate": 3.4366609709933735e-05, "loss": 0.5016, "num_tokens": 2935106237.0, "step": 3839 }, { "epoch": 1.4067237667750652, "grad_norm": 0.14304902639201209, "learning_rate": 3.436350591033659e-05, "loss": 0.5368, "num_tokens": 2935926843.0, "step": 3840 }, { "epoch": 1.4070901845829709, "grad_norm": 0.15274197851932636, "learning_rate": 3.4360401414649414e-05, "loss": 0.5403, "num_tokens": 2936662706.0, "step": 3841 }, { "epoch": 1.407456602390876, "grad_norm": 0.17133328067218304, "learning_rate": 3.4357296223047e-05, "loss": 0.5302, "num_tokens": 2937428104.0, "step": 3842 }, { "epoch": 1.4078230201987816, "grad_norm": 0.17240414926643705, "learning_rate": 3.435419033570417e-05, "loss": 0.5186, "num_tokens": 2938225665.0, "step": 3843 }, { "epoch": 1.408189438006687, "grad_norm": 0.16664713670660972, "learning_rate": 3.43510837527958e-05, "loss": 0.514, "num_tokens": 2938921441.0, "step": 3844 }, { "epoch": 1.4085558558145925, "grad_norm": 0.19630664620552934, "learning_rate": 3.434797647449679e-05, "loss": 0.5079, "num_tokens": 2939738739.0, "step": 3845 }, { "epoch": 1.408922273622498, "grad_norm": 0.14775229047258617, "learning_rate": 3.4344868500982085e-05, "loss": 0.4774, "num_tokens": 2940579653.0, "step": 3846 }, { "epoch": 1.4092886914304035, "grad_norm": 0.175395455904007, "learning_rate": 3.4341759832426675e-05, "loss": 0.487, "num_tokens": 2941396861.0, "step": 3847 }, { "epoch": 1.409655109238309, "grad_norm": 0.16693471262164972, "learning_rate": 3.433865046900558e-05, "loss": 0.5346, "num_tokens": 2942073547.0, "step": 3848 }, { "epoch": 1.4100215270462144, "grad_norm": 0.16114508071372746, "learning_rate": 3.4335540410893866e-05, "loss": 0.497, "num_tokens": 2942682574.0, "step": 3849 }, { "epoch": 1.41038794485412, "grad_norm": 0.154710499743971, "learning_rate": 3.433242965826663e-05, "loss": 0.5108, "num_tokens": 2943435015.0, "step": 3850 }, { "epoch": 1.4107543626620254, "grad_norm": 0.154054827431256, "learning_rate": 3.432931821129903e-05, "loss": 0.481, "num_tokens": 2944203866.0, "step": 3851 }, { "epoch": 1.4111207804699308, "grad_norm": 0.15408257047799664, "learning_rate": 3.432620607016623e-05, "loss": 0.5038, "num_tokens": 2945003896.0, "step": 3852 }, { "epoch": 1.4114871982778363, "grad_norm": 0.16044235015767352, "learning_rate": 3.432309323504345e-05, "loss": 0.4854, "num_tokens": 2945688676.0, "step": 3853 }, { "epoch": 1.4118536160857418, "grad_norm": 0.15583543511996412, "learning_rate": 3.431997970610595e-05, "loss": 0.5322, "num_tokens": 2946512365.0, "step": 3854 }, { "epoch": 1.4122200338936473, "grad_norm": 0.1631325810935644, "learning_rate": 3.431686548352904e-05, "loss": 0.4823, "num_tokens": 2947372555.0, "step": 3855 }, { "epoch": 1.4125864517015527, "grad_norm": 0.13819333653004617, "learning_rate": 3.4313750567488046e-05, "loss": 0.4802, "num_tokens": 2948226581.0, "step": 3856 }, { "epoch": 1.4129528695094582, "grad_norm": 0.13682517696354238, "learning_rate": 3.431063495815835e-05, "loss": 0.5173, "num_tokens": 2949108163.0, "step": 3857 }, { "epoch": 1.4133192873173637, "grad_norm": 0.15148035803920096, "learning_rate": 3.430751865571535e-05, "loss": 0.5018, "num_tokens": 2949906251.0, "step": 3858 }, { "epoch": 1.4136857051252691, "grad_norm": 0.16824452290707415, "learning_rate": 3.430440166033452e-05, "loss": 0.5469, "num_tokens": 2950679168.0, "step": 3859 }, { "epoch": 1.4140521229331746, "grad_norm": 0.12633107412593045, "learning_rate": 3.430128397219135e-05, "loss": 0.5061, "num_tokens": 2951567540.0, "step": 3860 }, { "epoch": 1.41441854074108, "grad_norm": 0.15988582975093807, "learning_rate": 3.429816559146137e-05, "loss": 0.5361, "num_tokens": 2952376922.0, "step": 3861 }, { "epoch": 1.4147849585489856, "grad_norm": 0.1668288294543584, "learning_rate": 3.4295046518320144e-05, "loss": 0.5039, "num_tokens": 2953061389.0, "step": 3862 }, { "epoch": 1.4151513763568908, "grad_norm": 0.17251589883683868, "learning_rate": 3.4291926752943295e-05, "loss": 0.5152, "num_tokens": 2953798074.0, "step": 3863 }, { "epoch": 1.4155177941647965, "grad_norm": 0.1436722345980381, "learning_rate": 3.428880629550645e-05, "loss": 0.477, "num_tokens": 2954616955.0, "step": 3864 }, { "epoch": 1.4158842119727018, "grad_norm": 0.16291807378195644, "learning_rate": 3.4285685146185325e-05, "loss": 0.5295, "num_tokens": 2955273950.0, "step": 3865 }, { "epoch": 1.4162506297806075, "grad_norm": 0.16357277354087596, "learning_rate": 3.428256330515563e-05, "loss": 0.559, "num_tokens": 2956080116.0, "step": 3866 }, { "epoch": 1.4166170475885127, "grad_norm": 0.15200877435368726, "learning_rate": 3.4279440772593136e-05, "loss": 0.4813, "num_tokens": 2956819206.0, "step": 3867 }, { "epoch": 1.4169834653964184, "grad_norm": 0.1674458761641908, "learning_rate": 3.427631754867364e-05, "loss": 0.52, "num_tokens": 2957505684.0, "step": 3868 }, { "epoch": 1.4173498832043236, "grad_norm": 0.17171437680549334, "learning_rate": 3.4273193633573e-05, "loss": 0.506, "num_tokens": 2958261103.0, "step": 3869 }, { "epoch": 1.4177163010122291, "grad_norm": 0.14309743471411157, "learning_rate": 3.427006902746708e-05, "loss": 0.5455, "num_tokens": 2959160529.0, "step": 3870 }, { "epoch": 1.4180827188201346, "grad_norm": 0.16592707585571637, "learning_rate": 3.426694373053182e-05, "loss": 0.4834, "num_tokens": 2959873417.0, "step": 3871 }, { "epoch": 1.41844913662804, "grad_norm": 0.1552193589711904, "learning_rate": 3.4263817742943164e-05, "loss": 0.5054, "num_tokens": 2960642084.0, "step": 3872 }, { "epoch": 1.4188155544359455, "grad_norm": 0.14890347447765317, "learning_rate": 3.426069106487713e-05, "loss": 0.5051, "num_tokens": 2961373723.0, "step": 3873 }, { "epoch": 1.419181972243851, "grad_norm": 0.16720040366335542, "learning_rate": 3.4257563696509745e-05, "loss": 0.538, "num_tokens": 2962171773.0, "step": 3874 }, { "epoch": 1.4195483900517565, "grad_norm": 0.13806965629877174, "learning_rate": 3.425443563801708e-05, "loss": 0.4786, "num_tokens": 2962999067.0, "step": 3875 }, { "epoch": 1.419914807859662, "grad_norm": 0.1643368545418961, "learning_rate": 3.425130688957526e-05, "loss": 0.5125, "num_tokens": 2963699388.0, "step": 3876 }, { "epoch": 1.4202812256675674, "grad_norm": 0.1557418987357225, "learning_rate": 3.4248177451360444e-05, "loss": 0.5341, "num_tokens": 2964473138.0, "step": 3877 }, { "epoch": 1.420647643475473, "grad_norm": 0.16595455004296997, "learning_rate": 3.4245047323548815e-05, "loss": 0.506, "num_tokens": 2965101098.0, "step": 3878 }, { "epoch": 1.4210140612833784, "grad_norm": 0.14773602349724013, "learning_rate": 3.424191650631661e-05, "loss": 0.4691, "num_tokens": 2965780333.0, "step": 3879 }, { "epoch": 1.4213804790912838, "grad_norm": 0.12914029914412536, "learning_rate": 3.42387849998401e-05, "loss": 0.4953, "num_tokens": 2966619830.0, "step": 3880 }, { "epoch": 1.4217468968991893, "grad_norm": 0.15494406813542838, "learning_rate": 3.42356528042956e-05, "loss": 0.4822, "num_tokens": 2967371616.0, "step": 3881 }, { "epoch": 1.4221133147070948, "grad_norm": 0.1415806321063255, "learning_rate": 3.423251991985945e-05, "loss": 0.518, "num_tokens": 2968187822.0, "step": 3882 }, { "epoch": 1.4224797325150003, "grad_norm": 0.13664023696679195, "learning_rate": 3.422938634670804e-05, "loss": 0.504, "num_tokens": 2969024200.0, "step": 3883 }, { "epoch": 1.4228461503229057, "grad_norm": 0.12830075915797434, "learning_rate": 3.42262520850178e-05, "loss": 0.5119, "num_tokens": 2969890822.0, "step": 3884 }, { "epoch": 1.4232125681308112, "grad_norm": 0.13513300425604763, "learning_rate": 3.4223117134965194e-05, "loss": 0.5046, "num_tokens": 2970866026.0, "step": 3885 }, { "epoch": 1.4235789859387167, "grad_norm": 0.1475210677039471, "learning_rate": 3.421998149672672e-05, "loss": 0.5101, "num_tokens": 2971607891.0, "step": 3886 }, { "epoch": 1.4239454037466222, "grad_norm": 0.13942659688498624, "learning_rate": 3.421684517047893e-05, "loss": 0.4936, "num_tokens": 2972412813.0, "step": 3887 }, { "epoch": 1.4243118215545276, "grad_norm": 0.1404845139078567, "learning_rate": 3.42137081563984e-05, "loss": 0.4773, "num_tokens": 2973259284.0, "step": 3888 }, { "epoch": 1.424678239362433, "grad_norm": 0.14472547655476437, "learning_rate": 3.421057045466175e-05, "loss": 0.5239, "num_tokens": 2974134772.0, "step": 3889 }, { "epoch": 1.4250446571703383, "grad_norm": 0.16446502921886075, "learning_rate": 3.420743206544565e-05, "loss": 0.5187, "num_tokens": 2974914048.0, "step": 3890 }, { "epoch": 1.425411074978244, "grad_norm": 0.15631977531379146, "learning_rate": 3.420429298892678e-05, "loss": 0.4917, "num_tokens": 2975592295.0, "step": 3891 }, { "epoch": 1.4257774927861493, "grad_norm": 0.15160156652647938, "learning_rate": 3.420115322528188e-05, "loss": 0.5125, "num_tokens": 2976349755.0, "step": 3892 }, { "epoch": 1.426143910594055, "grad_norm": 0.15946600818559112, "learning_rate": 3.4198012774687734e-05, "loss": 0.5236, "num_tokens": 2977109424.0, "step": 3893 }, { "epoch": 1.4265103284019602, "grad_norm": 0.14397769402865274, "learning_rate": 3.4194871637321156e-05, "loss": 0.5389, "num_tokens": 2977838422.0, "step": 3894 }, { "epoch": 1.426876746209866, "grad_norm": 0.16300246283850897, "learning_rate": 3.419172981335898e-05, "loss": 0.5013, "num_tokens": 2978651582.0, "step": 3895 }, { "epoch": 1.4272431640177712, "grad_norm": 0.14492225775875436, "learning_rate": 3.4188587302978126e-05, "loss": 0.5001, "num_tokens": 2979350807.0, "step": 3896 }, { "epoch": 1.4276095818256767, "grad_norm": 0.14765065948799172, "learning_rate": 3.4185444106355497e-05, "loss": 0.4524, "num_tokens": 2980181777.0, "step": 3897 }, { "epoch": 1.4279759996335821, "grad_norm": 0.13989785514628364, "learning_rate": 3.4182300223668075e-05, "loss": 0.5233, "num_tokens": 2980869640.0, "step": 3898 }, { "epoch": 1.4283424174414876, "grad_norm": 0.15017268203698697, "learning_rate": 3.417915565509286e-05, "loss": 0.519, "num_tokens": 2981639416.0, "step": 3899 }, { "epoch": 1.428708835249393, "grad_norm": 0.12812028136545675, "learning_rate": 3.4176010400806904e-05, "loss": 0.4804, "num_tokens": 2982364469.0, "step": 3900 }, { "epoch": 1.4290752530572985, "grad_norm": 0.1556959096367061, "learning_rate": 3.417286446098729e-05, "loss": 0.5534, "num_tokens": 2983075864.0, "step": 3901 }, { "epoch": 1.429441670865204, "grad_norm": 0.15079900764310586, "learning_rate": 3.4169717835811135e-05, "loss": 0.5089, "num_tokens": 2983988735.0, "step": 3902 }, { "epoch": 1.4298080886731095, "grad_norm": 0.15634024183152143, "learning_rate": 3.4166570525455605e-05, "loss": 0.5219, "num_tokens": 2984761619.0, "step": 3903 }, { "epoch": 1.430174506481015, "grad_norm": 0.1538957795285396, "learning_rate": 3.41634225300979e-05, "loss": 0.5277, "num_tokens": 2985500637.0, "step": 3904 }, { "epoch": 1.4305409242889204, "grad_norm": 0.15414016208209352, "learning_rate": 3.4160273849915256e-05, "loss": 0.5258, "num_tokens": 2986254000.0, "step": 3905 }, { "epoch": 1.430907342096826, "grad_norm": 0.16157755007457886, "learning_rate": 3.415712448508495e-05, "loss": 0.5265, "num_tokens": 2986943041.0, "step": 3906 }, { "epoch": 1.4312737599047314, "grad_norm": 0.14236776811652596, "learning_rate": 3.415397443578431e-05, "loss": 0.5022, "num_tokens": 2987715616.0, "step": 3907 }, { "epoch": 1.4316401777126369, "grad_norm": 0.1696166611860589, "learning_rate": 3.415082370219066e-05, "loss": 0.5394, "num_tokens": 2988485583.0, "step": 3908 }, { "epoch": 1.4320065955205423, "grad_norm": 0.17206355770258247, "learning_rate": 3.414767228448143e-05, "loss": 0.523, "num_tokens": 2989194518.0, "step": 3909 }, { "epoch": 1.4323730133284478, "grad_norm": 0.1497524509835754, "learning_rate": 3.4144520182834026e-05, "loss": 0.4813, "num_tokens": 2989900593.0, "step": 3910 }, { "epoch": 1.4327394311363533, "grad_norm": 0.17903048694441195, "learning_rate": 3.414136739742592e-05, "loss": 0.5225, "num_tokens": 2990906439.0, "step": 3911 }, { "epoch": 1.4331058489442587, "grad_norm": 0.14831827463659522, "learning_rate": 3.4138213928434636e-05, "loss": 0.489, "num_tokens": 2991663300.0, "step": 3912 }, { "epoch": 1.4334722667521642, "grad_norm": 0.18542189052302846, "learning_rate": 3.4135059776037694e-05, "loss": 0.5498, "num_tokens": 2992391823.0, "step": 3913 }, { "epoch": 1.4338386845600697, "grad_norm": 0.1991565489131086, "learning_rate": 3.413190494041271e-05, "loss": 0.5384, "num_tokens": 2993096161.0, "step": 3914 }, { "epoch": 1.4342051023679752, "grad_norm": 0.1607800248915898, "learning_rate": 3.4128749421737276e-05, "loss": 0.5182, "num_tokens": 2993889748.0, "step": 3915 }, { "epoch": 1.4345715201758806, "grad_norm": 0.17662468045179788, "learning_rate": 3.412559322018907e-05, "loss": 0.5117, "num_tokens": 2994557172.0, "step": 3916 }, { "epoch": 1.4349379379837859, "grad_norm": 0.180330636575246, "learning_rate": 3.4122436335945805e-05, "loss": 0.51, "num_tokens": 2995227566.0, "step": 3917 }, { "epoch": 1.4353043557916916, "grad_norm": 0.16608372257809786, "learning_rate": 3.4119278769185195e-05, "loss": 0.4983, "num_tokens": 2995965391.0, "step": 3918 }, { "epoch": 1.4356707735995968, "grad_norm": 0.1787363090585465, "learning_rate": 3.411612052008503e-05, "loss": 0.5167, "num_tokens": 2996727378.0, "step": 3919 }, { "epoch": 1.4360371914075025, "grad_norm": 0.16258191779694509, "learning_rate": 3.4112961588823136e-05, "loss": 0.4718, "num_tokens": 2997394018.0, "step": 3920 }, { "epoch": 1.4364036092154078, "grad_norm": 0.16402510536426887, "learning_rate": 3.4109801975577345e-05, "loss": 0.5099, "num_tokens": 2998225131.0, "step": 3921 }, { "epoch": 1.4367700270233135, "grad_norm": 0.14887045205810734, "learning_rate": 3.410664168052557e-05, "loss": 0.4909, "num_tokens": 2999052655.0, "step": 3922 }, { "epoch": 1.4371364448312187, "grad_norm": 0.15064328830934196, "learning_rate": 3.410348070384573e-05, "loss": 0.505, "num_tokens": 2999784531.0, "step": 3923 }, { "epoch": 1.4375028626391242, "grad_norm": 0.15786533635180372, "learning_rate": 3.4100319045715786e-05, "loss": 0.5006, "num_tokens": 3000442936.0, "step": 3924 }, { "epoch": 1.4378692804470297, "grad_norm": 0.15638020666946978, "learning_rate": 3.409715670631377e-05, "loss": 0.5501, "num_tokens": 3001120058.0, "step": 3925 }, { "epoch": 1.4382356982549351, "grad_norm": 0.14695940832961762, "learning_rate": 3.4093993685817705e-05, "loss": 0.4802, "num_tokens": 3002036708.0, "step": 3926 }, { "epoch": 1.4386021160628406, "grad_norm": 0.14184523563196388, "learning_rate": 3.409082998440569e-05, "loss": 0.4897, "num_tokens": 3002883432.0, "step": 3927 }, { "epoch": 1.438968533870746, "grad_norm": 0.15194112081472613, "learning_rate": 3.408766560225584e-05, "loss": 0.4948, "num_tokens": 3003590315.0, "step": 3928 }, { "epoch": 1.4393349516786516, "grad_norm": 0.1584242577396095, "learning_rate": 3.408450053954632e-05, "loss": 0.494, "num_tokens": 3004436393.0, "step": 3929 }, { "epoch": 1.439701369486557, "grad_norm": 0.1568350519449572, "learning_rate": 3.408133479645533e-05, "loss": 0.4989, "num_tokens": 3005156071.0, "step": 3930 }, { "epoch": 1.4400677872944625, "grad_norm": 0.16276534262467832, "learning_rate": 3.40781683731611e-05, "loss": 0.5134, "num_tokens": 3005938781.0, "step": 3931 }, { "epoch": 1.440434205102368, "grad_norm": 0.14761201355798595, "learning_rate": 3.407500126984192e-05, "loss": 0.4762, "num_tokens": 3006736521.0, "step": 3932 }, { "epoch": 1.4408006229102734, "grad_norm": 0.1449468594799039, "learning_rate": 3.407183348667609e-05, "loss": 0.5065, "num_tokens": 3007585817.0, "step": 3933 }, { "epoch": 1.441167040718179, "grad_norm": 0.1538301683438605, "learning_rate": 3.4068665023841964e-05, "loss": 0.5018, "num_tokens": 3008405160.0, "step": 3934 }, { "epoch": 1.4415334585260844, "grad_norm": 0.16438743784713572, "learning_rate": 3.406549588151795e-05, "loss": 0.5272, "num_tokens": 3009125557.0, "step": 3935 }, { "epoch": 1.4418998763339899, "grad_norm": 0.13292029918983253, "learning_rate": 3.406232605988245e-05, "loss": 0.5145, "num_tokens": 3009843759.0, "step": 3936 }, { "epoch": 1.4422662941418953, "grad_norm": 0.15555572122408637, "learning_rate": 3.405915555911395e-05, "loss": 0.4802, "num_tokens": 3010633661.0, "step": 3937 }, { "epoch": 1.4426327119498008, "grad_norm": 0.1424651139774461, "learning_rate": 3.405598437939095e-05, "loss": 0.4839, "num_tokens": 3011401693.0, "step": 3938 }, { "epoch": 1.4429991297577063, "grad_norm": 0.1525945319683441, "learning_rate": 3.4052812520892e-05, "loss": 0.5125, "num_tokens": 3012202387.0, "step": 3939 }, { "epoch": 1.4433655475656118, "grad_norm": 0.14123906533287278, "learning_rate": 3.4049639983795666e-05, "loss": 0.4926, "num_tokens": 3012974573.0, "step": 3940 }, { "epoch": 1.4437319653735172, "grad_norm": 0.15060315278466135, "learning_rate": 3.404646676828059e-05, "loss": 0.5056, "num_tokens": 3013723991.0, "step": 3941 }, { "epoch": 1.4440983831814227, "grad_norm": 0.1596447638395713, "learning_rate": 3.404329287452541e-05, "loss": 0.4794, "num_tokens": 3014464747.0, "step": 3942 }, { "epoch": 1.4444648009893282, "grad_norm": 0.1965693039998371, "learning_rate": 3.4040118302708835e-05, "loss": 0.5163, "num_tokens": 3015288386.0, "step": 3943 }, { "epoch": 1.4448312187972334, "grad_norm": 0.1659825724353482, "learning_rate": 3.40369430530096e-05, "loss": 0.5053, "num_tokens": 3016035154.0, "step": 3944 }, { "epoch": 1.4451976366051391, "grad_norm": 0.18320057510540483, "learning_rate": 3.403376712560647e-05, "loss": 0.5471, "num_tokens": 3016690314.0, "step": 3945 }, { "epoch": 1.4455640544130444, "grad_norm": 0.1588723215806109, "learning_rate": 3.4030590520678266e-05, "loss": 0.5127, "num_tokens": 3017495632.0, "step": 3946 }, { "epoch": 1.44593047222095, "grad_norm": 0.17050766783727853, "learning_rate": 3.402741323840383e-05, "loss": 0.5012, "num_tokens": 3018204888.0, "step": 3947 }, { "epoch": 1.4462968900288553, "grad_norm": 0.18467197775164776, "learning_rate": 3.4024235278962035e-05, "loss": 0.498, "num_tokens": 3018918445.0, "step": 3948 }, { "epoch": 1.446663307836761, "grad_norm": 0.14841446951937076, "learning_rate": 3.402105664253184e-05, "loss": 0.488, "num_tokens": 3019697365.0, "step": 3949 }, { "epoch": 1.4470297256446663, "grad_norm": 0.168123003986023, "learning_rate": 3.401787732929218e-05, "loss": 0.5063, "num_tokens": 3020414465.0, "step": 3950 }, { "epoch": 1.4473961434525717, "grad_norm": 0.1655384296873625, "learning_rate": 3.401469733942208e-05, "loss": 0.4941, "num_tokens": 3021164470.0, "step": 3951 }, { "epoch": 1.4477625612604772, "grad_norm": 0.14661608124677383, "learning_rate": 3.401151667310056e-05, "loss": 0.4817, "num_tokens": 3021979052.0, "step": 3952 }, { "epoch": 1.4481289790683827, "grad_norm": 0.16992176643433157, "learning_rate": 3.40083353305067e-05, "loss": 0.507, "num_tokens": 3022750161.0, "step": 3953 }, { "epoch": 1.4484953968762881, "grad_norm": 0.13777079586473914, "learning_rate": 3.400515331181963e-05, "loss": 0.4808, "num_tokens": 3023449734.0, "step": 3954 }, { "epoch": 1.4488618146841936, "grad_norm": 0.1631253112903345, "learning_rate": 3.4001970617218484e-05, "loss": 0.4966, "num_tokens": 3024181054.0, "step": 3955 }, { "epoch": 1.449228232492099, "grad_norm": 0.16864592923972674, "learning_rate": 3.399878724688248e-05, "loss": 0.5178, "num_tokens": 3024912763.0, "step": 3956 }, { "epoch": 1.4495946503000046, "grad_norm": 0.17052741009359648, "learning_rate": 3.399560320099082e-05, "loss": 0.5124, "num_tokens": 3025678806.0, "step": 3957 }, { "epoch": 1.44996106810791, "grad_norm": 0.17692026735203648, "learning_rate": 3.399241847972279e-05, "loss": 0.5164, "num_tokens": 3026316065.0, "step": 3958 }, { "epoch": 1.4503274859158155, "grad_norm": 0.15490358489988548, "learning_rate": 3.3989233083257694e-05, "loss": 0.4843, "num_tokens": 3027143107.0, "step": 3959 }, { "epoch": 1.450693903723721, "grad_norm": 0.17486553729145846, "learning_rate": 3.3986047011774874e-05, "loss": 0.5123, "num_tokens": 3027796814.0, "step": 3960 }, { "epoch": 1.4510603215316265, "grad_norm": 0.16344165740829347, "learning_rate": 3.398286026545371e-05, "loss": 0.4787, "num_tokens": 3028611950.0, "step": 3961 }, { "epoch": 1.451426739339532, "grad_norm": 0.16110429712623503, "learning_rate": 3.397967284447362e-05, "loss": 0.5046, "num_tokens": 3029383776.0, "step": 3962 }, { "epoch": 1.4517931571474374, "grad_norm": 0.15286126417828408, "learning_rate": 3.397648474901407e-05, "loss": 0.5261, "num_tokens": 3030109612.0, "step": 3963 }, { "epoch": 1.4521595749553429, "grad_norm": 0.16202017478962022, "learning_rate": 3.3973295979254555e-05, "loss": 0.4966, "num_tokens": 3030791029.0, "step": 3964 }, { "epoch": 1.4525259927632483, "grad_norm": 0.1739724401847526, "learning_rate": 3.39701065353746e-05, "loss": 0.5153, "num_tokens": 3031587851.0, "step": 3965 }, { "epoch": 1.4528924105711538, "grad_norm": 0.15106833540777606, "learning_rate": 3.396691641755378e-05, "loss": 0.5235, "num_tokens": 3032319281.0, "step": 3966 }, { "epoch": 1.4532588283790593, "grad_norm": 0.13975400770148472, "learning_rate": 3.396372562597171e-05, "loss": 0.493, "num_tokens": 3033124426.0, "step": 3967 }, { "epoch": 1.4536252461869648, "grad_norm": 0.15920110310167104, "learning_rate": 3.396053416080805e-05, "loss": 0.4961, "num_tokens": 3033915076.0, "step": 3968 }, { "epoch": 1.4539916639948702, "grad_norm": 0.13215755878290275, "learning_rate": 3.3957342022242464e-05, "loss": 0.4973, "num_tokens": 3034722799.0, "step": 3969 }, { "epoch": 1.4543580818027757, "grad_norm": 0.16242274554955313, "learning_rate": 3.395414921045468e-05, "loss": 0.5371, "num_tokens": 3035487978.0, "step": 3970 }, { "epoch": 1.454724499610681, "grad_norm": 0.14530188927804047, "learning_rate": 3.395095572562447e-05, "loss": 0.4967, "num_tokens": 3036169712.0, "step": 3971 }, { "epoch": 1.4550909174185866, "grad_norm": 0.13125828780834975, "learning_rate": 3.3947761567931614e-05, "loss": 0.4906, "num_tokens": 3036989952.0, "step": 3972 }, { "epoch": 1.455457335226492, "grad_norm": 0.14506942037973442, "learning_rate": 3.394456673755597e-05, "loss": 0.5038, "num_tokens": 3037741643.0, "step": 3973 }, { "epoch": 1.4558237530343976, "grad_norm": 0.1541971236788273, "learning_rate": 3.394137123467741e-05, "loss": 0.5112, "num_tokens": 3038497073.0, "step": 3974 }, { "epoch": 1.4561901708423028, "grad_norm": 0.1546067381953342, "learning_rate": 3.3938175059475846e-05, "loss": 0.4899, "num_tokens": 3039153604.0, "step": 3975 }, { "epoch": 1.4565565886502083, "grad_norm": 0.15443796380895924, "learning_rate": 3.3934978212131215e-05, "loss": 0.5383, "num_tokens": 3040000506.0, "step": 3976 }, { "epoch": 1.4569230064581138, "grad_norm": 0.14256028174921972, "learning_rate": 3.393178069282353e-05, "loss": 0.5015, "num_tokens": 3040675672.0, "step": 3977 }, { "epoch": 1.4572894242660193, "grad_norm": 0.1836895243507708, "learning_rate": 3.3928582501732793e-05, "loss": 0.5315, "num_tokens": 3041344836.0, "step": 3978 }, { "epoch": 1.4576558420739247, "grad_norm": 0.14606979805275574, "learning_rate": 3.3925383639039086e-05, "loss": 0.5082, "num_tokens": 3042106717.0, "step": 3979 }, { "epoch": 1.4580222598818302, "grad_norm": 0.15952464090523852, "learning_rate": 3.39221841049225e-05, "loss": 0.5239, "num_tokens": 3042767816.0, "step": 3980 }, { "epoch": 1.4583886776897357, "grad_norm": 0.15534567594444154, "learning_rate": 3.391898389956319e-05, "loss": 0.5122, "num_tokens": 3043497875.0, "step": 3981 }, { "epoch": 1.4587550954976412, "grad_norm": 0.15448372447281636, "learning_rate": 3.3915783023141325e-05, "loss": 0.5129, "num_tokens": 3044154999.0, "step": 3982 }, { "epoch": 1.4591215133055466, "grad_norm": 0.14840650986851311, "learning_rate": 3.391258147583711e-05, "loss": 0.5033, "num_tokens": 3044943611.0, "step": 3983 }, { "epoch": 1.459487931113452, "grad_norm": 0.1662507883749798, "learning_rate": 3.390937925783082e-05, "loss": 0.5279, "num_tokens": 3045673623.0, "step": 3984 }, { "epoch": 1.4598543489213576, "grad_norm": 0.15548092144788708, "learning_rate": 3.3906176369302725e-05, "loss": 0.5322, "num_tokens": 3046351249.0, "step": 3985 }, { "epoch": 1.460220766729263, "grad_norm": 0.1718364956585426, "learning_rate": 3.390297281043317e-05, "loss": 0.4977, "num_tokens": 3047100901.0, "step": 3986 }, { "epoch": 1.4605871845371685, "grad_norm": 0.13517558119013834, "learning_rate": 3.38997685814025e-05, "loss": 0.547, "num_tokens": 3047875715.0, "step": 3987 }, { "epoch": 1.460953602345074, "grad_norm": 0.15329542037593072, "learning_rate": 3.3896563682391164e-05, "loss": 0.4834, "num_tokens": 3048567376.0, "step": 3988 }, { "epoch": 1.4613200201529795, "grad_norm": 0.1586806317509821, "learning_rate": 3.389335811357955e-05, "loss": 0.4958, "num_tokens": 3049354272.0, "step": 3989 }, { "epoch": 1.461686437960885, "grad_norm": 0.15958370099706923, "learning_rate": 3.3890151875148175e-05, "loss": 0.504, "num_tokens": 3050033427.0, "step": 3990 }, { "epoch": 1.4620528557687904, "grad_norm": 0.14934569418390714, "learning_rate": 3.388694496727755e-05, "loss": 0.5149, "num_tokens": 3050823507.0, "step": 3991 }, { "epoch": 1.4624192735766959, "grad_norm": 0.1616343972418588, "learning_rate": 3.3883737390148224e-05, "loss": 0.5232, "num_tokens": 3051614160.0, "step": 3992 }, { "epoch": 1.4627856913846013, "grad_norm": 0.14757640714410641, "learning_rate": 3.388052914394078e-05, "loss": 0.4972, "num_tokens": 3052484170.0, "step": 3993 }, { "epoch": 1.4631521091925068, "grad_norm": 0.16424116324394644, "learning_rate": 3.3877320228835874e-05, "loss": 0.5052, "num_tokens": 3053203426.0, "step": 3994 }, { "epoch": 1.4635185270004123, "grad_norm": 0.14014668695482518, "learning_rate": 3.3874110645014155e-05, "loss": 0.5187, "num_tokens": 3054022112.0, "step": 3995 }, { "epoch": 1.4638849448083178, "grad_norm": 0.14777088382442516, "learning_rate": 3.3870900392656334e-05, "loss": 0.497, "num_tokens": 3054867189.0, "step": 3996 }, { "epoch": 1.4642513626162232, "grad_norm": 0.15023782991517556, "learning_rate": 3.386768947194315e-05, "loss": 0.5095, "num_tokens": 3055663662.0, "step": 3997 }, { "epoch": 1.4646177804241285, "grad_norm": 0.17417119678468204, "learning_rate": 3.38644778830554e-05, "loss": 0.4989, "num_tokens": 3056436799.0, "step": 3998 }, { "epoch": 1.4649841982320342, "grad_norm": 0.1425677916875921, "learning_rate": 3.386126562617388e-05, "loss": 0.4921, "num_tokens": 3057150123.0, "step": 3999 }, { "epoch": 1.4653506160399394, "grad_norm": 0.16130004503163814, "learning_rate": 3.3858052701479465e-05, "loss": 0.4886, "num_tokens": 3057973545.0, "step": 4000 }, { "epoch": 1.4657170338478451, "grad_norm": 0.16836719175754303, "learning_rate": 3.385483910915304e-05, "loss": 0.5129, "num_tokens": 3058704879.0, "step": 4001 }, { "epoch": 1.4660834516557504, "grad_norm": 0.16499246761062414, "learning_rate": 3.385162484937555e-05, "loss": 0.5456, "num_tokens": 3059301496.0, "step": 4002 }, { "epoch": 1.4664498694636559, "grad_norm": 0.14229688252418907, "learning_rate": 3.384840992232794e-05, "loss": 0.4708, "num_tokens": 3060069863.0, "step": 4003 }, { "epoch": 1.4668162872715613, "grad_norm": 0.15863869941998865, "learning_rate": 3.384519432819123e-05, "loss": 0.4797, "num_tokens": 3060913453.0, "step": 4004 }, { "epoch": 1.4671827050794668, "grad_norm": 0.14826028525304263, "learning_rate": 3.3841978067146476e-05, "loss": 0.4967, "num_tokens": 3061697928.0, "step": 4005 }, { "epoch": 1.4675491228873723, "grad_norm": 0.14740315470199855, "learning_rate": 3.383876113937474e-05, "loss": 0.5074, "num_tokens": 3062477230.0, "step": 4006 }, { "epoch": 1.4679155406952777, "grad_norm": 0.16327397482051234, "learning_rate": 3.383554354505715e-05, "loss": 0.5715, "num_tokens": 3063240722.0, "step": 4007 }, { "epoch": 1.4682819585031832, "grad_norm": 0.14720224160859216, "learning_rate": 3.383232528437486e-05, "loss": 0.4878, "num_tokens": 3064054091.0, "step": 4008 }, { "epoch": 1.4686483763110887, "grad_norm": 0.14427491931571068, "learning_rate": 3.382910635750908e-05, "loss": 0.5009, "num_tokens": 3064820388.0, "step": 4009 }, { "epoch": 1.4690147941189942, "grad_norm": 0.13998764062158703, "learning_rate": 3.3825886764641015e-05, "loss": 0.4936, "num_tokens": 3065524238.0, "step": 4010 }, { "epoch": 1.4693812119268996, "grad_norm": 0.15763386386656222, "learning_rate": 3.382266650595196e-05, "loss": 0.5073, "num_tokens": 3066352821.0, "step": 4011 }, { "epoch": 1.469747629734805, "grad_norm": 0.1604207163731857, "learning_rate": 3.38194455816232e-05, "loss": 0.5226, "num_tokens": 3067000650.0, "step": 4012 }, { "epoch": 1.4701140475427106, "grad_norm": 0.15749525593750052, "learning_rate": 3.381622399183611e-05, "loss": 0.5158, "num_tokens": 3067636770.0, "step": 4013 }, { "epoch": 1.470480465350616, "grad_norm": 0.15430422034773525, "learning_rate": 3.381300173677203e-05, "loss": 0.5443, "num_tokens": 3068316373.0, "step": 4014 }, { "epoch": 1.4708468831585215, "grad_norm": 0.14225369838446467, "learning_rate": 3.380977881661241e-05, "loss": 0.5237, "num_tokens": 3069059327.0, "step": 4015 }, { "epoch": 1.471213300966427, "grad_norm": 0.14569638798916226, "learning_rate": 3.3806555231538705e-05, "loss": 0.4829, "num_tokens": 3069824607.0, "step": 4016 }, { "epoch": 1.4715797187743325, "grad_norm": 0.15092628312979528, "learning_rate": 3.38033309817324e-05, "loss": 0.5298, "num_tokens": 3070501714.0, "step": 4017 }, { "epoch": 1.471946136582238, "grad_norm": 0.15377185429634266, "learning_rate": 3.3800106067375025e-05, "loss": 0.4862, "num_tokens": 3071293577.0, "step": 4018 }, { "epoch": 1.4723125543901434, "grad_norm": 0.14911105724698837, "learning_rate": 3.379688048864815e-05, "loss": 0.509, "num_tokens": 3072119813.0, "step": 4019 }, { "epoch": 1.4726789721980489, "grad_norm": 0.16087179635905052, "learning_rate": 3.3793654245733395e-05, "loss": 0.514, "num_tokens": 3072856214.0, "step": 4020 }, { "epoch": 1.4730453900059544, "grad_norm": 0.15518025547594436, "learning_rate": 3.3790427338812394e-05, "loss": 0.5114, "num_tokens": 3073719475.0, "step": 4021 }, { "epoch": 1.4734118078138598, "grad_norm": 0.15817513615762946, "learning_rate": 3.378719976806682e-05, "loss": 0.5169, "num_tokens": 3074536698.0, "step": 4022 }, { "epoch": 1.473778225621765, "grad_norm": 0.15042831840682339, "learning_rate": 3.378397153367841e-05, "loss": 0.4899, "num_tokens": 3075371436.0, "step": 4023 }, { "epoch": 1.4741446434296708, "grad_norm": 0.1445975469699548, "learning_rate": 3.37807426358289e-05, "loss": 0.5203, "num_tokens": 3076165965.0, "step": 4024 }, { "epoch": 1.474511061237576, "grad_norm": 0.14677772290467347, "learning_rate": 3.3777513074700095e-05, "loss": 0.4968, "num_tokens": 3076975064.0, "step": 4025 }, { "epoch": 1.4748774790454817, "grad_norm": 0.14966064097147722, "learning_rate": 3.377428285047383e-05, "loss": 0.4879, "num_tokens": 3077696686.0, "step": 4026 }, { "epoch": 1.475243896853387, "grad_norm": 0.12821312866520987, "learning_rate": 3.377105196333196e-05, "loss": 0.4538, "num_tokens": 3078547598.0, "step": 4027 }, { "epoch": 1.4756103146612927, "grad_norm": 0.16849539120741222, "learning_rate": 3.3767820413456406e-05, "loss": 0.5383, "num_tokens": 3079238245.0, "step": 4028 }, { "epoch": 1.475976732469198, "grad_norm": 0.15080284133012545, "learning_rate": 3.37645882010291e-05, "loss": 0.5156, "num_tokens": 3079971235.0, "step": 4029 }, { "epoch": 1.4763431502771034, "grad_norm": 0.1458712236106835, "learning_rate": 3.376135532623202e-05, "loss": 0.5154, "num_tokens": 3080687097.0, "step": 4030 }, { "epoch": 1.4767095680850089, "grad_norm": 0.15367635304661564, "learning_rate": 3.375812178924719e-05, "loss": 0.5043, "num_tokens": 3081474556.0, "step": 4031 }, { "epoch": 1.4770759858929143, "grad_norm": 0.1351408410555556, "learning_rate": 3.3754887590256666e-05, "loss": 0.4611, "num_tokens": 3082276952.0, "step": 4032 }, { "epoch": 1.4774424037008198, "grad_norm": 0.13205772198855806, "learning_rate": 3.375165272944253e-05, "loss": 0.4959, "num_tokens": 3083059173.0, "step": 4033 }, { "epoch": 1.4778088215087253, "grad_norm": 0.13987418178570113, "learning_rate": 3.374841720698691e-05, "loss": 0.522, "num_tokens": 3083907190.0, "step": 4034 }, { "epoch": 1.4781752393166308, "grad_norm": 0.16025251082596578, "learning_rate": 3.3745181023071996e-05, "loss": 0.5357, "num_tokens": 3084669627.0, "step": 4035 }, { "epoch": 1.4785416571245362, "grad_norm": 0.1409627807890483, "learning_rate": 3.3741944177879965e-05, "loss": 0.4991, "num_tokens": 3085422195.0, "step": 4036 }, { "epoch": 1.4789080749324417, "grad_norm": 0.14430211104786358, "learning_rate": 3.373870667159307e-05, "loss": 0.499, "num_tokens": 3086177604.0, "step": 4037 }, { "epoch": 1.4792744927403472, "grad_norm": 0.15030056498354105, "learning_rate": 3.373546850439359e-05, "loss": 0.4811, "num_tokens": 3086958532.0, "step": 4038 }, { "epoch": 1.4796409105482526, "grad_norm": 0.14593821398881374, "learning_rate": 3.373222967646384e-05, "loss": 0.5069, "num_tokens": 3087784951.0, "step": 4039 }, { "epoch": 1.4800073283561581, "grad_norm": 0.16989587151571967, "learning_rate": 3.372899018798616e-05, "loss": 0.5541, "num_tokens": 3088539564.0, "step": 4040 }, { "epoch": 1.4803737461640636, "grad_norm": 0.16065963088955748, "learning_rate": 3.3725750039142956e-05, "loss": 0.4981, "num_tokens": 3089258011.0, "step": 4041 }, { "epoch": 1.480740163971969, "grad_norm": 0.1612676410846482, "learning_rate": 3.3722509230116645e-05, "loss": 0.5463, "num_tokens": 3089956092.0, "step": 4042 }, { "epoch": 1.4811065817798745, "grad_norm": 0.1680184797224272, "learning_rate": 3.3719267761089695e-05, "loss": 0.5216, "num_tokens": 3090726439.0, "step": 4043 }, { "epoch": 1.48147299958778, "grad_norm": 0.14842833585176357, "learning_rate": 3.371602563224461e-05, "loss": 0.5333, "num_tokens": 3091550084.0, "step": 4044 }, { "epoch": 1.4818394173956855, "grad_norm": 0.1562013574250633, "learning_rate": 3.371278284376392e-05, "loss": 0.5008, "num_tokens": 3092280562.0, "step": 4045 }, { "epoch": 1.482205835203591, "grad_norm": 0.14854736228119902, "learning_rate": 3.3709539395830205e-05, "loss": 0.4845, "num_tokens": 3092931157.0, "step": 4046 }, { "epoch": 1.4825722530114964, "grad_norm": 0.15795030429774456, "learning_rate": 3.3706295288626076e-05, "loss": 0.4715, "num_tokens": 3093772879.0, "step": 4047 }, { "epoch": 1.482938670819402, "grad_norm": 0.15026940643647937, "learning_rate": 3.370305052233419e-05, "loss": 0.5437, "num_tokens": 3094558822.0, "step": 4048 }, { "epoch": 1.4833050886273074, "grad_norm": 0.14958646256963462, "learning_rate": 3.369980509713723e-05, "loss": 0.494, "num_tokens": 3095476973.0, "step": 4049 }, { "epoch": 1.4836715064352126, "grad_norm": 0.14574049975527698, "learning_rate": 3.3696559013217915e-05, "loss": 0.4806, "num_tokens": 3096349232.0, "step": 4050 }, { "epoch": 1.4840379242431183, "grad_norm": 0.14079119746327323, "learning_rate": 3.3693312270759005e-05, "loss": 0.4974, "num_tokens": 3097140035.0, "step": 4051 }, { "epoch": 1.4844043420510236, "grad_norm": 0.14586410562656507, "learning_rate": 3.3690064869943304e-05, "loss": 0.5088, "num_tokens": 3097943429.0, "step": 4052 }, { "epoch": 1.4847707598589293, "grad_norm": 0.13032722924653453, "learning_rate": 3.3686816810953655e-05, "loss": 0.4957, "num_tokens": 3098777952.0, "step": 4053 }, { "epoch": 1.4851371776668345, "grad_norm": 0.1621014768480567, "learning_rate": 3.3683568093972915e-05, "loss": 0.5457, "num_tokens": 3099555459.0, "step": 4054 }, { "epoch": 1.4855035954747402, "grad_norm": 0.13241867853806863, "learning_rate": 3.3680318719183996e-05, "loss": 0.4914, "num_tokens": 3100394553.0, "step": 4055 }, { "epoch": 1.4858700132826455, "grad_norm": 0.1509502719465555, "learning_rate": 3.3677068686769844e-05, "loss": 0.5207, "num_tokens": 3101197946.0, "step": 4056 }, { "epoch": 1.486236431090551, "grad_norm": 0.1442729515305591, "learning_rate": 3.367381799691345e-05, "loss": 0.4668, "num_tokens": 3102012671.0, "step": 4057 }, { "epoch": 1.4866028488984564, "grad_norm": 0.1494649401605923, "learning_rate": 3.367056664979783e-05, "loss": 0.5102, "num_tokens": 3102703260.0, "step": 4058 }, { "epoch": 1.4869692667063619, "grad_norm": 0.16327723485736328, "learning_rate": 3.366731464560604e-05, "loss": 0.5319, "num_tokens": 3103463582.0, "step": 4059 }, { "epoch": 1.4873356845142673, "grad_norm": 0.15137520410306146, "learning_rate": 3.3664061984521175e-05, "loss": 0.4952, "num_tokens": 3104256564.0, "step": 4060 }, { "epoch": 1.4877021023221728, "grad_norm": 0.15654740221128757, "learning_rate": 3.366080866672637e-05, "loss": 0.5296, "num_tokens": 3104971091.0, "step": 4061 }, { "epoch": 1.4880685201300783, "grad_norm": 0.16654426952663934, "learning_rate": 3.365755469240479e-05, "loss": 0.4783, "num_tokens": 3105643908.0, "step": 4062 }, { "epoch": 1.4884349379379838, "grad_norm": 0.17042280759791056, "learning_rate": 3.3654300061739636e-05, "loss": 0.5058, "num_tokens": 3106432364.0, "step": 4063 }, { "epoch": 1.4888013557458892, "grad_norm": 0.18911644488558282, "learning_rate": 3.365104477491415e-05, "loss": 0.5037, "num_tokens": 3107244819.0, "step": 4064 }, { "epoch": 1.4891677735537947, "grad_norm": 0.17413153133104214, "learning_rate": 3.364778883211163e-05, "loss": 0.5066, "num_tokens": 3108045477.0, "step": 4065 }, { "epoch": 1.4895341913617002, "grad_norm": 0.13709344046966673, "learning_rate": 3.364453223351537e-05, "loss": 0.4687, "num_tokens": 3108879148.0, "step": 4066 }, { "epoch": 1.4899006091696056, "grad_norm": 0.1703328385869015, "learning_rate": 3.3641274979308724e-05, "loss": 0.5282, "num_tokens": 3109697927.0, "step": 4067 }, { "epoch": 1.4902670269775111, "grad_norm": 0.15589596882557627, "learning_rate": 3.36380170696751e-05, "loss": 0.4805, "num_tokens": 3110565317.0, "step": 4068 }, { "epoch": 1.4906334447854166, "grad_norm": 0.1504851511439896, "learning_rate": 3.36347585047979e-05, "loss": 0.5108, "num_tokens": 3111204434.0, "step": 4069 }, { "epoch": 1.490999862593322, "grad_norm": 0.13489964010020478, "learning_rate": 3.363149928486061e-05, "loss": 0.5108, "num_tokens": 3112047515.0, "step": 4070 }, { "epoch": 1.4913662804012275, "grad_norm": 0.1512409210765939, "learning_rate": 3.362823941004672e-05, "loss": 0.4915, "num_tokens": 3112847264.0, "step": 4071 }, { "epoch": 1.491732698209133, "grad_norm": 0.14985921841564437, "learning_rate": 3.362497888053977e-05, "loss": 0.4983, "num_tokens": 3113669162.0, "step": 4072 }, { "epoch": 1.4920991160170385, "grad_norm": 0.1479717446602151, "learning_rate": 3.3621717696523334e-05, "loss": 0.5033, "num_tokens": 3114413491.0, "step": 4073 }, { "epoch": 1.492465533824944, "grad_norm": 0.14644722007712352, "learning_rate": 3.361845585818102e-05, "loss": 0.5328, "num_tokens": 3115123220.0, "step": 4074 }, { "epoch": 1.4928319516328494, "grad_norm": 0.15438656805133752, "learning_rate": 3.3615193365696484e-05, "loss": 0.5022, "num_tokens": 3115834860.0, "step": 4075 }, { "epoch": 1.493198369440755, "grad_norm": 0.15100097228525455, "learning_rate": 3.36119302192534e-05, "loss": 0.4755, "num_tokens": 3116650564.0, "step": 4076 }, { "epoch": 1.4935647872486602, "grad_norm": 0.17361279034680943, "learning_rate": 3.3608666419035496e-05, "loss": 0.5571, "num_tokens": 3117339858.0, "step": 4077 }, { "epoch": 1.4939312050565658, "grad_norm": 0.14631610234649242, "learning_rate": 3.360540196522653e-05, "loss": 0.482, "num_tokens": 3118267111.0, "step": 4078 }, { "epoch": 1.494297622864471, "grad_norm": 0.14983657870864442, "learning_rate": 3.36021368580103e-05, "loss": 0.4816, "num_tokens": 3119088704.0, "step": 4079 }, { "epoch": 1.4946640406723768, "grad_norm": 0.14407603494919377, "learning_rate": 3.359887109757062e-05, "loss": 0.5074, "num_tokens": 3119850025.0, "step": 4080 }, { "epoch": 1.495030458480282, "grad_norm": 0.15544883561819603, "learning_rate": 3.359560468409139e-05, "loss": 0.5262, "num_tokens": 3120647207.0, "step": 4081 }, { "epoch": 1.4953968762881877, "grad_norm": 0.1420629652844402, "learning_rate": 3.35923376177565e-05, "loss": 0.5122, "num_tokens": 3121353964.0, "step": 4082 }, { "epoch": 1.495763294096093, "grad_norm": 0.15565767324514485, "learning_rate": 3.358906989874988e-05, "loss": 0.503, "num_tokens": 3122069262.0, "step": 4083 }, { "epoch": 1.4961297119039985, "grad_norm": 0.15313024836721592, "learning_rate": 3.358580152725553e-05, "loss": 0.5197, "num_tokens": 3122870381.0, "step": 4084 }, { "epoch": 1.496496129711904, "grad_norm": 0.14820078457484695, "learning_rate": 3.358253250345745e-05, "loss": 0.5185, "num_tokens": 3123722704.0, "step": 4085 }, { "epoch": 1.4968625475198094, "grad_norm": 0.15001684688023825, "learning_rate": 3.35792628275397e-05, "loss": 0.5114, "num_tokens": 3124632487.0, "step": 4086 }, { "epoch": 1.4972289653277149, "grad_norm": 0.15598684830255807, "learning_rate": 3.357599249968637e-05, "loss": 0.5294, "num_tokens": 3125349372.0, "step": 4087 }, { "epoch": 1.4975953831356204, "grad_norm": 0.1476570073444768, "learning_rate": 3.3572721520081586e-05, "loss": 0.4951, "num_tokens": 3126157683.0, "step": 4088 }, { "epoch": 1.4979618009435258, "grad_norm": 0.18752469490782978, "learning_rate": 3.356944988890951e-05, "loss": 0.5216, "num_tokens": 3126858929.0, "step": 4089 }, { "epoch": 1.4983282187514313, "grad_norm": 0.1469217338144418, "learning_rate": 3.356617760635434e-05, "loss": 0.5083, "num_tokens": 3127660163.0, "step": 4090 }, { "epoch": 1.4986946365593368, "grad_norm": 0.16984775333189805, "learning_rate": 3.35629046726003e-05, "loss": 0.5016, "num_tokens": 3128391722.0, "step": 4091 }, { "epoch": 1.4990610543672422, "grad_norm": 0.1570409217318143, "learning_rate": 3.3559631087831694e-05, "loss": 0.4801, "num_tokens": 3129148382.0, "step": 4092 }, { "epoch": 1.4994274721751477, "grad_norm": 0.16501983811848822, "learning_rate": 3.3556356852232804e-05, "loss": 0.4989, "num_tokens": 3129897448.0, "step": 4093 }, { "epoch": 1.4997938899830532, "grad_norm": 0.1437237196903454, "learning_rate": 3.355308196598798e-05, "loss": 0.4737, "num_tokens": 3130647137.0, "step": 4094 }, { "epoch": 1.5001603077909587, "grad_norm": 0.16463450169498345, "learning_rate": 3.354980642928161e-05, "loss": 0.5105, "num_tokens": 3131491573.0, "step": 4095 }, { "epoch": 1.5005267255988641, "grad_norm": 0.167480965644783, "learning_rate": 3.3546530242298115e-05, "loss": 0.4905, "num_tokens": 3132270204.0, "step": 4096 }, { "epoch": 1.5008931434067696, "grad_norm": 0.1654044260231503, "learning_rate": 3.354325340522195e-05, "loss": 0.4789, "num_tokens": 3133075227.0, "step": 4097 }, { "epoch": 1.501259561214675, "grad_norm": 0.1547805149459672, "learning_rate": 3.35399759182376e-05, "loss": 0.4764, "num_tokens": 3133800454.0, "step": 4098 }, { "epoch": 1.5016259790225805, "grad_norm": 0.16486257100510263, "learning_rate": 3.3536697781529605e-05, "loss": 0.4898, "num_tokens": 3134577510.0, "step": 4099 }, { "epoch": 1.5019923968304858, "grad_norm": 0.1511639571834604, "learning_rate": 3.3533418995282526e-05, "loss": 0.4595, "num_tokens": 3135328452.0, "step": 4100 }, { "epoch": 1.5023588146383915, "grad_norm": 0.18900846748897543, "learning_rate": 3.353013955968096e-05, "loss": 0.5093, "num_tokens": 3136147438.0, "step": 4101 }, { "epoch": 1.5027252324462967, "grad_norm": 0.15191641921738874, "learning_rate": 3.352685947490955e-05, "loss": 0.5143, "num_tokens": 3136779860.0, "step": 4102 }, { "epoch": 1.5030916502542024, "grad_norm": 0.1552670635378131, "learning_rate": 3.352357874115298e-05, "loss": 0.5114, "num_tokens": 3137487233.0, "step": 4103 }, { "epoch": 1.5034580680621077, "grad_norm": 0.18160635950971898, "learning_rate": 3.352029735859594e-05, "loss": 0.4979, "num_tokens": 3138095185.0, "step": 4104 }, { "epoch": 1.5038244858700134, "grad_norm": 0.1736894739789436, "learning_rate": 3.35170153274232e-05, "loss": 0.4678, "num_tokens": 3138797509.0, "step": 4105 }, { "epoch": 1.5041909036779186, "grad_norm": 0.1448019662295789, "learning_rate": 3.3513732647819544e-05, "loss": 0.5033, "num_tokens": 3139616448.0, "step": 4106 }, { "epoch": 1.5045573214858243, "grad_norm": 0.16726691465868607, "learning_rate": 3.351044931996977e-05, "loss": 0.5118, "num_tokens": 3140305209.0, "step": 4107 }, { "epoch": 1.5049237392937296, "grad_norm": 0.1653171769240875, "learning_rate": 3.350716534405876e-05, "loss": 0.5195, "num_tokens": 3141080065.0, "step": 4108 }, { "epoch": 1.5052901571016353, "grad_norm": 0.14873706930440173, "learning_rate": 3.35038807202714e-05, "loss": 0.4893, "num_tokens": 3141783599.0, "step": 4109 }, { "epoch": 1.5056565749095405, "grad_norm": 0.16102164174629685, "learning_rate": 3.3500595448792616e-05, "loss": 0.5084, "num_tokens": 3142598715.0, "step": 4110 }, { "epoch": 1.5060229927174462, "grad_norm": 0.13946481328451846, "learning_rate": 3.349730952980739e-05, "loss": 0.4919, "num_tokens": 3143316991.0, "step": 4111 }, { "epoch": 1.5063894105253515, "grad_norm": 0.14952537679434696, "learning_rate": 3.349402296350071e-05, "loss": 0.4677, "num_tokens": 3144175776.0, "step": 4112 }, { "epoch": 1.506755828333257, "grad_norm": 0.18083002143085175, "learning_rate": 3.349073575005762e-05, "loss": 0.5204, "num_tokens": 3144950479.0, "step": 4113 }, { "epoch": 1.5071222461411624, "grad_norm": 0.16246215321799848, "learning_rate": 3.348744788966321e-05, "loss": 0.4859, "num_tokens": 3145795077.0, "step": 4114 }, { "epoch": 1.5074886639490679, "grad_norm": 0.16475171631936317, "learning_rate": 3.348415938250258e-05, "loss": 0.5158, "num_tokens": 3146520950.0, "step": 4115 }, { "epoch": 1.5078550817569734, "grad_norm": 0.15760334824044206, "learning_rate": 3.3480870228760876e-05, "loss": 0.509, "num_tokens": 3147256317.0, "step": 4116 }, { "epoch": 1.5082214995648788, "grad_norm": 0.15828162982159674, "learning_rate": 3.347758042862329e-05, "loss": 0.5312, "num_tokens": 3147990107.0, "step": 4117 }, { "epoch": 1.5085879173727843, "grad_norm": 0.1551757176596603, "learning_rate": 3.347428998227505e-05, "loss": 0.5068, "num_tokens": 3148803751.0, "step": 4118 }, { "epoch": 1.5089543351806898, "grad_norm": 0.1469495648268884, "learning_rate": 3.34709988899014e-05, "loss": 0.527, "num_tokens": 3149636103.0, "step": 4119 }, { "epoch": 1.5093207529885952, "grad_norm": 0.15197450930733297, "learning_rate": 3.346770715168765e-05, "loss": 0.5238, "num_tokens": 3150382740.0, "step": 4120 }, { "epoch": 1.5096871707965007, "grad_norm": 0.15332109651856146, "learning_rate": 3.346441476781912e-05, "loss": 0.5132, "num_tokens": 3151106250.0, "step": 4121 }, { "epoch": 1.5100535886044062, "grad_norm": 0.13675554760963582, "learning_rate": 3.3461121738481184e-05, "loss": 0.4615, "num_tokens": 3151873152.0, "step": 4122 }, { "epoch": 1.5104200064123117, "grad_norm": 0.15942393402057017, "learning_rate": 3.345782806385925e-05, "loss": 0.4945, "num_tokens": 3152676273.0, "step": 4123 }, { "epoch": 1.5107864242202171, "grad_norm": 0.14178148516546923, "learning_rate": 3.3454533744138746e-05, "loss": 0.5291, "num_tokens": 3153522115.0, "step": 4124 }, { "epoch": 1.5111528420281226, "grad_norm": 0.14916162699232333, "learning_rate": 3.345123877950517e-05, "loss": 0.5093, "num_tokens": 3154393385.0, "step": 4125 }, { "epoch": 1.511519259836028, "grad_norm": 0.14206637251167556, "learning_rate": 3.344794317014401e-05, "loss": 0.5106, "num_tokens": 3155146189.0, "step": 4126 }, { "epoch": 1.5118856776439333, "grad_norm": 0.14925114258992994, "learning_rate": 3.3444646916240825e-05, "loss": 0.5199, "num_tokens": 3155875159.0, "step": 4127 }, { "epoch": 1.512252095451839, "grad_norm": 0.1501810086146472, "learning_rate": 3.344135001798121e-05, "loss": 0.4854, "num_tokens": 3156661088.0, "step": 4128 }, { "epoch": 1.5126185132597443, "grad_norm": 0.1339587372480815, "learning_rate": 3.343805247555078e-05, "loss": 0.5054, "num_tokens": 3157367787.0, "step": 4129 }, { "epoch": 1.51298493106765, "grad_norm": 0.14674875806331464, "learning_rate": 3.3434754289135186e-05, "loss": 0.4916, "num_tokens": 3158184647.0, "step": 4130 }, { "epoch": 1.5133513488755552, "grad_norm": 0.14695932584171303, "learning_rate": 3.343145545892014e-05, "loss": 0.5, "num_tokens": 3158929615.0, "step": 4131 }, { "epoch": 1.513717766683461, "grad_norm": 0.12858141671849577, "learning_rate": 3.3428155985091345e-05, "loss": 0.456, "num_tokens": 3159692229.0, "step": 4132 }, { "epoch": 1.5140841844913662, "grad_norm": 0.14836825041223806, "learning_rate": 3.3424855867834595e-05, "loss": 0.5111, "num_tokens": 3160407818.0, "step": 4133 }, { "epoch": 1.5144506022992719, "grad_norm": 0.14799674317809802, "learning_rate": 3.3421555107335685e-05, "loss": 0.5207, "num_tokens": 3161087036.0, "step": 4134 }, { "epoch": 1.5148170201071771, "grad_norm": 0.13178201796447817, "learning_rate": 3.341825370378045e-05, "loss": 0.4865, "num_tokens": 3161968631.0, "step": 4135 }, { "epoch": 1.5151834379150828, "grad_norm": 0.1438768190101168, "learning_rate": 3.341495165735476e-05, "loss": 0.5003, "num_tokens": 3162711376.0, "step": 4136 }, { "epoch": 1.515549855722988, "grad_norm": 0.1385908414587348, "learning_rate": 3.341164896824454e-05, "loss": 0.5217, "num_tokens": 3163419833.0, "step": 4137 }, { "epoch": 1.5159162735308938, "grad_norm": 0.14877729600966344, "learning_rate": 3.3408345636635736e-05, "loss": 0.5311, "num_tokens": 3164150152.0, "step": 4138 }, { "epoch": 1.516282691338799, "grad_norm": 0.14362907029102165, "learning_rate": 3.340504166271434e-05, "loss": 0.4888, "num_tokens": 3164845171.0, "step": 4139 }, { "epoch": 1.5166491091467045, "grad_norm": 0.14146254369816974, "learning_rate": 3.3401737046666335e-05, "loss": 0.4829, "num_tokens": 3165665933.0, "step": 4140 }, { "epoch": 1.51701552695461, "grad_norm": 0.15257797735628106, "learning_rate": 3.3398431788677825e-05, "loss": 0.5264, "num_tokens": 3166398241.0, "step": 4141 }, { "epoch": 1.5173819447625154, "grad_norm": 0.13486543606725815, "learning_rate": 3.339512588893487e-05, "loss": 0.5223, "num_tokens": 3167176840.0, "step": 4142 }, { "epoch": 1.517748362570421, "grad_norm": 0.1602928760203969, "learning_rate": 3.339181934762362e-05, "loss": 0.5441, "num_tokens": 3167901978.0, "step": 4143 }, { "epoch": 1.5181147803783264, "grad_norm": 0.14549466289818017, "learning_rate": 3.338851216493022e-05, "loss": 0.5082, "num_tokens": 3168608244.0, "step": 4144 }, { "epoch": 1.5184811981862318, "grad_norm": 0.1512355558524844, "learning_rate": 3.338520434104089e-05, "loss": 0.5599, "num_tokens": 3169320206.0, "step": 4145 }, { "epoch": 1.5188476159941373, "grad_norm": 0.1575558993219361, "learning_rate": 3.3381895876141844e-05, "loss": 0.5216, "num_tokens": 3170092402.0, "step": 4146 }, { "epoch": 1.5192140338020428, "grad_norm": 0.13966598235396202, "learning_rate": 3.337858677041938e-05, "loss": 0.5052, "num_tokens": 3170878967.0, "step": 4147 }, { "epoch": 1.5195804516099483, "grad_norm": 0.14320821386459137, "learning_rate": 3.33752770240598e-05, "loss": 0.4796, "num_tokens": 3171706131.0, "step": 4148 }, { "epoch": 1.5199468694178537, "grad_norm": 0.14208939246966595, "learning_rate": 3.3371966637249434e-05, "loss": 0.4827, "num_tokens": 3172490399.0, "step": 4149 }, { "epoch": 1.5203132872257592, "grad_norm": 0.13905590668684636, "learning_rate": 3.336865561017468e-05, "loss": 0.4776, "num_tokens": 3173325394.0, "step": 4150 }, { "epoch": 1.5206797050336647, "grad_norm": 0.1475110890264382, "learning_rate": 3.336534394302195e-05, "loss": 0.4974, "num_tokens": 3174194321.0, "step": 4151 }, { "epoch": 1.5210461228415701, "grad_norm": 0.14209084702133692, "learning_rate": 3.33620316359777e-05, "loss": 0.4892, "num_tokens": 3174905120.0, "step": 4152 }, { "epoch": 1.5214125406494756, "grad_norm": 0.15210850119996772, "learning_rate": 3.335871868922842e-05, "loss": 0.4691, "num_tokens": 3175717464.0, "step": 4153 }, { "epoch": 1.5217789584573809, "grad_norm": 0.1582171474256731, "learning_rate": 3.335540510296063e-05, "loss": 0.5392, "num_tokens": 3176438745.0, "step": 4154 }, { "epoch": 1.5221453762652866, "grad_norm": 0.17128047290763249, "learning_rate": 3.335209087736089e-05, "loss": 0.5161, "num_tokens": 3177171070.0, "step": 4155 }, { "epoch": 1.5225117940731918, "grad_norm": 0.15858346658935074, "learning_rate": 3.3348776012615806e-05, "loss": 0.4919, "num_tokens": 3177868034.0, "step": 4156 }, { "epoch": 1.5228782118810975, "grad_norm": 0.1682790292899685, "learning_rate": 3.3345460508912e-05, "loss": 0.5601, "num_tokens": 3178567236.0, "step": 4157 }, { "epoch": 1.5232446296890028, "grad_norm": 0.1525531087609941, "learning_rate": 3.3342144366436164e-05, "loss": 0.5261, "num_tokens": 3179271220.0, "step": 4158 }, { "epoch": 1.5236110474969085, "grad_norm": 0.15203418554535506, "learning_rate": 3.3338827585374984e-05, "loss": 0.5188, "num_tokens": 3179956539.0, "step": 4159 }, { "epoch": 1.5239774653048137, "grad_norm": 0.13210273133820996, "learning_rate": 3.33355101659152e-05, "loss": 0.4467, "num_tokens": 3180784743.0, "step": 4160 }, { "epoch": 1.5243438831127194, "grad_norm": 0.1708233956124924, "learning_rate": 3.33321921082436e-05, "loss": 0.5264, "num_tokens": 3181606679.0, "step": 4161 }, { "epoch": 1.5247103009206247, "grad_norm": 0.1420363565334576, "learning_rate": 3.332887341254699e-05, "loss": 0.5525, "num_tokens": 3182392828.0, "step": 4162 }, { "epoch": 1.5250767187285303, "grad_norm": 0.1428704777087673, "learning_rate": 3.332555407901222e-05, "loss": 0.5099, "num_tokens": 3183135686.0, "step": 4163 }, { "epoch": 1.5254431365364356, "grad_norm": 0.1445346678406367, "learning_rate": 3.332223410782618e-05, "loss": 0.4926, "num_tokens": 3183929382.0, "step": 4164 }, { "epoch": 1.5258095543443413, "grad_norm": 0.13838643463747283, "learning_rate": 3.331891349917579e-05, "loss": 0.4971, "num_tokens": 3184600354.0, "step": 4165 }, { "epoch": 1.5261759721522465, "grad_norm": 0.15264516006375808, "learning_rate": 3.331559225324799e-05, "loss": 0.5104, "num_tokens": 3185359464.0, "step": 4166 }, { "epoch": 1.526542389960152, "grad_norm": 0.13608243255697738, "learning_rate": 3.33122703702298e-05, "loss": 0.4849, "num_tokens": 3186053721.0, "step": 4167 }, { "epoch": 1.5269088077680575, "grad_norm": 0.13173354775221735, "learning_rate": 3.330894785030823e-05, "loss": 0.4839, "num_tokens": 3186894813.0, "step": 4168 }, { "epoch": 1.527275225575963, "grad_norm": 0.1476112222363443, "learning_rate": 3.3305624693670355e-05, "loss": 0.5013, "num_tokens": 3187727806.0, "step": 4169 }, { "epoch": 1.5276416433838684, "grad_norm": 0.14613516048731343, "learning_rate": 3.330230090050327e-05, "loss": 0.5215, "num_tokens": 3188477287.0, "step": 4170 }, { "epoch": 1.528008061191774, "grad_norm": 0.14822404779463136, "learning_rate": 3.3298976470994116e-05, "loss": 0.515, "num_tokens": 3189223301.0, "step": 4171 }, { "epoch": 1.5283744789996794, "grad_norm": 0.16193751325851882, "learning_rate": 3.329565140533006e-05, "loss": 0.5281, "num_tokens": 3189927454.0, "step": 4172 }, { "epoch": 1.5287408968075848, "grad_norm": 0.15091100945793598, "learning_rate": 3.3292325703698304e-05, "loss": 0.5085, "num_tokens": 3190698268.0, "step": 4173 }, { "epoch": 1.5291073146154903, "grad_norm": 0.14988441474139008, "learning_rate": 3.32889993662861e-05, "loss": 0.4968, "num_tokens": 3191369137.0, "step": 4174 }, { "epoch": 1.5294737324233958, "grad_norm": 0.15384607515170512, "learning_rate": 3.328567239328073e-05, "loss": 0.4925, "num_tokens": 3192049618.0, "step": 4175 }, { "epoch": 1.5298401502313013, "grad_norm": 0.1386564513942813, "learning_rate": 3.32823447848695e-05, "loss": 0.4601, "num_tokens": 3192800450.0, "step": 4176 }, { "epoch": 1.5302065680392067, "grad_norm": 0.15510125372479872, "learning_rate": 3.327901654123977e-05, "loss": 0.507, "num_tokens": 3193500758.0, "step": 4177 }, { "epoch": 1.5305729858471122, "grad_norm": 0.15304729257306587, "learning_rate": 3.327568766257892e-05, "loss": 0.4616, "num_tokens": 3194331642.0, "step": 4178 }, { "epoch": 1.5309394036550177, "grad_norm": 0.15085981377678462, "learning_rate": 3.327235814907438e-05, "loss": 0.475, "num_tokens": 3195002695.0, "step": 4179 }, { "epoch": 1.5313058214629232, "grad_norm": 0.1583007905732204, "learning_rate": 3.3269028000913596e-05, "loss": 0.5229, "num_tokens": 3195673161.0, "step": 4180 }, { "epoch": 1.5316722392708284, "grad_norm": 0.14113826475019886, "learning_rate": 3.326569721828408e-05, "loss": 0.4772, "num_tokens": 3196501244.0, "step": 4181 }, { "epoch": 1.532038657078734, "grad_norm": 0.1412520465894273, "learning_rate": 3.3262365801373344e-05, "loss": 0.5171, "num_tokens": 3197266680.0, "step": 4182 }, { "epoch": 1.5324050748866394, "grad_norm": 0.1524503165535072, "learning_rate": 3.325903375036896e-05, "loss": 0.4927, "num_tokens": 3197913479.0, "step": 4183 }, { "epoch": 1.532771492694545, "grad_norm": 0.14791946288321334, "learning_rate": 3.325570106545853e-05, "loss": 0.4719, "num_tokens": 3198730148.0, "step": 4184 }, { "epoch": 1.5331379105024503, "grad_norm": 0.1564541256542883, "learning_rate": 3.325236774682969e-05, "loss": 0.4988, "num_tokens": 3199492389.0, "step": 4185 }, { "epoch": 1.533504328310356, "grad_norm": 0.14709455908295246, "learning_rate": 3.324903379467012e-05, "loss": 0.5108, "num_tokens": 3200320583.0, "step": 4186 }, { "epoch": 1.5338707461182612, "grad_norm": 0.15417493521525374, "learning_rate": 3.324569920916751e-05, "loss": 0.5275, "num_tokens": 3201128985.0, "step": 4187 }, { "epoch": 1.534237163926167, "grad_norm": 0.1468850289959018, "learning_rate": 3.3242363990509626e-05, "loss": 0.4783, "num_tokens": 3201804848.0, "step": 4188 }, { "epoch": 1.5346035817340722, "grad_norm": 0.15452905038648385, "learning_rate": 3.323902813888423e-05, "loss": 0.4878, "num_tokens": 3202505841.0, "step": 4189 }, { "epoch": 1.5349699995419779, "grad_norm": 0.15935624468873918, "learning_rate": 3.323569165447915e-05, "loss": 0.5079, "num_tokens": 3203372080.0, "step": 4190 }, { "epoch": 1.5353364173498831, "grad_norm": 0.15722378705273168, "learning_rate": 3.323235453748222e-05, "loss": 0.4995, "num_tokens": 3204157119.0, "step": 4191 }, { "epoch": 1.5357028351577888, "grad_norm": 0.1604277774509009, "learning_rate": 3.3229016788081345e-05, "loss": 0.5407, "num_tokens": 3204856745.0, "step": 4192 }, { "epoch": 1.536069252965694, "grad_norm": 0.1462578322335373, "learning_rate": 3.3225678406464436e-05, "loss": 0.5076, "num_tokens": 3205641174.0, "step": 4193 }, { "epoch": 1.5364356707735995, "grad_norm": 0.1557563682726126, "learning_rate": 3.322233939281945e-05, "loss": 0.4946, "num_tokens": 3206443543.0, "step": 4194 }, { "epoch": 1.536802088581505, "grad_norm": 0.1441782641531196, "learning_rate": 3.321899974733439e-05, "loss": 0.5026, "num_tokens": 3207274243.0, "step": 4195 }, { "epoch": 1.5371685063894105, "grad_norm": 0.1457393714468038, "learning_rate": 3.321565947019727e-05, "loss": 0.5212, "num_tokens": 3208055493.0, "step": 4196 }, { "epoch": 1.537534924197316, "grad_norm": 0.14205715110599756, "learning_rate": 3.321231856159617e-05, "loss": 0.5029, "num_tokens": 3208941571.0, "step": 4197 }, { "epoch": 1.5379013420052214, "grad_norm": 0.15519300157382485, "learning_rate": 3.3208977021719175e-05, "loss": 0.4952, "num_tokens": 3209792486.0, "step": 4198 }, { "epoch": 1.538267759813127, "grad_norm": 0.14453161645150175, "learning_rate": 3.3205634850754436e-05, "loss": 0.5092, "num_tokens": 3210665368.0, "step": 4199 }, { "epoch": 1.5386341776210324, "grad_norm": 0.15068495955181663, "learning_rate": 3.32022920488901e-05, "loss": 0.5246, "num_tokens": 3211388023.0, "step": 4200 }, { "epoch": 1.5390005954289379, "grad_norm": 0.15785540646230295, "learning_rate": 3.31989486163144e-05, "loss": 0.5096, "num_tokens": 3212078281.0, "step": 4201 }, { "epoch": 1.5393670132368433, "grad_norm": 0.13666476802275665, "learning_rate": 3.319560455321557e-05, "loss": 0.4737, "num_tokens": 3212842278.0, "step": 4202 }, { "epoch": 1.5397334310447488, "grad_norm": 0.14735391503122383, "learning_rate": 3.3192259859781876e-05, "loss": 0.4919, "num_tokens": 3213632542.0, "step": 4203 }, { "epoch": 1.5400998488526543, "grad_norm": 0.5947034395879982, "learning_rate": 3.3188914536201646e-05, "loss": 0.5391, "num_tokens": 3214349592.0, "step": 4204 }, { "epoch": 1.5404662666605597, "grad_norm": 0.1544530952908619, "learning_rate": 3.318556858266321e-05, "loss": 0.4754, "num_tokens": 3215287634.0, "step": 4205 }, { "epoch": 1.5408326844684652, "grad_norm": 0.1501052975503874, "learning_rate": 3.318222199935498e-05, "loss": 0.5108, "num_tokens": 3215960748.0, "step": 4206 }, { "epoch": 1.5411991022763707, "grad_norm": 0.13255877585669304, "learning_rate": 3.3178874786465345e-05, "loss": 0.4704, "num_tokens": 3216783999.0, "step": 4207 }, { "epoch": 1.541565520084276, "grad_norm": 0.14366949516480615, "learning_rate": 3.317552694418277e-05, "loss": 0.5126, "num_tokens": 3217518827.0, "step": 4208 }, { "epoch": 1.5419319378921816, "grad_norm": 0.18421979808836922, "learning_rate": 3.317217847269576e-05, "loss": 0.5092, "num_tokens": 3218328279.0, "step": 4209 }, { "epoch": 1.5422983557000869, "grad_norm": 0.1278680299621883, "learning_rate": 3.316882937219283e-05, "loss": 0.4877, "num_tokens": 3219139769.0, "step": 4210 }, { "epoch": 1.5426647735079926, "grad_norm": 0.1383679886335034, "learning_rate": 3.3165479642862535e-05, "loss": 0.48, "num_tokens": 3219915983.0, "step": 4211 }, { "epoch": 1.5430311913158978, "grad_norm": 0.14064933808814373, "learning_rate": 3.316212928489347e-05, "loss": 0.4667, "num_tokens": 3220690908.0, "step": 4212 }, { "epoch": 1.5433976091238035, "grad_norm": 0.14721069285428057, "learning_rate": 3.315877829847428e-05, "loss": 0.5189, "num_tokens": 3221429747.0, "step": 4213 }, { "epoch": 1.5437640269317088, "grad_norm": 0.15297234382679142, "learning_rate": 3.315542668379363e-05, "loss": 0.5017, "num_tokens": 3222189665.0, "step": 4214 }, { "epoch": 1.5441304447396145, "grad_norm": 0.1491688100538282, "learning_rate": 3.3152074441040216e-05, "loss": 0.5013, "num_tokens": 3222900717.0, "step": 4215 }, { "epoch": 1.5444968625475197, "grad_norm": 0.1471246508498669, "learning_rate": 3.314872157040277e-05, "loss": 0.4955, "num_tokens": 3223769540.0, "step": 4216 }, { "epoch": 1.5448632803554254, "grad_norm": 0.14085985046033667, "learning_rate": 3.314536807207008e-05, "loss": 0.4929, "num_tokens": 3224503399.0, "step": 4217 }, { "epoch": 1.5452296981633307, "grad_norm": 0.15773982332312497, "learning_rate": 3.3142013946230945e-05, "loss": 0.5036, "num_tokens": 3225223232.0, "step": 4218 }, { "epoch": 1.5455961159712364, "grad_norm": 0.15731624481006692, "learning_rate": 3.313865919307421e-05, "loss": 0.483, "num_tokens": 3225936619.0, "step": 4219 }, { "epoch": 1.5459625337791416, "grad_norm": 0.15902793270378598, "learning_rate": 3.313530381278876e-05, "loss": 0.5393, "num_tokens": 3226694493.0, "step": 4220 }, { "epoch": 1.546328951587047, "grad_norm": 0.14848748400942233, "learning_rate": 3.3131947805563504e-05, "loss": 0.493, "num_tokens": 3227519503.0, "step": 4221 }, { "epoch": 1.5466953693949526, "grad_norm": 0.17970779743794718, "learning_rate": 3.31285911715874e-05, "loss": 0.4883, "num_tokens": 3228316912.0, "step": 4222 }, { "epoch": 1.547061787202858, "grad_norm": 0.13800694647469008, "learning_rate": 3.3125233911049423e-05, "loss": 0.4964, "num_tokens": 3229083152.0, "step": 4223 }, { "epoch": 1.5474282050107635, "grad_norm": 0.1596755086224817, "learning_rate": 3.3121876024138596e-05, "loss": 0.5213, "num_tokens": 3229796426.0, "step": 4224 }, { "epoch": 1.547794622818669, "grad_norm": 0.139550498848675, "learning_rate": 3.311851751104398e-05, "loss": 0.4821, "num_tokens": 3230602238.0, "step": 4225 }, { "epoch": 1.5481610406265744, "grad_norm": 0.14924760066968804, "learning_rate": 3.311515837195466e-05, "loss": 0.5249, "num_tokens": 3231411436.0, "step": 4226 }, { "epoch": 1.54852745843448, "grad_norm": 0.13777424214583048, "learning_rate": 3.311179860705976e-05, "loss": 0.4861, "num_tokens": 3232222167.0, "step": 4227 }, { "epoch": 1.5488938762423854, "grad_norm": 0.15516574968302316, "learning_rate": 3.310843821654845e-05, "loss": 0.5139, "num_tokens": 3232922896.0, "step": 4228 }, { "epoch": 1.5492602940502909, "grad_norm": 0.15279123862502547, "learning_rate": 3.310507720060992e-05, "loss": 0.5478, "num_tokens": 3233620595.0, "step": 4229 }, { "epoch": 1.5496267118581963, "grad_norm": 0.1575325523569801, "learning_rate": 3.3101715559433416e-05, "loss": 0.5521, "num_tokens": 3234416775.0, "step": 4230 }, { "epoch": 1.5499931296661018, "grad_norm": 0.1306765274907837, "learning_rate": 3.309835329320818e-05, "loss": 0.4886, "num_tokens": 3235183683.0, "step": 4231 }, { "epoch": 1.5503595474740073, "grad_norm": 0.17133063419221567, "learning_rate": 3.309499040212353e-05, "loss": 0.4992, "num_tokens": 3235853691.0, "step": 4232 }, { "epoch": 1.5507259652819128, "grad_norm": 0.17674234113551507, "learning_rate": 3.309162688636881e-05, "loss": 0.527, "num_tokens": 3236613910.0, "step": 4233 }, { "epoch": 1.5510923830898182, "grad_norm": 0.13562788472950185, "learning_rate": 3.3088262746133375e-05, "loss": 0.4955, "num_tokens": 3237309207.0, "step": 4234 }, { "epoch": 1.5514588008977235, "grad_norm": 0.15378307865143281, "learning_rate": 3.3084897981606646e-05, "loss": 0.4951, "num_tokens": 3238020354.0, "step": 4235 }, { "epoch": 1.5518252187056292, "grad_norm": 0.15330009877128017, "learning_rate": 3.308153259297806e-05, "loss": 0.5144, "num_tokens": 3238751816.0, "step": 4236 }, { "epoch": 1.5521916365135344, "grad_norm": 0.14714555729271625, "learning_rate": 3.30781665804371e-05, "loss": 0.5084, "num_tokens": 3239495009.0, "step": 4237 }, { "epoch": 1.5525580543214401, "grad_norm": 0.16735895801618056, "learning_rate": 3.307479994417326e-05, "loss": 0.4781, "num_tokens": 3240152759.0, "step": 4238 }, { "epoch": 1.5529244721293454, "grad_norm": 0.14417018316699517, "learning_rate": 3.307143268437612e-05, "loss": 0.4666, "num_tokens": 3240913855.0, "step": 4239 }, { "epoch": 1.553290889937251, "grad_norm": 0.13814827043258987, "learning_rate": 3.306806480123524e-05, "loss": 0.4966, "num_tokens": 3241692804.0, "step": 4240 }, { "epoch": 1.5536573077451563, "grad_norm": 0.16642154579675786, "learning_rate": 3.306469629494026e-05, "loss": 0.4922, "num_tokens": 3242392117.0, "step": 4241 }, { "epoch": 1.554023725553062, "grad_norm": 0.1599234423214198, "learning_rate": 3.3061327165680805e-05, "loss": 0.5564, "num_tokens": 3243221907.0, "step": 4242 }, { "epoch": 1.5543901433609673, "grad_norm": 0.16455617928835362, "learning_rate": 3.3057957413646585e-05, "loss": 0.5404, "num_tokens": 3243948085.0, "step": 4243 }, { "epoch": 1.554756561168873, "grad_norm": 0.16361558980055582, "learning_rate": 3.305458703902732e-05, "loss": 0.4909, "num_tokens": 3244762760.0, "step": 4244 }, { "epoch": 1.5551229789767782, "grad_norm": 0.147100116874851, "learning_rate": 3.305121604201276e-05, "loss": 0.4808, "num_tokens": 3245435330.0, "step": 4245 }, { "epoch": 1.555489396784684, "grad_norm": 0.15403289969514902, "learning_rate": 3.304784442279271e-05, "loss": 0.472, "num_tokens": 3246232454.0, "step": 4246 }, { "epoch": 1.5558558145925891, "grad_norm": 0.13201254569689, "learning_rate": 3.3044472181556994e-05, "loss": 0.5011, "num_tokens": 3247033548.0, "step": 4247 }, { "epoch": 1.5562222324004946, "grad_norm": 0.1532740325663987, "learning_rate": 3.304109931849547e-05, "loss": 0.4721, "num_tokens": 3247839039.0, "step": 4248 }, { "epoch": 1.5565886502084, "grad_norm": 0.1374675557975588, "learning_rate": 3.303772583379805e-05, "loss": 0.4988, "num_tokens": 3248603259.0, "step": 4249 }, { "epoch": 1.5569550680163056, "grad_norm": 0.1347102855180176, "learning_rate": 3.3034351727654654e-05, "loss": 0.5007, "num_tokens": 3249371703.0, "step": 4250 }, { "epoch": 1.557321485824211, "grad_norm": 0.1553902093075201, "learning_rate": 3.303097700025526e-05, "loss": 0.4997, "num_tokens": 3250173177.0, "step": 4251 }, { "epoch": 1.5576879036321165, "grad_norm": 0.13382575378814857, "learning_rate": 3.302760165178987e-05, "loss": 0.4897, "num_tokens": 3250951317.0, "step": 4252 }, { "epoch": 1.558054321440022, "grad_norm": 0.15261477933515058, "learning_rate": 3.3024225682448526e-05, "loss": 0.5078, "num_tokens": 3251794907.0, "step": 4253 }, { "epoch": 1.5584207392479275, "grad_norm": 0.13635261974423124, "learning_rate": 3.302084909242129e-05, "loss": 0.5149, "num_tokens": 3252638882.0, "step": 4254 }, { "epoch": 1.558787157055833, "grad_norm": 0.1315080555799724, "learning_rate": 3.301747188189829e-05, "loss": 0.5001, "num_tokens": 3253401233.0, "step": 4255 }, { "epoch": 1.5591535748637384, "grad_norm": 0.147054718683001, "learning_rate": 3.301409405106965e-05, "loss": 0.4568, "num_tokens": 3254175666.0, "step": 4256 }, { "epoch": 1.5595199926716439, "grad_norm": 0.1375935667794501, "learning_rate": 3.3010715600125554e-05, "loss": 0.4916, "num_tokens": 3254986439.0, "step": 4257 }, { "epoch": 1.5598864104795493, "grad_norm": 0.15639155120749135, "learning_rate": 3.300733652925623e-05, "loss": 0.4894, "num_tokens": 3255719868.0, "step": 4258 }, { "epoch": 1.5602528282874548, "grad_norm": 0.1316723005931063, "learning_rate": 3.3003956838651906e-05, "loss": 0.5015, "num_tokens": 3256512788.0, "step": 4259 }, { "epoch": 1.56061924609536, "grad_norm": 0.1340037587234119, "learning_rate": 3.300057652850288e-05, "loss": 0.4776, "num_tokens": 3257289691.0, "step": 4260 }, { "epoch": 1.5609856639032658, "grad_norm": 0.13295604582417253, "learning_rate": 3.2997195598999463e-05, "loss": 0.5143, "num_tokens": 3258026135.0, "step": 4261 }, { "epoch": 1.561352081711171, "grad_norm": 0.1489047517666512, "learning_rate": 3.2993814050332006e-05, "loss": 0.4996, "num_tokens": 3258732965.0, "step": 4262 }, { "epoch": 1.5617184995190767, "grad_norm": 0.13329594273544423, "learning_rate": 3.29904318826909e-05, "loss": 0.5015, "num_tokens": 3259535208.0, "step": 4263 }, { "epoch": 1.562084917326982, "grad_norm": 0.16759992625473238, "learning_rate": 3.298704909626657e-05, "loss": 0.53, "num_tokens": 3260327416.0, "step": 4264 }, { "epoch": 1.5624513351348877, "grad_norm": 0.1408767930791493, "learning_rate": 3.298366569124947e-05, "loss": 0.4976, "num_tokens": 3261136103.0, "step": 4265 }, { "epoch": 1.562817752942793, "grad_norm": 0.15247405981149906, "learning_rate": 3.2980281667830105e-05, "loss": 0.5264, "num_tokens": 3261909295.0, "step": 4266 }, { "epoch": 1.5631841707506986, "grad_norm": 0.14140207137436342, "learning_rate": 3.2976897026198984e-05, "loss": 0.515, "num_tokens": 3262698694.0, "step": 4267 }, { "epoch": 1.5635505885586038, "grad_norm": 0.1519315127912977, "learning_rate": 3.297351176654668e-05, "loss": 0.4981, "num_tokens": 3263391224.0, "step": 4268 }, { "epoch": 1.5639170063665095, "grad_norm": 0.1556176559671261, "learning_rate": 3.297012588906378e-05, "loss": 0.5506, "num_tokens": 3264199088.0, "step": 4269 }, { "epoch": 1.5642834241744148, "grad_norm": 0.1486597134835669, "learning_rate": 3.2966739393940926e-05, "loss": 0.5078, "num_tokens": 3265017029.0, "step": 4270 }, { "epoch": 1.5646498419823205, "grad_norm": 0.13358508732388996, "learning_rate": 3.296335228136878e-05, "loss": 0.5051, "num_tokens": 3265854648.0, "step": 4271 }, { "epoch": 1.5650162597902257, "grad_norm": 0.15702897366395563, "learning_rate": 3.295996455153805e-05, "loss": 0.4883, "num_tokens": 3266677225.0, "step": 4272 }, { "epoch": 1.5653826775981314, "grad_norm": 0.1425917329753126, "learning_rate": 3.2956576204639463e-05, "loss": 0.5114, "num_tokens": 3267425230.0, "step": 4273 }, { "epoch": 1.5657490954060367, "grad_norm": 0.1375070293001893, "learning_rate": 3.2953187240863786e-05, "loss": 0.4757, "num_tokens": 3268095502.0, "step": 4274 }, { "epoch": 1.5661155132139422, "grad_norm": 0.14989987073979213, "learning_rate": 3.2949797660401834e-05, "loss": 0.5152, "num_tokens": 3268849519.0, "step": 4275 }, { "epoch": 1.5664819310218476, "grad_norm": 0.14055088489318945, "learning_rate": 3.294640746344445e-05, "loss": 0.5003, "num_tokens": 3269608370.0, "step": 4276 }, { "epoch": 1.566848348829753, "grad_norm": 0.13478571946980203, "learning_rate": 3.2943016650182494e-05, "loss": 0.4783, "num_tokens": 3270372841.0, "step": 4277 }, { "epoch": 1.5672147666376586, "grad_norm": 0.14372131086558285, "learning_rate": 3.2939625220806894e-05, "loss": 0.5028, "num_tokens": 3271116695.0, "step": 4278 }, { "epoch": 1.567581184445564, "grad_norm": 0.13891981438052922, "learning_rate": 3.293623317550857e-05, "loss": 0.4863, "num_tokens": 3271887655.0, "step": 4279 }, { "epoch": 1.5679476022534695, "grad_norm": 0.13865758572020986, "learning_rate": 3.2932840514478536e-05, "loss": 0.516, "num_tokens": 3272714503.0, "step": 4280 }, { "epoch": 1.568314020061375, "grad_norm": 0.12919250772090932, "learning_rate": 3.292944723790777e-05, "loss": 0.5086, "num_tokens": 3273455170.0, "step": 4281 }, { "epoch": 1.5686804378692805, "grad_norm": 0.16203758477366983, "learning_rate": 3.292605334598734e-05, "loss": 0.546, "num_tokens": 3274182846.0, "step": 4282 }, { "epoch": 1.569046855677186, "grad_norm": 0.1463442263929876, "learning_rate": 3.292265883890832e-05, "loss": 0.514, "num_tokens": 3274910955.0, "step": 4283 }, { "epoch": 1.5694132734850914, "grad_norm": 0.12513905297708625, "learning_rate": 3.2919263716861846e-05, "loss": 0.4935, "num_tokens": 3275672323.0, "step": 4284 }, { "epoch": 1.5697796912929969, "grad_norm": 0.15251165353643667, "learning_rate": 3.2915867980039045e-05, "loss": 0.4968, "num_tokens": 3276426330.0, "step": 4285 }, { "epoch": 1.5701461091009024, "grad_norm": 0.13999615410883892, "learning_rate": 3.2912471628631114e-05, "loss": 0.5057, "num_tokens": 3277110175.0, "step": 4286 }, { "epoch": 1.5705125269088076, "grad_norm": 0.16280044817835382, "learning_rate": 3.290907466282928e-05, "loss": 0.5286, "num_tokens": 3277844700.0, "step": 4287 }, { "epoch": 1.5708789447167133, "grad_norm": 0.16425448977601986, "learning_rate": 3.290567708282479e-05, "loss": 0.5279, "num_tokens": 3278582608.0, "step": 4288 }, { "epoch": 1.5712453625246185, "grad_norm": 0.15043580768313233, "learning_rate": 3.2902278888808936e-05, "loss": 0.5087, "num_tokens": 3279316346.0, "step": 4289 }, { "epoch": 1.5716117803325242, "grad_norm": 0.1446717784338314, "learning_rate": 3.2898880080973046e-05, "loss": 0.4946, "num_tokens": 3280070612.0, "step": 4290 }, { "epoch": 1.5719781981404295, "grad_norm": 0.13762976773687763, "learning_rate": 3.289548065950849e-05, "loss": 0.4754, "num_tokens": 3280883119.0, "step": 4291 }, { "epoch": 1.5723446159483352, "grad_norm": 0.1400691436446441, "learning_rate": 3.2892080624606644e-05, "loss": 0.5225, "num_tokens": 3281809623.0, "step": 4292 }, { "epoch": 1.5727110337562404, "grad_norm": 0.14590895559216283, "learning_rate": 3.2888679976458936e-05, "loss": 0.5159, "num_tokens": 3282523142.0, "step": 4293 }, { "epoch": 1.5730774515641461, "grad_norm": 0.13598657732899647, "learning_rate": 3.288527871525685e-05, "loss": 0.5277, "num_tokens": 3283356700.0, "step": 4294 }, { "epoch": 1.5734438693720514, "grad_norm": 0.14186906814240705, "learning_rate": 3.288187684119187e-05, "loss": 0.4823, "num_tokens": 3284174415.0, "step": 4295 }, { "epoch": 1.573810287179957, "grad_norm": 0.13853410682111772, "learning_rate": 3.287847435445552e-05, "loss": 0.5015, "num_tokens": 3284960925.0, "step": 4296 }, { "epoch": 1.5741767049878623, "grad_norm": 0.14108863729292334, "learning_rate": 3.2875071255239386e-05, "loss": 0.4927, "num_tokens": 3285824231.0, "step": 4297 }, { "epoch": 1.574543122795768, "grad_norm": 0.14073881982563374, "learning_rate": 3.287166754373506e-05, "loss": 0.4939, "num_tokens": 3286604316.0, "step": 4298 }, { "epoch": 1.5749095406036733, "grad_norm": 0.15292918066936909, "learning_rate": 3.286826322013418e-05, "loss": 0.49, "num_tokens": 3287371785.0, "step": 4299 }, { "epoch": 1.5752759584115787, "grad_norm": 0.1380348489468509, "learning_rate": 3.286485828462841e-05, "loss": 0.4889, "num_tokens": 3288228865.0, "step": 4300 }, { "epoch": 1.5756423762194842, "grad_norm": 0.1354116686838669, "learning_rate": 3.2861452737409454e-05, "loss": 0.4621, "num_tokens": 3289085556.0, "step": 4301 }, { "epoch": 1.5760087940273897, "grad_norm": 0.1491700234599458, "learning_rate": 3.285804657866906e-05, "loss": 0.5099, "num_tokens": 3289904583.0, "step": 4302 }, { "epoch": 1.5763752118352952, "grad_norm": 0.15042902625494273, "learning_rate": 3.285463980859899e-05, "loss": 0.5348, "num_tokens": 3290642186.0, "step": 4303 }, { "epoch": 1.5767416296432006, "grad_norm": 0.1435223585318462, "learning_rate": 3.285123242739106e-05, "loss": 0.5342, "num_tokens": 3291400937.0, "step": 4304 }, { "epoch": 1.577108047451106, "grad_norm": 0.1386213371192968, "learning_rate": 3.284782443523712e-05, "loss": 0.485, "num_tokens": 3292085024.0, "step": 4305 }, { "epoch": 1.5774744652590116, "grad_norm": 0.1466756780618705, "learning_rate": 3.284441583232903e-05, "loss": 0.4973, "num_tokens": 3292941829.0, "step": 4306 }, { "epoch": 1.577840883066917, "grad_norm": 0.149876313731616, "learning_rate": 3.284100661885872e-05, "loss": 0.4901, "num_tokens": 3293771335.0, "step": 4307 }, { "epoch": 1.5782073008748225, "grad_norm": 0.15149110554722522, "learning_rate": 3.283759679501811e-05, "loss": 0.5241, "num_tokens": 3294575471.0, "step": 4308 }, { "epoch": 1.578573718682728, "grad_norm": 0.15079057514776428, "learning_rate": 3.28341863609992e-05, "loss": 0.4849, "num_tokens": 3295251228.0, "step": 4309 }, { "epoch": 1.5789401364906335, "grad_norm": 0.1778272362499432, "learning_rate": 3.2830775316994006e-05, "loss": 0.5293, "num_tokens": 3295873465.0, "step": 4310 }, { "epoch": 1.579306554298539, "grad_norm": 0.14809824086480636, "learning_rate": 3.2827363663194565e-05, "loss": 0.506, "num_tokens": 3296650140.0, "step": 4311 }, { "epoch": 1.5796729721064444, "grad_norm": 0.15625424128009036, "learning_rate": 3.282395139979296e-05, "loss": 0.5292, "num_tokens": 3297350573.0, "step": 4312 }, { "epoch": 1.58003938991435, "grad_norm": 0.15259447146707875, "learning_rate": 3.282053852698132e-05, "loss": 0.4739, "num_tokens": 3298101752.0, "step": 4313 }, { "epoch": 1.5804058077222551, "grad_norm": 0.13226444166490745, "learning_rate": 3.2817125044951784e-05, "loss": 0.4672, "num_tokens": 3298858240.0, "step": 4314 }, { "epoch": 1.5807722255301608, "grad_norm": 0.15040276506135458, "learning_rate": 3.2813710953896536e-05, "loss": 0.4877, "num_tokens": 3299611004.0, "step": 4315 }, { "epoch": 1.581138643338066, "grad_norm": 0.13911531809107208, "learning_rate": 3.281029625400781e-05, "loss": 0.499, "num_tokens": 3300372816.0, "step": 4316 }, { "epoch": 1.5815050611459718, "grad_norm": 0.13951360951979783, "learning_rate": 3.280688094547786e-05, "loss": 0.4721, "num_tokens": 3301152977.0, "step": 4317 }, { "epoch": 1.581871478953877, "grad_norm": 0.14486317958636505, "learning_rate": 3.2803465028498955e-05, "loss": 0.4965, "num_tokens": 3301864381.0, "step": 4318 }, { "epoch": 1.5822378967617827, "grad_norm": 0.13810583078387462, "learning_rate": 3.280004850326344e-05, "loss": 0.5072, "num_tokens": 3302763576.0, "step": 4319 }, { "epoch": 1.582604314569688, "grad_norm": 0.13389783149700082, "learning_rate": 3.2796631369963664e-05, "loss": 0.4844, "num_tokens": 3303529581.0, "step": 4320 }, { "epoch": 1.5829707323775937, "grad_norm": 0.1380761786972188, "learning_rate": 3.2793213628792014e-05, "loss": 0.4855, "num_tokens": 3304338067.0, "step": 4321 }, { "epoch": 1.583337150185499, "grad_norm": 0.143026705678834, "learning_rate": 3.278979527994092e-05, "loss": 0.5244, "num_tokens": 3305127483.0, "step": 4322 }, { "epoch": 1.5837035679934046, "grad_norm": 0.14612245374179553, "learning_rate": 3.2786376323602835e-05, "loss": 0.5259, "num_tokens": 3305837069.0, "step": 4323 }, { "epoch": 1.5840699858013099, "grad_norm": 0.16312658578187128, "learning_rate": 3.278295675997027e-05, "loss": 0.5028, "num_tokens": 3306482060.0, "step": 4324 }, { "epoch": 1.5844364036092156, "grad_norm": 0.15119791574555588, "learning_rate": 3.277953658923574e-05, "loss": 0.5221, "num_tokens": 3307377534.0, "step": 4325 }, { "epoch": 1.5848028214171208, "grad_norm": 0.16182702334476798, "learning_rate": 3.27761158115918e-05, "loss": 0.5216, "num_tokens": 3308084850.0, "step": 4326 }, { "epoch": 1.5851692392250263, "grad_norm": 0.1619373800455756, "learning_rate": 3.277269442723107e-05, "loss": 0.5026, "num_tokens": 3308779893.0, "step": 4327 }, { "epoch": 1.5855356570329318, "grad_norm": 0.16132954692126703, "learning_rate": 3.276927243634616e-05, "loss": 0.5001, "num_tokens": 3309404796.0, "step": 4328 }, { "epoch": 1.5859020748408372, "grad_norm": 0.17748478241591978, "learning_rate": 3.2765849839129735e-05, "loss": 0.4937, "num_tokens": 3310204200.0, "step": 4329 }, { "epoch": 1.5862684926487427, "grad_norm": 0.14466255129307876, "learning_rate": 3.2762426635774515e-05, "loss": 0.5134, "num_tokens": 3310995905.0, "step": 4330 }, { "epoch": 1.5866349104566482, "grad_norm": 0.17791036490978746, "learning_rate": 3.2759002826473216e-05, "loss": 0.5032, "num_tokens": 3311733771.0, "step": 4331 }, { "epoch": 1.5870013282645536, "grad_norm": 0.15233139331554554, "learning_rate": 3.27555784114186e-05, "loss": 0.4734, "num_tokens": 3312538116.0, "step": 4332 }, { "epoch": 1.5873677460724591, "grad_norm": 0.13916589020213277, "learning_rate": 3.2752153390803485e-05, "loss": 0.5122, "num_tokens": 3313452171.0, "step": 4333 }, { "epoch": 1.5877341638803646, "grad_norm": 0.17155407090795863, "learning_rate": 3.2748727764820696e-05, "loss": 0.524, "num_tokens": 3314150687.0, "step": 4334 }, { "epoch": 1.58810058168827, "grad_norm": 0.14965814680298947, "learning_rate": 3.27453015336631e-05, "loss": 0.5263, "num_tokens": 3314898028.0, "step": 4335 }, { "epoch": 1.5884669994961755, "grad_norm": 0.15055752366916442, "learning_rate": 3.2741874697523624e-05, "loss": 0.5202, "num_tokens": 3315657490.0, "step": 4336 }, { "epoch": 1.588833417304081, "grad_norm": 0.13777337222901587, "learning_rate": 3.273844725659517e-05, "loss": 0.4948, "num_tokens": 3316493663.0, "step": 4337 }, { "epoch": 1.5891998351119865, "grad_norm": 0.14124542647963145, "learning_rate": 3.273501921107073e-05, "loss": 0.4997, "num_tokens": 3317236567.0, "step": 4338 }, { "epoch": 1.589566252919892, "grad_norm": 0.146927169290822, "learning_rate": 3.273159056114331e-05, "loss": 0.5081, "num_tokens": 3317954105.0, "step": 4339 }, { "epoch": 1.5899326707277974, "grad_norm": 0.14901586110090959, "learning_rate": 3.272816130700594e-05, "loss": 0.5069, "num_tokens": 3318598806.0, "step": 4340 }, { "epoch": 1.5902990885357027, "grad_norm": 0.1543085347962422, "learning_rate": 3.272473144885172e-05, "loss": 0.4889, "num_tokens": 3319351411.0, "step": 4341 }, { "epoch": 1.5906655063436084, "grad_norm": 0.16003002664978921, "learning_rate": 3.272130098687372e-05, "loss": 0.503, "num_tokens": 3320076066.0, "step": 4342 }, { "epoch": 1.5910319241515136, "grad_norm": 0.1334562028329969, "learning_rate": 3.27178699212651e-05, "loss": 0.5239, "num_tokens": 3320867037.0, "step": 4343 }, { "epoch": 1.5913983419594193, "grad_norm": 0.15820625292850668, "learning_rate": 3.2714438252219046e-05, "loss": 0.4994, "num_tokens": 3321595367.0, "step": 4344 }, { "epoch": 1.5917647597673246, "grad_norm": 0.14753743407313283, "learning_rate": 3.271100597992875e-05, "loss": 0.4722, "num_tokens": 3322347281.0, "step": 4345 }, { "epoch": 1.5921311775752303, "grad_norm": 0.1581250262693043, "learning_rate": 3.270757310458747e-05, "loss": 0.4941, "num_tokens": 3323081817.0, "step": 4346 }, { "epoch": 1.5924975953831355, "grad_norm": 0.15472518092522036, "learning_rate": 3.270413962638848e-05, "loss": 0.479, "num_tokens": 3323865078.0, "step": 4347 }, { "epoch": 1.5928640131910412, "grad_norm": 0.14408956856914262, "learning_rate": 3.270070554552508e-05, "loss": 0.4807, "num_tokens": 3324668011.0, "step": 4348 }, { "epoch": 1.5932304309989465, "grad_norm": 0.1386024707997803, "learning_rate": 3.269727086219063e-05, "loss": 0.5157, "num_tokens": 3325420251.0, "step": 4349 }, { "epoch": 1.5935968488068522, "grad_norm": 0.15032422872238033, "learning_rate": 3.26938355765785e-05, "loss": 0.4709, "num_tokens": 3326090591.0, "step": 4350 }, { "epoch": 1.5939632666147574, "grad_norm": 0.13101924373400822, "learning_rate": 3.269039968888211e-05, "loss": 0.4917, "num_tokens": 3326971980.0, "step": 4351 }, { "epoch": 1.594329684422663, "grad_norm": 0.14271339980359243, "learning_rate": 3.268696319929491e-05, "loss": 0.5052, "num_tokens": 3327774240.0, "step": 4352 }, { "epoch": 1.5946961022305683, "grad_norm": 0.16655972309452124, "learning_rate": 3.2683526108010366e-05, "loss": 0.5334, "num_tokens": 3328545689.0, "step": 4353 }, { "epoch": 1.5950625200384738, "grad_norm": 0.1565727907188055, "learning_rate": 3.268008841522201e-05, "loss": 0.499, "num_tokens": 3329362192.0, "step": 4354 }, { "epoch": 1.5954289378463793, "grad_norm": 0.15733724765160725, "learning_rate": 3.267665012112339e-05, "loss": 0.4948, "num_tokens": 3330152639.0, "step": 4355 }, { "epoch": 1.5957953556542848, "grad_norm": 0.1429352081270241, "learning_rate": 3.267321122590807e-05, "loss": 0.483, "num_tokens": 3330821913.0, "step": 4356 }, { "epoch": 1.5961617734621902, "grad_norm": 0.14211472118435528, "learning_rate": 3.266977172976969e-05, "loss": 0.4932, "num_tokens": 3331601629.0, "step": 4357 }, { "epoch": 1.5965281912700957, "grad_norm": 0.15956124621979356, "learning_rate": 3.266633163290189e-05, "loss": 0.4809, "num_tokens": 3332314603.0, "step": 4358 }, { "epoch": 1.5968946090780012, "grad_norm": 0.1448893805892276, "learning_rate": 3.266289093549835e-05, "loss": 0.4976, "num_tokens": 3333120741.0, "step": 4359 }, { "epoch": 1.5972610268859067, "grad_norm": 0.12246971534841083, "learning_rate": 3.265944963775279e-05, "loss": 0.4959, "num_tokens": 3333959205.0, "step": 4360 }, { "epoch": 1.5976274446938121, "grad_norm": 0.15438620361424085, "learning_rate": 3.265600773985897e-05, "loss": 0.4872, "num_tokens": 3334735736.0, "step": 4361 }, { "epoch": 1.5979938625017176, "grad_norm": 0.12596950795226008, "learning_rate": 3.265256524201067e-05, "loss": 0.4751, "num_tokens": 3335566447.0, "step": 4362 }, { "epoch": 1.598360280309623, "grad_norm": 0.14912270570517283, "learning_rate": 3.264912214440171e-05, "loss": 0.4725, "num_tokens": 3336488307.0, "step": 4363 }, { "epoch": 1.5987266981175285, "grad_norm": 0.1479723420699969, "learning_rate": 3.264567844722594e-05, "loss": 0.5305, "num_tokens": 3337282656.0, "step": 4364 }, { "epoch": 1.599093115925434, "grad_norm": 0.15864945272267117, "learning_rate": 3.264223415067726e-05, "loss": 0.5239, "num_tokens": 3338088016.0, "step": 4365 }, { "epoch": 1.5994595337333395, "grad_norm": 0.13729252523074534, "learning_rate": 3.263878925494957e-05, "loss": 0.5188, "num_tokens": 3338923573.0, "step": 4366 }, { "epoch": 1.599825951541245, "grad_norm": 0.15978256785055236, "learning_rate": 3.263534376023684e-05, "loss": 0.5061, "num_tokens": 3339656622.0, "step": 4367 }, { "epoch": 1.6001923693491502, "grad_norm": 0.16066429526228557, "learning_rate": 3.2631897666733054e-05, "loss": 0.549, "num_tokens": 3340296212.0, "step": 4368 }, { "epoch": 1.600558787157056, "grad_norm": 0.13877421076003565, "learning_rate": 3.2628450974632224e-05, "loss": 0.5048, "num_tokens": 3341108660.0, "step": 4369 }, { "epoch": 1.6009252049649612, "grad_norm": 0.15998179262099535, "learning_rate": 3.262500368412842e-05, "loss": 0.5171, "num_tokens": 3341923812.0, "step": 4370 }, { "epoch": 1.6012916227728669, "grad_norm": 0.1390429306025436, "learning_rate": 3.262155579541573e-05, "loss": 0.4866, "num_tokens": 3342699625.0, "step": 4371 }, { "epoch": 1.601658040580772, "grad_norm": 0.15983000574358694, "learning_rate": 3.2618107308688274e-05, "loss": 0.5411, "num_tokens": 3343468747.0, "step": 4372 }, { "epoch": 1.6020244583886778, "grad_norm": 0.1498141256874587, "learning_rate": 3.261465822414021e-05, "loss": 0.4854, "num_tokens": 3344296798.0, "step": 4373 }, { "epoch": 1.602390876196583, "grad_norm": 0.14377729986419086, "learning_rate": 3.261120854196572e-05, "loss": 0.495, "num_tokens": 3345028086.0, "step": 4374 }, { "epoch": 1.6027572940044887, "grad_norm": 0.14757046726239204, "learning_rate": 3.2607758262359045e-05, "loss": 0.4814, "num_tokens": 3345857787.0, "step": 4375 }, { "epoch": 1.603123711812394, "grad_norm": 0.14966629662201375, "learning_rate": 3.260430738551442e-05, "loss": 0.4937, "num_tokens": 3346551575.0, "step": 4376 }, { "epoch": 1.6034901296202997, "grad_norm": 0.1414315765788821, "learning_rate": 3.2600855911626154e-05, "loss": 0.4897, "num_tokens": 3347363503.0, "step": 4377 }, { "epoch": 1.603856547428205, "grad_norm": 0.15637422568971251, "learning_rate": 3.2597403840888566e-05, "loss": 0.4995, "num_tokens": 3348061284.0, "step": 4378 }, { "epoch": 1.6042229652361106, "grad_norm": 0.14169234099097927, "learning_rate": 3.2593951173496016e-05, "loss": 0.4683, "num_tokens": 3348846897.0, "step": 4379 }, { "epoch": 1.6045893830440159, "grad_norm": 0.16023408027731714, "learning_rate": 3.259049790964289e-05, "loss": 0.5004, "num_tokens": 3349479649.0, "step": 4380 }, { "epoch": 1.6049558008519214, "grad_norm": 0.14195114799414962, "learning_rate": 3.258704404952362e-05, "loss": 0.5086, "num_tokens": 3350308291.0, "step": 4381 }, { "epoch": 1.6053222186598268, "grad_norm": 0.1673241508197866, "learning_rate": 3.258358959333267e-05, "loss": 0.5192, "num_tokens": 3351075594.0, "step": 4382 }, { "epoch": 1.6056886364677323, "grad_norm": 0.17959199141041887, "learning_rate": 3.258013454126452e-05, "loss": 0.5333, "num_tokens": 3351886670.0, "step": 4383 }, { "epoch": 1.6060550542756378, "grad_norm": 0.15235471302015338, "learning_rate": 3.25766788935137e-05, "loss": 0.5308, "num_tokens": 3352639030.0, "step": 4384 }, { "epoch": 1.6064214720835432, "grad_norm": 0.16905420995311674, "learning_rate": 3.2573222650274785e-05, "loss": 0.5201, "num_tokens": 3353340409.0, "step": 4385 }, { "epoch": 1.6067878898914487, "grad_norm": 0.16625613897704655, "learning_rate": 3.256976581174234e-05, "loss": 0.4998, "num_tokens": 3354208661.0, "step": 4386 }, { "epoch": 1.6071543076993542, "grad_norm": 0.15145200670166717, "learning_rate": 3.256630837811102e-05, "loss": 0.506, "num_tokens": 3354905094.0, "step": 4387 }, { "epoch": 1.6075207255072597, "grad_norm": 0.1586280504259752, "learning_rate": 3.256285034957547e-05, "loss": 0.5272, "num_tokens": 3355764542.0, "step": 4388 }, { "epoch": 1.6078871433151651, "grad_norm": 0.16493802777982902, "learning_rate": 3.255939172633038e-05, "loss": 0.5186, "num_tokens": 3356439242.0, "step": 4389 }, { "epoch": 1.6082535611230706, "grad_norm": 0.15328620139295948, "learning_rate": 3.2555932508570486e-05, "loss": 0.5385, "num_tokens": 3357199576.0, "step": 4390 }, { "epoch": 1.608619978930976, "grad_norm": 0.13461678638362382, "learning_rate": 3.255247269649056e-05, "loss": 0.4976, "num_tokens": 3358112073.0, "step": 4391 }, { "epoch": 1.6089863967388816, "grad_norm": 0.15293206930520795, "learning_rate": 3.254901229028537e-05, "loss": 0.4901, "num_tokens": 3358734244.0, "step": 4392 }, { "epoch": 1.609352814546787, "grad_norm": 0.14447292392314104, "learning_rate": 3.254555129014977e-05, "loss": 0.5038, "num_tokens": 3359491845.0, "step": 4393 }, { "epoch": 1.6097192323546925, "grad_norm": 0.15139282265757462, "learning_rate": 3.25420896962786e-05, "loss": 0.4999, "num_tokens": 3360241228.0, "step": 4394 }, { "epoch": 1.6100856501625977, "grad_norm": 0.14035828846840936, "learning_rate": 3.253862750886677e-05, "loss": 0.5056, "num_tokens": 3361059063.0, "step": 4395 }, { "epoch": 1.6104520679705034, "grad_norm": 0.15178719681045094, "learning_rate": 3.253516472810919e-05, "loss": 0.5086, "num_tokens": 3361871377.0, "step": 4396 }, { "epoch": 1.6108184857784087, "grad_norm": 0.14696554418815852, "learning_rate": 3.2531701354200835e-05, "loss": 0.5075, "num_tokens": 3362556300.0, "step": 4397 }, { "epoch": 1.6111849035863144, "grad_norm": 0.15818167568000843, "learning_rate": 3.25282373873367e-05, "loss": 0.5075, "num_tokens": 3363268803.0, "step": 4398 }, { "epoch": 1.6115513213942196, "grad_norm": 0.15444426428945957, "learning_rate": 3.2524772827711815e-05, "loss": 0.5297, "num_tokens": 3363989319.0, "step": 4399 }, { "epoch": 1.6119177392021253, "grad_norm": 0.1702030778071286, "learning_rate": 3.252130767552124e-05, "loss": 0.5155, "num_tokens": 3364751795.0, "step": 4400 }, { "epoch": 1.6122841570100306, "grad_norm": 0.13806051255415902, "learning_rate": 3.2517841930960056e-05, "loss": 0.4884, "num_tokens": 3365541605.0, "step": 4401 }, { "epoch": 1.6126505748179363, "grad_norm": 0.15208779202045578, "learning_rate": 3.2514375594223414e-05, "loss": 0.5156, "num_tokens": 3366274180.0, "step": 4402 }, { "epoch": 1.6130169926258415, "grad_norm": 0.13954221918869347, "learning_rate": 3.2510908665506464e-05, "loss": 0.5284, "num_tokens": 3367103532.0, "step": 4403 }, { "epoch": 1.6133834104337472, "grad_norm": 0.16558148142137655, "learning_rate": 3.250744114500439e-05, "loss": 0.5628, "num_tokens": 3367867700.0, "step": 4404 }, { "epoch": 1.6137498282416525, "grad_norm": 0.15159809652981784, "learning_rate": 3.250397303291244e-05, "loss": 0.5492, "num_tokens": 3368600836.0, "step": 4405 }, { "epoch": 1.6141162460495582, "grad_norm": 0.16680672744256472, "learning_rate": 3.250050432942587e-05, "loss": 0.5444, "num_tokens": 3369434667.0, "step": 4406 }, { "epoch": 1.6144826638574634, "grad_norm": 0.1407373163879932, "learning_rate": 3.2497035034739965e-05, "loss": 0.5127, "num_tokens": 3370121915.0, "step": 4407 }, { "epoch": 1.614849081665369, "grad_norm": 0.1703178222799337, "learning_rate": 3.2493565149050064e-05, "loss": 0.5258, "num_tokens": 3370910733.0, "step": 4408 }, { "epoch": 1.6152154994732744, "grad_norm": 0.16889648939157928, "learning_rate": 3.2490094672551525e-05, "loss": 0.557, "num_tokens": 3371588352.0, "step": 4409 }, { "epoch": 1.6155819172811798, "grad_norm": 0.1709670621599938, "learning_rate": 3.248662360543974e-05, "loss": 0.542, "num_tokens": 3372337027.0, "step": 4410 }, { "epoch": 1.6159483350890853, "grad_norm": 0.16131542997977943, "learning_rate": 3.248315194791015e-05, "loss": 0.4974, "num_tokens": 3373176881.0, "step": 4411 }, { "epoch": 1.6163147528969908, "grad_norm": 0.1384041885133431, "learning_rate": 3.2479679700158196e-05, "loss": 0.4822, "num_tokens": 3373865770.0, "step": 4412 }, { "epoch": 1.6166811707048963, "grad_norm": 0.15814225971578574, "learning_rate": 3.2476206862379384e-05, "loss": 0.5295, "num_tokens": 3374587674.0, "step": 4413 }, { "epoch": 1.6170475885128017, "grad_norm": 0.14665709308307315, "learning_rate": 3.247273343476924e-05, "loss": 0.4945, "num_tokens": 3375283021.0, "step": 4414 }, { "epoch": 1.6174140063207072, "grad_norm": 0.1425173915876836, "learning_rate": 3.246925941752333e-05, "loss": 0.4896, "num_tokens": 3376147884.0, "step": 4415 }, { "epoch": 1.6177804241286127, "grad_norm": 0.1427524675080243, "learning_rate": 3.2465784810837244e-05, "loss": 0.5125, "num_tokens": 3376858566.0, "step": 4416 }, { "epoch": 1.6181468419365181, "grad_norm": 0.14840493001065705, "learning_rate": 3.2462309614906604e-05, "loss": 0.578, "num_tokens": 3377577029.0, "step": 4417 }, { "epoch": 1.6185132597444236, "grad_norm": 0.1550923973407355, "learning_rate": 3.2458833829927075e-05, "loss": 0.5082, "num_tokens": 3378293256.0, "step": 4418 }, { "epoch": 1.618879677552329, "grad_norm": 0.15058210185873414, "learning_rate": 3.245535745609435e-05, "loss": 0.5077, "num_tokens": 3379010277.0, "step": 4419 }, { "epoch": 1.6192460953602346, "grad_norm": 0.13934183652240065, "learning_rate": 3.245188049360415e-05, "loss": 0.5027, "num_tokens": 3379923527.0, "step": 4420 }, { "epoch": 1.61961251316814, "grad_norm": 0.15885501855122208, "learning_rate": 3.244840294265225e-05, "loss": 0.5606, "num_tokens": 3380646468.0, "step": 4421 }, { "epoch": 1.6199789309760453, "grad_norm": 0.13457259310399425, "learning_rate": 3.244492480343444e-05, "loss": 0.4989, "num_tokens": 3381500289.0, "step": 4422 }, { "epoch": 1.620345348783951, "grad_norm": 0.13704183217065244, "learning_rate": 3.2441446076146524e-05, "loss": 0.4785, "num_tokens": 3382284553.0, "step": 4423 }, { "epoch": 1.6207117665918562, "grad_norm": 0.14240030875465784, "learning_rate": 3.243796676098439e-05, "loss": 0.5358, "num_tokens": 3383036058.0, "step": 4424 }, { "epoch": 1.621078184399762, "grad_norm": 0.14124511232724893, "learning_rate": 3.243448685814392e-05, "loss": 0.5445, "num_tokens": 3383766311.0, "step": 4425 }, { "epoch": 1.6214446022076672, "grad_norm": 0.16162218940079148, "learning_rate": 3.2431006367821026e-05, "loss": 0.5339, "num_tokens": 3384586397.0, "step": 4426 }, { "epoch": 1.6218110200155729, "grad_norm": 0.1452026815276393, "learning_rate": 3.24275252902117e-05, "loss": 0.5067, "num_tokens": 3385322457.0, "step": 4427 }, { "epoch": 1.6221774378234781, "grad_norm": 0.14426480612564674, "learning_rate": 3.242404362551189e-05, "loss": 0.5164, "num_tokens": 3386066889.0, "step": 4428 }, { "epoch": 1.6225438556313838, "grad_norm": 0.15984125143553865, "learning_rate": 3.2420561373917645e-05, "loss": 0.4936, "num_tokens": 3386765531.0, "step": 4429 }, { "epoch": 1.622910273439289, "grad_norm": 0.1481877011259301, "learning_rate": 3.241707853562502e-05, "loss": 0.4745, "num_tokens": 3387543925.0, "step": 4430 }, { "epoch": 1.6232766912471948, "grad_norm": 0.14912669362838327, "learning_rate": 3.241359511083012e-05, "loss": 0.5315, "num_tokens": 3388234924.0, "step": 4431 }, { "epoch": 1.6236431090551, "grad_norm": 0.15168766932491912, "learning_rate": 3.2410111099729036e-05, "loss": 0.4863, "num_tokens": 3388964684.0, "step": 4432 }, { "epoch": 1.6240095268630057, "grad_norm": 0.1610327117583183, "learning_rate": 3.2406626502517956e-05, "loss": 0.552, "num_tokens": 3389658707.0, "step": 4433 }, { "epoch": 1.624375944670911, "grad_norm": 0.147350196415342, "learning_rate": 3.240314131939305e-05, "loss": 0.477, "num_tokens": 3390316781.0, "step": 4434 }, { "epoch": 1.6247423624788164, "grad_norm": 0.14672708440234458, "learning_rate": 3.2399655550550556e-05, "loss": 0.4968, "num_tokens": 3391191088.0, "step": 4435 }, { "epoch": 1.625108780286722, "grad_norm": 0.13643088258715602, "learning_rate": 3.239616919618672e-05, "loss": 0.4633, "num_tokens": 3392004060.0, "step": 4436 }, { "epoch": 1.6254751980946274, "grad_norm": 0.14795600564571748, "learning_rate": 3.239268225649783e-05, "loss": 0.5089, "num_tokens": 3392810618.0, "step": 4437 }, { "epoch": 1.6258416159025328, "grad_norm": 0.13119859701627648, "learning_rate": 3.238919473168021e-05, "loss": 0.5025, "num_tokens": 3393587927.0, "step": 4438 }, { "epoch": 1.6262080337104383, "grad_norm": 0.15507302614677457, "learning_rate": 3.2385706621930215e-05, "loss": 0.5211, "num_tokens": 3394414260.0, "step": 4439 }, { "epoch": 1.6265744515183438, "grad_norm": 0.1440616821975284, "learning_rate": 3.238221792744423e-05, "loss": 0.4852, "num_tokens": 3395152959.0, "step": 4440 }, { "epoch": 1.6269408693262493, "grad_norm": 0.14211402661036032, "learning_rate": 3.237872864841868e-05, "loss": 0.5393, "num_tokens": 3395996512.0, "step": 4441 }, { "epoch": 1.6273072871341547, "grad_norm": 0.1558742397887297, "learning_rate": 3.237523878505e-05, "loss": 0.5111, "num_tokens": 3396847212.0, "step": 4442 }, { "epoch": 1.6276737049420602, "grad_norm": 0.15332974040236697, "learning_rate": 3.237174833753472e-05, "loss": 0.5177, "num_tokens": 3397554737.0, "step": 4443 }, { "epoch": 1.6280401227499657, "grad_norm": 0.14244365527246028, "learning_rate": 3.236825730606932e-05, "loss": 0.5192, "num_tokens": 3398327756.0, "step": 4444 }, { "epoch": 1.6284065405578712, "grad_norm": 0.15229163170611612, "learning_rate": 3.236476569085036e-05, "loss": 0.5006, "num_tokens": 3399029898.0, "step": 4445 }, { "epoch": 1.6287729583657766, "grad_norm": 0.1492977203131761, "learning_rate": 3.236127349207443e-05, "loss": 0.4879, "num_tokens": 3399714010.0, "step": 4446 }, { "epoch": 1.6291393761736819, "grad_norm": 0.14290124802261012, "learning_rate": 3.2357780709938146e-05, "loss": 0.5022, "num_tokens": 3400507267.0, "step": 4447 }, { "epoch": 1.6295057939815876, "grad_norm": 0.14447657171403072, "learning_rate": 3.235428734463816e-05, "loss": 0.4995, "num_tokens": 3401272487.0, "step": 4448 }, { "epoch": 1.6298722117894928, "grad_norm": 0.1381659758222347, "learning_rate": 3.235079339637115e-05, "loss": 0.5234, "num_tokens": 3402061590.0, "step": 4449 }, { "epoch": 1.6302386295973985, "grad_norm": 0.15057598856535784, "learning_rate": 3.234729886533384e-05, "loss": 0.5301, "num_tokens": 3402804841.0, "step": 4450 }, { "epoch": 1.6306050474053038, "grad_norm": 0.1526704600274286, "learning_rate": 3.234380375172297e-05, "loss": 0.5107, "num_tokens": 3403640605.0, "step": 4451 }, { "epoch": 1.6309714652132095, "grad_norm": 0.12599187458094, "learning_rate": 3.234030805573534e-05, "loss": 0.4778, "num_tokens": 3404544316.0, "step": 4452 }, { "epoch": 1.6313378830211147, "grad_norm": 0.12503987263911964, "learning_rate": 3.233681177756774e-05, "loss": 0.5127, "num_tokens": 3405420008.0, "step": 4453 }, { "epoch": 1.6317043008290204, "grad_norm": 0.14913290866002274, "learning_rate": 3.233331491741703e-05, "loss": 0.4988, "num_tokens": 3406140863.0, "step": 4454 }, { "epoch": 1.6320707186369257, "grad_norm": 0.1459312849674501, "learning_rate": 3.23298174754801e-05, "loss": 0.4858, "num_tokens": 3406941464.0, "step": 4455 }, { "epoch": 1.6324371364448313, "grad_norm": 0.1377003646507554, "learning_rate": 3.232631945195384e-05, "loss": 0.4991, "num_tokens": 3407685366.0, "step": 4456 }, { "epoch": 1.6328035542527366, "grad_norm": 0.15211036992676213, "learning_rate": 3.2322820847035214e-05, "loss": 0.5333, "num_tokens": 3408394693.0, "step": 4457 }, { "epoch": 1.6331699720606423, "grad_norm": 0.14159942071917017, "learning_rate": 3.23193216609212e-05, "loss": 0.4889, "num_tokens": 3409257095.0, "step": 4458 }, { "epoch": 1.6335363898685475, "grad_norm": 0.164998160155723, "learning_rate": 3.2315821893808794e-05, "loss": 0.5191, "num_tokens": 3410027762.0, "step": 4459 }, { "epoch": 1.6339028076764532, "grad_norm": 0.14648292969947838, "learning_rate": 3.231232154589505e-05, "loss": 0.5266, "num_tokens": 3410729740.0, "step": 4460 }, { "epoch": 1.6342692254843585, "grad_norm": 0.1539632866500565, "learning_rate": 3.230882061737705e-05, "loss": 0.511, "num_tokens": 3411545073.0, "step": 4461 }, { "epoch": 1.634635643292264, "grad_norm": 0.15378293295703208, "learning_rate": 3.230531910845189e-05, "loss": 0.4967, "num_tokens": 3412266415.0, "step": 4462 }, { "epoch": 1.6350020611001694, "grad_norm": 0.15525295535026712, "learning_rate": 3.230181701931673e-05, "loss": 0.5001, "num_tokens": 3413097840.0, "step": 4463 }, { "epoch": 1.635368478908075, "grad_norm": 0.1783422869574624, "learning_rate": 3.229831435016873e-05, "loss": 0.5221, "num_tokens": 3413823974.0, "step": 4464 }, { "epoch": 1.6357348967159804, "grad_norm": 0.16564376243000345, "learning_rate": 3.22948111012051e-05, "loss": 0.5565, "num_tokens": 3414463980.0, "step": 4465 }, { "epoch": 1.6361013145238859, "grad_norm": 0.18044783529858796, "learning_rate": 3.229130727262308e-05, "loss": 0.5046, "num_tokens": 3415220246.0, "step": 4466 }, { "epoch": 1.6364677323317913, "grad_norm": 0.18320279112978555, "learning_rate": 3.228780286461995e-05, "loss": 0.4727, "num_tokens": 3415884736.0, "step": 4467 }, { "epoch": 1.6368341501396968, "grad_norm": 0.14963689108532485, "learning_rate": 3.2284297877393004e-05, "loss": 0.5368, "num_tokens": 3416697587.0, "step": 4468 }, { "epoch": 1.6372005679476023, "grad_norm": 0.16561424889491458, "learning_rate": 3.228079231113958e-05, "loss": 0.5091, "num_tokens": 3417377155.0, "step": 4469 }, { "epoch": 1.6375669857555077, "grad_norm": 0.19449918728409873, "learning_rate": 3.227728616605706e-05, "loss": 0.5411, "num_tokens": 3418139314.0, "step": 4470 }, { "epoch": 1.6379334035634132, "grad_norm": 0.15250596211658593, "learning_rate": 3.2273779442342836e-05, "loss": 0.4997, "num_tokens": 3418851654.0, "step": 4471 }, { "epoch": 1.6382998213713187, "grad_norm": 0.14385900364406143, "learning_rate": 3.227027214019435e-05, "loss": 0.4785, "num_tokens": 3419648959.0, "step": 4472 }, { "epoch": 1.6386662391792242, "grad_norm": 0.18194418959090694, "learning_rate": 3.226676425980907e-05, "loss": 0.5447, "num_tokens": 3420432596.0, "step": 4473 }, { "epoch": 1.6390326569871294, "grad_norm": 0.1493627279153959, "learning_rate": 3.226325580138449e-05, "loss": 0.4895, "num_tokens": 3421166521.0, "step": 4474 }, { "epoch": 1.639399074795035, "grad_norm": 0.16067617246743085, "learning_rate": 3.225974676511815e-05, "loss": 0.4794, "num_tokens": 3422036428.0, "step": 4475 }, { "epoch": 1.6397654926029404, "grad_norm": 0.13968984134381712, "learning_rate": 3.2256237151207605e-05, "loss": 0.4834, "num_tokens": 3422809829.0, "step": 4476 }, { "epoch": 1.640131910410846, "grad_norm": 0.16833904781056447, "learning_rate": 3.225272695985047e-05, "loss": 0.5095, "num_tokens": 3423557813.0, "step": 4477 }, { "epoch": 1.6404983282187513, "grad_norm": 0.1574586805453428, "learning_rate": 3.2249216191244366e-05, "loss": 0.4917, "num_tokens": 3424266102.0, "step": 4478 }, { "epoch": 1.640864746026657, "grad_norm": 0.1379983255009471, "learning_rate": 3.224570484558695e-05, "loss": 0.4993, "num_tokens": 3425030747.0, "step": 4479 }, { "epoch": 1.6412311638345622, "grad_norm": 0.20320305922201737, "learning_rate": 3.224219292307594e-05, "loss": 0.5462, "num_tokens": 3425686804.0, "step": 4480 }, { "epoch": 1.641597581642468, "grad_norm": 0.1528610570179523, "learning_rate": 3.223868042390904e-05, "loss": 0.5112, "num_tokens": 3426472392.0, "step": 4481 }, { "epoch": 1.6419639994503732, "grad_norm": 0.15891843054890029, "learning_rate": 3.2235167348284026e-05, "loss": 0.5632, "num_tokens": 3427178813.0, "step": 4482 }, { "epoch": 1.6423304172582789, "grad_norm": 0.15153715301095425, "learning_rate": 3.223165369639868e-05, "loss": 0.4759, "num_tokens": 3427856192.0, "step": 4483 }, { "epoch": 1.6426968350661841, "grad_norm": 0.13679807231604224, "learning_rate": 3.222813946845084e-05, "loss": 0.498, "num_tokens": 3428689363.0, "step": 4484 }, { "epoch": 1.6430632528740898, "grad_norm": 0.1830730675809651, "learning_rate": 3.222462466463835e-05, "loss": 0.5411, "num_tokens": 3429321267.0, "step": 4485 }, { "epoch": 1.643429670681995, "grad_norm": 0.14447726184361664, "learning_rate": 3.2221109285159104e-05, "loss": 0.508, "num_tokens": 3430092792.0, "step": 4486 }, { "epoch": 1.6437960884899008, "grad_norm": 0.14272635597088504, "learning_rate": 3.221759333021103e-05, "loss": 0.5292, "num_tokens": 3430801243.0, "step": 4487 }, { "epoch": 1.644162506297806, "grad_norm": 0.1581183244353413, "learning_rate": 3.2214076799992086e-05, "loss": 0.5367, "num_tokens": 3431624554.0, "step": 4488 }, { "epoch": 1.6445289241057115, "grad_norm": 0.13168844196481197, "learning_rate": 3.221055969470025e-05, "loss": 0.4744, "num_tokens": 3432405812.0, "step": 4489 }, { "epoch": 1.644895341913617, "grad_norm": 0.1452556082230822, "learning_rate": 3.2207042014533554e-05, "loss": 0.4841, "num_tokens": 3433083595.0, "step": 4490 }, { "epoch": 1.6452617597215224, "grad_norm": 0.15394519368253004, "learning_rate": 3.220352375969004e-05, "loss": 0.5201, "num_tokens": 3433804226.0, "step": 4491 }, { "epoch": 1.645628177529428, "grad_norm": 0.15208158865777158, "learning_rate": 3.22000049303678e-05, "loss": 0.5004, "num_tokens": 3434552813.0, "step": 4492 }, { "epoch": 1.6459945953373334, "grad_norm": 0.15207574535821494, "learning_rate": 3.2196485526764946e-05, "loss": 0.4792, "num_tokens": 3435360722.0, "step": 4493 }, { "epoch": 1.6463610131452389, "grad_norm": 0.14932491211403637, "learning_rate": 3.219296554907963e-05, "loss": 0.4685, "num_tokens": 3436111127.0, "step": 4494 }, { "epoch": 1.6467274309531443, "grad_norm": 0.15793948340576353, "learning_rate": 3.2189444997510025e-05, "loss": 0.4857, "num_tokens": 3437004214.0, "step": 4495 }, { "epoch": 1.6470938487610498, "grad_norm": 0.14918020723429856, "learning_rate": 3.218592387225436e-05, "loss": 0.5363, "num_tokens": 3437763999.0, "step": 4496 }, { "epoch": 1.6474602665689553, "grad_norm": 0.14838819609118348, "learning_rate": 3.2182402173510875e-05, "loss": 0.4953, "num_tokens": 3438595869.0, "step": 4497 }, { "epoch": 1.6478266843768608, "grad_norm": 0.15518017782644203, "learning_rate": 3.217887990147785e-05, "loss": 0.5231, "num_tokens": 3439302712.0, "step": 4498 }, { "epoch": 1.6481931021847662, "grad_norm": 0.14224088143246952, "learning_rate": 3.217535705635359e-05, "loss": 0.4739, "num_tokens": 3440077795.0, "step": 4499 }, { "epoch": 1.6485595199926717, "grad_norm": 0.143136025117355, "learning_rate": 3.217183363833644e-05, "loss": 0.4697, "num_tokens": 3440830319.0, "step": 4500 }, { "epoch": 1.648925937800577, "grad_norm": 0.13460803616793215, "learning_rate": 3.2168309647624776e-05, "loss": 0.4936, "num_tokens": 3441630989.0, "step": 4501 }, { "epoch": 1.6492923556084826, "grad_norm": 0.15963228152530332, "learning_rate": 3.216478508441701e-05, "loss": 0.5206, "num_tokens": 3442422054.0, "step": 4502 }, { "epoch": 1.649658773416388, "grad_norm": 0.1379813749028845, "learning_rate": 3.216125994891158e-05, "loss": 0.5017, "num_tokens": 3443261552.0, "step": 4503 }, { "epoch": 1.6500251912242936, "grad_norm": 0.13074097773145998, "learning_rate": 3.215773424130696e-05, "loss": 0.4998, "num_tokens": 3444117523.0, "step": 4504 }, { "epoch": 1.6503916090321988, "grad_norm": 0.13707077681486912, "learning_rate": 3.2154207961801646e-05, "loss": 0.4765, "num_tokens": 3444923447.0, "step": 4505 }, { "epoch": 1.6507580268401045, "grad_norm": 0.15109050224827805, "learning_rate": 3.215068111059419e-05, "loss": 0.5362, "num_tokens": 3445753166.0, "step": 4506 }, { "epoch": 1.6511244446480098, "grad_norm": 0.13576517006072047, "learning_rate": 3.214715368788314e-05, "loss": 0.5046, "num_tokens": 3446575781.0, "step": 4507 }, { "epoch": 1.6514908624559155, "grad_norm": 0.13300539207667203, "learning_rate": 3.214362569386711e-05, "loss": 0.5151, "num_tokens": 3447363033.0, "step": 4508 }, { "epoch": 1.6518572802638207, "grad_norm": 0.12964071614011144, "learning_rate": 3.214009712874473e-05, "loss": 0.5148, "num_tokens": 3448131541.0, "step": 4509 }, { "epoch": 1.6522236980717264, "grad_norm": 0.13770845468723072, "learning_rate": 3.2136567992714675e-05, "loss": 0.4987, "num_tokens": 3448926484.0, "step": 4510 }, { "epoch": 1.6525901158796317, "grad_norm": 0.13800054118695002, "learning_rate": 3.213303828597563e-05, "loss": 0.5094, "num_tokens": 3449784619.0, "step": 4511 }, { "epoch": 1.6529565336875374, "grad_norm": 0.13996610300541057, "learning_rate": 3.212950800872632e-05, "loss": 0.514, "num_tokens": 3450500634.0, "step": 4512 }, { "epoch": 1.6533229514954426, "grad_norm": 0.13628758225582444, "learning_rate": 3.212597716116552e-05, "loss": 0.4586, "num_tokens": 3451254152.0, "step": 4513 }, { "epoch": 1.653689369303348, "grad_norm": 0.13542311891641468, "learning_rate": 3.212244574349202e-05, "loss": 0.527, "num_tokens": 3452034560.0, "step": 4514 }, { "epoch": 1.6540557871112536, "grad_norm": 0.14871842153088458, "learning_rate": 3.211891375590464e-05, "loss": 0.534, "num_tokens": 3452730910.0, "step": 4515 }, { "epoch": 1.654422204919159, "grad_norm": 0.15181011020660384, "learning_rate": 3.211538119860225e-05, "loss": 0.5123, "num_tokens": 3453479252.0, "step": 4516 }, { "epoch": 1.6547886227270645, "grad_norm": 0.1603430261828335, "learning_rate": 3.2111848071783736e-05, "loss": 0.4953, "num_tokens": 3454204895.0, "step": 4517 }, { "epoch": 1.65515504053497, "grad_norm": 0.1372357510646379, "learning_rate": 3.2108314375648e-05, "loss": 0.4798, "num_tokens": 3454994172.0, "step": 4518 }, { "epoch": 1.6555214583428755, "grad_norm": 0.17316328686815297, "learning_rate": 3.2104780110394024e-05, "loss": 0.4745, "num_tokens": 3455796748.0, "step": 4519 }, { "epoch": 1.655887876150781, "grad_norm": 0.16570183714072478, "learning_rate": 3.2101245276220785e-05, "loss": 0.4978, "num_tokens": 3456568147.0, "step": 4520 }, { "epoch": 1.6562542939586864, "grad_norm": 0.14384976092652385, "learning_rate": 3.209770987332729e-05, "loss": 0.5081, "num_tokens": 3457337378.0, "step": 4521 }, { "epoch": 1.6566207117665919, "grad_norm": 0.14169588552837187, "learning_rate": 3.2094173901912606e-05, "loss": 0.4876, "num_tokens": 3458118447.0, "step": 4522 }, { "epoch": 1.6569871295744973, "grad_norm": 0.1685935137199497, "learning_rate": 3.20906373621758e-05, "loss": 0.5202, "num_tokens": 3458791433.0, "step": 4523 }, { "epoch": 1.6573535473824028, "grad_norm": 0.14293512461877098, "learning_rate": 3.2087100254316004e-05, "loss": 0.4837, "num_tokens": 3459523514.0, "step": 4524 }, { "epoch": 1.6577199651903083, "grad_norm": 0.14303868420676982, "learning_rate": 3.208356257853234e-05, "loss": 0.4821, "num_tokens": 3460255197.0, "step": 4525 }, { "epoch": 1.6580863829982138, "grad_norm": 0.12955780567240108, "learning_rate": 3.208002433502401e-05, "loss": 0.4973, "num_tokens": 3461019662.0, "step": 4526 }, { "epoch": 1.6584528008061192, "grad_norm": 0.15362567109164327, "learning_rate": 3.207648552399022e-05, "loss": 0.5201, "num_tokens": 3461837395.0, "step": 4527 }, { "epoch": 1.6588192186140245, "grad_norm": 0.15069189314079087, "learning_rate": 3.207294614563019e-05, "loss": 0.5071, "num_tokens": 3462558218.0, "step": 4528 }, { "epoch": 1.6591856364219302, "grad_norm": 0.1462753627399973, "learning_rate": 3.2069406200143224e-05, "loss": 0.5182, "num_tokens": 3463342729.0, "step": 4529 }, { "epoch": 1.6595520542298354, "grad_norm": 0.1380769288923861, "learning_rate": 3.206586568772862e-05, "loss": 0.5061, "num_tokens": 3464118500.0, "step": 4530 }, { "epoch": 1.6599184720377411, "grad_norm": 0.13901294285584065, "learning_rate": 3.206232460858569e-05, "loss": 0.5213, "num_tokens": 3464730236.0, "step": 4531 }, { "epoch": 1.6602848898456464, "grad_norm": 0.15178440087998657, "learning_rate": 3.2058782962913834e-05, "loss": 0.5042, "num_tokens": 3465446873.0, "step": 4532 }, { "epoch": 1.660651307653552, "grad_norm": 0.13869644136243495, "learning_rate": 3.205524075091245e-05, "loss": 0.477, "num_tokens": 3466223496.0, "step": 4533 }, { "epoch": 1.6610177254614573, "grad_norm": 0.14097545248823304, "learning_rate": 3.205169797278095e-05, "loss": 0.4866, "num_tokens": 3467103795.0, "step": 4534 }, { "epoch": 1.661384143269363, "grad_norm": 0.15352950460066614, "learning_rate": 3.2048154628718835e-05, "loss": 0.499, "num_tokens": 3467866013.0, "step": 4535 }, { "epoch": 1.6617505610772683, "grad_norm": 0.16867633796115136, "learning_rate": 3.2044610718925566e-05, "loss": 0.5189, "num_tokens": 3468695818.0, "step": 4536 }, { "epoch": 1.662116978885174, "grad_norm": 0.14192476873097534, "learning_rate": 3.204106624360069e-05, "loss": 0.5049, "num_tokens": 3469396725.0, "step": 4537 }, { "epoch": 1.6624833966930792, "grad_norm": 0.15089152563657193, "learning_rate": 3.203752120294378e-05, "loss": 0.4964, "num_tokens": 3470169674.0, "step": 4538 }, { "epoch": 1.662849814500985, "grad_norm": 0.16186446777072616, "learning_rate": 3.203397559715441e-05, "loss": 0.5177, "num_tokens": 3470879786.0, "step": 4539 }, { "epoch": 1.6632162323088902, "grad_norm": 0.14610024643569466, "learning_rate": 3.20304294264322e-05, "loss": 0.546, "num_tokens": 3471601207.0, "step": 4540 }, { "epoch": 1.6635826501167956, "grad_norm": 0.15346737447345524, "learning_rate": 3.2026882690976824e-05, "loss": 0.5429, "num_tokens": 3472367325.0, "step": 4541 }, { "epoch": 1.663949067924701, "grad_norm": 0.14354408904782381, "learning_rate": 3.202333539098797e-05, "loss": 0.5075, "num_tokens": 3473041706.0, "step": 4542 }, { "epoch": 1.6643154857326066, "grad_norm": 0.1469297668118018, "learning_rate": 3.201978752666533e-05, "loss": 0.5114, "num_tokens": 3473692977.0, "step": 4543 }, { "epoch": 1.664681903540512, "grad_norm": 0.1603670689356502, "learning_rate": 3.20162390982087e-05, "loss": 0.4987, "num_tokens": 3474469554.0, "step": 4544 }, { "epoch": 1.6650483213484175, "grad_norm": 0.15432547489106593, "learning_rate": 3.2012690105817826e-05, "loss": 0.5044, "num_tokens": 3475289425.0, "step": 4545 }, { "epoch": 1.665414739156323, "grad_norm": 0.13821564018740293, "learning_rate": 3.200914054969254e-05, "loss": 0.5238, "num_tokens": 3476082119.0, "step": 4546 }, { "epoch": 1.6657811569642285, "grad_norm": 0.1711237872676208, "learning_rate": 3.2005590430032695e-05, "loss": 0.4859, "num_tokens": 3476737352.0, "step": 4547 }, { "epoch": 1.666147574772134, "grad_norm": 0.1844871432096613, "learning_rate": 3.200203974703815e-05, "loss": 0.5139, "num_tokens": 3477549210.0, "step": 4548 }, { "epoch": 1.6665139925800394, "grad_norm": 0.16810546919234096, "learning_rate": 3.199848850090883e-05, "loss": 0.5214, "num_tokens": 3478320496.0, "step": 4549 }, { "epoch": 1.6668804103879449, "grad_norm": 0.17772586146244138, "learning_rate": 3.199493669184468e-05, "loss": 0.4837, "num_tokens": 3479108455.0, "step": 4550 }, { "epoch": 1.6672468281958503, "grad_norm": 0.17543346065456977, "learning_rate": 3.199138432004566e-05, "loss": 0.5234, "num_tokens": 3479794138.0, "step": 4551 }, { "epoch": 1.6676132460037558, "grad_norm": 0.15187363161690146, "learning_rate": 3.198783138571179e-05, "loss": 0.4914, "num_tokens": 3480515689.0, "step": 4552 }, { "epoch": 1.6679796638116613, "grad_norm": 0.1388871915331687, "learning_rate": 3.19842778890431e-05, "loss": 0.5152, "num_tokens": 3481311954.0, "step": 4553 }, { "epoch": 1.6683460816195668, "grad_norm": 0.19034705133043647, "learning_rate": 3.1980723830239665e-05, "loss": 0.5342, "num_tokens": 3482004120.0, "step": 4554 }, { "epoch": 1.668712499427472, "grad_norm": 0.14854172887071532, "learning_rate": 3.197716920950157e-05, "loss": 0.5126, "num_tokens": 3482688235.0, "step": 4555 }, { "epoch": 1.6690789172353777, "grad_norm": 0.1385378347453981, "learning_rate": 3.1973614027028964e-05, "loss": 0.5147, "num_tokens": 3483534997.0, "step": 4556 }, { "epoch": 1.669445335043283, "grad_norm": 0.155718585816161, "learning_rate": 3.1970058283022005e-05, "loss": 0.4712, "num_tokens": 3484343696.0, "step": 4557 }, { "epoch": 1.6698117528511887, "grad_norm": 0.14939665616792264, "learning_rate": 3.196650197768089e-05, "loss": 0.5142, "num_tokens": 3485105131.0, "step": 4558 }, { "epoch": 1.670178170659094, "grad_norm": 0.13833558749814245, "learning_rate": 3.196294511120584e-05, "loss": 0.5139, "num_tokens": 3485901829.0, "step": 4559 }, { "epoch": 1.6705445884669996, "grad_norm": 0.1554583184199537, "learning_rate": 3.1959387683797124e-05, "loss": 0.5188, "num_tokens": 3486582984.0, "step": 4560 }, { "epoch": 1.6709110062749049, "grad_norm": 0.1396421456952293, "learning_rate": 3.1955829695655016e-05, "loss": 0.4773, "num_tokens": 3487367970.0, "step": 4561 }, { "epoch": 1.6712774240828105, "grad_norm": 0.14107646642968047, "learning_rate": 3.195227114697986e-05, "loss": 0.5022, "num_tokens": 3488146647.0, "step": 4562 }, { "epoch": 1.6716438418907158, "grad_norm": 0.14747341687167648, "learning_rate": 3.1948712037971986e-05, "loss": 0.4779, "num_tokens": 3488965745.0, "step": 4563 }, { "epoch": 1.6720102596986215, "grad_norm": 0.1512905860321543, "learning_rate": 3.19451523688318e-05, "loss": 0.5286, "num_tokens": 3489605519.0, "step": 4564 }, { "epoch": 1.6723766775065267, "grad_norm": 0.1500111045626565, "learning_rate": 3.19415921397597e-05, "loss": 0.4649, "num_tokens": 3490376855.0, "step": 4565 }, { "epoch": 1.6727430953144324, "grad_norm": 0.1373832844920628, "learning_rate": 3.193803135095615e-05, "loss": 0.5058, "num_tokens": 3491131775.0, "step": 4566 }, { "epoch": 1.6731095131223377, "grad_norm": 0.12904611224402415, "learning_rate": 3.193447000262162e-05, "loss": 0.4811, "num_tokens": 3491887490.0, "step": 4567 }, { "epoch": 1.6734759309302432, "grad_norm": 0.14998401659248756, "learning_rate": 3.1930908094956627e-05, "loss": 0.4962, "num_tokens": 3492729823.0, "step": 4568 }, { "epoch": 1.6738423487381486, "grad_norm": 0.15976145083235438, "learning_rate": 3.1927345628161705e-05, "loss": 0.5136, "num_tokens": 3493498443.0, "step": 4569 }, { "epoch": 1.674208766546054, "grad_norm": 0.14001597109706715, "learning_rate": 3.192378260243744e-05, "loss": 0.525, "num_tokens": 3494204770.0, "step": 4570 }, { "epoch": 1.6745751843539596, "grad_norm": 0.15728446973660196, "learning_rate": 3.1920219017984426e-05, "loss": 0.4685, "num_tokens": 3494905476.0, "step": 4571 }, { "epoch": 1.674941602161865, "grad_norm": 0.14721531885151154, "learning_rate": 3.1916654875003296e-05, "loss": 0.5081, "num_tokens": 3495684548.0, "step": 4572 }, { "epoch": 1.6753080199697705, "grad_norm": 0.14872799087032482, "learning_rate": 3.1913090173694735e-05, "loss": 0.5265, "num_tokens": 3496456842.0, "step": 4573 }, { "epoch": 1.675674437777676, "grad_norm": 0.150639772812212, "learning_rate": 3.1909524914259434e-05, "loss": 0.4932, "num_tokens": 3497170663.0, "step": 4574 }, { "epoch": 1.6760408555855815, "grad_norm": 0.157497633290084, "learning_rate": 3.190595909689812e-05, "loss": 0.4992, "num_tokens": 3498067998.0, "step": 4575 }, { "epoch": 1.676407273393487, "grad_norm": 0.1573077585587686, "learning_rate": 3.190239272181157e-05, "loss": 0.4854, "num_tokens": 3498702004.0, "step": 4576 }, { "epoch": 1.6767736912013924, "grad_norm": 0.13951384485557655, "learning_rate": 3.189882578920056e-05, "loss": 0.502, "num_tokens": 3499467639.0, "step": 4577 }, { "epoch": 1.6771401090092979, "grad_norm": 0.14605197680421686, "learning_rate": 3.189525829926593e-05, "loss": 0.5088, "num_tokens": 3500221557.0, "step": 4578 }, { "epoch": 1.6775065268172034, "grad_norm": 0.13938677792062593, "learning_rate": 3.189169025220852e-05, "loss": 0.5009, "num_tokens": 3501161651.0, "step": 4579 }, { "epoch": 1.6778729446251088, "grad_norm": 0.1434114268230285, "learning_rate": 3.1888121648229234e-05, "loss": 0.4631, "num_tokens": 3501986451.0, "step": 4580 }, { "epoch": 1.6782393624330143, "grad_norm": 0.14129784463468, "learning_rate": 3.188455248752899e-05, "loss": 0.5003, "num_tokens": 3502719458.0, "step": 4581 }, { "epoch": 1.6786057802409196, "grad_norm": 0.14478185169642785, "learning_rate": 3.188098277030873e-05, "loss": 0.4969, "num_tokens": 3503497859.0, "step": 4582 }, { "epoch": 1.6789721980488252, "grad_norm": 0.14271258341828788, "learning_rate": 3.187741249676945e-05, "loss": 0.4798, "num_tokens": 3504262432.0, "step": 4583 }, { "epoch": 1.6793386158567305, "grad_norm": 0.1433996087571638, "learning_rate": 3.187384166711214e-05, "loss": 0.5184, "num_tokens": 3505043531.0, "step": 4584 }, { "epoch": 1.6797050336646362, "grad_norm": 0.14334369964744093, "learning_rate": 3.187027028153787e-05, "loss": 0.5017, "num_tokens": 3505718994.0, "step": 4585 }, { "epoch": 1.6800714514725414, "grad_norm": 0.15106809303572952, "learning_rate": 3.18666983402477e-05, "loss": 0.5279, "num_tokens": 3506472514.0, "step": 4586 }, { "epoch": 1.6804378692804471, "grad_norm": 0.15680031829794053, "learning_rate": 3.186312584344275e-05, "loss": 0.5529, "num_tokens": 3507299276.0, "step": 4587 }, { "epoch": 1.6808042870883524, "grad_norm": 0.14183170344633503, "learning_rate": 3.185955279132415e-05, "loss": 0.5041, "num_tokens": 3508042057.0, "step": 4588 }, { "epoch": 1.681170704896258, "grad_norm": 0.14491325456566564, "learning_rate": 3.1855979184093074e-05, "loss": 0.5081, "num_tokens": 3508807245.0, "step": 4589 }, { "epoch": 1.6815371227041633, "grad_norm": 0.1722414831797832, "learning_rate": 3.185240502195072e-05, "loss": 0.4814, "num_tokens": 3509676326.0, "step": 4590 }, { "epoch": 1.681903540512069, "grad_norm": 0.16283555076350834, "learning_rate": 3.184883030509831e-05, "loss": 0.4716, "num_tokens": 3510393089.0, "step": 4591 }, { "epoch": 1.6822699583199743, "grad_norm": 0.14224953654226283, "learning_rate": 3.184525503373713e-05, "loss": 0.5286, "num_tokens": 3511130824.0, "step": 4592 }, { "epoch": 1.68263637612788, "grad_norm": 0.1966896149837293, "learning_rate": 3.1841679208068466e-05, "loss": 0.5161, "num_tokens": 3512009992.0, "step": 4593 }, { "epoch": 1.6830027939357852, "grad_norm": 0.14702898564846698, "learning_rate": 3.183810282829363e-05, "loss": 0.5283, "num_tokens": 3512730460.0, "step": 4594 }, { "epoch": 1.6833692117436907, "grad_norm": 0.1433704830608797, "learning_rate": 3.1834525894614e-05, "loss": 0.4804, "num_tokens": 3513460346.0, "step": 4595 }, { "epoch": 1.6837356295515962, "grad_norm": 0.1641542861783132, "learning_rate": 3.183094840723096e-05, "loss": 0.486, "num_tokens": 3514231052.0, "step": 4596 }, { "epoch": 1.6841020473595016, "grad_norm": 0.15174248935108112, "learning_rate": 3.182737036634592e-05, "loss": 0.5799, "num_tokens": 3514906890.0, "step": 4597 }, { "epoch": 1.6844684651674071, "grad_norm": 0.14545581822046924, "learning_rate": 3.182379177216034e-05, "loss": 0.5021, "num_tokens": 3515781032.0, "step": 4598 }, { "epoch": 1.6848348829753126, "grad_norm": 0.17024468349222818, "learning_rate": 3.18202126248757e-05, "loss": 0.5383, "num_tokens": 3516410430.0, "step": 4599 }, { "epoch": 1.685201300783218, "grad_norm": 0.13802614147430875, "learning_rate": 3.18166329246935e-05, "loss": 0.4866, "num_tokens": 3517101258.0, "step": 4600 }, { "epoch": 1.6855677185911235, "grad_norm": 0.14424538321284128, "learning_rate": 3.18130526718153e-05, "loss": 0.511, "num_tokens": 3517902951.0, "step": 4601 }, { "epoch": 1.685934136399029, "grad_norm": 0.14101382833558224, "learning_rate": 3.1809471866442675e-05, "loss": 0.516, "num_tokens": 3518580776.0, "step": 4602 }, { "epoch": 1.6863005542069345, "grad_norm": 0.14528983814219815, "learning_rate": 3.1805890508777224e-05, "loss": 0.4972, "num_tokens": 3519341824.0, "step": 4603 }, { "epoch": 1.68666697201484, "grad_norm": 0.13177268514936508, "learning_rate": 3.1802308599020596e-05, "loss": 0.5232, "num_tokens": 3520158371.0, "step": 4604 }, { "epoch": 1.6870333898227454, "grad_norm": 0.1351484188587551, "learning_rate": 3.179872613737445e-05, "loss": 0.4864, "num_tokens": 3520909357.0, "step": 4605 }, { "epoch": 1.687399807630651, "grad_norm": 0.14578053926455073, "learning_rate": 3.179514312404048e-05, "loss": 0.5452, "num_tokens": 3521662132.0, "step": 4606 }, { "epoch": 1.6877662254385564, "grad_norm": 0.13889532253259465, "learning_rate": 3.179155955922043e-05, "loss": 0.4883, "num_tokens": 3522381574.0, "step": 4607 }, { "epoch": 1.6881326432464618, "grad_norm": 0.1282729304067802, "learning_rate": 3.1787975443116057e-05, "loss": 0.4842, "num_tokens": 3523279024.0, "step": 4608 }, { "epoch": 1.688499061054367, "grad_norm": 0.13923859755835624, "learning_rate": 3.178439077592915e-05, "loss": 0.5031, "num_tokens": 3524051666.0, "step": 4609 }, { "epoch": 1.6888654788622728, "grad_norm": 0.17866798039985016, "learning_rate": 3.178080555786154e-05, "loss": 0.5071, "num_tokens": 3524938464.0, "step": 4610 }, { "epoch": 1.689231896670178, "grad_norm": 0.14434018235708385, "learning_rate": 3.1777219789115074e-05, "loss": 0.529, "num_tokens": 3525727207.0, "step": 4611 }, { "epoch": 1.6895983144780837, "grad_norm": 0.1584008508303071, "learning_rate": 3.177363346989164e-05, "loss": 0.5182, "num_tokens": 3526550920.0, "step": 4612 }, { "epoch": 1.689964732285989, "grad_norm": 0.1530025439718578, "learning_rate": 3.1770046600393163e-05, "loss": 0.5027, "num_tokens": 3527265970.0, "step": 4613 }, { "epoch": 1.6903311500938947, "grad_norm": 0.15253406151119434, "learning_rate": 3.176645918082158e-05, "loss": 0.4805, "num_tokens": 3528123504.0, "step": 4614 }, { "epoch": 1.6906975679018, "grad_norm": 0.13070735203387593, "learning_rate": 3.176287121137888e-05, "loss": 0.5097, "num_tokens": 3528916255.0, "step": 4615 }, { "epoch": 1.6910639857097056, "grad_norm": 0.15175812204536393, "learning_rate": 3.175928269226707e-05, "loss": 0.506, "num_tokens": 3529710507.0, "step": 4616 }, { "epoch": 1.6914304035176109, "grad_norm": 0.14828394371496173, "learning_rate": 3.175569362368817e-05, "loss": 0.4692, "num_tokens": 3530430454.0, "step": 4617 }, { "epoch": 1.6917968213255166, "grad_norm": 0.1486741790237615, "learning_rate": 3.175210400584429e-05, "loss": 0.5048, "num_tokens": 3531181057.0, "step": 4618 }, { "epoch": 1.6921632391334218, "grad_norm": 0.17569752857020873, "learning_rate": 3.1748513838937506e-05, "loss": 0.5078, "num_tokens": 3531909641.0, "step": 4619 }, { "epoch": 1.6925296569413275, "grad_norm": 0.14148529881056696, "learning_rate": 3.174492312316995e-05, "loss": 0.5081, "num_tokens": 3532697865.0, "step": 4620 }, { "epoch": 1.6928960747492328, "grad_norm": 0.1443479655925576, "learning_rate": 3.1741331858743807e-05, "loss": 0.5084, "num_tokens": 3533430750.0, "step": 4621 }, { "epoch": 1.6932624925571382, "grad_norm": 0.1391756405796402, "learning_rate": 3.173774004586125e-05, "loss": 0.5033, "num_tokens": 3534312302.0, "step": 4622 }, { "epoch": 1.6936289103650437, "grad_norm": 0.13761507004789755, "learning_rate": 3.173414768472453e-05, "loss": 0.4967, "num_tokens": 3535174754.0, "step": 4623 }, { "epoch": 1.6939953281729492, "grad_norm": 0.13207038311374836, "learning_rate": 3.173055477553587e-05, "loss": 0.4797, "num_tokens": 3535921560.0, "step": 4624 }, { "epoch": 1.6943617459808546, "grad_norm": 0.14461818110753036, "learning_rate": 3.172696131849759e-05, "loss": 0.5206, "num_tokens": 3536728132.0, "step": 4625 }, { "epoch": 1.6947281637887601, "grad_norm": 0.1421163957787113, "learning_rate": 3.1723367313812e-05, "loss": 0.5123, "num_tokens": 3537375863.0, "step": 4626 }, { "epoch": 1.6950945815966656, "grad_norm": 0.16439961840843173, "learning_rate": 3.171977276168143e-05, "loss": 0.5443, "num_tokens": 3538018622.0, "step": 4627 }, { "epoch": 1.695460999404571, "grad_norm": 0.14228873518137655, "learning_rate": 3.1716177662308285e-05, "loss": 0.5132, "num_tokens": 3538943518.0, "step": 4628 }, { "epoch": 1.6958274172124765, "grad_norm": 0.13105489549716812, "learning_rate": 3.171258201589497e-05, "loss": 0.5074, "num_tokens": 3539790923.0, "step": 4629 }, { "epoch": 1.696193835020382, "grad_norm": 0.17026835210873034, "learning_rate": 3.1708985822643924e-05, "loss": 0.498, "num_tokens": 3540578993.0, "step": 4630 }, { "epoch": 1.6965602528282875, "grad_norm": 0.1531296982760317, "learning_rate": 3.170538908275761e-05, "loss": 0.5048, "num_tokens": 3541366661.0, "step": 4631 }, { "epoch": 1.696926670636193, "grad_norm": 0.1274915357318977, "learning_rate": 3.1701791796438555e-05, "loss": 0.5012, "num_tokens": 3542152765.0, "step": 4632 }, { "epoch": 1.6972930884440984, "grad_norm": 0.15954869045447437, "learning_rate": 3.169819396388927e-05, "loss": 0.5155, "num_tokens": 3542914138.0, "step": 4633 }, { "epoch": 1.697659506252004, "grad_norm": 0.162283493171628, "learning_rate": 3.1694595585312336e-05, "loss": 0.5235, "num_tokens": 3543698072.0, "step": 4634 }, { "epoch": 1.6980259240599094, "grad_norm": 0.13763360285086568, "learning_rate": 3.169099666091034e-05, "loss": 0.4953, "num_tokens": 3544589849.0, "step": 4635 }, { "epoch": 1.6983923418678146, "grad_norm": 0.3245192974157395, "learning_rate": 3.168739719088592e-05, "loss": 0.4925, "num_tokens": 3545389638.0, "step": 4636 }, { "epoch": 1.6987587596757203, "grad_norm": 0.1749247486058243, "learning_rate": 3.1683797175441714e-05, "loss": 0.5218, "num_tokens": 3546236121.0, "step": 4637 }, { "epoch": 1.6991251774836256, "grad_norm": 0.14168772878157398, "learning_rate": 3.168019661478043e-05, "loss": 0.5081, "num_tokens": 3547099141.0, "step": 4638 }, { "epoch": 1.6994915952915313, "grad_norm": 0.14099039862660867, "learning_rate": 3.1676595509104776e-05, "loss": 0.5055, "num_tokens": 3547971523.0, "step": 4639 }, { "epoch": 1.6998580130994365, "grad_norm": 0.14129506197573666, "learning_rate": 3.16729938586175e-05, "loss": 0.5036, "num_tokens": 3548752512.0, "step": 4640 }, { "epoch": 1.7002244309073422, "grad_norm": 0.13384275185659736, "learning_rate": 3.166939166352139e-05, "loss": 0.5369, "num_tokens": 3549615221.0, "step": 4641 }, { "epoch": 1.7005908487152475, "grad_norm": 0.1416451532177688, "learning_rate": 3.166578892401925e-05, "loss": 0.5082, "num_tokens": 3550295457.0, "step": 4642 }, { "epoch": 1.7009572665231532, "grad_norm": 0.1612875880194324, "learning_rate": 3.1662185640313915e-05, "loss": 0.5287, "num_tokens": 3550982722.0, "step": 4643 }, { "epoch": 1.7013236843310584, "grad_norm": 0.1546203720734934, "learning_rate": 3.165858181260828e-05, "loss": 0.5196, "num_tokens": 3551688100.0, "step": 4644 }, { "epoch": 1.701690102138964, "grad_norm": 0.13799278984171937, "learning_rate": 3.165497744110523e-05, "loss": 0.4852, "num_tokens": 3552436773.0, "step": 4645 }, { "epoch": 1.7020565199468694, "grad_norm": 0.16659388274335352, "learning_rate": 3.165137252600771e-05, "loss": 0.5089, "num_tokens": 3553269280.0, "step": 4646 }, { "epoch": 1.702422937754775, "grad_norm": 0.14672209323086263, "learning_rate": 3.164776706751865e-05, "loss": 0.4985, "num_tokens": 3554009452.0, "step": 4647 }, { "epoch": 1.7027893555626803, "grad_norm": 0.1521994321083184, "learning_rate": 3.1644161065841095e-05, "loss": 0.5045, "num_tokens": 3554853833.0, "step": 4648 }, { "epoch": 1.7031557733705858, "grad_norm": 0.14716126966256182, "learning_rate": 3.1640554521178035e-05, "loss": 0.4905, "num_tokens": 3555599815.0, "step": 4649 }, { "epoch": 1.7035221911784912, "grad_norm": 0.1495972972826585, "learning_rate": 3.1636947433732535e-05, "loss": 0.4839, "num_tokens": 3556276282.0, "step": 4650 }, { "epoch": 1.7038886089863967, "grad_norm": 0.1719945305597925, "learning_rate": 3.163333980370769e-05, "loss": 0.5376, "num_tokens": 3556983933.0, "step": 4651 }, { "epoch": 1.7042550267943022, "grad_norm": 0.15011260625539805, "learning_rate": 3.1629731631306596e-05, "loss": 0.4942, "num_tokens": 3557781286.0, "step": 4652 }, { "epoch": 1.7046214446022077, "grad_norm": 0.14566832785069284, "learning_rate": 3.162612291673243e-05, "loss": 0.5404, "num_tokens": 3558571849.0, "step": 4653 }, { "epoch": 1.7049878624101131, "grad_norm": 0.16323143919592117, "learning_rate": 3.1622513660188334e-05, "loss": 0.499, "num_tokens": 3559227911.0, "step": 4654 }, { "epoch": 1.7053542802180186, "grad_norm": 0.14820005084190943, "learning_rate": 3.161890386187755e-05, "loss": 0.509, "num_tokens": 3560028579.0, "step": 4655 }, { "epoch": 1.705720698025924, "grad_norm": 0.14095472579184593, "learning_rate": 3.16152935220033e-05, "loss": 0.4788, "num_tokens": 3560837622.0, "step": 4656 }, { "epoch": 1.7060871158338295, "grad_norm": 0.15182046787705383, "learning_rate": 3.161168264076885e-05, "loss": 0.494, "num_tokens": 3561557107.0, "step": 4657 }, { "epoch": 1.706453533641735, "grad_norm": 0.15236775308124334, "learning_rate": 3.160807121837751e-05, "loss": 0.4696, "num_tokens": 3562285292.0, "step": 4658 }, { "epoch": 1.7068199514496405, "grad_norm": 0.14712893345251712, "learning_rate": 3.160445925503261e-05, "loss": 0.5175, "num_tokens": 3563038596.0, "step": 4659 }, { "epoch": 1.707186369257546, "grad_norm": 0.1392395773852909, "learning_rate": 3.1600846750937496e-05, "loss": 0.5029, "num_tokens": 3563816889.0, "step": 4660 }, { "epoch": 1.7075527870654512, "grad_norm": 0.15279251313766018, "learning_rate": 3.159723370629557e-05, "loss": 0.4885, "num_tokens": 3564562260.0, "step": 4661 }, { "epoch": 1.707919204873357, "grad_norm": 0.15480280415272557, "learning_rate": 3.159362012131026e-05, "loss": 0.5205, "num_tokens": 3565339506.0, "step": 4662 }, { "epoch": 1.7082856226812622, "grad_norm": 0.12945711993412576, "learning_rate": 3.159000599618501e-05, "loss": 0.4961, "num_tokens": 3566183335.0, "step": 4663 }, { "epoch": 1.7086520404891679, "grad_norm": 0.13675158955668343, "learning_rate": 3.15863913311233e-05, "loss": 0.5137, "num_tokens": 3566932384.0, "step": 4664 }, { "epoch": 1.709018458297073, "grad_norm": 0.15506230087006245, "learning_rate": 3.158277612632865e-05, "loss": 0.486, "num_tokens": 3567651784.0, "step": 4665 }, { "epoch": 1.7093848761049788, "grad_norm": 3.2711779337521913, "learning_rate": 3.15791603820046e-05, "loss": 0.5254, "num_tokens": 3568318241.0, "step": 4666 }, { "epoch": 1.709751293912884, "grad_norm": 0.1609434062213882, "learning_rate": 3.157554409835472e-05, "loss": 0.4983, "num_tokens": 3569053228.0, "step": 4667 }, { "epoch": 1.7101177117207897, "grad_norm": 0.14512084961309252, "learning_rate": 3.157192727558262e-05, "loss": 0.5035, "num_tokens": 3569867998.0, "step": 4668 }, { "epoch": 1.710484129528695, "grad_norm": 0.13435119096232032, "learning_rate": 3.156830991389193e-05, "loss": 0.4877, "num_tokens": 3570639062.0, "step": 4669 }, { "epoch": 1.7108505473366007, "grad_norm": 0.14667795131542855, "learning_rate": 3.156469201348632e-05, "loss": 0.5275, "num_tokens": 3571418269.0, "step": 4670 }, { "epoch": 1.711216965144506, "grad_norm": 0.14832839643947057, "learning_rate": 3.1561073574569477e-05, "loss": 0.4946, "num_tokens": 3572167379.0, "step": 4671 }, { "epoch": 1.7115833829524116, "grad_norm": 0.285696366544635, "learning_rate": 3.1557454597345136e-05, "loss": 0.4876, "num_tokens": 3572892029.0, "step": 4672 }, { "epoch": 1.7119498007603169, "grad_norm": 0.1425845565869046, "learning_rate": 3.155383508201704e-05, "loss": 0.5049, "num_tokens": 3573579111.0, "step": 4673 }, { "epoch": 1.7123162185682226, "grad_norm": 0.14156532040041672, "learning_rate": 3.1550215028788986e-05, "loss": 0.4757, "num_tokens": 3574355379.0, "step": 4674 }, { "epoch": 1.7126826363761278, "grad_norm": 0.15934488314573927, "learning_rate": 3.154659443786479e-05, "loss": 0.5012, "num_tokens": 3575124317.0, "step": 4675 }, { "epoch": 1.7130490541840333, "grad_norm": 0.14850026073828518, "learning_rate": 3.154297330944829e-05, "loss": 0.5162, "num_tokens": 3575882574.0, "step": 4676 }, { "epoch": 1.7134154719919388, "grad_norm": 0.15079041504978397, "learning_rate": 3.153935164374337e-05, "loss": 0.502, "num_tokens": 3576645834.0, "step": 4677 }, { "epoch": 1.7137818897998442, "grad_norm": 0.16677025194915968, "learning_rate": 3.153572944095393e-05, "loss": 0.5196, "num_tokens": 3577465172.0, "step": 4678 }, { "epoch": 1.7141483076077497, "grad_norm": 0.154536772580198, "learning_rate": 3.153210670128392e-05, "loss": 0.5025, "num_tokens": 3578200643.0, "step": 4679 }, { "epoch": 1.7145147254156552, "grad_norm": 0.13570016280954036, "learning_rate": 3.1528483424937295e-05, "loss": 0.4949, "num_tokens": 3578972757.0, "step": 4680 }, { "epoch": 1.7148811432235607, "grad_norm": 0.15810421696453836, "learning_rate": 3.152485961211806e-05, "loss": 0.5067, "num_tokens": 3579685484.0, "step": 4681 }, { "epoch": 1.7152475610314661, "grad_norm": 0.14991050749079712, "learning_rate": 3.1521235263030234e-05, "loss": 0.4924, "num_tokens": 3580360909.0, "step": 4682 }, { "epoch": 1.7156139788393716, "grad_norm": 0.16625302368245937, "learning_rate": 3.151761037787788e-05, "loss": 0.5068, "num_tokens": 3581115591.0, "step": 4683 }, { "epoch": 1.715980396647277, "grad_norm": 0.14378983069706477, "learning_rate": 3.15139849568651e-05, "loss": 0.5102, "num_tokens": 3581891311.0, "step": 4684 }, { "epoch": 1.7163468144551826, "grad_norm": 0.16001252673047625, "learning_rate": 3.151035900019599e-05, "loss": 0.5079, "num_tokens": 3582600138.0, "step": 4685 }, { "epoch": 1.716713232263088, "grad_norm": 0.13782431451092098, "learning_rate": 3.150673250807471e-05, "loss": 0.5056, "num_tokens": 3583386452.0, "step": 4686 }, { "epoch": 1.7170796500709935, "grad_norm": 0.13517095458452744, "learning_rate": 3.150310548070544e-05, "loss": 0.472, "num_tokens": 3584195787.0, "step": 4687 }, { "epoch": 1.7174460678788988, "grad_norm": 0.12909395774401036, "learning_rate": 3.149947791829238e-05, "loss": 0.4676, "num_tokens": 3585059620.0, "step": 4688 }, { "epoch": 1.7178124856868044, "grad_norm": 0.14035363362465517, "learning_rate": 3.149584982103978e-05, "loss": 0.5031, "num_tokens": 3585784668.0, "step": 4689 }, { "epoch": 1.7181789034947097, "grad_norm": 0.146346467548581, "learning_rate": 3.1492221189151904e-05, "loss": 0.4964, "num_tokens": 3586465418.0, "step": 4690 }, { "epoch": 1.7185453213026154, "grad_norm": 0.13413318933402674, "learning_rate": 3.148859202283305e-05, "loss": 0.5075, "num_tokens": 3587231624.0, "step": 4691 }, { "epoch": 1.7189117391105206, "grad_norm": 0.1563939237329672, "learning_rate": 3.148496232228754e-05, "loss": 0.4595, "num_tokens": 3587905851.0, "step": 4692 }, { "epoch": 1.7192781569184263, "grad_norm": 0.1285309257857606, "learning_rate": 3.1481332087719756e-05, "loss": 0.4828, "num_tokens": 3588704060.0, "step": 4693 }, { "epoch": 1.7196445747263316, "grad_norm": 0.1607156133889853, "learning_rate": 3.1477701319334065e-05, "loss": 0.5259, "num_tokens": 3589518213.0, "step": 4694 }, { "epoch": 1.7200109925342373, "grad_norm": 0.15059543481016527, "learning_rate": 3.14740700173349e-05, "loss": 0.5414, "num_tokens": 3590303451.0, "step": 4695 }, { "epoch": 1.7203774103421425, "grad_norm": 0.13539402089322558, "learning_rate": 3.1470438181926705e-05, "loss": 0.4699, "num_tokens": 3591034141.0, "step": 4696 }, { "epoch": 1.7207438281500482, "grad_norm": 0.13032416404840075, "learning_rate": 3.146680581331395e-05, "loss": 0.4734, "num_tokens": 3591874621.0, "step": 4697 }, { "epoch": 1.7211102459579535, "grad_norm": 0.16214742267142382, "learning_rate": 3.1463172911701166e-05, "loss": 0.5438, "num_tokens": 3592579957.0, "step": 4698 }, { "epoch": 1.7214766637658592, "grad_norm": 0.14259442321562207, "learning_rate": 3.145953947729288e-05, "loss": 0.4955, "num_tokens": 3593374887.0, "step": 4699 }, { "epoch": 1.7218430815737644, "grad_norm": 0.1413908199597496, "learning_rate": 3.145590551029366e-05, "loss": 0.5588, "num_tokens": 3594028677.0, "step": 4700 }, { "epoch": 1.72220949938167, "grad_norm": 0.15707583477691575, "learning_rate": 3.145227101090811e-05, "loss": 0.5037, "num_tokens": 3594738963.0, "step": 4701 }, { "epoch": 1.7225759171895754, "grad_norm": 0.16205403666510323, "learning_rate": 3.144863597934086e-05, "loss": 0.4895, "num_tokens": 3595547183.0, "step": 4702 }, { "epoch": 1.7229423349974808, "grad_norm": 0.14679096871045133, "learning_rate": 3.1445000415796556e-05, "loss": 0.5087, "num_tokens": 3596352462.0, "step": 4703 }, { "epoch": 1.7233087528053863, "grad_norm": 0.15032123459921007, "learning_rate": 3.144136432047991e-05, "loss": 0.5255, "num_tokens": 3597058807.0, "step": 4704 }, { "epoch": 1.7236751706132918, "grad_norm": 0.1475705399551413, "learning_rate": 3.1437727693595625e-05, "loss": 0.5038, "num_tokens": 3597896269.0, "step": 4705 }, { "epoch": 1.7240415884211973, "grad_norm": 0.16250578090418902, "learning_rate": 3.1434090535348455e-05, "loss": 0.4928, "num_tokens": 3598738002.0, "step": 4706 }, { "epoch": 1.7244080062291027, "grad_norm": 0.1509481438209039, "learning_rate": 3.143045284594319e-05, "loss": 0.5062, "num_tokens": 3599430172.0, "step": 4707 }, { "epoch": 1.7247744240370082, "grad_norm": 0.1485688482145803, "learning_rate": 3.142681462558463e-05, "loss": 0.5177, "num_tokens": 3600201855.0, "step": 4708 }, { "epoch": 1.7251408418449137, "grad_norm": 0.16945305466050745, "learning_rate": 3.14231758744776e-05, "loss": 0.5054, "num_tokens": 3600970340.0, "step": 4709 }, { "epoch": 1.7255072596528191, "grad_norm": 0.16605749959623362, "learning_rate": 3.141953659282699e-05, "loss": 0.5159, "num_tokens": 3601667041.0, "step": 4710 }, { "epoch": 1.7258736774607246, "grad_norm": 0.1401619286268363, "learning_rate": 3.141589678083768e-05, "loss": 0.5096, "num_tokens": 3602423057.0, "step": 4711 }, { "epoch": 1.72624009526863, "grad_norm": 0.15311715660675945, "learning_rate": 3.141225643871462e-05, "loss": 0.5172, "num_tokens": 3603197192.0, "step": 4712 }, { "epoch": 1.7266065130765356, "grad_norm": 0.15666146784045912, "learning_rate": 3.140861556666276e-05, "loss": 0.5165, "num_tokens": 3603899334.0, "step": 4713 }, { "epoch": 1.726972930884441, "grad_norm": 0.13808521036205232, "learning_rate": 3.140497416488708e-05, "loss": 0.4736, "num_tokens": 3604699594.0, "step": 4714 }, { "epoch": 1.7273393486923463, "grad_norm": 0.16460213260662274, "learning_rate": 3.1401332233592606e-05, "loss": 0.506, "num_tokens": 3605461926.0, "step": 4715 }, { "epoch": 1.727705766500252, "grad_norm": 0.14628103998517, "learning_rate": 3.139768977298439e-05, "loss": 0.5018, "num_tokens": 3606213181.0, "step": 4716 }, { "epoch": 1.7280721843081572, "grad_norm": 0.14912026107640294, "learning_rate": 3.13940467832675e-05, "loss": 0.4527, "num_tokens": 3606949843.0, "step": 4717 }, { "epoch": 1.728438602116063, "grad_norm": 0.13736511622693215, "learning_rate": 3.139040326464705e-05, "loss": 0.4505, "num_tokens": 3607815755.0, "step": 4718 }, { "epoch": 1.7288050199239682, "grad_norm": 0.13638333008752604, "learning_rate": 3.138675921732817e-05, "loss": 0.4724, "num_tokens": 3608533653.0, "step": 4719 }, { "epoch": 1.7291714377318739, "grad_norm": 0.15256144050918108, "learning_rate": 3.138311464151604e-05, "loss": 0.5364, "num_tokens": 3609402950.0, "step": 4720 }, { "epoch": 1.7295378555397791, "grad_norm": 0.14470365542272132, "learning_rate": 3.137946953741585e-05, "loss": 0.5043, "num_tokens": 3610149409.0, "step": 4721 }, { "epoch": 1.7299042733476848, "grad_norm": 0.1394446996687917, "learning_rate": 3.137582390523282e-05, "loss": 0.5113, "num_tokens": 3610909932.0, "step": 4722 }, { "epoch": 1.73027069115559, "grad_norm": 0.15467535970716478, "learning_rate": 3.1372177745172215e-05, "loss": 0.5158, "num_tokens": 3611717227.0, "step": 4723 }, { "epoch": 1.7306371089634958, "grad_norm": 0.15135983680367557, "learning_rate": 3.136853105743932e-05, "loss": 0.4987, "num_tokens": 3612533449.0, "step": 4724 }, { "epoch": 1.731003526771401, "grad_norm": 0.1535066438682913, "learning_rate": 3.136488384223945e-05, "loss": 0.5235, "num_tokens": 3613295656.0, "step": 4725 }, { "epoch": 1.7313699445793067, "grad_norm": 0.16533227106355147, "learning_rate": 3.1361236099777946e-05, "loss": 0.5453, "num_tokens": 3613942478.0, "step": 4726 }, { "epoch": 1.731736362387212, "grad_norm": 0.1322954515348886, "learning_rate": 3.135758783026019e-05, "loss": 0.5055, "num_tokens": 3614720457.0, "step": 4727 }, { "epoch": 1.7321027801951174, "grad_norm": 0.14003826404196854, "learning_rate": 3.1353939033891575e-05, "loss": 0.505, "num_tokens": 3615494256.0, "step": 4728 }, { "epoch": 1.732469198003023, "grad_norm": 0.13710709462293386, "learning_rate": 3.135028971087756e-05, "loss": 0.5323, "num_tokens": 3616214079.0, "step": 4729 }, { "epoch": 1.7328356158109284, "grad_norm": 0.1424399992902563, "learning_rate": 3.134663986142358e-05, "loss": 0.52, "num_tokens": 3616905795.0, "step": 4730 }, { "epoch": 1.7332020336188338, "grad_norm": 0.13977201579323806, "learning_rate": 3.134298948573515e-05, "loss": 0.5213, "num_tokens": 3617711800.0, "step": 4731 }, { "epoch": 1.7335684514267393, "grad_norm": 0.13302579438083995, "learning_rate": 3.1339338584017776e-05, "loss": 0.5032, "num_tokens": 3618464179.0, "step": 4732 }, { "epoch": 1.7339348692346448, "grad_norm": 0.1312370266627796, "learning_rate": 3.133568715647703e-05, "loss": 0.4694, "num_tokens": 3619262483.0, "step": 4733 }, { "epoch": 1.7343012870425503, "grad_norm": 0.15111392469624602, "learning_rate": 3.133203520331848e-05, "loss": 0.5467, "num_tokens": 3619969416.0, "step": 4734 }, { "epoch": 1.7346677048504557, "grad_norm": 0.14984926248452674, "learning_rate": 3.132838272474775e-05, "loss": 0.4746, "num_tokens": 3620603157.0, "step": 4735 }, { "epoch": 1.7350341226583612, "grad_norm": 0.14256161389726438, "learning_rate": 3.132472972097047e-05, "loss": 0.5437, "num_tokens": 3621420185.0, "step": 4736 }, { "epoch": 1.7354005404662667, "grad_norm": 0.15080395670032098, "learning_rate": 3.132107619219232e-05, "loss": 0.5007, "num_tokens": 3622220028.0, "step": 4737 }, { "epoch": 1.7357669582741722, "grad_norm": 0.1317448590172617, "learning_rate": 3.1317422138618995e-05, "loss": 0.4922, "num_tokens": 3623027164.0, "step": 4738 }, { "epoch": 1.7361333760820776, "grad_norm": 0.15961118969245108, "learning_rate": 3.131376756045623e-05, "loss": 0.5362, "num_tokens": 3623801527.0, "step": 4739 }, { "epoch": 1.736499793889983, "grad_norm": 0.13938585168045464, "learning_rate": 3.131011245790979e-05, "loss": 0.5065, "num_tokens": 3624640594.0, "step": 4740 }, { "epoch": 1.7368662116978886, "grad_norm": 0.16998096013177721, "learning_rate": 3.1306456831185444e-05, "loss": 0.5043, "num_tokens": 3625329713.0, "step": 4741 }, { "epoch": 1.7372326295057938, "grad_norm": 0.14749889985758602, "learning_rate": 3.130280068048904e-05, "loss": 0.5229, "num_tokens": 3626127236.0, "step": 4742 }, { "epoch": 1.7375990473136995, "grad_norm": 0.15875314960574086, "learning_rate": 3.1299144006026396e-05, "loss": 0.508, "num_tokens": 3626855692.0, "step": 4743 }, { "epoch": 1.7379654651216048, "grad_norm": 0.135216811956085, "learning_rate": 3.129548680800342e-05, "loss": 0.493, "num_tokens": 3627663289.0, "step": 4744 }, { "epoch": 1.7383318829295105, "grad_norm": 0.1648400568678061, "learning_rate": 3.1291829086625995e-05, "loss": 0.5025, "num_tokens": 3628407793.0, "step": 4745 }, { "epoch": 1.7386983007374157, "grad_norm": 0.15014688192795453, "learning_rate": 3.128817084210007e-05, "loss": 0.511, "num_tokens": 3629128549.0, "step": 4746 }, { "epoch": 1.7390647185453214, "grad_norm": 0.14274547995498468, "learning_rate": 3.1284512074631613e-05, "loss": 0.4773, "num_tokens": 3629875302.0, "step": 4747 }, { "epoch": 1.7394311363532267, "grad_norm": 0.143970150964374, "learning_rate": 3.128085278442662e-05, "loss": 0.4898, "num_tokens": 3630622206.0, "step": 4748 }, { "epoch": 1.7397975541611324, "grad_norm": 0.15248365040044198, "learning_rate": 3.12771929716911e-05, "loss": 0.5389, "num_tokens": 3631422250.0, "step": 4749 }, { "epoch": 1.7401639719690376, "grad_norm": 0.1386620376027203, "learning_rate": 3.127353263663113e-05, "loss": 0.4597, "num_tokens": 3632206103.0, "step": 4750 }, { "epoch": 1.7405303897769433, "grad_norm": 0.1488830269880269, "learning_rate": 3.126987177945279e-05, "loss": 0.5126, "num_tokens": 3632999478.0, "step": 4751 }, { "epoch": 1.7408968075848485, "grad_norm": 0.14833462101148653, "learning_rate": 3.1266210400362174e-05, "loss": 0.5082, "num_tokens": 3633739080.0, "step": 4752 }, { "epoch": 1.7412632253927542, "grad_norm": 0.16392711896256015, "learning_rate": 3.126254849956544e-05, "loss": 0.5247, "num_tokens": 3634471482.0, "step": 4753 }, { "epoch": 1.7416296432006595, "grad_norm": 0.16017733649125623, "learning_rate": 3.125888607726877e-05, "loss": 0.5248, "num_tokens": 3635062590.0, "step": 4754 }, { "epoch": 1.741996061008565, "grad_norm": 0.16583840950708847, "learning_rate": 3.1255223133678355e-05, "loss": 0.5115, "num_tokens": 3635886886.0, "step": 4755 }, { "epoch": 1.7423624788164704, "grad_norm": 0.16044853932684824, "learning_rate": 3.1251559669000424e-05, "loss": 0.5066, "num_tokens": 3636677265.0, "step": 4756 }, { "epoch": 1.742728896624376, "grad_norm": 0.15104094086720155, "learning_rate": 3.124789568344124e-05, "loss": 0.5261, "num_tokens": 3637370470.0, "step": 4757 }, { "epoch": 1.7430953144322814, "grad_norm": 0.15028236251410318, "learning_rate": 3.1244231177207085e-05, "loss": 0.5027, "num_tokens": 3638225967.0, "step": 4758 }, { "epoch": 1.7434617322401869, "grad_norm": 0.15922601405010883, "learning_rate": 3.12405661505043e-05, "loss": 0.4812, "num_tokens": 3639006388.0, "step": 4759 }, { "epoch": 1.7438281500480923, "grad_norm": 0.1393348276898627, "learning_rate": 3.1236900603539204e-05, "loss": 0.5251, "num_tokens": 3639909998.0, "step": 4760 }, { "epoch": 1.7441945678559978, "grad_norm": 0.1448500508252333, "learning_rate": 3.123323453651819e-05, "loss": 0.5277, "num_tokens": 3640626737.0, "step": 4761 }, { "epoch": 1.7445609856639033, "grad_norm": 0.15231971301345296, "learning_rate": 3.122956794964767e-05, "loss": 0.524, "num_tokens": 3641311807.0, "step": 4762 }, { "epoch": 1.7449274034718087, "grad_norm": 0.14121424256786286, "learning_rate": 3.122590084313408e-05, "loss": 0.4806, "num_tokens": 3642074979.0, "step": 4763 }, { "epoch": 1.7452938212797142, "grad_norm": 0.14246315022374342, "learning_rate": 3.122223321718387e-05, "loss": 0.4953, "num_tokens": 3642898869.0, "step": 4764 }, { "epoch": 1.7456602390876197, "grad_norm": 0.18157320658371612, "learning_rate": 3.121856507200355e-05, "loss": 0.539, "num_tokens": 3643539026.0, "step": 4765 }, { "epoch": 1.7460266568955252, "grad_norm": 0.14831973096468945, "learning_rate": 3.121489640779963e-05, "loss": 0.5382, "num_tokens": 3644308292.0, "step": 4766 }, { "epoch": 1.7463930747034306, "grad_norm": 0.17297237381493114, "learning_rate": 3.1211227224778676e-05, "loss": 0.5038, "num_tokens": 3645061926.0, "step": 4767 }, { "epoch": 1.746759492511336, "grad_norm": 0.15255627784746983, "learning_rate": 3.1207557523147265e-05, "loss": 0.4867, "num_tokens": 3645773721.0, "step": 4768 }, { "epoch": 1.7471259103192414, "grad_norm": 0.16537517350980396, "learning_rate": 3.120388730311201e-05, "loss": 0.5095, "num_tokens": 3646429246.0, "step": 4769 }, { "epoch": 1.747492328127147, "grad_norm": 0.14415262898365244, "learning_rate": 3.1200216564879554e-05, "loss": 0.5014, "num_tokens": 3647115593.0, "step": 4770 }, { "epoch": 1.7478587459350523, "grad_norm": 0.15404626133228014, "learning_rate": 3.1196545308656565e-05, "loss": 0.4831, "num_tokens": 3647909618.0, "step": 4771 }, { "epoch": 1.748225163742958, "grad_norm": 0.21580937912920115, "learning_rate": 3.1192873534649734e-05, "loss": 0.5068, "num_tokens": 3648622614.0, "step": 4772 }, { "epoch": 1.7485915815508632, "grad_norm": 0.14184565335744106, "learning_rate": 3.118920124306581e-05, "loss": 0.4648, "num_tokens": 3649344648.0, "step": 4773 }, { "epoch": 1.748957999358769, "grad_norm": 0.17642008856266972, "learning_rate": 3.118552843411152e-05, "loss": 0.5025, "num_tokens": 3650130173.0, "step": 4774 }, { "epoch": 1.7493244171666742, "grad_norm": 0.1794832844282816, "learning_rate": 3.118185510799367e-05, "loss": 0.5099, "num_tokens": 3650849612.0, "step": 4775 }, { "epoch": 1.74969083497458, "grad_norm": 0.15274151830299407, "learning_rate": 3.1178181264919075e-05, "loss": 0.4919, "num_tokens": 3651708890.0, "step": 4776 }, { "epoch": 1.7500572527824851, "grad_norm": 0.16477673337420445, "learning_rate": 3.1174506905094577e-05, "loss": 0.4829, "num_tokens": 3652572182.0, "step": 4777 }, { "epoch": 1.7504236705903908, "grad_norm": 0.14820692290673992, "learning_rate": 3.1170832028727045e-05, "loss": 0.4894, "num_tokens": 3653363499.0, "step": 4778 }, { "epoch": 1.750790088398296, "grad_norm": 0.17169109442663358, "learning_rate": 3.1167156636023394e-05, "loss": 0.5415, "num_tokens": 3654164733.0, "step": 4779 }, { "epoch": 1.7511565062062018, "grad_norm": 0.16488115137123682, "learning_rate": 3.116348072719055e-05, "loss": 0.5479, "num_tokens": 3654957134.0, "step": 4780 }, { "epoch": 1.751522924014107, "grad_norm": 0.16651444277747576, "learning_rate": 3.115980430243547e-05, "loss": 0.5106, "num_tokens": 3655769184.0, "step": 4781 }, { "epoch": 1.7518893418220125, "grad_norm": 0.14005192418503357, "learning_rate": 3.115612736196514e-05, "loss": 0.4775, "num_tokens": 3656413057.0, "step": 4782 }, { "epoch": 1.752255759629918, "grad_norm": 0.15943628117434538, "learning_rate": 3.1152449905986594e-05, "loss": 0.516, "num_tokens": 3657179548.0, "step": 4783 }, { "epoch": 1.7526221774378234, "grad_norm": 0.16137652122653137, "learning_rate": 3.1148771934706864e-05, "loss": 0.5157, "num_tokens": 3657927565.0, "step": 4784 }, { "epoch": 1.752988595245729, "grad_norm": 0.13739695385246106, "learning_rate": 3.114509344833304e-05, "loss": 0.4818, "num_tokens": 3658750614.0, "step": 4785 }, { "epoch": 1.7533550130536344, "grad_norm": 0.17563794548551107, "learning_rate": 3.114141444707223e-05, "loss": 0.4939, "num_tokens": 3659535369.0, "step": 4786 }, { "epoch": 1.7537214308615399, "grad_norm": 0.14288719432496347, "learning_rate": 3.113773493113155e-05, "loss": 0.5078, "num_tokens": 3660241927.0, "step": 4787 }, { "epoch": 1.7540878486694453, "grad_norm": 0.1513786284508035, "learning_rate": 3.1134054900718185e-05, "loss": 0.5095, "num_tokens": 3661019070.0, "step": 4788 }, { "epoch": 1.7544542664773508, "grad_norm": 0.15045883382163455, "learning_rate": 3.113037435603932e-05, "loss": 0.5139, "num_tokens": 3661798646.0, "step": 4789 }, { "epoch": 1.7548206842852563, "grad_norm": 0.1457905012858214, "learning_rate": 3.112669329730217e-05, "loss": 0.5106, "num_tokens": 3662569895.0, "step": 4790 }, { "epoch": 1.7551871020931618, "grad_norm": 0.1419772301481144, "learning_rate": 3.1123011724713993e-05, "loss": 0.493, "num_tokens": 3663381562.0, "step": 4791 }, { "epoch": 1.7555535199010672, "grad_norm": 0.15534430996742282, "learning_rate": 3.111932963848207e-05, "loss": 0.5084, "num_tokens": 3664165446.0, "step": 4792 }, { "epoch": 1.7559199377089727, "grad_norm": 0.14056071342565973, "learning_rate": 3.1115647038813705e-05, "loss": 0.511, "num_tokens": 3664903754.0, "step": 4793 }, { "epoch": 1.7562863555168782, "grad_norm": 0.14083666491884145, "learning_rate": 3.1111963925916246e-05, "loss": 0.5021, "num_tokens": 3665635921.0, "step": 4794 }, { "epoch": 1.7566527733247836, "grad_norm": 0.15136295683881063, "learning_rate": 3.110828029999705e-05, "loss": 0.5031, "num_tokens": 3666409154.0, "step": 4795 }, { "epoch": 1.757019191132689, "grad_norm": 0.17504410259451422, "learning_rate": 3.110459616126351e-05, "loss": 0.5438, "num_tokens": 3667074754.0, "step": 4796 }, { "epoch": 1.7573856089405946, "grad_norm": 0.16378201870028528, "learning_rate": 3.110091150992306e-05, "loss": 0.5138, "num_tokens": 3667816590.0, "step": 4797 }, { "epoch": 1.7577520267484998, "grad_norm": 0.14233497705147158, "learning_rate": 3.109722634618315e-05, "loss": 0.5094, "num_tokens": 3668560248.0, "step": 4798 }, { "epoch": 1.7581184445564055, "grad_norm": 0.15671852050928198, "learning_rate": 3.109354067025125e-05, "loss": 0.488, "num_tokens": 3669368263.0, "step": 4799 }, { "epoch": 1.7584848623643108, "grad_norm": 0.16325680530303271, "learning_rate": 3.108985448233489e-05, "loss": 0.5222, "num_tokens": 3670030164.0, "step": 4800 }, { "epoch": 1.7588512801722165, "grad_norm": 0.12807180529960616, "learning_rate": 3.1086167782641594e-05, "loss": 0.4865, "num_tokens": 3670947998.0, "step": 4801 }, { "epoch": 1.7592176979801217, "grad_norm": 0.16811476989285493, "learning_rate": 3.108248057137894e-05, "loss": 0.485, "num_tokens": 3671723826.0, "step": 4802 }, { "epoch": 1.7595841157880274, "grad_norm": 0.14135398199476398, "learning_rate": 3.1078792848754534e-05, "loss": 0.5023, "num_tokens": 3672504311.0, "step": 4803 }, { "epoch": 1.7599505335959327, "grad_norm": 0.13455189617592603, "learning_rate": 3.107510461497599e-05, "loss": 0.5146, "num_tokens": 3673373784.0, "step": 4804 }, { "epoch": 1.7603169514038384, "grad_norm": 0.17371249005755088, "learning_rate": 3.107141587025094e-05, "loss": 0.4667, "num_tokens": 3674077698.0, "step": 4805 }, { "epoch": 1.7606833692117436, "grad_norm": 0.15382596544619614, "learning_rate": 3.1067726614787116e-05, "loss": 0.4983, "num_tokens": 3674709236.0, "step": 4806 }, { "epoch": 1.7610497870196493, "grad_norm": 0.16115876445722596, "learning_rate": 3.10640368487922e-05, "loss": 0.5031, "num_tokens": 3675407912.0, "step": 4807 }, { "epoch": 1.7614162048275546, "grad_norm": 0.16169634961319232, "learning_rate": 3.106034657247394e-05, "loss": 0.4999, "num_tokens": 3676184663.0, "step": 4808 }, { "epoch": 1.76178262263546, "grad_norm": 0.14296717110443088, "learning_rate": 3.10566557860401e-05, "loss": 0.4945, "num_tokens": 3676939223.0, "step": 4809 }, { "epoch": 1.7621490404433655, "grad_norm": 0.1447470699906271, "learning_rate": 3.105296448969849e-05, "loss": 0.4789, "num_tokens": 3677716321.0, "step": 4810 }, { "epoch": 1.762515458251271, "grad_norm": 0.1419287477449955, "learning_rate": 3.104927268365692e-05, "loss": 0.4999, "num_tokens": 3678571871.0, "step": 4811 }, { "epoch": 1.7628818760591765, "grad_norm": 0.14553557928115615, "learning_rate": 3.104558036812327e-05, "loss": 0.4934, "num_tokens": 3679479793.0, "step": 4812 }, { "epoch": 1.763248293867082, "grad_norm": 0.15219419468332865, "learning_rate": 3.10418875433054e-05, "loss": 0.5069, "num_tokens": 3680198592.0, "step": 4813 }, { "epoch": 1.7636147116749874, "grad_norm": 0.14731255913704167, "learning_rate": 3.103819420941125e-05, "loss": 0.4997, "num_tokens": 3680910395.0, "step": 4814 }, { "epoch": 1.7639811294828929, "grad_norm": 0.15580766457519382, "learning_rate": 3.103450036664874e-05, "loss": 0.4911, "num_tokens": 3681760879.0, "step": 4815 }, { "epoch": 1.7643475472907983, "grad_norm": 0.12794512604903982, "learning_rate": 3.103080601522584e-05, "loss": 0.4765, "num_tokens": 3682667471.0, "step": 4816 }, { "epoch": 1.7647139650987038, "grad_norm": 0.14297425460903315, "learning_rate": 3.102711115535057e-05, "loss": 0.4906, "num_tokens": 3683340100.0, "step": 4817 }, { "epoch": 1.7650803829066093, "grad_norm": 0.15011281084791764, "learning_rate": 3.1023415787230943e-05, "loss": 0.5036, "num_tokens": 3684185129.0, "step": 4818 }, { "epoch": 1.7654468007145148, "grad_norm": 0.14645466911274133, "learning_rate": 3.101971991107501e-05, "loss": 0.5212, "num_tokens": 3684876906.0, "step": 4819 }, { "epoch": 1.7658132185224202, "grad_norm": 0.1503590951396783, "learning_rate": 3.1016023527090875e-05, "loss": 0.4948, "num_tokens": 3685649119.0, "step": 4820 }, { "epoch": 1.7661796363303257, "grad_norm": 0.17318872757005027, "learning_rate": 3.101232663548664e-05, "loss": 0.4943, "num_tokens": 3686383935.0, "step": 4821 }, { "epoch": 1.7665460541382312, "grad_norm": 0.15094420538036507, "learning_rate": 3.1008629236470436e-05, "loss": 0.5016, "num_tokens": 3687065665.0, "step": 4822 }, { "epoch": 1.7669124719461364, "grad_norm": 0.16054889710424441, "learning_rate": 3.100493133025046e-05, "loss": 0.5124, "num_tokens": 3687770181.0, "step": 4823 }, { "epoch": 1.7672788897540421, "grad_norm": 0.18331269618727009, "learning_rate": 3.1001232917034886e-05, "loss": 0.4927, "num_tokens": 3688400251.0, "step": 4824 }, { "epoch": 1.7676453075619474, "grad_norm": 0.15120525970879764, "learning_rate": 3.099753399703196e-05, "loss": 0.4852, "num_tokens": 3689186036.0, "step": 4825 }, { "epoch": 1.768011725369853, "grad_norm": 0.14263397186543977, "learning_rate": 3.099383457044994e-05, "loss": 0.5367, "num_tokens": 3689993295.0, "step": 4826 }, { "epoch": 1.7683781431777583, "grad_norm": 0.1403156115890079, "learning_rate": 3.0990134637497094e-05, "loss": 0.4931, "num_tokens": 3690873057.0, "step": 4827 }, { "epoch": 1.768744560985664, "grad_norm": 0.13942888587345678, "learning_rate": 3.098643419838174e-05, "loss": 0.4933, "num_tokens": 3691612094.0, "step": 4828 }, { "epoch": 1.7691109787935693, "grad_norm": 0.16081071426109558, "learning_rate": 3.098273325331224e-05, "loss": 0.5023, "num_tokens": 3692338850.0, "step": 4829 }, { "epoch": 1.769477396601475, "grad_norm": 0.1568030093002776, "learning_rate": 3.097903180249694e-05, "loss": 0.532, "num_tokens": 3693167051.0, "step": 4830 }, { "epoch": 1.7698438144093802, "grad_norm": 0.1324777808651287, "learning_rate": 3.097532984614425e-05, "loss": 0.5164, "num_tokens": 3694035587.0, "step": 4831 }, { "epoch": 1.770210232217286, "grad_norm": 0.15000839767125765, "learning_rate": 3.09716273844626e-05, "loss": 0.4973, "num_tokens": 3694787391.0, "step": 4832 }, { "epoch": 1.7705766500251912, "grad_norm": 0.1544157674631439, "learning_rate": 3.096792441766044e-05, "loss": 0.51, "num_tokens": 3695506184.0, "step": 4833 }, { "epoch": 1.7709430678330969, "grad_norm": 0.1407364852512417, "learning_rate": 3.0964220945946255e-05, "loss": 0.4861, "num_tokens": 3696175335.0, "step": 4834 }, { "epoch": 1.771309485641002, "grad_norm": 0.15337922094190587, "learning_rate": 3.0960516969528555e-05, "loss": 0.4558, "num_tokens": 3696987354.0, "step": 4835 }, { "epoch": 1.7716759034489076, "grad_norm": 0.15112848627408554, "learning_rate": 3.0956812488615896e-05, "loss": 0.5118, "num_tokens": 3697761713.0, "step": 4836 }, { "epoch": 1.772042321256813, "grad_norm": 0.13714772054392343, "learning_rate": 3.095310750341683e-05, "loss": 0.4989, "num_tokens": 3698379709.0, "step": 4837 }, { "epoch": 1.7724087390647185, "grad_norm": 0.15394114391802732, "learning_rate": 3.094940201413997e-05, "loss": 0.4977, "num_tokens": 3699091217.0, "step": 4838 }, { "epoch": 1.772775156872624, "grad_norm": 0.13829206086865917, "learning_rate": 3.094569602099393e-05, "loss": 0.509, "num_tokens": 3699777755.0, "step": 4839 }, { "epoch": 1.7731415746805295, "grad_norm": 0.1558152283391836, "learning_rate": 3.0941989524187376e-05, "loss": 0.4784, "num_tokens": 3700435322.0, "step": 4840 }, { "epoch": 1.773507992488435, "grad_norm": 0.16866309001517915, "learning_rate": 3.0938282523928976e-05, "loss": 0.5069, "num_tokens": 3701248546.0, "step": 4841 }, { "epoch": 1.7738744102963404, "grad_norm": 0.14311389443653222, "learning_rate": 3.093457502042745e-05, "loss": 0.4902, "num_tokens": 3702051082.0, "step": 4842 }, { "epoch": 1.7742408281042459, "grad_norm": 0.14649384851651603, "learning_rate": 3.0930867013891546e-05, "loss": 0.4775, "num_tokens": 3702940610.0, "step": 4843 }, { "epoch": 1.7746072459121514, "grad_norm": 0.14013087575869804, "learning_rate": 3.092715850453002e-05, "loss": 0.4837, "num_tokens": 3703753255.0, "step": 4844 }, { "epoch": 1.7749736637200568, "grad_norm": 0.15431662214024666, "learning_rate": 3.0923449492551675e-05, "loss": 0.5202, "num_tokens": 3704583347.0, "step": 4845 }, { "epoch": 1.7753400815279623, "grad_norm": 0.13555657122400125, "learning_rate": 3.091973997816534e-05, "loss": 0.4834, "num_tokens": 3705332624.0, "step": 4846 }, { "epoch": 1.7757064993358678, "grad_norm": 0.15311411661538096, "learning_rate": 3.091602996157984e-05, "loss": 0.4941, "num_tokens": 3706189721.0, "step": 4847 }, { "epoch": 1.776072917143773, "grad_norm": 0.13829296851480288, "learning_rate": 3.0912319443004104e-05, "loss": 0.5274, "num_tokens": 3706907918.0, "step": 4848 }, { "epoch": 1.7764393349516787, "grad_norm": 0.1701222263172888, "learning_rate": 3.0908608422647e-05, "loss": 0.5427, "num_tokens": 3707701232.0, "step": 4849 }, { "epoch": 1.776805752759584, "grad_norm": 0.16374766013123365, "learning_rate": 3.090489690071748e-05, "loss": 0.504, "num_tokens": 3708478786.0, "step": 4850 }, { "epoch": 1.7771721705674897, "grad_norm": 0.177305170054796, "learning_rate": 3.090118487742452e-05, "loss": 0.5097, "num_tokens": 3709203260.0, "step": 4851 }, { "epoch": 1.777538588375395, "grad_norm": 0.16256099944501562, "learning_rate": 3.089747235297711e-05, "loss": 0.504, "num_tokens": 3709995290.0, "step": 4852 }, { "epoch": 1.7779050061833006, "grad_norm": 0.14877416607860128, "learning_rate": 3.089375932758425e-05, "loss": 0.5161, "num_tokens": 3710733631.0, "step": 4853 }, { "epoch": 1.7782714239912059, "grad_norm": 0.1654400477554566, "learning_rate": 3.089004580145503e-05, "loss": 0.4866, "num_tokens": 3711480463.0, "step": 4854 }, { "epoch": 1.7786378417991116, "grad_norm": 0.12730823191340998, "learning_rate": 3.08863317747985e-05, "loss": 0.4676, "num_tokens": 3712273943.0, "step": 4855 }, { "epoch": 1.7790042596070168, "grad_norm": 0.15012881918327062, "learning_rate": 3.088261724782377e-05, "loss": 0.5189, "num_tokens": 3713105869.0, "step": 4856 }, { "epoch": 1.7793706774149225, "grad_norm": 0.15683063535932015, "learning_rate": 3.087890222073999e-05, "loss": 0.5247, "num_tokens": 3713845435.0, "step": 4857 }, { "epoch": 1.7797370952228277, "grad_norm": 0.15135988690203656, "learning_rate": 3.0875186693756305e-05, "loss": 0.4883, "num_tokens": 3714537752.0, "step": 4858 }, { "epoch": 1.7801035130307334, "grad_norm": 0.14616964100296226, "learning_rate": 3.087147066708192e-05, "loss": 0.5229, "num_tokens": 3715368186.0, "step": 4859 }, { "epoch": 1.7804699308386387, "grad_norm": 0.15411995988298594, "learning_rate": 3.0867754140926045e-05, "loss": 0.5015, "num_tokens": 3716164325.0, "step": 4860 }, { "epoch": 1.7808363486465444, "grad_norm": 0.13977371091148313, "learning_rate": 3.086403711549794e-05, "loss": 0.5113, "num_tokens": 3716945223.0, "step": 4861 }, { "epoch": 1.7812027664544496, "grad_norm": 0.15541883921710387, "learning_rate": 3.086031959100687e-05, "loss": 0.496, "num_tokens": 3717804953.0, "step": 4862 }, { "epoch": 1.781569184262355, "grad_norm": 0.14197658282970382, "learning_rate": 3.085660156766214e-05, "loss": 0.5038, "num_tokens": 3718599094.0, "step": 4863 }, { "epoch": 1.7819356020702606, "grad_norm": 0.12669980926093613, "learning_rate": 3.08528830456731e-05, "loss": 0.4593, "num_tokens": 3719495942.0, "step": 4864 }, { "epoch": 1.782302019878166, "grad_norm": 0.14178230669869502, "learning_rate": 3.084916402524909e-05, "loss": 0.4711, "num_tokens": 3720190296.0, "step": 4865 }, { "epoch": 1.7826684376860715, "grad_norm": 0.15001039569011518, "learning_rate": 3.084544450659949e-05, "loss": 0.5214, "num_tokens": 3720912431.0, "step": 4866 }, { "epoch": 1.783034855493977, "grad_norm": 0.14082250124885765, "learning_rate": 3.0841724489933735e-05, "loss": 0.5104, "num_tokens": 3721624397.0, "step": 4867 }, { "epoch": 1.7834012733018825, "grad_norm": 0.15692848141283075, "learning_rate": 3.083800397546127e-05, "loss": 0.4965, "num_tokens": 3722428476.0, "step": 4868 }, { "epoch": 1.783767691109788, "grad_norm": 0.14807254720265492, "learning_rate": 3.083428296339156e-05, "loss": 0.4964, "num_tokens": 3723129793.0, "step": 4869 }, { "epoch": 1.7841341089176934, "grad_norm": 0.15345948084780905, "learning_rate": 3.083056145393411e-05, "loss": 0.525, "num_tokens": 3723905186.0, "step": 4870 }, { "epoch": 1.784500526725599, "grad_norm": 0.14640052166327455, "learning_rate": 3.0826839447298436e-05, "loss": 0.4921, "num_tokens": 3724546506.0, "step": 4871 }, { "epoch": 1.7848669445335044, "grad_norm": 0.13779061001211676, "learning_rate": 3.082311694369411e-05, "loss": 0.4954, "num_tokens": 3725366241.0, "step": 4872 }, { "epoch": 1.7852333623414098, "grad_norm": 0.15520572274381492, "learning_rate": 3.0819393943330706e-05, "loss": 0.4993, "num_tokens": 3726014729.0, "step": 4873 }, { "epoch": 1.7855997801493153, "grad_norm": 0.14616592281331378, "learning_rate": 3.081567044641785e-05, "loss": 0.5308, "num_tokens": 3726848483.0, "step": 4874 }, { "epoch": 1.7859661979572206, "grad_norm": 0.15811066471061333, "learning_rate": 3.081194645316516e-05, "loss": 0.4793, "num_tokens": 3727618074.0, "step": 4875 }, { "epoch": 1.7863326157651263, "grad_norm": 0.14600652507372494, "learning_rate": 3.0808221963782336e-05, "loss": 0.5079, "num_tokens": 3728340053.0, "step": 4876 }, { "epoch": 1.7866990335730315, "grad_norm": 0.14858246851619458, "learning_rate": 3.080449697847903e-05, "loss": 0.4893, "num_tokens": 3729108258.0, "step": 4877 }, { "epoch": 1.7870654513809372, "grad_norm": 0.1525927570304923, "learning_rate": 3.080077149746502e-05, "loss": 0.5091, "num_tokens": 3730002284.0, "step": 4878 }, { "epoch": 1.7874318691888424, "grad_norm": 0.13893815570915805, "learning_rate": 3.079704552095001e-05, "loss": 0.5134, "num_tokens": 3730842377.0, "step": 4879 }, { "epoch": 1.7877982869967481, "grad_norm": 0.13561460012125215, "learning_rate": 3.07933190491438e-05, "loss": 0.5224, "num_tokens": 3731570894.0, "step": 4880 }, { "epoch": 1.7881647048046534, "grad_norm": 0.17263782738606795, "learning_rate": 3.07895920822562e-05, "loss": 0.5426, "num_tokens": 3732389202.0, "step": 4881 }, { "epoch": 1.788531122612559, "grad_norm": 0.14719813571438364, "learning_rate": 3.078586462049704e-05, "loss": 0.4868, "num_tokens": 3733073707.0, "step": 4882 }, { "epoch": 1.7888975404204643, "grad_norm": 0.13710013271956767, "learning_rate": 3.07821366640762e-05, "loss": 0.4607, "num_tokens": 3733823436.0, "step": 4883 }, { "epoch": 1.78926395822837, "grad_norm": 0.12973340626494625, "learning_rate": 3.0778408213203545e-05, "loss": 0.5104, "num_tokens": 3734656631.0, "step": 4884 }, { "epoch": 1.7896303760362753, "grad_norm": 0.14512794378076913, "learning_rate": 3.0774679268089e-05, "loss": 0.4947, "num_tokens": 3735456398.0, "step": 4885 }, { "epoch": 1.789996793844181, "grad_norm": 0.1516709207734392, "learning_rate": 3.0770949828942534e-05, "loss": 0.4748, "num_tokens": 3736085419.0, "step": 4886 }, { "epoch": 1.7903632116520862, "grad_norm": 0.13470164358328598, "learning_rate": 3.07672198959741e-05, "loss": 0.5099, "num_tokens": 3737036460.0, "step": 4887 }, { "epoch": 1.7907296294599917, "grad_norm": 0.14533273608623157, "learning_rate": 3.076348946939372e-05, "loss": 0.5403, "num_tokens": 3737716943.0, "step": 4888 }, { "epoch": 1.7910960472678972, "grad_norm": 0.14855201552908934, "learning_rate": 3.0759758549411396e-05, "loss": 0.4881, "num_tokens": 3738442620.0, "step": 4889 }, { "epoch": 1.7914624650758026, "grad_norm": 0.1435843922744059, "learning_rate": 3.07560271362372e-05, "loss": 0.5066, "num_tokens": 3739242366.0, "step": 4890 }, { "epoch": 1.7918288828837081, "grad_norm": 0.1705441259938175, "learning_rate": 3.075229523008123e-05, "loss": 0.5105, "num_tokens": 3739992878.0, "step": 4891 }, { "epoch": 1.7921953006916136, "grad_norm": 0.1500503240967902, "learning_rate": 3.074856283115359e-05, "loss": 0.531, "num_tokens": 3740802195.0, "step": 4892 }, { "epoch": 1.792561718499519, "grad_norm": 0.15118530276025838, "learning_rate": 3.074482993966442e-05, "loss": 0.5251, "num_tokens": 3741521328.0, "step": 4893 }, { "epoch": 1.7929281363074245, "grad_norm": 0.14490043478180598, "learning_rate": 3.074109655582388e-05, "loss": 0.5009, "num_tokens": 3742269600.0, "step": 4894 }, { "epoch": 1.79329455411533, "grad_norm": 0.14674132190768108, "learning_rate": 3.0737362679842194e-05, "loss": 0.5068, "num_tokens": 3743142954.0, "step": 4895 }, { "epoch": 1.7936609719232355, "grad_norm": 0.1620046969296834, "learning_rate": 3.073362831192956e-05, "loss": 0.5201, "num_tokens": 3743852709.0, "step": 4896 }, { "epoch": 1.794027389731141, "grad_norm": 0.16070932573958815, "learning_rate": 3.072989345229625e-05, "loss": 0.5142, "num_tokens": 3744527653.0, "step": 4897 }, { "epoch": 1.7943938075390464, "grad_norm": 0.17544509890369236, "learning_rate": 3.072615810115252e-05, "loss": 0.5343, "num_tokens": 3745184206.0, "step": 4898 }, { "epoch": 1.794760225346952, "grad_norm": 0.14850824820694142, "learning_rate": 3.07224222587087e-05, "loss": 0.5127, "num_tokens": 3746058410.0, "step": 4899 }, { "epoch": 1.7951266431548574, "grad_norm": 0.1396648759851411, "learning_rate": 3.071868592517512e-05, "loss": 0.4909, "num_tokens": 3746856800.0, "step": 4900 }, { "epoch": 1.7954930609627628, "grad_norm": 0.14916320743482708, "learning_rate": 3.071494910076214e-05, "loss": 0.5254, "num_tokens": 3747569402.0, "step": 4901 }, { "epoch": 1.795859478770668, "grad_norm": 0.1713165207356548, "learning_rate": 3.071121178568015e-05, "loss": 0.5086, "num_tokens": 3748206795.0, "step": 4902 }, { "epoch": 1.7962258965785738, "grad_norm": 0.1462043285952255, "learning_rate": 3.070747398013957e-05, "loss": 0.516, "num_tokens": 3748995205.0, "step": 4903 }, { "epoch": 1.796592314386479, "grad_norm": 0.14321855422276494, "learning_rate": 3.070373568435084e-05, "loss": 0.5093, "num_tokens": 3749887848.0, "step": 4904 }, { "epoch": 1.7969587321943847, "grad_norm": 0.1705543404435843, "learning_rate": 3.069999689852445e-05, "loss": 0.5141, "num_tokens": 3750750118.0, "step": 4905 }, { "epoch": 1.79732515000229, "grad_norm": 0.14359469526470273, "learning_rate": 3.0696257622870884e-05, "loss": 0.5335, "num_tokens": 3751402882.0, "step": 4906 }, { "epoch": 1.7976915678101957, "grad_norm": 0.15203860320185164, "learning_rate": 3.069251785760068e-05, "loss": 0.4738, "num_tokens": 3752104516.0, "step": 4907 }, { "epoch": 1.798057985618101, "grad_norm": 0.16404794958311864, "learning_rate": 3.068877760292438e-05, "loss": 0.4971, "num_tokens": 3752868582.0, "step": 4908 }, { "epoch": 1.7984244034260066, "grad_norm": 0.12713958648650633, "learning_rate": 3.0685036859052596e-05, "loss": 0.4724, "num_tokens": 3753690919.0, "step": 4909 }, { "epoch": 1.7987908212339119, "grad_norm": 0.16169659049189966, "learning_rate": 3.068129562619591e-05, "loss": 0.5344, "num_tokens": 3754371534.0, "step": 4910 }, { "epoch": 1.7991572390418176, "grad_norm": 0.16101308430341338, "learning_rate": 3.0677553904564974e-05, "loss": 0.5119, "num_tokens": 3755149436.0, "step": 4911 }, { "epoch": 1.7995236568497228, "grad_norm": 0.15126729751509002, "learning_rate": 3.0673811694370466e-05, "loss": 0.4961, "num_tokens": 3755873097.0, "step": 4912 }, { "epoch": 1.7998900746576285, "grad_norm": 0.13049780537502406, "learning_rate": 3.0670068995823055e-05, "loss": 0.4948, "num_tokens": 3756549423.0, "step": 4913 }, { "epoch": 1.8002564924655338, "grad_norm": 0.16116337723830249, "learning_rate": 3.066632580913348e-05, "loss": 0.4681, "num_tokens": 3757299518.0, "step": 4914 }, { "epoch": 1.8006229102734392, "grad_norm": 0.14427281405172357, "learning_rate": 3.066258213451248e-05, "loss": 0.4858, "num_tokens": 3757987449.0, "step": 4915 }, { "epoch": 1.8009893280813447, "grad_norm": 0.14218436828986117, "learning_rate": 3.065883797217084e-05, "loss": 0.4905, "num_tokens": 3758735957.0, "step": 4916 }, { "epoch": 1.8013557458892502, "grad_norm": 0.14062394177144905, "learning_rate": 3.065509332231936e-05, "loss": 0.5291, "num_tokens": 3759565892.0, "step": 4917 }, { "epoch": 1.8017221636971557, "grad_norm": 0.16200618800345187, "learning_rate": 3.065134818516886e-05, "loss": 0.4705, "num_tokens": 3760307702.0, "step": 4918 }, { "epoch": 1.8020885815050611, "grad_norm": 0.14030122269381887, "learning_rate": 3.064760256093022e-05, "loss": 0.4642, "num_tokens": 3760949730.0, "step": 4919 }, { "epoch": 1.8024549993129666, "grad_norm": 0.14993861929993435, "learning_rate": 3.0643856449814325e-05, "loss": 0.4958, "num_tokens": 3761721115.0, "step": 4920 }, { "epoch": 1.802821417120872, "grad_norm": 0.15077453617247966, "learning_rate": 3.0640109852032064e-05, "loss": 0.4958, "num_tokens": 3762518124.0, "step": 4921 }, { "epoch": 1.8031878349287775, "grad_norm": 0.14961276523047304, "learning_rate": 3.06363627677944e-05, "loss": 0.5322, "num_tokens": 3763233780.0, "step": 4922 }, { "epoch": 1.803554252736683, "grad_norm": 0.15193383802771265, "learning_rate": 3.0632615197312296e-05, "loss": 0.4805, "num_tokens": 3764143366.0, "step": 4923 }, { "epoch": 1.8039206705445885, "grad_norm": 0.14222209279435766, "learning_rate": 3.062886714079674e-05, "loss": 0.5027, "num_tokens": 3764847158.0, "step": 4924 }, { "epoch": 1.804287088352494, "grad_norm": 0.14904342488357086, "learning_rate": 3.0625118598458764e-05, "loss": 0.5129, "num_tokens": 3765551600.0, "step": 4925 }, { "epoch": 1.8046535061603994, "grad_norm": 0.1444298788616528, "learning_rate": 3.0621369570509426e-05, "loss": 0.5026, "num_tokens": 3766385007.0, "step": 4926 }, { "epoch": 1.805019923968305, "grad_norm": 0.14063366915824566, "learning_rate": 3.061762005715978e-05, "loss": 0.4977, "num_tokens": 3767205691.0, "step": 4927 }, { "epoch": 1.8053863417762104, "grad_norm": 0.13299555488377887, "learning_rate": 3.061387005862095e-05, "loss": 0.5065, "num_tokens": 3767910195.0, "step": 4928 }, { "epoch": 1.8057527595841156, "grad_norm": 0.1521143843441064, "learning_rate": 3.0610119575104064e-05, "loss": 0.4938, "num_tokens": 3768671245.0, "step": 4929 }, { "epoch": 1.8061191773920213, "grad_norm": 0.13613796614607024, "learning_rate": 3.0606368606820276e-05, "loss": 0.4993, "num_tokens": 3769449008.0, "step": 4930 }, { "epoch": 1.8064855951999266, "grad_norm": 0.13333823999836253, "learning_rate": 3.060261715398078e-05, "loss": 0.4857, "num_tokens": 3770331036.0, "step": 4931 }, { "epoch": 1.8068520130078323, "grad_norm": 0.1506664705472753, "learning_rate": 3.059886521679678e-05, "loss": 0.4852, "num_tokens": 3771210787.0, "step": 4932 }, { "epoch": 1.8072184308157375, "grad_norm": 0.1381283496515039, "learning_rate": 3.059511279547954e-05, "loss": 0.4792, "num_tokens": 3772003508.0, "step": 4933 }, { "epoch": 1.8075848486236432, "grad_norm": 0.16796321033978132, "learning_rate": 3.0591359890240316e-05, "loss": 0.5266, "num_tokens": 3772647324.0, "step": 4934 }, { "epoch": 1.8079512664315485, "grad_norm": 0.14167145706141349, "learning_rate": 3.058760650129039e-05, "loss": 0.4908, "num_tokens": 3773346703.0, "step": 4935 }, { "epoch": 1.8083176842394542, "grad_norm": 0.15121697608879933, "learning_rate": 3.05838526288411e-05, "loss": 0.5015, "num_tokens": 3774077105.0, "step": 4936 }, { "epoch": 1.8086841020473594, "grad_norm": 0.1447598195447862, "learning_rate": 3.05800982731038e-05, "loss": 0.4853, "num_tokens": 3774886131.0, "step": 4937 }, { "epoch": 1.809050519855265, "grad_norm": 0.1430043421895046, "learning_rate": 3.0576343434289864e-05, "loss": 0.4789, "num_tokens": 3775716227.0, "step": 4938 }, { "epoch": 1.8094169376631704, "grad_norm": 0.13519190045010557, "learning_rate": 3.0572588112610686e-05, "loss": 0.5115, "num_tokens": 3776445012.0, "step": 4939 }, { "epoch": 1.809783355471076, "grad_norm": 0.1634064192951335, "learning_rate": 3.0568832308277714e-05, "loss": 0.5148, "num_tokens": 3777228736.0, "step": 4940 }, { "epoch": 1.8101497732789813, "grad_norm": 0.14695378833435305, "learning_rate": 3.0565076021502396e-05, "loss": 0.4827, "num_tokens": 3778091754.0, "step": 4941 }, { "epoch": 1.8105161910868868, "grad_norm": 0.13976276572771604, "learning_rate": 3.056131925249622e-05, "loss": 0.4692, "num_tokens": 3778824462.0, "step": 4942 }, { "epoch": 1.8108826088947922, "grad_norm": 0.1384053281333236, "learning_rate": 3.05575620014707e-05, "loss": 0.503, "num_tokens": 3779628045.0, "step": 4943 }, { "epoch": 1.8112490267026977, "grad_norm": 0.15503277503374513, "learning_rate": 3.055380426863739e-05, "loss": 0.5031, "num_tokens": 3780403067.0, "step": 4944 }, { "epoch": 1.8116154445106032, "grad_norm": 0.1537682757515496, "learning_rate": 3.0550046054207835e-05, "loss": 0.5265, "num_tokens": 3781116988.0, "step": 4945 }, { "epoch": 1.8119818623185087, "grad_norm": 0.15564592633001506, "learning_rate": 3.054628735839365e-05, "loss": 0.5385, "num_tokens": 3781833900.0, "step": 4946 }, { "epoch": 1.8123482801264141, "grad_norm": 0.14630990757483928, "learning_rate": 3.0542528181406455e-05, "loss": 0.4896, "num_tokens": 3782748812.0, "step": 4947 }, { "epoch": 1.8127146979343196, "grad_norm": 0.15420227104170298, "learning_rate": 3.053876852345788e-05, "loss": 0.5108, "num_tokens": 3783558043.0, "step": 4948 }, { "epoch": 1.813081115742225, "grad_norm": 0.14471512934292302, "learning_rate": 3.0535008384759614e-05, "loss": 0.5106, "num_tokens": 3784368774.0, "step": 4949 }, { "epoch": 1.8134475335501306, "grad_norm": 0.19684560027455467, "learning_rate": 3.053124776552337e-05, "loss": 0.4938, "num_tokens": 3785263479.0, "step": 4950 }, { "epoch": 1.813813951358036, "grad_norm": 0.14614391779386932, "learning_rate": 3.052748666596086e-05, "loss": 0.5349, "num_tokens": 3786053605.0, "step": 4951 }, { "epoch": 1.8141803691659415, "grad_norm": 0.16571761142941585, "learning_rate": 3.052372508628386e-05, "loss": 0.5222, "num_tokens": 3786719961.0, "step": 4952 }, { "epoch": 1.814546786973847, "grad_norm": 0.1754726086384033, "learning_rate": 3.0519963026704135e-05, "loss": 0.475, "num_tokens": 3787413474.0, "step": 4953 }, { "epoch": 1.8149132047817524, "grad_norm": 0.15278844777282224, "learning_rate": 3.0516200487433506e-05, "loss": 0.5298, "num_tokens": 3788185232.0, "step": 4954 }, { "epoch": 1.815279622589658, "grad_norm": 0.1804986161507328, "learning_rate": 3.051243746868382e-05, "loss": 0.5524, "num_tokens": 3788845258.0, "step": 4955 }, { "epoch": 1.8156460403975632, "grad_norm": 0.16244511025088743, "learning_rate": 3.0508673970666928e-05, "loss": 0.5184, "num_tokens": 3789561766.0, "step": 4956 }, { "epoch": 1.8160124582054689, "grad_norm": 0.13664081663666763, "learning_rate": 3.0504909993594727e-05, "loss": 0.4827, "num_tokens": 3790348685.0, "step": 4957 }, { "epoch": 1.816378876013374, "grad_norm": 0.15077117753204833, "learning_rate": 3.0501145537679142e-05, "loss": 0.4731, "num_tokens": 3791109839.0, "step": 4958 }, { "epoch": 1.8167452938212798, "grad_norm": 0.18979350637808026, "learning_rate": 3.0497380603132116e-05, "loss": 0.5197, "num_tokens": 3791859310.0, "step": 4959 }, { "epoch": 1.817111711629185, "grad_norm": 0.1549090019577952, "learning_rate": 3.049361519016562e-05, "loss": 0.4655, "num_tokens": 3792500284.0, "step": 4960 }, { "epoch": 1.8174781294370908, "grad_norm": 0.14983501848471012, "learning_rate": 3.0489849298991658e-05, "loss": 0.5262, "num_tokens": 3793297434.0, "step": 4961 }, { "epoch": 1.817844547244996, "grad_norm": 0.15433672091819864, "learning_rate": 3.0486082929822253e-05, "loss": 0.4583, "num_tokens": 3794054426.0, "step": 4962 }, { "epoch": 1.8182109650529017, "grad_norm": 0.13893709098713544, "learning_rate": 3.0482316082869453e-05, "loss": 0.5107, "num_tokens": 3794722911.0, "step": 4963 }, { "epoch": 1.818577382860807, "grad_norm": 0.13541432823065713, "learning_rate": 3.047854875834536e-05, "loss": 0.4686, "num_tokens": 3795463905.0, "step": 4964 }, { "epoch": 1.8189438006687126, "grad_norm": 0.12623637745870306, "learning_rate": 3.0474780956462066e-05, "loss": 0.522, "num_tokens": 3796347303.0, "step": 4965 }, { "epoch": 1.819310218476618, "grad_norm": 0.13924290043067253, "learning_rate": 3.0471012677431702e-05, "loss": 0.4786, "num_tokens": 3797116697.0, "step": 4966 }, { "epoch": 1.8196766362845236, "grad_norm": 0.14075272503917358, "learning_rate": 3.0467243921466445e-05, "loss": 0.5379, "num_tokens": 3797927632.0, "step": 4967 }, { "epoch": 1.8200430540924288, "grad_norm": 0.1444522807367285, "learning_rate": 3.046347468877847e-05, "loss": 0.5516, "num_tokens": 3798600409.0, "step": 4968 }, { "epoch": 1.8204094719003343, "grad_norm": 0.13946040711376134, "learning_rate": 3.045970497958e-05, "loss": 0.4753, "num_tokens": 3799323107.0, "step": 4969 }, { "epoch": 1.8207758897082398, "grad_norm": 0.15634320677902572, "learning_rate": 3.0455934794083268e-05, "loss": 0.4788, "num_tokens": 3800087268.0, "step": 4970 }, { "epoch": 1.8211423075161453, "grad_norm": 0.14216206431237902, "learning_rate": 3.0452164132500552e-05, "loss": 0.4945, "num_tokens": 3800951794.0, "step": 4971 }, { "epoch": 1.8215087253240507, "grad_norm": 0.14993590075924138, "learning_rate": 3.0448392995044148e-05, "loss": 0.5225, "num_tokens": 3801783593.0, "step": 4972 }, { "epoch": 1.8218751431319562, "grad_norm": 0.19045436444017225, "learning_rate": 3.0444621381926374e-05, "loss": 0.518, "num_tokens": 3802521837.0, "step": 4973 }, { "epoch": 1.8222415609398617, "grad_norm": 0.12634175071762915, "learning_rate": 3.044084929335958e-05, "loss": 0.4843, "num_tokens": 3803367944.0, "step": 4974 }, { "epoch": 1.8226079787477671, "grad_norm": 0.172992650546024, "learning_rate": 3.043707672955615e-05, "loss": 0.5274, "num_tokens": 3804093702.0, "step": 4975 }, { "epoch": 1.8229743965556726, "grad_norm": 0.15424809155298438, "learning_rate": 3.0433303690728467e-05, "loss": 0.5093, "num_tokens": 3804868865.0, "step": 4976 }, { "epoch": 1.823340814363578, "grad_norm": 0.16193660862270723, "learning_rate": 3.042953017708898e-05, "loss": 0.5442, "num_tokens": 3805656660.0, "step": 4977 }, { "epoch": 1.8237072321714836, "grad_norm": 0.14555276155498395, "learning_rate": 3.0425756188850145e-05, "loss": 0.509, "num_tokens": 3806397484.0, "step": 4978 }, { "epoch": 1.824073649979389, "grad_norm": 0.14674275053991565, "learning_rate": 3.0421981726224432e-05, "loss": 0.5193, "num_tokens": 3807076979.0, "step": 4979 }, { "epoch": 1.8244400677872945, "grad_norm": 0.15384181861581478, "learning_rate": 3.041820678942436e-05, "loss": 0.5266, "num_tokens": 3807792795.0, "step": 4980 }, { "epoch": 1.8248064855952, "grad_norm": 0.1508776517763363, "learning_rate": 3.0414431378662463e-05, "loss": 0.5101, "num_tokens": 3808537246.0, "step": 4981 }, { "epoch": 1.8251729034031055, "grad_norm": 0.1332845558292022, "learning_rate": 3.0410655494151304e-05, "loss": 0.5064, "num_tokens": 3809345222.0, "step": 4982 }, { "epoch": 1.8255393212110107, "grad_norm": 0.14937968549529038, "learning_rate": 3.0406879136103468e-05, "loss": 0.508, "num_tokens": 3810122748.0, "step": 4983 }, { "epoch": 1.8259057390189164, "grad_norm": 0.15614117390795149, "learning_rate": 3.0403102304731584e-05, "loss": 0.5133, "num_tokens": 3810933303.0, "step": 4984 }, { "epoch": 1.8262721568268216, "grad_norm": 0.14044865882701552, "learning_rate": 3.0399325000248283e-05, "loss": 0.4988, "num_tokens": 3811711280.0, "step": 4985 }, { "epoch": 1.8266385746347273, "grad_norm": 0.1654286690688694, "learning_rate": 3.0395547222866243e-05, "loss": 0.5563, "num_tokens": 3812342044.0, "step": 4986 }, { "epoch": 1.8270049924426326, "grad_norm": 0.1476481808155469, "learning_rate": 3.0391768972798155e-05, "loss": 0.5007, "num_tokens": 3813046072.0, "step": 4987 }, { "epoch": 1.8273714102505383, "grad_norm": 0.14028219298431396, "learning_rate": 3.038799025025675e-05, "loss": 0.4838, "num_tokens": 3813784269.0, "step": 4988 }, { "epoch": 1.8277378280584435, "grad_norm": 0.14299858875395643, "learning_rate": 3.0384211055454762e-05, "loss": 0.5315, "num_tokens": 3814508100.0, "step": 4989 }, { "epoch": 1.8281042458663492, "grad_norm": 0.1370285475672487, "learning_rate": 3.038043138860498e-05, "loss": 0.4887, "num_tokens": 3815396915.0, "step": 4990 }, { "epoch": 1.8284706636742545, "grad_norm": 0.1428919212180213, "learning_rate": 3.03766512499202e-05, "loss": 0.5161, "num_tokens": 3816200442.0, "step": 4991 }, { "epoch": 1.8288370814821602, "grad_norm": 0.15376404361519552, "learning_rate": 3.0372870639613264e-05, "loss": 0.4845, "num_tokens": 3816949602.0, "step": 4992 }, { "epoch": 1.8292034992900654, "grad_norm": 0.13542987356109157, "learning_rate": 3.0369089557897014e-05, "loss": 0.5015, "num_tokens": 3817713532.0, "step": 4993 }, { "epoch": 1.8295699170979711, "grad_norm": 0.1293998708290112, "learning_rate": 3.0365308004984332e-05, "loss": 0.4811, "num_tokens": 3818444432.0, "step": 4994 }, { "epoch": 1.8299363349058764, "grad_norm": 0.14380180126635617, "learning_rate": 3.036152598108814e-05, "loss": 0.5086, "num_tokens": 3819125778.0, "step": 4995 }, { "epoch": 1.8303027527137818, "grad_norm": 0.1451951757904787, "learning_rate": 3.0357743486421362e-05, "loss": 0.5234, "num_tokens": 3819994305.0, "step": 4996 }, { "epoch": 1.8306691705216873, "grad_norm": 0.14101213907142315, "learning_rate": 3.0353960521196962e-05, "loss": 0.5174, "num_tokens": 3820708037.0, "step": 4997 }, { "epoch": 1.8310355883295928, "grad_norm": 0.13967012628547187, "learning_rate": 3.0350177085627933e-05, "loss": 0.5048, "num_tokens": 3821531516.0, "step": 4998 }, { "epoch": 1.8314020061374983, "grad_norm": 0.15542385467013456, "learning_rate": 3.0346393179927293e-05, "loss": 0.4743, "num_tokens": 3822244532.0, "step": 4999 }, { "epoch": 1.8317684239454037, "grad_norm": 0.14492990689477478, "learning_rate": 3.0342608804308065e-05, "loss": 0.5076, "num_tokens": 3822994259.0, "step": 5000 }, { "epoch": 1.8321348417533092, "grad_norm": 0.1414513421401117, "learning_rate": 3.0338823958983338e-05, "loss": 0.5243, "num_tokens": 3823805192.0, "step": 5001 }, { "epoch": 1.8325012595612147, "grad_norm": 0.16801735780411223, "learning_rate": 3.0335038644166196e-05, "loss": 0.5167, "num_tokens": 3824588012.0, "step": 5002 }, { "epoch": 1.8328676773691202, "grad_norm": 0.16422123304577882, "learning_rate": 3.033125286006976e-05, "loss": 0.4934, "num_tokens": 3825356940.0, "step": 5003 }, { "epoch": 1.8332340951770256, "grad_norm": 0.1528278030317635, "learning_rate": 3.0327466606907175e-05, "loss": 0.529, "num_tokens": 3826094688.0, "step": 5004 }, { "epoch": 1.833600512984931, "grad_norm": 0.15663454948980537, "learning_rate": 3.0323679884891625e-05, "loss": 0.4812, "num_tokens": 3826853546.0, "step": 5005 }, { "epoch": 1.8339669307928366, "grad_norm": 0.17471933852924315, "learning_rate": 3.0319892694236307e-05, "loss": 0.4916, "num_tokens": 3827734302.0, "step": 5006 }, { "epoch": 1.834333348600742, "grad_norm": 0.1714944277707366, "learning_rate": 3.0316105035154435e-05, "loss": 0.524, "num_tokens": 3828396445.0, "step": 5007 }, { "epoch": 1.8346997664086475, "grad_norm": 0.1312010595116706, "learning_rate": 3.031231690785927e-05, "loss": 0.4625, "num_tokens": 3829122825.0, "step": 5008 }, { "epoch": 1.835066184216553, "grad_norm": 0.16479259565531837, "learning_rate": 3.0308528312564095e-05, "loss": 0.4928, "num_tokens": 3829915140.0, "step": 5009 }, { "epoch": 1.8354326020244582, "grad_norm": 0.1676578923189278, "learning_rate": 3.0304739249482216e-05, "loss": 0.4895, "num_tokens": 3830581880.0, "step": 5010 }, { "epoch": 1.835799019832364, "grad_norm": 0.13451999620813543, "learning_rate": 3.0300949718826956e-05, "loss": 0.5143, "num_tokens": 3831388137.0, "step": 5011 }, { "epoch": 1.8361654376402692, "grad_norm": 0.1645170321464875, "learning_rate": 3.0297159720811682e-05, "loss": 0.5079, "num_tokens": 3832096010.0, "step": 5012 }, { "epoch": 1.8365318554481749, "grad_norm": 0.18994895071633205, "learning_rate": 3.029336925564977e-05, "loss": 0.5192, "num_tokens": 3832848067.0, "step": 5013 }, { "epoch": 1.8368982732560801, "grad_norm": 0.15065923773653717, "learning_rate": 3.0289578323554635e-05, "loss": 0.4879, "num_tokens": 3833667027.0, "step": 5014 }, { "epoch": 1.8372646910639858, "grad_norm": 0.14420860575553496, "learning_rate": 3.028578692473971e-05, "loss": 0.4751, "num_tokens": 3834525829.0, "step": 5015 }, { "epoch": 1.837631108871891, "grad_norm": 0.15333675817623135, "learning_rate": 3.0281995059418472e-05, "loss": 0.5023, "num_tokens": 3835242456.0, "step": 5016 }, { "epoch": 1.8379975266797968, "grad_norm": 0.1629329502099158, "learning_rate": 3.0278202727804396e-05, "loss": 0.53, "num_tokens": 3835923468.0, "step": 5017 }, { "epoch": 1.838363944487702, "grad_norm": 0.14067987109216887, "learning_rate": 3.0274409930111003e-05, "loss": 0.5048, "num_tokens": 3836819110.0, "step": 5018 }, { "epoch": 1.8387303622956077, "grad_norm": 0.15381122696130295, "learning_rate": 3.027061666655184e-05, "loss": 0.5274, "num_tokens": 3837592521.0, "step": 5019 }, { "epoch": 1.839096780103513, "grad_norm": 0.13941734697244373, "learning_rate": 3.0266822937340473e-05, "loss": 0.537, "num_tokens": 3838358739.0, "step": 5020 }, { "epoch": 1.8394631979114187, "grad_norm": 0.14604945401042813, "learning_rate": 3.026302874269048e-05, "loss": 0.5134, "num_tokens": 3839127858.0, "step": 5021 }, { "epoch": 1.839829615719324, "grad_norm": 0.14307298798692097, "learning_rate": 3.0259234082815502e-05, "loss": 0.4665, "num_tokens": 3839896236.0, "step": 5022 }, { "epoch": 1.8401960335272294, "grad_norm": 0.13508899216439285, "learning_rate": 3.0255438957929183e-05, "loss": 0.4855, "num_tokens": 3840696438.0, "step": 5023 }, { "epoch": 1.8405624513351349, "grad_norm": 0.13635752721972721, "learning_rate": 3.025164336824519e-05, "loss": 0.4963, "num_tokens": 3841482573.0, "step": 5024 }, { "epoch": 1.8409288691430403, "grad_norm": 0.12824125779733606, "learning_rate": 3.024784731397722e-05, "loss": 0.4705, "num_tokens": 3842262736.0, "step": 5025 }, { "epoch": 1.8412952869509458, "grad_norm": 0.14729679389429692, "learning_rate": 3.0244050795339008e-05, "loss": 0.5118, "num_tokens": 3842966991.0, "step": 5026 }, { "epoch": 1.8416617047588513, "grad_norm": 0.15781352539896829, "learning_rate": 3.0240253812544304e-05, "loss": 0.5194, "num_tokens": 3843684975.0, "step": 5027 }, { "epoch": 1.8420281225667567, "grad_norm": 0.13973019953615265, "learning_rate": 3.0236456365806876e-05, "loss": 0.5226, "num_tokens": 3844418062.0, "step": 5028 }, { "epoch": 1.8423945403746622, "grad_norm": 0.15640180933699563, "learning_rate": 3.023265845534054e-05, "loss": 0.4845, "num_tokens": 3845087989.0, "step": 5029 }, { "epoch": 1.8427609581825677, "grad_norm": 0.14724276873364806, "learning_rate": 3.0228860081359122e-05, "loss": 0.5116, "num_tokens": 3845880450.0, "step": 5030 }, { "epoch": 1.8431273759904732, "grad_norm": 0.13650416393133735, "learning_rate": 3.0225061244076467e-05, "loss": 0.4767, "num_tokens": 3846677250.0, "step": 5031 }, { "epoch": 1.8434937937983786, "grad_norm": 0.152486226063607, "learning_rate": 3.022126194370647e-05, "loss": 0.4774, "num_tokens": 3847451507.0, "step": 5032 }, { "epoch": 1.843860211606284, "grad_norm": 0.14178874082176615, "learning_rate": 3.0217462180463042e-05, "loss": 0.523, "num_tokens": 3848201723.0, "step": 5033 }, { "epoch": 1.8442266294141896, "grad_norm": 0.14815840976606312, "learning_rate": 3.021366195456011e-05, "loss": 0.4941, "num_tokens": 3848966891.0, "step": 5034 }, { "epoch": 1.8445930472220948, "grad_norm": 0.13359203446003518, "learning_rate": 3.0209861266211624e-05, "loss": 0.5078, "num_tokens": 3849675273.0, "step": 5035 }, { "epoch": 1.8449594650300005, "grad_norm": 0.15159895135516538, "learning_rate": 3.020606011563159e-05, "loss": 0.5205, "num_tokens": 3850433863.0, "step": 5036 }, { "epoch": 1.8453258828379058, "grad_norm": 0.15504287595959104, "learning_rate": 3.0202258503034008e-05, "loss": 0.52, "num_tokens": 3851202981.0, "step": 5037 }, { "epoch": 1.8456923006458115, "grad_norm": 0.14845119888057504, "learning_rate": 3.0198456428632924e-05, "loss": 0.4703, "num_tokens": 3851864370.0, "step": 5038 }, { "epoch": 1.8460587184537167, "grad_norm": 0.1335295460097388, "learning_rate": 3.01946538926424e-05, "loss": 0.4822, "num_tokens": 3852614135.0, "step": 5039 }, { "epoch": 1.8464251362616224, "grad_norm": 0.13902214965790918, "learning_rate": 3.019085089527652e-05, "loss": 0.4973, "num_tokens": 3853374666.0, "step": 5040 }, { "epoch": 1.8467915540695277, "grad_norm": 0.15313715551334453, "learning_rate": 3.018704743674941e-05, "loss": 0.5184, "num_tokens": 3854146913.0, "step": 5041 }, { "epoch": 1.8471579718774334, "grad_norm": 0.13410746662352876, "learning_rate": 3.01832435172752e-05, "loss": 0.4968, "num_tokens": 3854883033.0, "step": 5042 }, { "epoch": 1.8475243896853386, "grad_norm": 0.13909327768706248, "learning_rate": 3.0179439137068072e-05, "loss": 0.5132, "num_tokens": 3855637818.0, "step": 5043 }, { "epoch": 1.8478908074932443, "grad_norm": 0.1499012922456244, "learning_rate": 3.0175634296342214e-05, "loss": 0.5293, "num_tokens": 3856364498.0, "step": 5044 }, { "epoch": 1.8482572253011496, "grad_norm": 0.15128408202588708, "learning_rate": 3.0171828995311846e-05, "loss": 0.5248, "num_tokens": 3857023364.0, "step": 5045 }, { "epoch": 1.8486236431090552, "grad_norm": 0.1373593962680953, "learning_rate": 3.016802323419121e-05, "loss": 0.4863, "num_tokens": 3857849251.0, "step": 5046 }, { "epoch": 1.8489900609169605, "grad_norm": 0.1403173713547685, "learning_rate": 3.0164217013194586e-05, "loss": 0.485, "num_tokens": 3858678945.0, "step": 5047 }, { "epoch": 1.8493564787248662, "grad_norm": 0.13193299969486905, "learning_rate": 3.0160410332536266e-05, "loss": 0.4971, "num_tokens": 3859609184.0, "step": 5048 }, { "epoch": 1.8497228965327714, "grad_norm": 0.1317916256319055, "learning_rate": 3.0156603192430575e-05, "loss": 0.5051, "num_tokens": 3860480361.0, "step": 5049 }, { "epoch": 1.850089314340677, "grad_norm": 0.19139392692316992, "learning_rate": 3.0152795593091867e-05, "loss": 0.525, "num_tokens": 3861097228.0, "step": 5050 }, { "epoch": 1.8504557321485824, "grad_norm": 0.15353949352255009, "learning_rate": 3.0148987534734512e-05, "loss": 0.5089, "num_tokens": 3861854371.0, "step": 5051 }, { "epoch": 1.8508221499564879, "grad_norm": 0.14384098764560566, "learning_rate": 3.014517901757291e-05, "loss": 0.5179, "num_tokens": 3862605651.0, "step": 5052 }, { "epoch": 1.8511885677643933, "grad_norm": 0.14281824689033293, "learning_rate": 3.0141370041821497e-05, "loss": 0.4834, "num_tokens": 3863412501.0, "step": 5053 }, { "epoch": 1.8515549855722988, "grad_norm": 0.16337821711579953, "learning_rate": 3.0137560607694713e-05, "loss": 0.5085, "num_tokens": 3864293918.0, "step": 5054 }, { "epoch": 1.8519214033802043, "grad_norm": 0.12694568252151492, "learning_rate": 3.0133750715407045e-05, "loss": 0.4832, "num_tokens": 3865056317.0, "step": 5055 }, { "epoch": 1.8522878211881098, "grad_norm": 0.1385445200257633, "learning_rate": 3.0129940365173e-05, "loss": 0.4856, "num_tokens": 3865855791.0, "step": 5056 }, { "epoch": 1.8526542389960152, "grad_norm": 0.15117000524675966, "learning_rate": 3.0126129557207108e-05, "loss": 0.5033, "num_tokens": 3866610536.0, "step": 5057 }, { "epoch": 1.8530206568039207, "grad_norm": 0.13666159119301496, "learning_rate": 3.0122318291723914e-05, "loss": 0.4847, "num_tokens": 3867301423.0, "step": 5058 }, { "epoch": 1.8533870746118262, "grad_norm": 0.13109993741632012, "learning_rate": 3.011850656893801e-05, "loss": 0.48, "num_tokens": 3868148458.0, "step": 5059 }, { "epoch": 1.8537534924197316, "grad_norm": 0.1535333880148058, "learning_rate": 3.0114694389064e-05, "loss": 0.4938, "num_tokens": 3868822409.0, "step": 5060 }, { "epoch": 1.8541199102276371, "grad_norm": 0.14155254621746305, "learning_rate": 3.011088175231652e-05, "loss": 0.5137, "num_tokens": 3869548026.0, "step": 5061 }, { "epoch": 1.8544863280355424, "grad_norm": 0.1352592746060021, "learning_rate": 3.0107068658910227e-05, "loss": 0.4852, "num_tokens": 3870388592.0, "step": 5062 }, { "epoch": 1.854852745843448, "grad_norm": 0.13907153484846668, "learning_rate": 3.0103255109059802e-05, "loss": 0.5192, "num_tokens": 3871155322.0, "step": 5063 }, { "epoch": 1.8552191636513533, "grad_norm": 0.13190017749799168, "learning_rate": 3.0099441102979966e-05, "loss": 0.4668, "num_tokens": 3871887199.0, "step": 5064 }, { "epoch": 1.855585581459259, "grad_norm": 0.1409211876762508, "learning_rate": 3.0095626640885448e-05, "loss": 0.4913, "num_tokens": 3872662026.0, "step": 5065 }, { "epoch": 1.8559519992671643, "grad_norm": 0.13480187126504706, "learning_rate": 3.0091811722991005e-05, "loss": 0.5009, "num_tokens": 3873339111.0, "step": 5066 }, { "epoch": 1.85631841707507, "grad_norm": 0.1465051593407931, "learning_rate": 3.008799634951144e-05, "loss": 0.4714, "num_tokens": 3874058089.0, "step": 5067 }, { "epoch": 1.8566848348829752, "grad_norm": 0.14821067470058466, "learning_rate": 3.0084180520661545e-05, "loss": 0.4793, "num_tokens": 3874807752.0, "step": 5068 }, { "epoch": 1.857051252690881, "grad_norm": 0.13518469908279912, "learning_rate": 3.0080364236656176e-05, "loss": 0.4707, "num_tokens": 3875536253.0, "step": 5069 }, { "epoch": 1.8574176704987861, "grad_norm": 0.15102597828606715, "learning_rate": 3.0076547497710195e-05, "loss": 0.5104, "num_tokens": 3876266948.0, "step": 5070 }, { "epoch": 1.8577840883066918, "grad_norm": 0.137388826602856, "learning_rate": 3.0072730304038478e-05, "loss": 0.4995, "num_tokens": 3877149398.0, "step": 5071 }, { "epoch": 1.858150506114597, "grad_norm": 0.15293539918336088, "learning_rate": 3.0068912655855957e-05, "loss": 0.5235, "num_tokens": 3877836750.0, "step": 5072 }, { "epoch": 1.8585169239225028, "grad_norm": 0.1455802122183199, "learning_rate": 3.0065094553377566e-05, "loss": 0.5353, "num_tokens": 3878512847.0, "step": 5073 }, { "epoch": 1.858883341730408, "grad_norm": 0.14907860671218873, "learning_rate": 3.0061275996818274e-05, "loss": 0.5113, "num_tokens": 3879343874.0, "step": 5074 }, { "epoch": 1.8592497595383135, "grad_norm": 0.13165849120181222, "learning_rate": 3.0057456986393077e-05, "loss": 0.4647, "num_tokens": 3880206136.0, "step": 5075 }, { "epoch": 1.859616177346219, "grad_norm": 0.16960718807804523, "learning_rate": 3.005363752231698e-05, "loss": 0.5019, "num_tokens": 3881041792.0, "step": 5076 }, { "epoch": 1.8599825951541245, "grad_norm": 0.12775743890414526, "learning_rate": 3.0049817604805036e-05, "loss": 0.4556, "num_tokens": 3881815183.0, "step": 5077 }, { "epoch": 1.86034901296203, "grad_norm": 0.1828150357269809, "learning_rate": 3.0045997234072315e-05, "loss": 0.5072, "num_tokens": 3882512818.0, "step": 5078 }, { "epoch": 1.8607154307699354, "grad_norm": 0.14343843753610883, "learning_rate": 3.0042176410333913e-05, "loss": 0.5294, "num_tokens": 3883310297.0, "step": 5079 }, { "epoch": 1.8610818485778409, "grad_norm": 0.16681261753931392, "learning_rate": 3.0038355133804938e-05, "loss": 0.5122, "num_tokens": 3884052387.0, "step": 5080 }, { "epoch": 1.8614482663857463, "grad_norm": 0.1679943228537294, "learning_rate": 3.003453340470055e-05, "loss": 0.4847, "num_tokens": 3884687167.0, "step": 5081 }, { "epoch": 1.8618146841936518, "grad_norm": 0.1459615643081098, "learning_rate": 3.0030711223235913e-05, "loss": 0.4868, "num_tokens": 3885442025.0, "step": 5082 }, { "epoch": 1.8621811020015573, "grad_norm": 0.17470768083538624, "learning_rate": 3.002688858962622e-05, "loss": 0.4683, "num_tokens": 3886141371.0, "step": 5083 }, { "epoch": 1.8625475198094628, "grad_norm": 0.14327620544253802, "learning_rate": 3.0023065504086703e-05, "loss": 0.5413, "num_tokens": 3886947223.0, "step": 5084 }, { "epoch": 1.8629139376173682, "grad_norm": 0.17647899939680367, "learning_rate": 3.0019241966832596e-05, "loss": 0.4747, "num_tokens": 3887707008.0, "step": 5085 }, { "epoch": 1.8632803554252737, "grad_norm": 0.15206046184166225, "learning_rate": 3.0015417978079184e-05, "loss": 0.5209, "num_tokens": 3888447399.0, "step": 5086 }, { "epoch": 1.8636467732331792, "grad_norm": 0.14453982069599547, "learning_rate": 3.001159353804176e-05, "loss": 0.5188, "num_tokens": 3889233656.0, "step": 5087 }, { "epoch": 1.8640131910410846, "grad_norm": 0.16369307812664224, "learning_rate": 3.0007768646935647e-05, "loss": 0.5205, "num_tokens": 3889983038.0, "step": 5088 }, { "epoch": 1.86437960884899, "grad_norm": 0.13974761148734569, "learning_rate": 3.0003943304976198e-05, "loss": 0.4987, "num_tokens": 3890664636.0, "step": 5089 }, { "epoch": 1.8647460266568956, "grad_norm": 0.15709948623026943, "learning_rate": 3.0000117512378772e-05, "loss": 0.5122, "num_tokens": 3891428299.0, "step": 5090 }, { "epoch": 1.8651124444648008, "grad_norm": 0.14537433399041821, "learning_rate": 2.9996291269358796e-05, "loss": 0.5093, "num_tokens": 3892237632.0, "step": 5091 }, { "epoch": 1.8654788622727065, "grad_norm": 0.1539617885106516, "learning_rate": 2.9992464576131678e-05, "loss": 0.5401, "num_tokens": 3892963333.0, "step": 5092 }, { "epoch": 1.8658452800806118, "grad_norm": 0.17453834964990617, "learning_rate": 2.998863743291286e-05, "loss": 0.5081, "num_tokens": 3893729386.0, "step": 5093 }, { "epoch": 1.8662116978885175, "grad_norm": 0.14718935418822846, "learning_rate": 2.9984809839917836e-05, "loss": 0.4912, "num_tokens": 3894471986.0, "step": 5094 }, { "epoch": 1.8665781156964227, "grad_norm": 0.15405067905173572, "learning_rate": 2.99809817973621e-05, "loss": 0.51, "num_tokens": 3895294691.0, "step": 5095 }, { "epoch": 1.8669445335043284, "grad_norm": 0.15197546788002037, "learning_rate": 2.997715330546118e-05, "loss": 0.5141, "num_tokens": 3896043864.0, "step": 5096 }, { "epoch": 1.8673109513122337, "grad_norm": 0.13791729142401413, "learning_rate": 2.9973324364430615e-05, "loss": 0.5102, "num_tokens": 3896819848.0, "step": 5097 }, { "epoch": 1.8676773691201394, "grad_norm": 0.15596409586797158, "learning_rate": 2.9969494974486004e-05, "loss": 0.5103, "num_tokens": 3897659889.0, "step": 5098 }, { "epoch": 1.8680437869280446, "grad_norm": 0.30426323047460574, "learning_rate": 2.996566513584293e-05, "loss": 0.5056, "num_tokens": 3898490520.0, "step": 5099 }, { "epoch": 1.8684102047359503, "grad_norm": 0.1689506984446866, "learning_rate": 2.9961834848717033e-05, "loss": 0.4989, "num_tokens": 3899220455.0, "step": 5100 }, { "epoch": 1.8687766225438556, "grad_norm": 0.13525612217029098, "learning_rate": 2.9958004113323956e-05, "loss": 0.5012, "num_tokens": 3900039516.0, "step": 5101 }, { "epoch": 1.869143040351761, "grad_norm": 0.15810402484331468, "learning_rate": 2.995417292987939e-05, "loss": 0.492, "num_tokens": 3900672963.0, "step": 5102 }, { "epoch": 1.8695094581596665, "grad_norm": 0.14585956334746475, "learning_rate": 2.9950341298599025e-05, "loss": 0.5044, "num_tokens": 3901433794.0, "step": 5103 }, { "epoch": 1.869875875967572, "grad_norm": 0.1442824007500106, "learning_rate": 2.9946509219698595e-05, "loss": 0.52, "num_tokens": 3902267385.0, "step": 5104 }, { "epoch": 1.8702422937754775, "grad_norm": 0.1434734162270654, "learning_rate": 2.9942676693393857e-05, "loss": 0.4859, "num_tokens": 3903147411.0, "step": 5105 }, { "epoch": 1.870608711583383, "grad_norm": 0.16342771410339701, "learning_rate": 2.993884371990059e-05, "loss": 0.513, "num_tokens": 3904035631.0, "step": 5106 }, { "epoch": 1.8709751293912884, "grad_norm": 0.13698243548780858, "learning_rate": 2.9935010299434578e-05, "loss": 0.506, "num_tokens": 3904893076.0, "step": 5107 }, { "epoch": 1.8713415471991939, "grad_norm": 0.15508637879969694, "learning_rate": 2.9931176432211684e-05, "loss": 0.4939, "num_tokens": 3905635229.0, "step": 5108 }, { "epoch": 1.8717079650070994, "grad_norm": 0.14501231239534057, "learning_rate": 2.992734211844774e-05, "loss": 0.5182, "num_tokens": 3906375519.0, "step": 5109 }, { "epoch": 1.8720743828150048, "grad_norm": 0.23216434034340558, "learning_rate": 2.992350735835863e-05, "loss": 0.5177, "num_tokens": 3907264236.0, "step": 5110 }, { "epoch": 1.8724408006229103, "grad_norm": 0.15331945506590774, "learning_rate": 2.991967215216026e-05, "loss": 0.5111, "num_tokens": 3908024206.0, "step": 5111 }, { "epoch": 1.8728072184308158, "grad_norm": 0.1349426914631928, "learning_rate": 2.9915836500068554e-05, "loss": 0.4809, "num_tokens": 3908842379.0, "step": 5112 }, { "epoch": 1.8731736362387212, "grad_norm": 0.14134626901112482, "learning_rate": 2.991200040229948e-05, "loss": 0.4922, "num_tokens": 3909637125.0, "step": 5113 }, { "epoch": 1.8735400540466267, "grad_norm": 0.12792722186651703, "learning_rate": 2.9908163859069006e-05, "loss": 0.4713, "num_tokens": 3910509121.0, "step": 5114 }, { "epoch": 1.8739064718545322, "grad_norm": 0.1413644621805016, "learning_rate": 2.990432687059314e-05, "loss": 0.4878, "num_tokens": 3911291844.0, "step": 5115 }, { "epoch": 1.8742728896624374, "grad_norm": 0.14812037707472955, "learning_rate": 2.990048943708792e-05, "loss": 0.5417, "num_tokens": 3912017575.0, "step": 5116 }, { "epoch": 1.8746393074703431, "grad_norm": 0.14049332009161436, "learning_rate": 2.9896651558769387e-05, "loss": 0.4857, "num_tokens": 3912861163.0, "step": 5117 }, { "epoch": 1.8750057252782484, "grad_norm": 0.16150526629895326, "learning_rate": 2.989281323585363e-05, "loss": 0.4835, "num_tokens": 3913483987.0, "step": 5118 }, { "epoch": 1.875372143086154, "grad_norm": 0.1412693331827959, "learning_rate": 2.9888974468556756e-05, "loss": 0.4718, "num_tokens": 3914368007.0, "step": 5119 }, { "epoch": 1.8757385608940593, "grad_norm": 0.13393472818720337, "learning_rate": 2.9885135257094884e-05, "loss": 0.5022, "num_tokens": 3915104427.0, "step": 5120 }, { "epoch": 1.876104978701965, "grad_norm": 0.1507027665027032, "learning_rate": 2.988129560168418e-05, "loss": 0.5108, "num_tokens": 3915836480.0, "step": 5121 }, { "epoch": 1.8764713965098703, "grad_norm": 0.13020490667322485, "learning_rate": 2.987745550254083e-05, "loss": 0.5061, "num_tokens": 3916583329.0, "step": 5122 }, { "epoch": 1.876837814317776, "grad_norm": 0.1301947983054632, "learning_rate": 2.987361495988102e-05, "loss": 0.524, "num_tokens": 3917393344.0, "step": 5123 }, { "epoch": 1.8772042321256812, "grad_norm": 0.13281241130540944, "learning_rate": 2.9869773973920996e-05, "loss": 0.5173, "num_tokens": 3918171035.0, "step": 5124 }, { "epoch": 1.877570649933587, "grad_norm": 0.15989560495904223, "learning_rate": 2.986593254487701e-05, "loss": 0.5135, "num_tokens": 3918836073.0, "step": 5125 }, { "epoch": 1.8779370677414922, "grad_norm": 0.1456965492563922, "learning_rate": 2.986209067296533e-05, "loss": 0.4998, "num_tokens": 3919672394.0, "step": 5126 }, { "epoch": 1.8783034855493979, "grad_norm": 0.13795051646750578, "learning_rate": 2.9858248358402286e-05, "loss": 0.4703, "num_tokens": 3920428657.0, "step": 5127 }, { "epoch": 1.878669903357303, "grad_norm": 0.16568632199207667, "learning_rate": 2.9854405601404184e-05, "loss": 0.4937, "num_tokens": 3921297851.0, "step": 5128 }, { "epoch": 1.8790363211652086, "grad_norm": 0.14883664608224598, "learning_rate": 2.9850562402187386e-05, "loss": 0.5016, "num_tokens": 3922079638.0, "step": 5129 }, { "epoch": 1.879402738973114, "grad_norm": 0.15418373445872527, "learning_rate": 2.984671876096828e-05, "loss": 0.4865, "num_tokens": 3922911752.0, "step": 5130 }, { "epoch": 1.8797691567810195, "grad_norm": 0.1388612915799317, "learning_rate": 2.984287467796326e-05, "loss": 0.4797, "num_tokens": 3923670921.0, "step": 5131 }, { "epoch": 1.880135574588925, "grad_norm": 0.13309616541937355, "learning_rate": 2.9839030153388758e-05, "loss": 0.5185, "num_tokens": 3924453752.0, "step": 5132 }, { "epoch": 1.8805019923968305, "grad_norm": 0.1322522845703682, "learning_rate": 2.9835185187461237e-05, "loss": 0.4868, "num_tokens": 3925214979.0, "step": 5133 }, { "epoch": 1.880868410204736, "grad_norm": 0.1398306671869653, "learning_rate": 2.9831339780397167e-05, "loss": 0.5037, "num_tokens": 3926012023.0, "step": 5134 }, { "epoch": 1.8812348280126414, "grad_norm": 0.15330127532133161, "learning_rate": 2.982749393241305e-05, "loss": 0.5125, "num_tokens": 3926849809.0, "step": 5135 }, { "epoch": 1.8816012458205469, "grad_norm": 0.15694739934332064, "learning_rate": 2.9823647643725428e-05, "loss": 0.5301, "num_tokens": 3927578082.0, "step": 5136 }, { "epoch": 1.8819676636284524, "grad_norm": 0.1329566777931082, "learning_rate": 2.9819800914550843e-05, "loss": 0.4906, "num_tokens": 3928267979.0, "step": 5137 }, { "epoch": 1.8823340814363578, "grad_norm": 0.15296055429403932, "learning_rate": 2.9815953745105876e-05, "loss": 0.51, "num_tokens": 3928983797.0, "step": 5138 }, { "epoch": 1.8827004992442633, "grad_norm": 0.1598313661022424, "learning_rate": 2.981210613560713e-05, "loss": 0.5502, "num_tokens": 3929645920.0, "step": 5139 }, { "epoch": 1.8830669170521688, "grad_norm": 0.13484153677410343, "learning_rate": 2.9808258086271236e-05, "loss": 0.4899, "num_tokens": 3930479309.0, "step": 5140 }, { "epoch": 1.8834333348600742, "grad_norm": 0.14753568645967569, "learning_rate": 2.980440959731484e-05, "loss": 0.5324, "num_tokens": 3931120490.0, "step": 5141 }, { "epoch": 1.8837997526679797, "grad_norm": 0.14563985264663035, "learning_rate": 2.980056066895464e-05, "loss": 0.4843, "num_tokens": 3931905856.0, "step": 5142 }, { "epoch": 1.884166170475885, "grad_norm": 0.13993778068299806, "learning_rate": 2.9796711301407305e-05, "loss": 0.4806, "num_tokens": 3932733808.0, "step": 5143 }, { "epoch": 1.8845325882837907, "grad_norm": 0.13353680387661068, "learning_rate": 2.9792861494889598e-05, "loss": 0.4982, "num_tokens": 3933597198.0, "step": 5144 }, { "epoch": 1.884899006091696, "grad_norm": 0.14098958869556336, "learning_rate": 2.9789011249618233e-05, "loss": 0.5, "num_tokens": 3934370894.0, "step": 5145 }, { "epoch": 1.8852654238996016, "grad_norm": 0.15662922575713448, "learning_rate": 2.9785160565810017e-05, "loss": 0.5078, "num_tokens": 3935061272.0, "step": 5146 }, { "epoch": 1.8856318417075069, "grad_norm": 0.15907266528441322, "learning_rate": 2.9781309443681743e-05, "loss": 0.5097, "num_tokens": 3935836367.0, "step": 5147 }, { "epoch": 1.8859982595154126, "grad_norm": 0.14166212395428474, "learning_rate": 2.977745788345023e-05, "loss": 0.4972, "num_tokens": 3936663100.0, "step": 5148 }, { "epoch": 1.8863646773233178, "grad_norm": 0.16948134177294558, "learning_rate": 2.9773605885332334e-05, "loss": 0.5164, "num_tokens": 3937526061.0, "step": 5149 }, { "epoch": 1.8867310951312235, "grad_norm": 0.1358483743368159, "learning_rate": 2.9769753449544934e-05, "loss": 0.4918, "num_tokens": 3938338848.0, "step": 5150 }, { "epoch": 1.8870975129391288, "grad_norm": 0.1715294421189931, "learning_rate": 2.9765900576304916e-05, "loss": 0.5437, "num_tokens": 3938997506.0, "step": 5151 }, { "epoch": 1.8874639307470344, "grad_norm": 0.1620683962380283, "learning_rate": 2.9762047265829218e-05, "loss": 0.5024, "num_tokens": 3939849886.0, "step": 5152 }, { "epoch": 1.8878303485549397, "grad_norm": 0.16219078840490322, "learning_rate": 2.9758193518334785e-05, "loss": 0.5052, "num_tokens": 3940515498.0, "step": 5153 }, { "epoch": 1.8881967663628454, "grad_norm": 0.15105844006762092, "learning_rate": 2.975433933403859e-05, "loss": 0.5056, "num_tokens": 3941192734.0, "step": 5154 }, { "epoch": 1.8885631841707506, "grad_norm": 0.13386635878503692, "learning_rate": 2.9750484713157627e-05, "loss": 0.5345, "num_tokens": 3942117706.0, "step": 5155 }, { "epoch": 1.8889296019786561, "grad_norm": 0.1309930199809368, "learning_rate": 2.9746629655908928e-05, "loss": 0.5031, "num_tokens": 3942933455.0, "step": 5156 }, { "epoch": 1.8892960197865616, "grad_norm": 0.13942179256882367, "learning_rate": 2.9742774162509537e-05, "loss": 0.5082, "num_tokens": 3943729932.0, "step": 5157 }, { "epoch": 1.889662437594467, "grad_norm": 0.13352999960534598, "learning_rate": 2.9738918233176515e-05, "loss": 0.4768, "num_tokens": 3944441727.0, "step": 5158 }, { "epoch": 1.8900288554023725, "grad_norm": 0.1303645720981244, "learning_rate": 2.973506186812698e-05, "loss": 0.4948, "num_tokens": 3945212639.0, "step": 5159 }, { "epoch": 1.890395273210278, "grad_norm": 0.14392808735395907, "learning_rate": 2.9731205067578038e-05, "loss": 0.5125, "num_tokens": 3945954159.0, "step": 5160 }, { "epoch": 1.8907616910181835, "grad_norm": 0.14682299976953198, "learning_rate": 2.9727347831746836e-05, "loss": 0.5212, "num_tokens": 3946788668.0, "step": 5161 }, { "epoch": 1.891128108826089, "grad_norm": 0.14157015084610666, "learning_rate": 2.9723490160850544e-05, "loss": 0.5126, "num_tokens": 3947541860.0, "step": 5162 }, { "epoch": 1.8914945266339944, "grad_norm": 0.14018688104593532, "learning_rate": 2.971963205510636e-05, "loss": 0.4969, "num_tokens": 3948315985.0, "step": 5163 }, { "epoch": 1.8918609444419, "grad_norm": 0.1385372364792693, "learning_rate": 2.9715773514731504e-05, "loss": 0.4801, "num_tokens": 3949081757.0, "step": 5164 }, { "epoch": 1.8922273622498054, "grad_norm": 0.14443311701848785, "learning_rate": 2.9711914539943216e-05, "loss": 0.4789, "num_tokens": 3949931905.0, "step": 5165 }, { "epoch": 1.8925937800577108, "grad_norm": 0.13132329908725024, "learning_rate": 2.9708055130958762e-05, "loss": 0.4927, "num_tokens": 3950744976.0, "step": 5166 }, { "epoch": 1.8929601978656163, "grad_norm": 0.1511771992183778, "learning_rate": 2.970419528799544e-05, "loss": 0.4852, "num_tokens": 3951395471.0, "step": 5167 }, { "epoch": 1.8933266156735218, "grad_norm": 0.14915868573650232, "learning_rate": 2.970033501127056e-05, "loss": 0.4875, "num_tokens": 3952071684.0, "step": 5168 }, { "epoch": 1.8936930334814273, "grad_norm": 0.1449828309652795, "learning_rate": 2.969647430100147e-05, "loss": 0.5009, "num_tokens": 3952890672.0, "step": 5169 }, { "epoch": 1.8940594512893325, "grad_norm": 0.1436816769542011, "learning_rate": 2.969261315740553e-05, "loss": 0.5093, "num_tokens": 3953669037.0, "step": 5170 }, { "epoch": 1.8944258690972382, "grad_norm": 0.14276025955800067, "learning_rate": 2.968875158070014e-05, "loss": 0.5124, "num_tokens": 3954391456.0, "step": 5171 }, { "epoch": 1.8947922869051435, "grad_norm": 0.1312611253197528, "learning_rate": 2.9684889571102702e-05, "loss": 0.4908, "num_tokens": 3955183325.0, "step": 5172 }, { "epoch": 1.8951587047130491, "grad_norm": 0.15493517954803224, "learning_rate": 2.9681027128830662e-05, "loss": 0.4909, "num_tokens": 3955954410.0, "step": 5173 }, { "epoch": 1.8955251225209544, "grad_norm": 0.15556851312611045, "learning_rate": 2.9677164254101484e-05, "loss": 0.532, "num_tokens": 3956661163.0, "step": 5174 }, { "epoch": 1.89589154032886, "grad_norm": 0.15456299874488358, "learning_rate": 2.9673300947132647e-05, "loss": 0.5112, "num_tokens": 3957401994.0, "step": 5175 }, { "epoch": 1.8962579581367653, "grad_norm": 0.17124915951434605, "learning_rate": 2.9669437208141673e-05, "loss": 0.5164, "num_tokens": 3958150349.0, "step": 5176 }, { "epoch": 1.896624375944671, "grad_norm": 0.14828855976576777, "learning_rate": 2.96655730373461e-05, "loss": 0.4552, "num_tokens": 3958948972.0, "step": 5177 }, { "epoch": 1.8969907937525763, "grad_norm": 0.16182080028001714, "learning_rate": 2.966170843496348e-05, "loss": 0.4788, "num_tokens": 3959755605.0, "step": 5178 }, { "epoch": 1.897357211560482, "grad_norm": 0.1396532112613494, "learning_rate": 2.9657843401211393e-05, "loss": 0.4917, "num_tokens": 3960628241.0, "step": 5179 }, { "epoch": 1.8977236293683872, "grad_norm": 0.16781385433567983, "learning_rate": 2.9653977936307467e-05, "loss": 0.5118, "num_tokens": 3961325163.0, "step": 5180 }, { "epoch": 1.898090047176293, "grad_norm": 0.15916286941773286, "learning_rate": 2.9650112040469312e-05, "loss": 0.4727, "num_tokens": 3962130872.0, "step": 5181 }, { "epoch": 1.8984564649841982, "grad_norm": 0.16815731342803517, "learning_rate": 2.964624571391461e-05, "loss": 0.5118, "num_tokens": 3962846404.0, "step": 5182 }, { "epoch": 1.8988228827921037, "grad_norm": 0.15505083274080922, "learning_rate": 2.964237895686102e-05, "loss": 0.5245, "num_tokens": 3963585788.0, "step": 5183 }, { "epoch": 1.8991893006000091, "grad_norm": 0.1758250670389978, "learning_rate": 2.9638511769526262e-05, "loss": 0.5217, "num_tokens": 3964258645.0, "step": 5184 }, { "epoch": 1.8995557184079146, "grad_norm": 0.16686457542666372, "learning_rate": 2.9634644152128066e-05, "loss": 0.4878, "num_tokens": 3964958560.0, "step": 5185 }, { "epoch": 1.89992213621582, "grad_norm": 0.19157156849663803, "learning_rate": 2.9630776104884185e-05, "loss": 0.469, "num_tokens": 3965691046.0, "step": 5186 }, { "epoch": 1.9002885540237255, "grad_norm": 0.14731730936189671, "learning_rate": 2.962690762801239e-05, "loss": 0.4718, "num_tokens": 3966477804.0, "step": 5187 }, { "epoch": 1.900654971831631, "grad_norm": 0.17457543442802811, "learning_rate": 2.96230387217305e-05, "loss": 0.5024, "num_tokens": 3967353380.0, "step": 5188 }, { "epoch": 1.9010213896395365, "grad_norm": 0.19548808820209462, "learning_rate": 2.9619169386256326e-05, "loss": 0.5119, "num_tokens": 3968095014.0, "step": 5189 }, { "epoch": 1.901387807447442, "grad_norm": 0.1932732198946662, "learning_rate": 2.961529962180772e-05, "loss": 0.5134, "num_tokens": 3968772170.0, "step": 5190 }, { "epoch": 1.9017542252553474, "grad_norm": 0.18661606975335307, "learning_rate": 2.961142942860258e-05, "loss": 0.5087, "num_tokens": 3969591238.0, "step": 5191 }, { "epoch": 1.902120643063253, "grad_norm": 0.15003666505538277, "learning_rate": 2.9607558806858777e-05, "loss": 0.4843, "num_tokens": 3970307060.0, "step": 5192 }, { "epoch": 1.9024870608711584, "grad_norm": 0.17498616829787583, "learning_rate": 2.960368775679425e-05, "loss": 0.4916, "num_tokens": 3971136187.0, "step": 5193 }, { "epoch": 1.9028534786790638, "grad_norm": 0.19020868003340413, "learning_rate": 2.959981627862694e-05, "loss": 0.4902, "num_tokens": 3971901508.0, "step": 5194 }, { "epoch": 1.9032198964869693, "grad_norm": 0.16583759254205274, "learning_rate": 2.959594437257482e-05, "loss": 0.4953, "num_tokens": 3972841745.0, "step": 5195 }, { "epoch": 1.9035863142948748, "grad_norm": 0.17873679440981743, "learning_rate": 2.9592072038855896e-05, "loss": 0.5301, "num_tokens": 3973629499.0, "step": 5196 }, { "epoch": 1.90395273210278, "grad_norm": 0.16281181057190097, "learning_rate": 2.958819927768818e-05, "loss": 0.522, "num_tokens": 3974470727.0, "step": 5197 }, { "epoch": 1.9043191499106857, "grad_norm": 0.15675970741479356, "learning_rate": 2.9584326089289712e-05, "loss": 0.5218, "num_tokens": 3975212182.0, "step": 5198 }, { "epoch": 1.904685567718591, "grad_norm": 0.14454636219771025, "learning_rate": 2.9580452473878574e-05, "loss": 0.5232, "num_tokens": 3976041732.0, "step": 5199 }, { "epoch": 1.9050519855264967, "grad_norm": 0.14612840739519095, "learning_rate": 2.957657843167284e-05, "loss": 0.4968, "num_tokens": 3976816969.0, "step": 5200 }, { "epoch": 1.905418403334402, "grad_norm": 0.14834723246590012, "learning_rate": 2.9572703962890633e-05, "loss": 0.5327, "num_tokens": 3977635009.0, "step": 5201 }, { "epoch": 1.9057848211423076, "grad_norm": 0.13638208327522944, "learning_rate": 2.9568829067750106e-05, "loss": 0.4746, "num_tokens": 3978366764.0, "step": 5202 }, { "epoch": 1.9061512389502129, "grad_norm": 0.13673555865763407, "learning_rate": 2.9564953746469408e-05, "loss": 0.4968, "num_tokens": 3979189127.0, "step": 5203 }, { "epoch": 1.9065176567581186, "grad_norm": 0.17671919676827336, "learning_rate": 2.9561077999266734e-05, "loss": 0.5379, "num_tokens": 3979851517.0, "step": 5204 }, { "epoch": 1.9068840745660238, "grad_norm": 0.1437716976464297, "learning_rate": 2.9557201826360297e-05, "loss": 0.4969, "num_tokens": 3980625806.0, "step": 5205 }, { "epoch": 1.9072504923739295, "grad_norm": 0.14461149717174465, "learning_rate": 2.9553325227968325e-05, "loss": 0.4923, "num_tokens": 3981380697.0, "step": 5206 }, { "epoch": 1.9076169101818348, "grad_norm": 0.14841059707098042, "learning_rate": 2.9549448204309087e-05, "loss": 0.4648, "num_tokens": 3982077464.0, "step": 5207 }, { "epoch": 1.9079833279897405, "grad_norm": 0.1530063168715554, "learning_rate": 2.954557075560087e-05, "loss": 0.4668, "num_tokens": 3982865694.0, "step": 5208 }, { "epoch": 1.9083497457976457, "grad_norm": 0.14117112247693914, "learning_rate": 2.9541692882061968e-05, "loss": 0.4955, "num_tokens": 3983820315.0, "step": 5209 }, { "epoch": 1.9087161636055512, "grad_norm": 0.15482720046583553, "learning_rate": 2.9537814583910723e-05, "loss": 0.5179, "num_tokens": 3984693315.0, "step": 5210 }, { "epoch": 1.9090825814134567, "grad_norm": 0.139971639618029, "learning_rate": 2.9533935861365495e-05, "loss": 0.4808, "num_tokens": 3985433646.0, "step": 5211 }, { "epoch": 1.9094489992213621, "grad_norm": 0.148571435185239, "learning_rate": 2.9530056714644652e-05, "loss": 0.4997, "num_tokens": 3986192504.0, "step": 5212 }, { "epoch": 1.9098154170292676, "grad_norm": 0.13318508486003244, "learning_rate": 2.9526177143966604e-05, "loss": 0.4885, "num_tokens": 3986990323.0, "step": 5213 }, { "epoch": 1.910181834837173, "grad_norm": 0.1664653550801959, "learning_rate": 2.9522297149549784e-05, "loss": 0.4994, "num_tokens": 3987773157.0, "step": 5214 }, { "epoch": 1.9105482526450785, "grad_norm": 0.165425153921881, "learning_rate": 2.9518416731612635e-05, "loss": 0.5408, "num_tokens": 3988507336.0, "step": 5215 }, { "epoch": 1.910914670452984, "grad_norm": 0.1507229345265757, "learning_rate": 2.951453589037364e-05, "loss": 0.52, "num_tokens": 3989206778.0, "step": 5216 }, { "epoch": 1.9112810882608895, "grad_norm": 0.19065744580641478, "learning_rate": 2.951065462605128e-05, "loss": 0.4943, "num_tokens": 3989962216.0, "step": 5217 }, { "epoch": 1.911647506068795, "grad_norm": 0.14808708356328187, "learning_rate": 2.9506772938864102e-05, "loss": 0.5045, "num_tokens": 3990765959.0, "step": 5218 }, { "epoch": 1.9120139238767004, "grad_norm": 0.17233397483506183, "learning_rate": 2.9502890829030645e-05, "loss": 0.4745, "num_tokens": 3991599371.0, "step": 5219 }, { "epoch": 1.912380341684606, "grad_norm": 0.192639450416549, "learning_rate": 2.949900829676947e-05, "loss": 0.4919, "num_tokens": 3992254354.0, "step": 5220 }, { "epoch": 1.9127467594925114, "grad_norm": 0.14164875157250117, "learning_rate": 2.9495125342299177e-05, "loss": 0.5547, "num_tokens": 3992955864.0, "step": 5221 }, { "epoch": 1.9131131773004166, "grad_norm": 0.18405635079359645, "learning_rate": 2.949124196583839e-05, "loss": 0.5418, "num_tokens": 3993748445.0, "step": 5222 }, { "epoch": 1.9134795951083223, "grad_norm": 0.15495773506619034, "learning_rate": 2.9487358167605747e-05, "loss": 0.5149, "num_tokens": 3994496515.0, "step": 5223 }, { "epoch": 1.9138460129162276, "grad_norm": 0.14493437116199365, "learning_rate": 2.94834739478199e-05, "loss": 0.5141, "num_tokens": 3995276085.0, "step": 5224 }, { "epoch": 1.9142124307241333, "grad_norm": 0.15482785016988523, "learning_rate": 2.9479589306699563e-05, "loss": 0.4583, "num_tokens": 3996123641.0, "step": 5225 }, { "epoch": 1.9145788485320385, "grad_norm": 0.15423639180102086, "learning_rate": 2.947570424446343e-05, "loss": 0.4764, "num_tokens": 3996972374.0, "step": 5226 }, { "epoch": 1.9149452663399442, "grad_norm": 0.19011069263846736, "learning_rate": 2.9471818761330246e-05, "loss": 0.5123, "num_tokens": 3997758919.0, "step": 5227 }, { "epoch": 1.9153116841478495, "grad_norm": 0.13956893803375048, "learning_rate": 2.9467932857518772e-05, "loss": 0.5116, "num_tokens": 3998487959.0, "step": 5228 }, { "epoch": 1.9156781019557552, "grad_norm": 0.15415310831073747, "learning_rate": 2.9464046533247793e-05, "loss": 0.5024, "num_tokens": 3999289674.0, "step": 5229 }, { "epoch": 1.9160445197636604, "grad_norm": 0.14918125764944803, "learning_rate": 2.9460159788736105e-05, "loss": 0.5332, "num_tokens": 4000072734.0, "step": 5230 }, { "epoch": 1.916410937571566, "grad_norm": 0.14029214796370987, "learning_rate": 2.945627262420255e-05, "loss": 0.5039, "num_tokens": 4000778257.0, "step": 5231 }, { "epoch": 1.9167773553794714, "grad_norm": 0.17340246012647983, "learning_rate": 2.9452385039865987e-05, "loss": 0.4959, "num_tokens": 4001477363.0, "step": 5232 }, { "epoch": 1.917143773187377, "grad_norm": 0.18496413784695903, "learning_rate": 2.9448497035945286e-05, "loss": 0.4878, "num_tokens": 4002257157.0, "step": 5233 }, { "epoch": 1.9175101909952823, "grad_norm": 0.1579657894151891, "learning_rate": 2.9444608612659352e-05, "loss": 0.4936, "num_tokens": 4003106183.0, "step": 5234 }, { "epoch": 1.917876608803188, "grad_norm": 0.15608952112361055, "learning_rate": 2.9440719770227116e-05, "loss": 0.5084, "num_tokens": 4003894582.0, "step": 5235 }, { "epoch": 1.9182430266110932, "grad_norm": 0.14638200466741605, "learning_rate": 2.943683050886752e-05, "loss": 0.4904, "num_tokens": 4004654495.0, "step": 5236 }, { "epoch": 1.9186094444189987, "grad_norm": 0.186659738906527, "learning_rate": 2.9432940828799537e-05, "loss": 0.5245, "num_tokens": 4005520393.0, "step": 5237 }, { "epoch": 1.9189758622269042, "grad_norm": 0.13732203081930108, "learning_rate": 2.9429050730242167e-05, "loss": 0.5224, "num_tokens": 4006340801.0, "step": 5238 }, { "epoch": 1.9193422800348097, "grad_norm": 0.13704147218349785, "learning_rate": 2.9425160213414435e-05, "loss": 0.4885, "num_tokens": 4007278172.0, "step": 5239 }, { "epoch": 1.9197086978427151, "grad_norm": 0.14236255156611627, "learning_rate": 2.9421269278535378e-05, "loss": 0.4862, "num_tokens": 4008053579.0, "step": 5240 }, { "epoch": 1.9200751156506206, "grad_norm": 0.13713075905104302, "learning_rate": 2.9417377925824068e-05, "loss": 0.4823, "num_tokens": 4008775327.0, "step": 5241 }, { "epoch": 1.920441533458526, "grad_norm": 0.13669009630279297, "learning_rate": 2.9413486155499583e-05, "loss": 0.5144, "num_tokens": 4009447705.0, "step": 5242 }, { "epoch": 1.9208079512664316, "grad_norm": 0.18073826113824518, "learning_rate": 2.940959396778106e-05, "loss": 0.4966, "num_tokens": 4010203527.0, "step": 5243 }, { "epoch": 1.921174369074337, "grad_norm": 0.13086409458371046, "learning_rate": 2.940570136288762e-05, "loss": 0.5181, "num_tokens": 4011063918.0, "step": 5244 }, { "epoch": 1.9215407868822425, "grad_norm": 0.16378648881412042, "learning_rate": 2.9401808341038435e-05, "loss": 0.5482, "num_tokens": 4011714845.0, "step": 5245 }, { "epoch": 1.921907204690148, "grad_norm": 0.14097318935547698, "learning_rate": 2.9397914902452673e-05, "loss": 0.4997, "num_tokens": 4012483748.0, "step": 5246 }, { "epoch": 1.9222736224980534, "grad_norm": 0.1507251106561563, "learning_rate": 2.939402104734956e-05, "loss": 0.5084, "num_tokens": 4013303011.0, "step": 5247 }, { "epoch": 1.922640040305959, "grad_norm": 0.15496886325028694, "learning_rate": 2.939012677594832e-05, "loss": 0.5251, "num_tokens": 4014042582.0, "step": 5248 }, { "epoch": 1.9230064581138642, "grad_norm": 0.13842834437238, "learning_rate": 2.938623208846821e-05, "loss": 0.4945, "num_tokens": 4014764700.0, "step": 5249 }, { "epoch": 1.9233728759217699, "grad_norm": 0.1463134344415033, "learning_rate": 2.9382336985128513e-05, "loss": 0.5204, "num_tokens": 4015505537.0, "step": 5250 }, { "epoch": 1.9237392937296751, "grad_norm": 0.1376305221517951, "learning_rate": 2.9378441466148518e-05, "loss": 0.4835, "num_tokens": 4016217121.0, "step": 5251 }, { "epoch": 1.9241057115375808, "grad_norm": 0.14416958961770687, "learning_rate": 2.9374545531747563e-05, "loss": 0.4889, "num_tokens": 4017000503.0, "step": 5252 }, { "epoch": 1.924472129345486, "grad_norm": 0.1367892533662773, "learning_rate": 2.937064918214499e-05, "loss": 0.4879, "num_tokens": 4017688986.0, "step": 5253 }, { "epoch": 1.9248385471533918, "grad_norm": 0.13251686872551222, "learning_rate": 2.936675241756018e-05, "loss": 0.497, "num_tokens": 4018502825.0, "step": 5254 }, { "epoch": 1.925204964961297, "grad_norm": 0.1332673788093351, "learning_rate": 2.9362855238212512e-05, "loss": 0.5144, "num_tokens": 4019256874.0, "step": 5255 }, { "epoch": 1.9255713827692027, "grad_norm": 0.13536876378838572, "learning_rate": 2.9358957644321422e-05, "loss": 0.4962, "num_tokens": 4020010378.0, "step": 5256 }, { "epoch": 1.925937800577108, "grad_norm": 0.15532468826831727, "learning_rate": 2.9355059636106352e-05, "loss": 0.5, "num_tokens": 4020681676.0, "step": 5257 }, { "epoch": 1.9263042183850136, "grad_norm": 0.1355921980546014, "learning_rate": 2.9351161213786753e-05, "loss": 0.5296, "num_tokens": 4021379354.0, "step": 5258 }, { "epoch": 1.926670636192919, "grad_norm": 0.1438248209771376, "learning_rate": 2.934726237758212e-05, "loss": 0.5176, "num_tokens": 4022089717.0, "step": 5259 }, { "epoch": 1.9270370540008246, "grad_norm": 0.13915614212525265, "learning_rate": 2.9343363127711972e-05, "loss": 0.5127, "num_tokens": 4022744805.0, "step": 5260 }, { "epoch": 1.9274034718087298, "grad_norm": 0.1407084739678735, "learning_rate": 2.9339463464395837e-05, "loss": 0.5212, "num_tokens": 4023487366.0, "step": 5261 }, { "epoch": 1.9277698896166355, "grad_norm": 0.1512290516150723, "learning_rate": 2.933556338785328e-05, "loss": 0.4896, "num_tokens": 4024281913.0, "step": 5262 }, { "epoch": 1.9281363074245408, "grad_norm": 0.1370517354304108, "learning_rate": 2.9331662898303884e-05, "loss": 0.5236, "num_tokens": 4025061062.0, "step": 5263 }, { "epoch": 1.9285027252324463, "grad_norm": 0.13333509176176456, "learning_rate": 2.9327761995967242e-05, "loss": 0.4554, "num_tokens": 4025853878.0, "step": 5264 }, { "epoch": 1.9288691430403517, "grad_norm": 0.13243736762944308, "learning_rate": 2.9323860681062997e-05, "loss": 0.4911, "num_tokens": 4026609395.0, "step": 5265 }, { "epoch": 1.9292355608482572, "grad_norm": 0.13924861160528196, "learning_rate": 2.931995895381079e-05, "loss": 0.4814, "num_tokens": 4027334735.0, "step": 5266 }, { "epoch": 1.9296019786561627, "grad_norm": 0.1370001073380956, "learning_rate": 2.9316056814430305e-05, "loss": 0.4656, "num_tokens": 4028121459.0, "step": 5267 }, { "epoch": 1.9299683964640681, "grad_norm": 0.12997003492252948, "learning_rate": 2.9312154263141232e-05, "loss": 0.4557, "num_tokens": 4028908717.0, "step": 5268 }, { "epoch": 1.9303348142719736, "grad_norm": 0.13382393656487943, "learning_rate": 2.9308251300163304e-05, "loss": 0.5078, "num_tokens": 4029735187.0, "step": 5269 }, { "epoch": 1.930701232079879, "grad_norm": 0.1356405437573403, "learning_rate": 2.930434792571625e-05, "loss": 0.5043, "num_tokens": 4030405944.0, "step": 5270 }, { "epoch": 1.9310676498877846, "grad_norm": 0.14447504153145, "learning_rate": 2.930044414001985e-05, "loss": 0.5057, "num_tokens": 4031136765.0, "step": 5271 }, { "epoch": 1.93143406769569, "grad_norm": 0.14169683401346145, "learning_rate": 2.9296539943293888e-05, "loss": 0.4965, "num_tokens": 4032061365.0, "step": 5272 }, { "epoch": 1.9318004855035955, "grad_norm": 0.12886110279868326, "learning_rate": 2.9292635335758182e-05, "loss": 0.5346, "num_tokens": 4032831250.0, "step": 5273 }, { "epoch": 1.932166903311501, "grad_norm": 0.14435282081130021, "learning_rate": 2.928873031763257e-05, "loss": 0.5145, "num_tokens": 4033605351.0, "step": 5274 }, { "epoch": 1.9325333211194065, "grad_norm": 0.13464873885230577, "learning_rate": 2.928482488913691e-05, "loss": 0.4926, "num_tokens": 4034376907.0, "step": 5275 }, { "epoch": 1.9328997389273117, "grad_norm": 0.13798450072040014, "learning_rate": 2.928091905049108e-05, "loss": 0.506, "num_tokens": 4035104066.0, "step": 5276 }, { "epoch": 1.9332661567352174, "grad_norm": 0.1267947223246903, "learning_rate": 2.9277012801915e-05, "loss": 0.4932, "num_tokens": 4035968689.0, "step": 5277 }, { "epoch": 1.9336325745431227, "grad_norm": 0.14046846199277524, "learning_rate": 2.927310614362858e-05, "loss": 0.5077, "num_tokens": 4036728508.0, "step": 5278 }, { "epoch": 1.9339989923510283, "grad_norm": 0.14280591110515195, "learning_rate": 2.9269199075851792e-05, "loss": 0.5032, "num_tokens": 4037391581.0, "step": 5279 }, { "epoch": 1.9343654101589336, "grad_norm": 0.12607717359681803, "learning_rate": 2.926529159880461e-05, "loss": 0.5105, "num_tokens": 4038233216.0, "step": 5280 }, { "epoch": 1.9347318279668393, "grad_norm": 0.1261011354994534, "learning_rate": 2.9261383712707018e-05, "loss": 0.4629, "num_tokens": 4038969263.0, "step": 5281 }, { "epoch": 1.9350982457747445, "grad_norm": 0.1324390125832841, "learning_rate": 2.925747541777905e-05, "loss": 0.5062, "num_tokens": 4039731714.0, "step": 5282 }, { "epoch": 1.9354646635826502, "grad_norm": 0.14102411961491051, "learning_rate": 2.9253566714240747e-05, "loss": 0.5265, "num_tokens": 4040391977.0, "step": 5283 }, { "epoch": 1.9358310813905555, "grad_norm": 0.13474795935343414, "learning_rate": 2.9249657602312177e-05, "loss": 0.4901, "num_tokens": 4041156196.0, "step": 5284 }, { "epoch": 1.9361974991984612, "grad_norm": 0.13471591429487947, "learning_rate": 2.9245748082213433e-05, "loss": 0.4899, "num_tokens": 4042021779.0, "step": 5285 }, { "epoch": 1.9365639170063664, "grad_norm": 0.13126798502609152, "learning_rate": 2.9241838154164627e-05, "loss": 0.4969, "num_tokens": 4042717320.0, "step": 5286 }, { "epoch": 1.9369303348142721, "grad_norm": 0.14499424486119034, "learning_rate": 2.9237927818385894e-05, "loss": 0.4995, "num_tokens": 4043471297.0, "step": 5287 }, { "epoch": 1.9372967526221774, "grad_norm": 0.13406100838189425, "learning_rate": 2.9234017075097397e-05, "loss": 0.4926, "num_tokens": 4044202261.0, "step": 5288 }, { "epoch": 1.9376631704300828, "grad_norm": 0.12820120575187427, "learning_rate": 2.9230105924519305e-05, "loss": 0.5255, "num_tokens": 4045002862.0, "step": 5289 }, { "epoch": 1.9380295882379883, "grad_norm": 0.14862912310518256, "learning_rate": 2.922619436687185e-05, "loss": 0.5282, "num_tokens": 4045650201.0, "step": 5290 }, { "epoch": 1.9383960060458938, "grad_norm": 0.1348095381397853, "learning_rate": 2.9222282402375243e-05, "loss": 0.4799, "num_tokens": 4046394135.0, "step": 5291 }, { "epoch": 1.9387624238537993, "grad_norm": 0.1362482455587766, "learning_rate": 2.9218370031249735e-05, "loss": 0.5043, "num_tokens": 4047192483.0, "step": 5292 }, { "epoch": 1.9391288416617047, "grad_norm": 0.12874844686420658, "learning_rate": 2.9214457253715602e-05, "loss": 0.517, "num_tokens": 4048060804.0, "step": 5293 }, { "epoch": 1.9394952594696102, "grad_norm": 0.13360621488107885, "learning_rate": 2.9210544069993146e-05, "loss": 0.5006, "num_tokens": 4048920479.0, "step": 5294 }, { "epoch": 1.9398616772775157, "grad_norm": 0.12812389782248898, "learning_rate": 2.9206630480302685e-05, "loss": 0.4959, "num_tokens": 4049670206.0, "step": 5295 }, { "epoch": 1.9402280950854212, "grad_norm": 0.1540305731444422, "learning_rate": 2.920271648486456e-05, "loss": 0.5352, "num_tokens": 4050339622.0, "step": 5296 }, { "epoch": 1.9405945128933266, "grad_norm": 0.1304072065515065, "learning_rate": 2.9198802083899136e-05, "loss": 0.4941, "num_tokens": 4051163781.0, "step": 5297 }, { "epoch": 1.940960930701232, "grad_norm": 0.12827647384051527, "learning_rate": 2.9194887277626804e-05, "loss": 0.4903, "num_tokens": 4052007939.0, "step": 5298 }, { "epoch": 1.9413273485091376, "grad_norm": 0.1235654269058759, "learning_rate": 2.9190972066267974e-05, "loss": 0.4708, "num_tokens": 4052805404.0, "step": 5299 }, { "epoch": 1.941693766317043, "grad_norm": 0.1385641375268419, "learning_rate": 2.9187056450043086e-05, "loss": 0.4927, "num_tokens": 4053560364.0, "step": 5300 }, { "epoch": 1.9420601841249485, "grad_norm": 0.14447568836178937, "learning_rate": 2.9183140429172585e-05, "loss": 0.5062, "num_tokens": 4054443819.0, "step": 5301 }, { "epoch": 1.942426601932854, "grad_norm": 0.13508621811862043, "learning_rate": 2.917922400387696e-05, "loss": 0.4723, "num_tokens": 4055204899.0, "step": 5302 }, { "epoch": 1.9427930197407592, "grad_norm": 0.12497772419482911, "learning_rate": 2.9175307174376705e-05, "loss": 0.5067, "num_tokens": 4056050206.0, "step": 5303 }, { "epoch": 1.943159437548665, "grad_norm": 0.14272945543543716, "learning_rate": 2.917138994089236e-05, "loss": 0.4981, "num_tokens": 4056905730.0, "step": 5304 }, { "epoch": 1.9435258553565702, "grad_norm": 0.13952685079418525, "learning_rate": 2.9167472303644467e-05, "loss": 0.4776, "num_tokens": 4057657568.0, "step": 5305 }, { "epoch": 1.9438922731644759, "grad_norm": 0.14059887856832806, "learning_rate": 2.9163554262853582e-05, "loss": 0.4839, "num_tokens": 4058449963.0, "step": 5306 }, { "epoch": 1.9442586909723811, "grad_norm": 0.13968674906223297, "learning_rate": 2.915963581874032e-05, "loss": 0.4911, "num_tokens": 4059276596.0, "step": 5307 }, { "epoch": 1.9446251087802868, "grad_norm": 0.14545629503126528, "learning_rate": 2.9155716971525286e-05, "loss": 0.5022, "num_tokens": 4060046547.0, "step": 5308 }, { "epoch": 1.944991526588192, "grad_norm": 0.15642045366358004, "learning_rate": 2.915179772142912e-05, "loss": 0.528, "num_tokens": 4060693511.0, "step": 5309 }, { "epoch": 1.9453579443960978, "grad_norm": 0.16300500982597885, "learning_rate": 2.914787806867248e-05, "loss": 0.5208, "num_tokens": 4061365491.0, "step": 5310 }, { "epoch": 1.945724362204003, "grad_norm": 0.13561935879327938, "learning_rate": 2.9143958013476063e-05, "loss": 0.4707, "num_tokens": 4062170854.0, "step": 5311 }, { "epoch": 1.9460907800119087, "grad_norm": 0.1305139390656824, "learning_rate": 2.914003755606056e-05, "loss": 0.4785, "num_tokens": 4063046513.0, "step": 5312 }, { "epoch": 1.946457197819814, "grad_norm": 0.14606483467522616, "learning_rate": 2.913611669664671e-05, "loss": 0.5144, "num_tokens": 4063744458.0, "step": 5313 }, { "epoch": 1.9468236156277197, "grad_norm": 0.1407315125617944, "learning_rate": 2.9132195435455262e-05, "loss": 0.493, "num_tokens": 4064606638.0, "step": 5314 }, { "epoch": 1.947190033435625, "grad_norm": 0.15040200176286875, "learning_rate": 2.9128273772707002e-05, "loss": 0.5178, "num_tokens": 4065345172.0, "step": 5315 }, { "epoch": 1.9475564512435304, "grad_norm": 0.13402182522434097, "learning_rate": 2.9124351708622706e-05, "loss": 0.4766, "num_tokens": 4066217041.0, "step": 5316 }, { "epoch": 1.9479228690514359, "grad_norm": 0.15243760755296135, "learning_rate": 2.912042924342321e-05, "loss": 0.5378, "num_tokens": 4066984673.0, "step": 5317 }, { "epoch": 1.9482892868593413, "grad_norm": 0.1483947107274157, "learning_rate": 2.9116506377329357e-05, "loss": 0.4828, "num_tokens": 4067873504.0, "step": 5318 }, { "epoch": 1.9486557046672468, "grad_norm": 0.14486719719022187, "learning_rate": 2.9112583110562004e-05, "loss": 0.4993, "num_tokens": 4068652162.0, "step": 5319 }, { "epoch": 1.9490221224751523, "grad_norm": 0.1615977353361658, "learning_rate": 2.9108659443342038e-05, "loss": 0.5106, "num_tokens": 4069462047.0, "step": 5320 }, { "epoch": 1.9493885402830577, "grad_norm": 0.15067144971927718, "learning_rate": 2.9104735375890378e-05, "loss": 0.5446, "num_tokens": 4070286878.0, "step": 5321 }, { "epoch": 1.9497549580909632, "grad_norm": 0.13907697345014955, "learning_rate": 2.9100810908427954e-05, "loss": 0.5146, "num_tokens": 4071103444.0, "step": 5322 }, { "epoch": 1.9501213758988687, "grad_norm": 0.15568044975686804, "learning_rate": 2.9096886041175712e-05, "loss": 0.4912, "num_tokens": 4071849028.0, "step": 5323 }, { "epoch": 1.9504877937067742, "grad_norm": 0.14754261483614323, "learning_rate": 2.9092960774354645e-05, "loss": 0.4981, "num_tokens": 4072577299.0, "step": 5324 }, { "epoch": 1.9508542115146796, "grad_norm": 0.13780305936451254, "learning_rate": 2.908903510818574e-05, "loss": 0.4927, "num_tokens": 4073313783.0, "step": 5325 }, { "epoch": 1.951220629322585, "grad_norm": 0.14947681793332174, "learning_rate": 2.9085109042890033e-05, "loss": 0.4763, "num_tokens": 4073984241.0, "step": 5326 }, { "epoch": 1.9515870471304906, "grad_norm": 0.1595481352176823, "learning_rate": 2.9081182578688555e-05, "loss": 0.541, "num_tokens": 4074693265.0, "step": 5327 }, { "epoch": 1.951953464938396, "grad_norm": 0.14036509020349783, "learning_rate": 2.9077255715802377e-05, "loss": 0.5343, "num_tokens": 4075412007.0, "step": 5328 }, { "epoch": 1.9523198827463015, "grad_norm": 0.1389490307656025, "learning_rate": 2.9073328454452605e-05, "loss": 0.5075, "num_tokens": 4076078672.0, "step": 5329 }, { "epoch": 1.9526863005542068, "grad_norm": 0.14493456923591927, "learning_rate": 2.9069400794860332e-05, "loss": 0.5487, "num_tokens": 4076800421.0, "step": 5330 }, { "epoch": 1.9530527183621125, "grad_norm": 0.1363708156374939, "learning_rate": 2.90654727372467e-05, "loss": 0.4944, "num_tokens": 4077617314.0, "step": 5331 }, { "epoch": 1.9534191361700177, "grad_norm": 0.1327392819623946, "learning_rate": 2.9061544281832873e-05, "loss": 0.4725, "num_tokens": 4078506444.0, "step": 5332 }, { "epoch": 1.9537855539779234, "grad_norm": 0.13727160012225378, "learning_rate": 2.905761542884002e-05, "loss": 0.5297, "num_tokens": 4079286394.0, "step": 5333 }, { "epoch": 1.9541519717858287, "grad_norm": 0.1467069981538887, "learning_rate": 2.9053686178489352e-05, "loss": 0.5111, "num_tokens": 4080138171.0, "step": 5334 }, { "epoch": 1.9545183895937344, "grad_norm": 0.1485503597026503, "learning_rate": 2.9049756531002085e-05, "loss": 0.5794, "num_tokens": 4080794555.0, "step": 5335 }, { "epoch": 1.9548848074016396, "grad_norm": 0.14322227979884944, "learning_rate": 2.9045826486599473e-05, "loss": 0.5498, "num_tokens": 4081533600.0, "step": 5336 }, { "epoch": 1.9552512252095453, "grad_norm": 0.15642627442903007, "learning_rate": 2.9041896045502782e-05, "loss": 0.528, "num_tokens": 4082306436.0, "step": 5337 }, { "epoch": 1.9556176430174506, "grad_norm": 0.14460399237025637, "learning_rate": 2.903796520793331e-05, "loss": 0.4902, "num_tokens": 4083049097.0, "step": 5338 }, { "epoch": 1.9559840608253563, "grad_norm": 0.13179980582174355, "learning_rate": 2.9034033974112362e-05, "loss": 0.4965, "num_tokens": 4083802771.0, "step": 5339 }, { "epoch": 1.9563504786332615, "grad_norm": 0.15026430873092084, "learning_rate": 2.903010234426128e-05, "loss": 0.4799, "num_tokens": 4084494765.0, "step": 5340 }, { "epoch": 1.9567168964411672, "grad_norm": 0.14012276249352904, "learning_rate": 2.9026170318601416e-05, "loss": 0.4776, "num_tokens": 4085346341.0, "step": 5341 }, { "epoch": 1.9570833142490724, "grad_norm": 0.12699153974766053, "learning_rate": 2.9022237897354157e-05, "loss": 0.4973, "num_tokens": 4086172584.0, "step": 5342 }, { "epoch": 1.957449732056978, "grad_norm": 0.15458889554864827, "learning_rate": 2.901830508074091e-05, "loss": 0.5234, "num_tokens": 4086915767.0, "step": 5343 }, { "epoch": 1.9578161498648834, "grad_norm": 0.15279404559466903, "learning_rate": 2.901437186898309e-05, "loss": 0.4945, "num_tokens": 4087641905.0, "step": 5344 }, { "epoch": 1.9581825676727889, "grad_norm": 0.1356729776885902, "learning_rate": 2.9010438262302146e-05, "loss": 0.5271, "num_tokens": 4088395730.0, "step": 5345 }, { "epoch": 1.9585489854806943, "grad_norm": 0.13965538047499748, "learning_rate": 2.9006504260919563e-05, "loss": 0.5253, "num_tokens": 4089043778.0, "step": 5346 }, { "epoch": 1.9589154032885998, "grad_norm": 0.13824999404230667, "learning_rate": 2.9002569865056817e-05, "loss": 0.5027, "num_tokens": 4089855767.0, "step": 5347 }, { "epoch": 1.9592818210965053, "grad_norm": 0.1496173750587321, "learning_rate": 2.8998635074935426e-05, "loss": 0.4833, "num_tokens": 4090609673.0, "step": 5348 }, { "epoch": 1.9596482389044108, "grad_norm": 0.13887979188727836, "learning_rate": 2.8994699890776926e-05, "loss": 0.5089, "num_tokens": 4091341634.0, "step": 5349 }, { "epoch": 1.9600146567123162, "grad_norm": 0.12131280491257485, "learning_rate": 2.8990764312802883e-05, "loss": 0.5194, "num_tokens": 4092186125.0, "step": 5350 }, { "epoch": 1.9603810745202217, "grad_norm": 0.14125395345130948, "learning_rate": 2.8986828341234865e-05, "loss": 0.4834, "num_tokens": 4092906347.0, "step": 5351 }, { "epoch": 1.9607474923281272, "grad_norm": 0.1399243592854868, "learning_rate": 2.8982891976294487e-05, "loss": 0.4912, "num_tokens": 4093591157.0, "step": 5352 }, { "epoch": 1.9611139101360326, "grad_norm": 0.1293608883954841, "learning_rate": 2.897895521820337e-05, "loss": 0.5534, "num_tokens": 4094458112.0, "step": 5353 }, { "epoch": 1.9614803279439381, "grad_norm": 0.13394728084986676, "learning_rate": 2.897501806718316e-05, "loss": 0.5065, "num_tokens": 4095198118.0, "step": 5354 }, { "epoch": 1.9618467457518436, "grad_norm": 0.12808254454389664, "learning_rate": 2.897108052345553e-05, "loss": 0.5133, "num_tokens": 4096060882.0, "step": 5355 }, { "epoch": 1.962213163559749, "grad_norm": 0.1527130606469819, "learning_rate": 2.8967142587242163e-05, "loss": 0.4907, "num_tokens": 4096808053.0, "step": 5356 }, { "epoch": 1.9625795813676543, "grad_norm": 0.13823797954403197, "learning_rate": 2.8963204258764786e-05, "loss": 0.5078, "num_tokens": 4097550904.0, "step": 5357 }, { "epoch": 1.96294599917556, "grad_norm": 0.14944360622901373, "learning_rate": 2.895926553824512e-05, "loss": 0.5008, "num_tokens": 4098314851.0, "step": 5358 }, { "epoch": 1.9633124169834653, "grad_norm": 0.15205662119404387, "learning_rate": 2.8955326425904932e-05, "loss": 0.4816, "num_tokens": 4099063662.0, "step": 5359 }, { "epoch": 1.963678834791371, "grad_norm": 0.1406657896089243, "learning_rate": 2.8951386921966005e-05, "loss": 0.5353, "num_tokens": 4099789452.0, "step": 5360 }, { "epoch": 1.9640452525992762, "grad_norm": 0.1386170504861641, "learning_rate": 2.894744702665013e-05, "loss": 0.5264, "num_tokens": 4100544592.0, "step": 5361 }, { "epoch": 1.964411670407182, "grad_norm": 0.17898757034417964, "learning_rate": 2.894350674017914e-05, "loss": 0.4929, "num_tokens": 4101294914.0, "step": 5362 }, { "epoch": 1.9647780882150871, "grad_norm": 0.14861857767988865, "learning_rate": 2.8939566062774872e-05, "loss": 0.5344, "num_tokens": 4101978272.0, "step": 5363 }, { "epoch": 1.9651445060229928, "grad_norm": 0.15547072557551908, "learning_rate": 2.893562499465921e-05, "loss": 0.5032, "num_tokens": 4102709029.0, "step": 5364 }, { "epoch": 1.965510923830898, "grad_norm": 0.15680177759287134, "learning_rate": 2.8931683536054026e-05, "loss": 0.499, "num_tokens": 4103450285.0, "step": 5365 }, { "epoch": 1.9658773416388038, "grad_norm": 0.1383196204134627, "learning_rate": 2.8927741687181245e-05, "loss": 0.4847, "num_tokens": 4104225173.0, "step": 5366 }, { "epoch": 1.966243759446709, "grad_norm": 0.1271002653218515, "learning_rate": 2.8923799448262788e-05, "loss": 0.469, "num_tokens": 4104911430.0, "step": 5367 }, { "epoch": 1.9666101772546147, "grad_norm": 0.14402691715730853, "learning_rate": 2.8919856819520623e-05, "loss": 0.4692, "num_tokens": 4105543635.0, "step": 5368 }, { "epoch": 1.96697659506252, "grad_norm": 0.1499377797506909, "learning_rate": 2.8915913801176733e-05, "loss": 0.5093, "num_tokens": 4106249624.0, "step": 5369 }, { "epoch": 1.9673430128704255, "grad_norm": 0.14540672553554887, "learning_rate": 2.8911970393453094e-05, "loss": 0.489, "num_tokens": 4107062630.0, "step": 5370 }, { "epoch": 1.967709430678331, "grad_norm": 0.12894126467122713, "learning_rate": 2.890802659657175e-05, "loss": 0.5138, "num_tokens": 4107827808.0, "step": 5371 }, { "epoch": 1.9680758484862364, "grad_norm": 0.14536500312171652, "learning_rate": 2.890408241075473e-05, "loss": 0.4956, "num_tokens": 4108634977.0, "step": 5372 }, { "epoch": 1.9684422662941419, "grad_norm": 0.14078905815586068, "learning_rate": 2.8900137836224118e-05, "loss": 0.5136, "num_tokens": 4109262668.0, "step": 5373 }, { "epoch": 1.9688086841020473, "grad_norm": 0.16053600727221332, "learning_rate": 2.8896192873201984e-05, "loss": 0.5232, "num_tokens": 4109916079.0, "step": 5374 }, { "epoch": 1.9691751019099528, "grad_norm": 0.15722688429587844, "learning_rate": 2.8892247521910436e-05, "loss": 0.5197, "num_tokens": 4110630127.0, "step": 5375 }, { "epoch": 1.9695415197178583, "grad_norm": 0.12101275747181957, "learning_rate": 2.8888301782571618e-05, "loss": 0.4771, "num_tokens": 4111565310.0, "step": 5376 }, { "epoch": 1.9699079375257638, "grad_norm": 0.1615577501456829, "learning_rate": 2.8884355655407682e-05, "loss": 0.5381, "num_tokens": 4112279989.0, "step": 5377 }, { "epoch": 1.9702743553336692, "grad_norm": 0.130354493767532, "learning_rate": 2.8880409140640785e-05, "loss": 0.4648, "num_tokens": 4113074874.0, "step": 5378 }, { "epoch": 1.9706407731415747, "grad_norm": 0.13522112664212246, "learning_rate": 2.8876462238493144e-05, "loss": 0.4899, "num_tokens": 4113937870.0, "step": 5379 }, { "epoch": 1.9710071909494802, "grad_norm": 0.1336237589430383, "learning_rate": 2.8872514949186974e-05, "loss": 0.5184, "num_tokens": 4114713694.0, "step": 5380 }, { "epoch": 1.9713736087573857, "grad_norm": 0.13842568518712325, "learning_rate": 2.8868567272944505e-05, "loss": 0.5204, "num_tokens": 4115499371.0, "step": 5381 }, { "epoch": 1.9717400265652911, "grad_norm": 0.15478136055873012, "learning_rate": 2.8864619209988013e-05, "loss": 0.5436, "num_tokens": 4116186064.0, "step": 5382 }, { "epoch": 1.9721064443731966, "grad_norm": 0.14767904682634406, "learning_rate": 2.8860670760539764e-05, "loss": 0.4797, "num_tokens": 4116951531.0, "step": 5383 }, { "epoch": 1.9724728621811018, "grad_norm": 0.13790748125188088, "learning_rate": 2.8856721924822083e-05, "loss": 0.4987, "num_tokens": 4117728976.0, "step": 5384 }, { "epoch": 1.9728392799890075, "grad_norm": 0.15007563436554477, "learning_rate": 2.885277270305728e-05, "loss": 0.4776, "num_tokens": 4118525906.0, "step": 5385 }, { "epoch": 1.9732056977969128, "grad_norm": 0.13226530120876018, "learning_rate": 2.884882309546772e-05, "loss": 0.4958, "num_tokens": 4119289678.0, "step": 5386 }, { "epoch": 1.9735721156048185, "grad_norm": 0.13473652399673394, "learning_rate": 2.8844873102275757e-05, "loss": 0.4872, "num_tokens": 4120105620.0, "step": 5387 }, { "epoch": 1.9739385334127237, "grad_norm": 0.14451352261705364, "learning_rate": 2.88409227237038e-05, "loss": 0.502, "num_tokens": 4120970956.0, "step": 5388 }, { "epoch": 1.9743049512206294, "grad_norm": 0.13321584208644383, "learning_rate": 2.883697195997425e-05, "loss": 0.4945, "num_tokens": 4121666800.0, "step": 5389 }, { "epoch": 1.9746713690285347, "grad_norm": 0.1312398412468744, "learning_rate": 2.883302081130955e-05, "loss": 0.4946, "num_tokens": 4122481844.0, "step": 5390 }, { "epoch": 1.9750377868364404, "grad_norm": 0.14369872594731395, "learning_rate": 2.882906927793216e-05, "loss": 0.497, "num_tokens": 4123276121.0, "step": 5391 }, { "epoch": 1.9754042046443456, "grad_norm": 0.13049743513961684, "learning_rate": 2.8825117360064547e-05, "loss": 0.5216, "num_tokens": 4124020504.0, "step": 5392 }, { "epoch": 1.9757706224522513, "grad_norm": 0.14624317294860825, "learning_rate": 2.882116505792923e-05, "loss": 0.5423, "num_tokens": 4124778822.0, "step": 5393 }, { "epoch": 1.9761370402601566, "grad_norm": 0.13033077788582095, "learning_rate": 2.8817212371748712e-05, "loss": 0.4987, "num_tokens": 4125459431.0, "step": 5394 }, { "epoch": 1.9765034580680623, "grad_norm": 0.14582291184277849, "learning_rate": 2.881325930174555e-05, "loss": 0.5193, "num_tokens": 4126202290.0, "step": 5395 }, { "epoch": 1.9768698758759675, "grad_norm": 0.14548715777881757, "learning_rate": 2.880930584814231e-05, "loss": 0.5151, "num_tokens": 4126912487.0, "step": 5396 }, { "epoch": 1.977236293683873, "grad_norm": 0.12461555806359645, "learning_rate": 2.8805352011161572e-05, "loss": 0.4956, "num_tokens": 4127763570.0, "step": 5397 }, { "epoch": 1.9776027114917785, "grad_norm": 0.13367330563167595, "learning_rate": 2.8801397791025953e-05, "loss": 0.4911, "num_tokens": 4128509571.0, "step": 5398 }, { "epoch": 1.977969129299684, "grad_norm": 0.13515620379552842, "learning_rate": 2.8797443187958075e-05, "loss": 0.452, "num_tokens": 4129408227.0, "step": 5399 }, { "epoch": 1.9783355471075894, "grad_norm": 0.11914418199759917, "learning_rate": 2.879348820218059e-05, "loss": 0.4697, "num_tokens": 4130217582.0, "step": 5400 }, { "epoch": 1.9787019649154949, "grad_norm": 0.1434359821321587, "learning_rate": 2.8789532833916185e-05, "loss": 0.4977, "num_tokens": 4130947858.0, "step": 5401 }, { "epoch": 1.9790683827234004, "grad_norm": 0.13998380899778848, "learning_rate": 2.878557708338754e-05, "loss": 0.5225, "num_tokens": 4131702777.0, "step": 5402 }, { "epoch": 1.9794348005313058, "grad_norm": 0.13731615584912943, "learning_rate": 2.878162095081738e-05, "loss": 0.4907, "num_tokens": 4132416711.0, "step": 5403 }, { "epoch": 1.9798012183392113, "grad_norm": 0.15757891414245195, "learning_rate": 2.877766443642844e-05, "loss": 0.5126, "num_tokens": 4133178389.0, "step": 5404 }, { "epoch": 1.9801676361471168, "grad_norm": 0.14451429159392243, "learning_rate": 2.8773707540443478e-05, "loss": 0.5073, "num_tokens": 4133974063.0, "step": 5405 }, { "epoch": 1.9805340539550222, "grad_norm": 0.12671827031311944, "learning_rate": 2.876975026308528e-05, "loss": 0.4977, "num_tokens": 4134807854.0, "step": 5406 }, { "epoch": 1.9809004717629277, "grad_norm": 0.1736326196335456, "learning_rate": 2.8765792604576647e-05, "loss": 0.5429, "num_tokens": 4135592078.0, "step": 5407 }, { "epoch": 1.9812668895708332, "grad_norm": 0.13634421119110426, "learning_rate": 2.8761834565140395e-05, "loss": 0.5108, "num_tokens": 4136273697.0, "step": 5408 }, { "epoch": 1.9816333073787387, "grad_norm": 0.14117361147138885, "learning_rate": 2.8757876144999378e-05, "loss": 0.5291, "num_tokens": 4136994134.0, "step": 5409 }, { "epoch": 1.9819997251866441, "grad_norm": 0.16412833002603053, "learning_rate": 2.8753917344376467e-05, "loss": 0.5285, "num_tokens": 4137754242.0, "step": 5410 }, { "epoch": 1.9823661429945494, "grad_norm": 0.14172705110463907, "learning_rate": 2.8749958163494543e-05, "loss": 0.544, "num_tokens": 4138445733.0, "step": 5411 }, { "epoch": 1.982732560802455, "grad_norm": 0.1429827368729778, "learning_rate": 2.8745998602576518e-05, "loss": 0.4988, "num_tokens": 4139209724.0, "step": 5412 }, { "epoch": 1.9830989786103603, "grad_norm": 0.13958899662592036, "learning_rate": 2.8742038661845325e-05, "loss": 0.4869, "num_tokens": 4139959524.0, "step": 5413 }, { "epoch": 1.983465396418266, "grad_norm": 0.1358641237450655, "learning_rate": 2.8738078341523906e-05, "loss": 0.4916, "num_tokens": 4140751644.0, "step": 5414 }, { "epoch": 1.9838318142261713, "grad_norm": 0.12978547622694542, "learning_rate": 2.873411764183525e-05, "loss": 0.4838, "num_tokens": 4141472367.0, "step": 5415 }, { "epoch": 1.984198232034077, "grad_norm": 0.14706789569594383, "learning_rate": 2.8730156563002344e-05, "loss": 0.4954, "num_tokens": 4142143215.0, "step": 5416 }, { "epoch": 1.9845646498419822, "grad_norm": 0.1386512468631851, "learning_rate": 2.8726195105248207e-05, "loss": 0.5096, "num_tokens": 4142904059.0, "step": 5417 }, { "epoch": 1.984931067649888, "grad_norm": 0.13433042450092708, "learning_rate": 2.8722233268795875e-05, "loss": 0.5022, "num_tokens": 4143641092.0, "step": 5418 }, { "epoch": 1.9852974854577932, "grad_norm": 0.14137648456731863, "learning_rate": 2.871827105386841e-05, "loss": 0.4982, "num_tokens": 4144367734.0, "step": 5419 }, { "epoch": 1.9856639032656989, "grad_norm": 0.14942639959876683, "learning_rate": 2.871430846068889e-05, "loss": 0.5004, "num_tokens": 4145132040.0, "step": 5420 }, { "epoch": 1.986030321073604, "grad_norm": 0.1540924673134348, "learning_rate": 2.8710345489480423e-05, "loss": 0.4926, "num_tokens": 4145932302.0, "step": 5421 }, { "epoch": 1.9863967388815098, "grad_norm": 0.16138812988409865, "learning_rate": 2.8706382140466117e-05, "loss": 0.5043, "num_tokens": 4146714829.0, "step": 5422 }, { "epoch": 1.986763156689415, "grad_norm": 0.12472666411603132, "learning_rate": 2.870241841386913e-05, "loss": 0.4726, "num_tokens": 4147471505.0, "step": 5423 }, { "epoch": 1.9871295744973205, "grad_norm": 0.1382527065809633, "learning_rate": 2.8698454309912632e-05, "loss": 0.491, "num_tokens": 4148205231.0, "step": 5424 }, { "epoch": 1.987495992305226, "grad_norm": 0.14909240094393067, "learning_rate": 2.8694489828819795e-05, "loss": 0.4532, "num_tokens": 4148940719.0, "step": 5425 }, { "epoch": 1.9878624101131315, "grad_norm": 0.13027944159707852, "learning_rate": 2.8690524970813834e-05, "loss": 0.4947, "num_tokens": 4149710510.0, "step": 5426 }, { "epoch": 1.988228827921037, "grad_norm": 0.14362156734765424, "learning_rate": 2.868655973611798e-05, "loss": 0.4999, "num_tokens": 4150474512.0, "step": 5427 }, { "epoch": 1.9885952457289424, "grad_norm": 0.14791087205611178, "learning_rate": 2.868259412495549e-05, "loss": 0.5001, "num_tokens": 4151240760.0, "step": 5428 }, { "epoch": 1.988961663536848, "grad_norm": 0.14422148657398504, "learning_rate": 2.8678628137549616e-05, "loss": 0.514, "num_tokens": 4152035650.0, "step": 5429 }, { "epoch": 1.9893280813447534, "grad_norm": 0.12534241482977346, "learning_rate": 2.8674661774123663e-05, "loss": 0.4936, "num_tokens": 4152894524.0, "step": 5430 }, { "epoch": 1.9896944991526588, "grad_norm": 0.14723512465206837, "learning_rate": 2.8670695034900955e-05, "loss": 0.5211, "num_tokens": 4153698014.0, "step": 5431 }, { "epoch": 1.9900609169605643, "grad_norm": 0.12381366747508846, "learning_rate": 2.8666727920104818e-05, "loss": 0.4969, "num_tokens": 4154514959.0, "step": 5432 }, { "epoch": 1.9904273347684698, "grad_norm": 0.14958898097899917, "learning_rate": 2.86627604299586e-05, "loss": 0.5144, "num_tokens": 4155262686.0, "step": 5433 }, { "epoch": 1.9907937525763753, "grad_norm": 0.1567564896378536, "learning_rate": 2.8658792564685694e-05, "loss": 0.5177, "num_tokens": 4155861376.0, "step": 5434 }, { "epoch": 1.9911601703842807, "grad_norm": 0.1349420664802927, "learning_rate": 2.8654824324509487e-05, "loss": 0.5047, "num_tokens": 4156569298.0, "step": 5435 }, { "epoch": 1.991526588192186, "grad_norm": 0.13833590709653146, "learning_rate": 2.8650855709653406e-05, "loss": 0.5021, "num_tokens": 4157335167.0, "step": 5436 }, { "epoch": 1.9918930060000917, "grad_norm": 0.1439060920462037, "learning_rate": 2.8646886720340888e-05, "loss": 0.5052, "num_tokens": 4158061077.0, "step": 5437 }, { "epoch": 1.992259423807997, "grad_norm": 0.1338752032436143, "learning_rate": 2.86429173567954e-05, "loss": 0.4918, "num_tokens": 4158783732.0, "step": 5438 }, { "epoch": 1.9926258416159026, "grad_norm": 0.14614868871061176, "learning_rate": 2.863894761924041e-05, "loss": 0.5165, "num_tokens": 4159496962.0, "step": 5439 }, { "epoch": 1.9929922594238079, "grad_norm": 0.14334854942986966, "learning_rate": 2.8634977507899444e-05, "loss": 0.4807, "num_tokens": 4160359568.0, "step": 5440 }, { "epoch": 1.9933586772317136, "grad_norm": 0.12996498113477162, "learning_rate": 2.863100702299602e-05, "loss": 0.5095, "num_tokens": 4161121846.0, "step": 5441 }, { "epoch": 1.9937250950396188, "grad_norm": 0.13612882580679517, "learning_rate": 2.8627036164753672e-05, "loss": 0.5289, "num_tokens": 4161965966.0, "step": 5442 }, { "epoch": 1.9940915128475245, "grad_norm": 0.1402324718345269, "learning_rate": 2.8623064933395978e-05, "loss": 0.4948, "num_tokens": 4162809262.0, "step": 5443 }, { "epoch": 1.9944579306554298, "grad_norm": 0.1397250662064698, "learning_rate": 2.8619093329146527e-05, "loss": 0.4559, "num_tokens": 4163768786.0, "step": 5444 }, { "epoch": 1.9948243484633355, "grad_norm": 0.1285823973897267, "learning_rate": 2.8615121352228924e-05, "loss": 0.4782, "num_tokens": 4164513825.0, "step": 5445 }, { "epoch": 1.9951907662712407, "grad_norm": 0.12840944302839608, "learning_rate": 2.861114900286681e-05, "loss": 0.4744, "num_tokens": 4165228483.0, "step": 5446 }, { "epoch": 1.9955571840791464, "grad_norm": 0.14655263913293917, "learning_rate": 2.8607176281283813e-05, "loss": 0.5177, "num_tokens": 4165984443.0, "step": 5447 }, { "epoch": 1.9959236018870516, "grad_norm": 0.15114114387363503, "learning_rate": 2.8603203187703625e-05, "loss": 0.5146, "num_tokens": 4166850379.0, "step": 5448 }, { "epoch": 1.9962900196949573, "grad_norm": 0.14939443618787565, "learning_rate": 2.8599229722349933e-05, "loss": 0.5245, "num_tokens": 4167498809.0, "step": 5449 }, { "epoch": 1.9966564375028626, "grad_norm": 0.13594915614571346, "learning_rate": 2.8595255885446448e-05, "loss": 0.5192, "num_tokens": 4168370401.0, "step": 5450 }, { "epoch": 1.997022855310768, "grad_norm": 0.13912927877660514, "learning_rate": 2.8591281677216915e-05, "loss": 0.5265, "num_tokens": 4169086628.0, "step": 5451 }, { "epoch": 1.9973892731186735, "grad_norm": 0.14908690275483769, "learning_rate": 2.858730709788508e-05, "loss": 0.522, "num_tokens": 4169812172.0, "step": 5452 }, { "epoch": 1.997755690926579, "grad_norm": 0.12566357280427815, "learning_rate": 2.8583332147674725e-05, "loss": 0.5048, "num_tokens": 4170592579.0, "step": 5453 }, { "epoch": 1.9981221087344845, "grad_norm": 0.1431641734435086, "learning_rate": 2.857935682680964e-05, "loss": 0.4975, "num_tokens": 4171489051.0, "step": 5454 }, { "epoch": 1.99848852654239, "grad_norm": 0.13026094119115644, "learning_rate": 2.8575381135513653e-05, "loss": 0.5006, "num_tokens": 4172387190.0, "step": 5455 }, { "epoch": 1.9988549443502954, "grad_norm": 0.14599503969953867, "learning_rate": 2.8571405074010604e-05, "loss": 0.4902, "num_tokens": 4173142497.0, "step": 5456 }, { "epoch": 1.999221362158201, "grad_norm": 0.12199291254899702, "learning_rate": 2.856742864252434e-05, "loss": 0.4818, "num_tokens": 4173905013.0, "step": 5457 }, { "epoch": 1.9995877799661064, "grad_norm": 0.15018159304292053, "learning_rate": 2.8563451841278752e-05, "loss": 0.4723, "num_tokens": 4174744046.0, "step": 5458 }, { "epoch": 1.9999541977740118, "grad_norm": 0.1524339786562733, "learning_rate": 2.8559474670497748e-05, "loss": 0.5175, "num_tokens": 4175589383.0, "step": 5459 }, { "epoch": 2.0, "grad_norm": 0.1524339786562733, "learning_rate": 2.855549713040523e-05, "loss": 0.4527, "num_tokens": 4175657705.0, "step": 5460 }, { "epoch": 2.0003664178079053, "grad_norm": 0.18538068936462782, "learning_rate": 2.8551519221225166e-05, "loss": 0.4493, "num_tokens": 614522.0, "step": 5461 }, { "epoch": 2.000732835615811, "grad_norm": 0.17563961267346992, "learning_rate": 2.8547540943181504e-05, "loss": 0.4252, "num_tokens": 1332617.0, "step": 5462 }, { "epoch": 2.001099253423716, "grad_norm": 0.14668060898915253, "learning_rate": 2.8543562296498233e-05, "loss": 0.4499, "num_tokens": 2160116.0, "step": 5463 }, { "epoch": 2.001465671231622, "grad_norm": 0.18597383785202473, "learning_rate": 2.853958328139936e-05, "loss": 0.4664, "num_tokens": 2929085.0, "step": 5464 }, { "epoch": 2.001832089039527, "grad_norm": 0.15714108341273758, "learning_rate": 2.8535603898108912e-05, "loss": 0.4244, "num_tokens": 3596406.0, "step": 5465 }, { "epoch": 2.002198506847433, "grad_norm": 0.16888203866987336, "learning_rate": 2.8531624146850934e-05, "loss": 0.4501, "num_tokens": 4270502.0, "step": 5466 }, { "epoch": 2.002564924655338, "grad_norm": 0.159531758781016, "learning_rate": 2.8527644027849496e-05, "loss": 0.4476, "num_tokens": 5086811.0, "step": 5467 }, { "epoch": 2.002931342463244, "grad_norm": 0.15238493645848694, "learning_rate": 2.8523663541328685e-05, "loss": 0.4436, "num_tokens": 5840853.0, "step": 5468 }, { "epoch": 2.003297760271149, "grad_norm": 0.15799100001055766, "learning_rate": 2.8519682687512613e-05, "loss": 0.458, "num_tokens": 6601702.0, "step": 5469 }, { "epoch": 2.0036641780790547, "grad_norm": 0.1588274121856915, "learning_rate": 2.8515701466625407e-05, "loss": 0.4409, "num_tokens": 7398050.0, "step": 5470 }, { "epoch": 2.00403059588696, "grad_norm": 0.134829562632704, "learning_rate": 2.8511719878891217e-05, "loss": 0.4512, "num_tokens": 8352003.0, "step": 5471 }, { "epoch": 2.0043970136948657, "grad_norm": 0.14875131548325943, "learning_rate": 2.8507737924534215e-05, "loss": 0.4292, "num_tokens": 9088665.0, "step": 5472 }, { "epoch": 2.004763431502771, "grad_norm": 0.16072962650053021, "learning_rate": 2.85037556037786e-05, "loss": 0.4758, "num_tokens": 9854516.0, "step": 5473 }, { "epoch": 2.0051298493106766, "grad_norm": 0.17067589518957815, "learning_rate": 2.8499772916848575e-05, "loss": 0.4491, "num_tokens": 10480477.0, "step": 5474 }, { "epoch": 2.005496267118582, "grad_norm": 0.14869458329124247, "learning_rate": 2.849578986396837e-05, "loss": 0.4542, "num_tokens": 11326765.0, "step": 5475 }, { "epoch": 2.0058626849264876, "grad_norm": 0.15078363346173948, "learning_rate": 2.8491806445362257e-05, "loss": 0.4504, "num_tokens": 12059991.0, "step": 5476 }, { "epoch": 2.006229102734393, "grad_norm": 0.16480147463583159, "learning_rate": 2.8487822661254494e-05, "loss": 0.4632, "num_tokens": 12869879.0, "step": 5477 }, { "epoch": 2.0065955205422985, "grad_norm": 0.15347387700210915, "learning_rate": 2.8483838511869376e-05, "loss": 0.4449, "num_tokens": 13617157.0, "step": 5478 }, { "epoch": 2.0069619383502038, "grad_norm": 0.15566792040018587, "learning_rate": 2.8479853997431233e-05, "loss": 0.4301, "num_tokens": 14431351.0, "step": 5479 }, { "epoch": 2.0073283561581095, "grad_norm": 0.15339474689353735, "learning_rate": 2.8475869118164384e-05, "loss": 0.4678, "num_tokens": 15171663.0, "step": 5480 }, { "epoch": 2.0076947739660147, "grad_norm": 0.16106246006361044, "learning_rate": 2.8471883874293196e-05, "loss": 0.458, "num_tokens": 15999830.0, "step": 5481 }, { "epoch": 2.0080611917739204, "grad_norm": 0.15404107806805617, "learning_rate": 2.8467898266042048e-05, "loss": 0.4405, "num_tokens": 16736086.0, "step": 5482 }, { "epoch": 2.0084276095818256, "grad_norm": 0.14803243873313152, "learning_rate": 2.8463912293635325e-05, "loss": 0.4463, "num_tokens": 17445002.0, "step": 5483 }, { "epoch": 2.0087940273897313, "grad_norm": 0.1345385698931718, "learning_rate": 2.845992595729746e-05, "loss": 0.4497, "num_tokens": 18212776.0, "step": 5484 }, { "epoch": 2.0091604451976366, "grad_norm": 0.14927812445330152, "learning_rate": 2.845593925725288e-05, "loss": 0.4495, "num_tokens": 18968123.0, "step": 5485 }, { "epoch": 2.0095268630055423, "grad_norm": 0.15183440248276092, "learning_rate": 2.845195219372605e-05, "loss": 0.4196, "num_tokens": 19758551.0, "step": 5486 }, { "epoch": 2.0098932808134475, "grad_norm": 0.14064859950890446, "learning_rate": 2.8447964766941453e-05, "loss": 0.4761, "num_tokens": 20536930.0, "step": 5487 }, { "epoch": 2.010259698621353, "grad_norm": 0.14741433558371128, "learning_rate": 2.8443976977123583e-05, "loss": 0.4151, "num_tokens": 21263824.0, "step": 5488 }, { "epoch": 2.0106261164292585, "grad_norm": 0.1827078183319223, "learning_rate": 2.8439988824496957e-05, "loss": 0.455, "num_tokens": 21943538.0, "step": 5489 }, { "epoch": 2.0109925342371637, "grad_norm": 0.1334108803031687, "learning_rate": 2.8436000309286132e-05, "loss": 0.4358, "num_tokens": 22676803.0, "step": 5490 }, { "epoch": 2.0113589520450694, "grad_norm": 0.16268707490236925, "learning_rate": 2.8432011431715653e-05, "loss": 0.4628, "num_tokens": 23315273.0, "step": 5491 }, { "epoch": 2.0117253698529747, "grad_norm": 0.1509964002745733, "learning_rate": 2.8428022192010104e-05, "loss": 0.4429, "num_tokens": 24125602.0, "step": 5492 }, { "epoch": 2.0120917876608804, "grad_norm": 0.14581281978888977, "learning_rate": 2.8424032590394096e-05, "loss": 0.4624, "num_tokens": 24884258.0, "step": 5493 }, { "epoch": 2.0124582054687856, "grad_norm": 0.13472681192545097, "learning_rate": 2.842004262709224e-05, "loss": 0.4087, "num_tokens": 25596477.0, "step": 5494 }, { "epoch": 2.0128246232766913, "grad_norm": 0.1523940105576381, "learning_rate": 2.8416052302329192e-05, "loss": 0.47, "num_tokens": 26322588.0, "step": 5495 }, { "epoch": 2.0131910410845966, "grad_norm": 0.1350787211378423, "learning_rate": 2.8412061616329608e-05, "loss": 0.4464, "num_tokens": 27177234.0, "step": 5496 }, { "epoch": 2.0135574588925023, "grad_norm": 0.13034274298271417, "learning_rate": 2.8408070569318167e-05, "loss": 0.4189, "num_tokens": 28121054.0, "step": 5497 }, { "epoch": 2.0139238767004075, "grad_norm": 0.1423187812253292, "learning_rate": 2.8404079161519575e-05, "loss": 0.4464, "num_tokens": 28852105.0, "step": 5498 }, { "epoch": 2.014290294508313, "grad_norm": 0.14254017721002138, "learning_rate": 2.840008739315857e-05, "loss": 0.4562, "num_tokens": 29540837.0, "step": 5499 }, { "epoch": 2.0146567123162185, "grad_norm": 0.15335252871487218, "learning_rate": 2.839609526445988e-05, "loss": 0.467, "num_tokens": 30243550.0, "step": 5500 }, { "epoch": 2.015023130124124, "grad_norm": 0.13100709110887757, "learning_rate": 2.8392102775648274e-05, "loss": 0.4254, "num_tokens": 31103839.0, "step": 5501 }, { "epoch": 2.0153895479320294, "grad_norm": 0.13086150371474412, "learning_rate": 2.838810992694854e-05, "loss": 0.4531, "num_tokens": 31979168.0, "step": 5502 }, { "epoch": 2.015755965739935, "grad_norm": 0.13137389205022534, "learning_rate": 2.8384116718585478e-05, "loss": 0.4284, "num_tokens": 32888259.0, "step": 5503 }, { "epoch": 2.0161223835478403, "grad_norm": 0.14310308888693898, "learning_rate": 2.8380123150783925e-05, "loss": 0.4797, "num_tokens": 33640836.0, "step": 5504 }, { "epoch": 2.016488801355746, "grad_norm": 0.12578042685948418, "learning_rate": 2.837612922376871e-05, "loss": 0.4482, "num_tokens": 34465322.0, "step": 5505 }, { "epoch": 2.0168552191636513, "grad_norm": 0.14849964408592553, "learning_rate": 2.8372134937764713e-05, "loss": 0.4324, "num_tokens": 35235621.0, "step": 5506 }, { "epoch": 2.017221636971557, "grad_norm": 0.14245740701818493, "learning_rate": 2.8368140292996814e-05, "loss": 0.4701, "num_tokens": 36007465.0, "step": 5507 }, { "epoch": 2.0175880547794622, "grad_norm": 0.15780911797311337, "learning_rate": 2.8364145289689928e-05, "loss": 0.4534, "num_tokens": 36710706.0, "step": 5508 }, { "epoch": 2.017954472587368, "grad_norm": 0.14324946117004275, "learning_rate": 2.8360149928068966e-05, "loss": 0.4546, "num_tokens": 37541676.0, "step": 5509 }, { "epoch": 2.018320890395273, "grad_norm": 0.13244853972973233, "learning_rate": 2.8356154208358882e-05, "loss": 0.4559, "num_tokens": 38358279.0, "step": 5510 }, { "epoch": 2.018687308203179, "grad_norm": 0.1281333363810615, "learning_rate": 2.835215813078465e-05, "loss": 0.4259, "num_tokens": 39182979.0, "step": 5511 }, { "epoch": 2.019053726011084, "grad_norm": 0.1408523557779074, "learning_rate": 2.834816169557125e-05, "loss": 0.4192, "num_tokens": 39950625.0, "step": 5512 }, { "epoch": 2.01942014381899, "grad_norm": 0.15132024587898285, "learning_rate": 2.8344164902943688e-05, "loss": 0.4511, "num_tokens": 40659709.0, "step": 5513 }, { "epoch": 2.019786561626895, "grad_norm": 0.1501716496301736, "learning_rate": 2.8340167753127002e-05, "loss": 0.4482, "num_tokens": 41375502.0, "step": 5514 }, { "epoch": 2.0201529794348003, "grad_norm": 0.13754722336936995, "learning_rate": 2.8336170246346224e-05, "loss": 0.4443, "num_tokens": 42095744.0, "step": 5515 }, { "epoch": 2.020519397242706, "grad_norm": 0.13001317449714062, "learning_rate": 2.833217238282643e-05, "loss": 0.4368, "num_tokens": 42966929.0, "step": 5516 }, { "epoch": 2.0208858150506113, "grad_norm": 0.14363223119226382, "learning_rate": 2.8328174162792712e-05, "loss": 0.4652, "num_tokens": 43704991.0, "step": 5517 }, { "epoch": 2.021252232858517, "grad_norm": 0.13044039809772132, "learning_rate": 2.8324175586470174e-05, "loss": 0.4437, "num_tokens": 44519072.0, "step": 5518 }, { "epoch": 2.021618650666422, "grad_norm": 0.14415275707842495, "learning_rate": 2.8320176654083925e-05, "loss": 0.4799, "num_tokens": 45256800.0, "step": 5519 }, { "epoch": 2.021985068474328, "grad_norm": 0.13965206232970317, "learning_rate": 2.8316177365859146e-05, "loss": 0.4408, "num_tokens": 46019050.0, "step": 5520 }, { "epoch": 2.022351486282233, "grad_norm": 0.1312061885285007, "learning_rate": 2.8312177722020995e-05, "loss": 0.4391, "num_tokens": 46777029.0, "step": 5521 }, { "epoch": 2.022717904090139, "grad_norm": 0.16319090897014205, "learning_rate": 2.8308177722794642e-05, "loss": 0.4628, "num_tokens": 47580155.0, "step": 5522 }, { "epoch": 2.023084321898044, "grad_norm": 0.1365761901606736, "learning_rate": 2.830417736840531e-05, "loss": 0.4405, "num_tokens": 48332781.0, "step": 5523 }, { "epoch": 2.02345073970595, "grad_norm": 0.13312217475877225, "learning_rate": 2.8300176659078232e-05, "loss": 0.4373, "num_tokens": 49073763.0, "step": 5524 }, { "epoch": 2.023817157513855, "grad_norm": 0.1518233732839463, "learning_rate": 2.8296175595038636e-05, "loss": 0.4783, "num_tokens": 49813124.0, "step": 5525 }, { "epoch": 2.0241835753217607, "grad_norm": 0.135124905259707, "learning_rate": 2.8292174176511806e-05, "loss": 0.4434, "num_tokens": 50600763.0, "step": 5526 }, { "epoch": 2.024549993129666, "grad_norm": 0.1361055147926536, "learning_rate": 2.8288172403723026e-05, "loss": 0.4457, "num_tokens": 51488144.0, "step": 5527 }, { "epoch": 2.0249164109375717, "grad_norm": 0.14992973739710205, "learning_rate": 2.828417027689761e-05, "loss": 0.4187, "num_tokens": 52207190.0, "step": 5528 }, { "epoch": 2.025282828745477, "grad_norm": 0.1277939153511566, "learning_rate": 2.8280167796260868e-05, "loss": 0.4637, "num_tokens": 52993268.0, "step": 5529 }, { "epoch": 2.0256492465533826, "grad_norm": 0.14177819391450758, "learning_rate": 2.8276164962038165e-05, "loss": 0.4473, "num_tokens": 53676875.0, "step": 5530 }, { "epoch": 2.026015664361288, "grad_norm": 0.14536289943360067, "learning_rate": 2.8272161774454865e-05, "loss": 0.4881, "num_tokens": 54363793.0, "step": 5531 }, { "epoch": 2.0263820821691936, "grad_norm": 0.15103153519444976, "learning_rate": 2.8268158233736346e-05, "loss": 0.4709, "num_tokens": 55116887.0, "step": 5532 }, { "epoch": 2.026748499977099, "grad_norm": 0.13969942085604375, "learning_rate": 2.8264154340108025e-05, "loss": 0.4555, "num_tokens": 55848707.0, "step": 5533 }, { "epoch": 2.0271149177850045, "grad_norm": 0.1403780030014593, "learning_rate": 2.826015009379533e-05, "loss": 0.4628, "num_tokens": 56602343.0, "step": 5534 }, { "epoch": 2.0274813355929098, "grad_norm": 0.1463323382760307, "learning_rate": 2.8256145495023705e-05, "loss": 0.4256, "num_tokens": 57275596.0, "step": 5535 }, { "epoch": 2.0278477534008155, "grad_norm": 0.1618465000708808, "learning_rate": 2.825214054401861e-05, "loss": 0.4796, "num_tokens": 58060348.0, "step": 5536 }, { "epoch": 2.0282141712087207, "grad_norm": 0.14406811564608094, "learning_rate": 2.824813524100554e-05, "loss": 0.4763, "num_tokens": 58872913.0, "step": 5537 }, { "epoch": 2.0285805890166264, "grad_norm": 0.15988138522884865, "learning_rate": 2.8244129586210004e-05, "loss": 0.4517, "num_tokens": 59707473.0, "step": 5538 }, { "epoch": 2.0289470068245317, "grad_norm": 0.1519706498579329, "learning_rate": 2.8240123579857523e-05, "loss": 0.4534, "num_tokens": 60435867.0, "step": 5539 }, { "epoch": 2.029313424632437, "grad_norm": 0.14684351464276327, "learning_rate": 2.823611722217364e-05, "loss": 0.4639, "num_tokens": 61288555.0, "step": 5540 }, { "epoch": 2.0296798424403426, "grad_norm": 0.1489373808566478, "learning_rate": 2.8232110513383923e-05, "loss": 0.4603, "num_tokens": 61988230.0, "step": 5541 }, { "epoch": 2.030046260248248, "grad_norm": 0.13860322352151933, "learning_rate": 2.8228103453713967e-05, "loss": 0.4403, "num_tokens": 62779439.0, "step": 5542 }, { "epoch": 2.0304126780561536, "grad_norm": 0.15361448983609866, "learning_rate": 2.822409604338937e-05, "loss": 0.4587, "num_tokens": 63473368.0, "step": 5543 }, { "epoch": 2.030779095864059, "grad_norm": 0.14334980132075573, "learning_rate": 2.8220088282635746e-05, "loss": 0.4465, "num_tokens": 64199755.0, "step": 5544 }, { "epoch": 2.0311455136719645, "grad_norm": 0.15982641339777778, "learning_rate": 2.8216080171678767e-05, "loss": 0.4692, "num_tokens": 64889490.0, "step": 5545 }, { "epoch": 2.0315119314798697, "grad_norm": 0.15600607763757465, "learning_rate": 2.821207171074407e-05, "loss": 0.4435, "num_tokens": 65765566.0, "step": 5546 }, { "epoch": 2.0318783492877754, "grad_norm": 0.14283122185527844, "learning_rate": 2.8208062900057353e-05, "loss": 0.4456, "num_tokens": 66456213.0, "step": 5547 }, { "epoch": 2.0322447670956807, "grad_norm": 0.1511962898957986, "learning_rate": 2.8204053739844327e-05, "loss": 0.4597, "num_tokens": 67286001.0, "step": 5548 }, { "epoch": 2.0326111849035864, "grad_norm": 0.15265899738675603, "learning_rate": 2.8200044230330696e-05, "loss": 0.4577, "num_tokens": 68094586.0, "step": 5549 }, { "epoch": 2.0329776027114916, "grad_norm": 0.13766775704645495, "learning_rate": 2.8196034371742217e-05, "loss": 0.4501, "num_tokens": 68894809.0, "step": 5550 }, { "epoch": 2.0333440205193973, "grad_norm": 0.1637219595526077, "learning_rate": 2.8192024164304655e-05, "loss": 0.4515, "num_tokens": 69769261.0, "step": 5551 }, { "epoch": 2.0337104383273026, "grad_norm": 0.14207852487597186, "learning_rate": 2.8188013608243786e-05, "loss": 0.468, "num_tokens": 70540114.0, "step": 5552 }, { "epoch": 2.0340768561352083, "grad_norm": 0.1468636749141017, "learning_rate": 2.8184002703785417e-05, "loss": 0.4687, "num_tokens": 71332501.0, "step": 5553 }, { "epoch": 2.0344432739431135, "grad_norm": 0.15286424892689607, "learning_rate": 2.8179991451155362e-05, "loss": 0.4366, "num_tokens": 72212543.0, "step": 5554 }, { "epoch": 2.0348096917510192, "grad_norm": 0.133649905293226, "learning_rate": 2.8175979850579466e-05, "loss": 0.4512, "num_tokens": 73134909.0, "step": 5555 }, { "epoch": 2.0351761095589245, "grad_norm": 0.14366172539457855, "learning_rate": 2.81719679022836e-05, "loss": 0.4576, "num_tokens": 73851414.0, "step": 5556 }, { "epoch": 2.03554252736683, "grad_norm": 0.15217611160351674, "learning_rate": 2.8167955606493634e-05, "loss": 0.4517, "num_tokens": 74665621.0, "step": 5557 }, { "epoch": 2.0359089451747354, "grad_norm": 0.14672137828047824, "learning_rate": 2.8163942963435465e-05, "loss": 0.4672, "num_tokens": 75296670.0, "step": 5558 }, { "epoch": 2.036275362982641, "grad_norm": 0.15441132554509965, "learning_rate": 2.8159929973335027e-05, "loss": 0.4582, "num_tokens": 75997020.0, "step": 5559 }, { "epoch": 2.0366417807905464, "grad_norm": 0.16060220923500232, "learning_rate": 2.815591663641825e-05, "loss": 0.4767, "num_tokens": 76793093.0, "step": 5560 }, { "epoch": 2.037008198598452, "grad_norm": 0.1335706911641283, "learning_rate": 2.8151902952911086e-05, "loss": 0.4157, "num_tokens": 77508139.0, "step": 5561 }, { "epoch": 2.0373746164063573, "grad_norm": 0.1472524383568003, "learning_rate": 2.8147888923039527e-05, "loss": 0.4366, "num_tokens": 78251644.0, "step": 5562 }, { "epoch": 2.037741034214263, "grad_norm": 0.16645710505048394, "learning_rate": 2.8143874547029563e-05, "loss": 0.4709, "num_tokens": 78961021.0, "step": 5563 }, { "epoch": 2.0381074520221683, "grad_norm": 0.155947043656859, "learning_rate": 2.813985982510721e-05, "loss": 0.4417, "num_tokens": 79680094.0, "step": 5564 }, { "epoch": 2.038473869830074, "grad_norm": 0.13241351937531298, "learning_rate": 2.8135844757498522e-05, "loss": 0.425, "num_tokens": 80561344.0, "step": 5565 }, { "epoch": 2.038840287637979, "grad_norm": 0.14304128146864267, "learning_rate": 2.8131829344429532e-05, "loss": 0.4752, "num_tokens": 81333351.0, "step": 5566 }, { "epoch": 2.039206705445885, "grad_norm": 0.14544853916388809, "learning_rate": 2.8127813586126326e-05, "loss": 0.4555, "num_tokens": 82079418.0, "step": 5567 }, { "epoch": 2.03957312325379, "grad_norm": 0.15345140325244855, "learning_rate": 2.8123797482815002e-05, "loss": 0.439, "num_tokens": 82775791.0, "step": 5568 }, { "epoch": 2.0399395410616954, "grad_norm": 0.1751646435595468, "learning_rate": 2.8119781034721665e-05, "loss": 0.5022, "num_tokens": 83565219.0, "step": 5569 }, { "epoch": 2.040305958869601, "grad_norm": 0.13645063083667952, "learning_rate": 2.8115764242072456e-05, "loss": 0.4371, "num_tokens": 84293682.0, "step": 5570 }, { "epoch": 2.0406723766775063, "grad_norm": 0.13688961099479885, "learning_rate": 2.8111747105093535e-05, "loss": 0.4568, "num_tokens": 85020446.0, "step": 5571 }, { "epoch": 2.041038794485412, "grad_norm": 0.17261441465265157, "learning_rate": 2.8107729624011057e-05, "loss": 0.4731, "num_tokens": 85617217.0, "step": 5572 }, { "epoch": 2.0414052122933173, "grad_norm": 0.14820433711962938, "learning_rate": 2.8103711799051238e-05, "loss": 0.4394, "num_tokens": 86400338.0, "step": 5573 }, { "epoch": 2.041771630101223, "grad_norm": 0.13052465659936194, "learning_rate": 2.8099693630440262e-05, "loss": 0.4382, "num_tokens": 87181325.0, "step": 5574 }, { "epoch": 2.0421380479091282, "grad_norm": 0.14512484886014543, "learning_rate": 2.8095675118404385e-05, "loss": 0.435, "num_tokens": 87895584.0, "step": 5575 }, { "epoch": 2.042504465717034, "grad_norm": 0.13927168452709235, "learning_rate": 2.8091656263169844e-05, "loss": 0.461, "num_tokens": 88754274.0, "step": 5576 }, { "epoch": 2.042870883524939, "grad_norm": 0.1390392249992615, "learning_rate": 2.808763706496291e-05, "loss": 0.444, "num_tokens": 89557047.0, "step": 5577 }, { "epoch": 2.043237301332845, "grad_norm": 0.13462776127330703, "learning_rate": 2.8083617524009872e-05, "loss": 0.413, "num_tokens": 90291272.0, "step": 5578 }, { "epoch": 2.04360371914075, "grad_norm": 0.1360022505045892, "learning_rate": 2.807959764053704e-05, "loss": 0.4332, "num_tokens": 91066463.0, "step": 5579 }, { "epoch": 2.043970136948656, "grad_norm": 0.15540366139569847, "learning_rate": 2.8075577414770743e-05, "loss": 0.4693, "num_tokens": 91683531.0, "step": 5580 }, { "epoch": 2.044336554756561, "grad_norm": 0.13203093341585662, "learning_rate": 2.807155684693732e-05, "loss": 0.4222, "num_tokens": 92426216.0, "step": 5581 }, { "epoch": 2.0447029725644668, "grad_norm": 0.13184944301752152, "learning_rate": 2.8067535937263146e-05, "loss": 0.4538, "num_tokens": 93316054.0, "step": 5582 }, { "epoch": 2.045069390372372, "grad_norm": 0.14600075643753269, "learning_rate": 2.8063514685974594e-05, "loss": 0.4523, "num_tokens": 94051124.0, "step": 5583 }, { "epoch": 2.0454358081802777, "grad_norm": 0.13447339662918587, "learning_rate": 2.8059493093298086e-05, "loss": 0.4298, "num_tokens": 94751777.0, "step": 5584 }, { "epoch": 2.045802225988183, "grad_norm": 0.13220916324879525, "learning_rate": 2.8055471159460034e-05, "loss": 0.4671, "num_tokens": 95532184.0, "step": 5585 }, { "epoch": 2.0461686437960886, "grad_norm": 0.1497226602473125, "learning_rate": 2.805144888468689e-05, "loss": 0.4758, "num_tokens": 96349101.0, "step": 5586 }, { "epoch": 2.046535061603994, "grad_norm": 0.13561666548724075, "learning_rate": 2.8047426269205102e-05, "loss": 0.4354, "num_tokens": 97114085.0, "step": 5587 }, { "epoch": 2.0469014794118996, "grad_norm": 0.14203288386412766, "learning_rate": 2.8043403313241154e-05, "loss": 0.4538, "num_tokens": 97850034.0, "step": 5588 }, { "epoch": 2.047267897219805, "grad_norm": 0.13692882306663962, "learning_rate": 2.8039380017021564e-05, "loss": 0.4629, "num_tokens": 98697809.0, "step": 5589 }, { "epoch": 2.0476343150277105, "grad_norm": 0.14690117044366674, "learning_rate": 2.803535638077283e-05, "loss": 0.4591, "num_tokens": 99437053.0, "step": 5590 }, { "epoch": 2.048000732835616, "grad_norm": 0.13172133504244818, "learning_rate": 2.8031332404721498e-05, "loss": 0.4391, "num_tokens": 100285281.0, "step": 5591 }, { "epoch": 2.0483671506435215, "grad_norm": 0.138423698911418, "learning_rate": 2.8027308089094138e-05, "loss": 0.4379, "num_tokens": 101065885.0, "step": 5592 }, { "epoch": 2.0487335684514267, "grad_norm": 0.1373492420998931, "learning_rate": 2.802328343411731e-05, "loss": 0.4401, "num_tokens": 101820015.0, "step": 5593 }, { "epoch": 2.049099986259332, "grad_norm": 0.1407581518293035, "learning_rate": 2.8019258440017614e-05, "loss": 0.4515, "num_tokens": 102597101.0, "step": 5594 }, { "epoch": 2.0494664040672377, "grad_norm": 0.14594312012329594, "learning_rate": 2.8015233107021674e-05, "loss": 0.4674, "num_tokens": 103258027.0, "step": 5595 }, { "epoch": 2.049832821875143, "grad_norm": 0.13872244020280694, "learning_rate": 2.801120743535611e-05, "loss": 0.4312, "num_tokens": 103989162.0, "step": 5596 }, { "epoch": 2.0501992396830486, "grad_norm": 0.14809731919704788, "learning_rate": 2.8007181425247592e-05, "loss": 0.4876, "num_tokens": 104757128.0, "step": 5597 }, { "epoch": 2.050565657490954, "grad_norm": 0.16466678799162732, "learning_rate": 2.8003155076922775e-05, "loss": 0.4922, "num_tokens": 105368408.0, "step": 5598 }, { "epoch": 2.0509320752988596, "grad_norm": 0.14316761288821173, "learning_rate": 2.7999128390608367e-05, "loss": 0.4416, "num_tokens": 106054795.0, "step": 5599 }, { "epoch": 2.051298493106765, "grad_norm": 0.1327833793917164, "learning_rate": 2.799510136653107e-05, "loss": 0.4237, "num_tokens": 106926463.0, "step": 5600 }, { "epoch": 2.0516649109146705, "grad_norm": 0.14936567295718872, "learning_rate": 2.7991074004917608e-05, "loss": 0.4553, "num_tokens": 107822979.0, "step": 5601 }, { "epoch": 2.0520313287225758, "grad_norm": 0.14284515377025075, "learning_rate": 2.7987046305994736e-05, "loss": 0.465, "num_tokens": 108587936.0, "step": 5602 }, { "epoch": 2.0523977465304815, "grad_norm": 0.13257223912518876, "learning_rate": 2.7983018269989233e-05, "loss": 0.4527, "num_tokens": 109386063.0, "step": 5603 }, { "epoch": 2.0527641643383867, "grad_norm": 0.1427027607369049, "learning_rate": 2.7978989897127866e-05, "loss": 0.4584, "num_tokens": 110171515.0, "step": 5604 }, { "epoch": 2.0531305821462924, "grad_norm": 0.1485102239640865, "learning_rate": 2.797496118763744e-05, "loss": 0.4495, "num_tokens": 110930284.0, "step": 5605 }, { "epoch": 2.0534969999541977, "grad_norm": 0.13748005842357688, "learning_rate": 2.79709321417448e-05, "loss": 0.4353, "num_tokens": 111642355.0, "step": 5606 }, { "epoch": 2.0538634177621033, "grad_norm": 0.18596217939722406, "learning_rate": 2.7966902759676773e-05, "loss": 0.4751, "num_tokens": 112350619.0, "step": 5607 }, { "epoch": 2.0542298355700086, "grad_norm": 0.1503825193689242, "learning_rate": 2.796287304166022e-05, "loss": 0.4489, "num_tokens": 113150454.0, "step": 5608 }, { "epoch": 2.0545962533779143, "grad_norm": 0.13889711154701462, "learning_rate": 2.7958842987922027e-05, "loss": 0.4725, "num_tokens": 113864549.0, "step": 5609 }, { "epoch": 2.0549626711858195, "grad_norm": 0.13269977420533488, "learning_rate": 2.79548125986891e-05, "loss": 0.4571, "num_tokens": 114630024.0, "step": 5610 }, { "epoch": 2.0553290889937252, "grad_norm": 0.15237185875776718, "learning_rate": 2.795078187418835e-05, "loss": 0.433, "num_tokens": 115303684.0, "step": 5611 }, { "epoch": 2.0556955068016305, "grad_norm": 0.15436197796636605, "learning_rate": 2.794675081464672e-05, "loss": 0.4223, "num_tokens": 115910816.0, "step": 5612 }, { "epoch": 2.056061924609536, "grad_norm": 0.1331967882638338, "learning_rate": 2.7942719420291156e-05, "loss": 0.4553, "num_tokens": 116731797.0, "step": 5613 }, { "epoch": 2.0564283424174414, "grad_norm": 0.13642298398661132, "learning_rate": 2.7938687691348646e-05, "loss": 0.4728, "num_tokens": 117604021.0, "step": 5614 }, { "epoch": 2.056794760225347, "grad_norm": 0.13524720630452283, "learning_rate": 2.793465562804618e-05, "loss": 0.4475, "num_tokens": 118453591.0, "step": 5615 }, { "epoch": 2.0571611780332524, "grad_norm": 0.144014791833894, "learning_rate": 2.7930623230610767e-05, "loss": 0.4955, "num_tokens": 119250105.0, "step": 5616 }, { "epoch": 2.057527595841158, "grad_norm": 0.14120227668250343, "learning_rate": 2.792659049926945e-05, "loss": 0.4286, "num_tokens": 120065743.0, "step": 5617 }, { "epoch": 2.0578940136490633, "grad_norm": 0.1398213147817811, "learning_rate": 2.792255743424927e-05, "loss": 0.442, "num_tokens": 120801291.0, "step": 5618 }, { "epoch": 2.058260431456969, "grad_norm": 0.1304050786174317, "learning_rate": 2.7918524035777296e-05, "loss": 0.4111, "num_tokens": 121623747.0, "step": 5619 }, { "epoch": 2.0586268492648743, "grad_norm": 0.1349787114377164, "learning_rate": 2.791449030408062e-05, "loss": 0.458, "num_tokens": 122411831.0, "step": 5620 }, { "epoch": 2.0589932670727795, "grad_norm": 0.155307800508554, "learning_rate": 2.7910456239386347e-05, "loss": 0.4805, "num_tokens": 123100544.0, "step": 5621 }, { "epoch": 2.059359684880685, "grad_norm": 0.13307602013989617, "learning_rate": 2.790642184192161e-05, "loss": 0.4195, "num_tokens": 123978637.0, "step": 5622 }, { "epoch": 2.0597261026885905, "grad_norm": 0.14958003445986776, "learning_rate": 2.7902387111913544e-05, "loss": 0.4897, "num_tokens": 124754022.0, "step": 5623 }, { "epoch": 2.060092520496496, "grad_norm": 0.1334480976882595, "learning_rate": 2.789835204958932e-05, "loss": 0.4381, "num_tokens": 125568546.0, "step": 5624 }, { "epoch": 2.0604589383044014, "grad_norm": 0.13730107507966238, "learning_rate": 2.7894316655176114e-05, "loss": 0.4342, "num_tokens": 126361028.0, "step": 5625 }, { "epoch": 2.060825356112307, "grad_norm": 0.13038332578735864, "learning_rate": 2.789028092890113e-05, "loss": 0.4707, "num_tokens": 127196126.0, "step": 5626 }, { "epoch": 2.0611917739202124, "grad_norm": 0.1404961837981512, "learning_rate": 2.788624487099159e-05, "loss": 0.4666, "num_tokens": 127929676.0, "step": 5627 }, { "epoch": 2.061558191728118, "grad_norm": 0.14451195771144523, "learning_rate": 2.7882208481674725e-05, "loss": 0.4893, "num_tokens": 128601183.0, "step": 5628 }, { "epoch": 2.0619246095360233, "grad_norm": 0.13963464828561467, "learning_rate": 2.7878171761177797e-05, "loss": 0.4347, "num_tokens": 129422875.0, "step": 5629 }, { "epoch": 2.062291027343929, "grad_norm": 0.1350769543226984, "learning_rate": 2.787413470972808e-05, "loss": 0.4319, "num_tokens": 130227964.0, "step": 5630 }, { "epoch": 2.0626574451518342, "grad_norm": 0.1406198677318193, "learning_rate": 2.7870097327552866e-05, "loss": 0.4955, "num_tokens": 130946672.0, "step": 5631 }, { "epoch": 2.06302386295974, "grad_norm": 0.14053222316682273, "learning_rate": 2.786605961487947e-05, "loss": 0.4547, "num_tokens": 131716090.0, "step": 5632 }, { "epoch": 2.063390280767645, "grad_norm": 0.14779774400030893, "learning_rate": 2.7862021571935226e-05, "loss": 0.4769, "num_tokens": 132387039.0, "step": 5633 }, { "epoch": 2.063756698575551, "grad_norm": 0.1284288129948424, "learning_rate": 2.785798319894748e-05, "loss": 0.432, "num_tokens": 133189934.0, "step": 5634 }, { "epoch": 2.064123116383456, "grad_norm": 0.12856526991022638, "learning_rate": 2.7853944496143597e-05, "loss": 0.4211, "num_tokens": 134048970.0, "step": 5635 }, { "epoch": 2.064489534191362, "grad_norm": 0.1486358714871635, "learning_rate": 2.7849905463750972e-05, "loss": 0.4709, "num_tokens": 134758605.0, "step": 5636 }, { "epoch": 2.064855951999267, "grad_norm": 0.13444316103736667, "learning_rate": 2.7845866101997008e-05, "loss": 0.4331, "num_tokens": 135528868.0, "step": 5637 }, { "epoch": 2.0652223698071728, "grad_norm": 0.1547322530720202, "learning_rate": 2.7841826411109124e-05, "loss": 0.4372, "num_tokens": 136153966.0, "step": 5638 }, { "epoch": 2.065588787615078, "grad_norm": 0.12588097282518132, "learning_rate": 2.7837786391314773e-05, "loss": 0.4291, "num_tokens": 137002294.0, "step": 5639 }, { "epoch": 2.0659552054229837, "grad_norm": 0.15936295275583656, "learning_rate": 2.783374604284141e-05, "loss": 0.4397, "num_tokens": 137686101.0, "step": 5640 }, { "epoch": 2.066321623230889, "grad_norm": 0.1457672165805537, "learning_rate": 2.782970536591651e-05, "loss": 0.4656, "num_tokens": 138518659.0, "step": 5641 }, { "epoch": 2.0666880410387947, "grad_norm": 0.1460328163551212, "learning_rate": 2.7825664360767577e-05, "loss": 0.4475, "num_tokens": 139231831.0, "step": 5642 }, { "epoch": 2.0670544588467, "grad_norm": 0.1543941725170023, "learning_rate": 2.7821623027622128e-05, "loss": 0.4628, "num_tokens": 140017946.0, "step": 5643 }, { "epoch": 2.0674208766546056, "grad_norm": 0.14152521114393685, "learning_rate": 2.7817581366707692e-05, "loss": 0.4509, "num_tokens": 140790159.0, "step": 5644 }, { "epoch": 2.067787294462511, "grad_norm": 0.1279666164716837, "learning_rate": 2.7813539378251833e-05, "loss": 0.4694, "num_tokens": 141599005.0, "step": 5645 }, { "epoch": 2.0681537122704166, "grad_norm": 0.14268060517499104, "learning_rate": 2.780949706248211e-05, "loss": 0.4525, "num_tokens": 142320415.0, "step": 5646 }, { "epoch": 2.068520130078322, "grad_norm": 0.12875116694989627, "learning_rate": 2.7805454419626127e-05, "loss": 0.4332, "num_tokens": 143117250.0, "step": 5647 }, { "epoch": 2.068886547886227, "grad_norm": 0.15093861391692903, "learning_rate": 2.7801411449911485e-05, "loss": 0.4796, "num_tokens": 143836692.0, "step": 5648 }, { "epoch": 2.0692529656941328, "grad_norm": 0.12681915086707718, "learning_rate": 2.7797368153565808e-05, "loss": 0.458, "num_tokens": 144664765.0, "step": 5649 }, { "epoch": 2.069619383502038, "grad_norm": 0.15129599877486083, "learning_rate": 2.7793324530816753e-05, "loss": 0.4666, "num_tokens": 145341423.0, "step": 5650 }, { "epoch": 2.0699858013099437, "grad_norm": 0.13498613449313368, "learning_rate": 2.7789280581891977e-05, "loss": 0.4149, "num_tokens": 146081104.0, "step": 5651 }, { "epoch": 2.070352219117849, "grad_norm": 0.15759101732244662, "learning_rate": 2.778523630701916e-05, "loss": 0.489, "num_tokens": 146916342.0, "step": 5652 }, { "epoch": 2.0707186369257546, "grad_norm": 0.1330151695671898, "learning_rate": 2.7781191706426e-05, "loss": 0.4449, "num_tokens": 147653314.0, "step": 5653 }, { "epoch": 2.07108505473366, "grad_norm": 0.15775897177464873, "learning_rate": 2.7777146780340234e-05, "loss": 0.4614, "num_tokens": 148376025.0, "step": 5654 }, { "epoch": 2.0714514725415656, "grad_norm": 0.1472211406675846, "learning_rate": 2.7773101528989578e-05, "loss": 0.4556, "num_tokens": 149205135.0, "step": 5655 }, { "epoch": 2.071817890349471, "grad_norm": 0.15007278156047893, "learning_rate": 2.7769055952601792e-05, "loss": 0.4991, "num_tokens": 149929680.0, "step": 5656 }, { "epoch": 2.0721843081573765, "grad_norm": 0.15928355121205512, "learning_rate": 2.776501005140466e-05, "loss": 0.4908, "num_tokens": 150577660.0, "step": 5657 }, { "epoch": 2.072550725965282, "grad_norm": 0.152466791923768, "learning_rate": 2.776096382562597e-05, "loss": 0.4596, "num_tokens": 151372174.0, "step": 5658 }, { "epoch": 2.0729171437731875, "grad_norm": 0.1297183884345867, "learning_rate": 2.775691727549354e-05, "loss": 0.4519, "num_tokens": 152243464.0, "step": 5659 }, { "epoch": 2.0732835615810927, "grad_norm": 0.1371117930782035, "learning_rate": 2.7752870401235178e-05, "loss": 0.4274, "num_tokens": 153011908.0, "step": 5660 }, { "epoch": 2.0736499793889984, "grad_norm": 0.13522765729393538, "learning_rate": 2.774882320307875e-05, "loss": 0.4514, "num_tokens": 153777275.0, "step": 5661 }, { "epoch": 2.0740163971969037, "grad_norm": 0.13575723147918337, "learning_rate": 2.7744775681252114e-05, "loss": 0.4515, "num_tokens": 154596545.0, "step": 5662 }, { "epoch": 2.0743828150048094, "grad_norm": 0.13137276261579106, "learning_rate": 2.7740727835983142e-05, "loss": 0.4346, "num_tokens": 155422616.0, "step": 5663 }, { "epoch": 2.0747492328127146, "grad_norm": 0.15101761047544424, "learning_rate": 2.7736679667499765e-05, "loss": 0.456, "num_tokens": 156114104.0, "step": 5664 }, { "epoch": 2.0751156506206203, "grad_norm": 0.14110571373412367, "learning_rate": 2.773263117602988e-05, "loss": 0.4556, "num_tokens": 156932687.0, "step": 5665 }, { "epoch": 2.0754820684285256, "grad_norm": 0.14107789750546246, "learning_rate": 2.772858236180143e-05, "loss": 0.4554, "num_tokens": 157658674.0, "step": 5666 }, { "epoch": 2.0758484862364313, "grad_norm": 0.13545425925093932, "learning_rate": 2.7724533225042375e-05, "loss": 0.459, "num_tokens": 158521839.0, "step": 5667 }, { "epoch": 2.0762149040443365, "grad_norm": 0.140041874725975, "learning_rate": 2.7720483765980683e-05, "loss": 0.4549, "num_tokens": 159285121.0, "step": 5668 }, { "epoch": 2.076581321852242, "grad_norm": 0.1362866089031188, "learning_rate": 2.7716433984844352e-05, "loss": 0.4508, "num_tokens": 159986404.0, "step": 5669 }, { "epoch": 2.0769477396601475, "grad_norm": 0.14475451291483496, "learning_rate": 2.7712383881861393e-05, "loss": 0.4644, "num_tokens": 160799716.0, "step": 5670 }, { "epoch": 2.077314157468053, "grad_norm": 0.13922109212867648, "learning_rate": 2.7708333457259834e-05, "loss": 0.4246, "num_tokens": 161522908.0, "step": 5671 }, { "epoch": 2.0776805752759584, "grad_norm": 0.14190814266208132, "learning_rate": 2.7704282711267717e-05, "loss": 0.4836, "num_tokens": 162245349.0, "step": 5672 }, { "epoch": 2.0780469930838636, "grad_norm": 0.13953116440581653, "learning_rate": 2.770023164411311e-05, "loss": 0.459, "num_tokens": 163012670.0, "step": 5673 }, { "epoch": 2.0784134108917693, "grad_norm": 0.14779491703900965, "learning_rate": 2.76961802560241e-05, "loss": 0.47, "num_tokens": 163729298.0, "step": 5674 }, { "epoch": 2.0787798286996746, "grad_norm": 0.13722571231132918, "learning_rate": 2.769212854722879e-05, "loss": 0.4514, "num_tokens": 164564134.0, "step": 5675 }, { "epoch": 2.0791462465075803, "grad_norm": 0.1445437497709158, "learning_rate": 2.7688076517955295e-05, "loss": 0.48, "num_tokens": 165337032.0, "step": 5676 }, { "epoch": 2.0795126643154855, "grad_norm": 0.14199228784894163, "learning_rate": 2.7684024168431746e-05, "loss": 0.4317, "num_tokens": 166062904.0, "step": 5677 }, { "epoch": 2.0798790821233912, "grad_norm": 0.1504215441485269, "learning_rate": 2.767997149888631e-05, "loss": 0.4855, "num_tokens": 166761729.0, "step": 5678 }, { "epoch": 2.0802454999312965, "grad_norm": 0.13393577885850852, "learning_rate": 2.7675918509547153e-05, "loss": 0.4595, "num_tokens": 167562553.0, "step": 5679 }, { "epoch": 2.080611917739202, "grad_norm": 0.1474099667323394, "learning_rate": 2.7671865200642464e-05, "loss": 0.4515, "num_tokens": 168236905.0, "step": 5680 }, { "epoch": 2.0809783355471074, "grad_norm": 0.1504674158092719, "learning_rate": 2.766781157240046e-05, "loss": 0.4408, "num_tokens": 168940218.0, "step": 5681 }, { "epoch": 2.081344753355013, "grad_norm": 0.14705963982501935, "learning_rate": 2.7663757625049363e-05, "loss": 0.4528, "num_tokens": 169607156.0, "step": 5682 }, { "epoch": 2.0817111711629184, "grad_norm": 0.13629822826612192, "learning_rate": 2.7659703358817428e-05, "loss": 0.4329, "num_tokens": 170384889.0, "step": 5683 }, { "epoch": 2.082077588970824, "grad_norm": 0.14073306775186956, "learning_rate": 2.76556487739329e-05, "loss": 0.487, "num_tokens": 171242153.0, "step": 5684 }, { "epoch": 2.0824440067787293, "grad_norm": 0.1342895811406953, "learning_rate": 2.7651593870624077e-05, "loss": 0.4569, "num_tokens": 172114755.0, "step": 5685 }, { "epoch": 2.082810424586635, "grad_norm": 0.13255990241193993, "learning_rate": 2.7647538649119257e-05, "loss": 0.4811, "num_tokens": 172938286.0, "step": 5686 }, { "epoch": 2.0831768423945403, "grad_norm": 0.13277006186621304, "learning_rate": 2.7643483109646742e-05, "loss": 0.4569, "num_tokens": 173767371.0, "step": 5687 }, { "epoch": 2.083543260202446, "grad_norm": 0.14846474986407923, "learning_rate": 2.7639427252434882e-05, "loss": 0.4502, "num_tokens": 174487661.0, "step": 5688 }, { "epoch": 2.083909678010351, "grad_norm": 0.1348056034033207, "learning_rate": 2.7635371077712027e-05, "loss": 0.4459, "num_tokens": 175294233.0, "step": 5689 }, { "epoch": 2.084276095818257, "grad_norm": 0.134184330722599, "learning_rate": 2.763131458570654e-05, "loss": 0.4349, "num_tokens": 176072509.0, "step": 5690 }, { "epoch": 2.084642513626162, "grad_norm": 0.13792830038143558, "learning_rate": 2.7627257776646818e-05, "loss": 0.4475, "num_tokens": 176927410.0, "step": 5691 }, { "epoch": 2.085008931434068, "grad_norm": 0.1331932620186017, "learning_rate": 2.7623200650761268e-05, "loss": 0.4332, "num_tokens": 177683183.0, "step": 5692 }, { "epoch": 2.085375349241973, "grad_norm": 0.1370817761606792, "learning_rate": 2.7619143208278306e-05, "loss": 0.4585, "num_tokens": 178489416.0, "step": 5693 }, { "epoch": 2.085741767049879, "grad_norm": 0.14255834268155693, "learning_rate": 2.761508544942638e-05, "loss": 0.4488, "num_tokens": 179190402.0, "step": 5694 }, { "epoch": 2.086108184857784, "grad_norm": 0.14235719440174976, "learning_rate": 2.761102737443395e-05, "loss": 0.4529, "num_tokens": 180022869.0, "step": 5695 }, { "epoch": 2.0864746026656897, "grad_norm": 0.13833799859008147, "learning_rate": 2.7606968983529495e-05, "loss": 0.4379, "num_tokens": 180777206.0, "step": 5696 }, { "epoch": 2.086841020473595, "grad_norm": 0.15383975646932616, "learning_rate": 2.760291027694151e-05, "loss": 0.4655, "num_tokens": 181457281.0, "step": 5697 }, { "epoch": 2.0872074382815007, "grad_norm": 0.15811256030004242, "learning_rate": 2.7598851254898496e-05, "loss": 0.4659, "num_tokens": 182108751.0, "step": 5698 }, { "epoch": 2.087573856089406, "grad_norm": 0.12394035186067712, "learning_rate": 2.7594791917628997e-05, "loss": 0.4156, "num_tokens": 182909671.0, "step": 5699 }, { "epoch": 2.0879402738973116, "grad_norm": 0.1514586577242076, "learning_rate": 2.7590732265361566e-05, "loss": 0.4582, "num_tokens": 183684790.0, "step": 5700 }, { "epoch": 2.088306691705217, "grad_norm": 0.15382512718135868, "learning_rate": 2.7586672298324758e-05, "loss": 0.4831, "num_tokens": 184400027.0, "step": 5701 }, { "epoch": 2.088673109513122, "grad_norm": 0.1415409835251943, "learning_rate": 2.7582612016747165e-05, "loss": 0.4549, "num_tokens": 185252104.0, "step": 5702 }, { "epoch": 2.089039527321028, "grad_norm": 0.14785964351482053, "learning_rate": 2.7578551420857386e-05, "loss": 0.4629, "num_tokens": 186032600.0, "step": 5703 }, { "epoch": 2.089405945128933, "grad_norm": 0.12770344873149603, "learning_rate": 2.7574490510884034e-05, "loss": 0.4256, "num_tokens": 186841400.0, "step": 5704 }, { "epoch": 2.0897723629368388, "grad_norm": 0.14718654191337033, "learning_rate": 2.7570429287055755e-05, "loss": 0.4548, "num_tokens": 187657398.0, "step": 5705 }, { "epoch": 2.090138780744744, "grad_norm": 0.1334101503148864, "learning_rate": 2.756636774960121e-05, "loss": 0.4477, "num_tokens": 188543231.0, "step": 5706 }, { "epoch": 2.0905051985526497, "grad_norm": 0.14221692102344977, "learning_rate": 2.7562305898749054e-05, "loss": 0.4565, "num_tokens": 189302209.0, "step": 5707 }, { "epoch": 2.090871616360555, "grad_norm": 0.15021015050956765, "learning_rate": 2.755824373472799e-05, "loss": 0.4598, "num_tokens": 190007176.0, "step": 5708 }, { "epoch": 2.0912380341684607, "grad_norm": 0.14191342030888227, "learning_rate": 2.755418125776673e-05, "loss": 0.4399, "num_tokens": 190737749.0, "step": 5709 }, { "epoch": 2.091604451976366, "grad_norm": 0.1315396896085465, "learning_rate": 2.755011846809398e-05, "loss": 0.4323, "num_tokens": 191582574.0, "step": 5710 }, { "epoch": 2.0919708697842716, "grad_norm": 0.1361007674204664, "learning_rate": 2.75460553659385e-05, "loss": 0.4662, "num_tokens": 192380975.0, "step": 5711 }, { "epoch": 2.092337287592177, "grad_norm": 0.1462364849850232, "learning_rate": 2.754199195152905e-05, "loss": 0.458, "num_tokens": 193187408.0, "step": 5712 }, { "epoch": 2.0927037054000825, "grad_norm": 0.13603407055242767, "learning_rate": 2.7537928225094407e-05, "loss": 0.4606, "num_tokens": 194048409.0, "step": 5713 }, { "epoch": 2.093070123207988, "grad_norm": 0.14623698338205457, "learning_rate": 2.7533864186863355e-05, "loss": 0.4522, "num_tokens": 194774226.0, "step": 5714 }, { "epoch": 2.0934365410158935, "grad_norm": 0.14508537705891084, "learning_rate": 2.7529799837064724e-05, "loss": 0.4832, "num_tokens": 195524993.0, "step": 5715 }, { "epoch": 2.0938029588237987, "grad_norm": 0.1330416952083479, "learning_rate": 2.7525735175927332e-05, "loss": 0.4487, "num_tokens": 196276187.0, "step": 5716 }, { "epoch": 2.0941693766317044, "grad_norm": 0.14656268716050924, "learning_rate": 2.752167020368004e-05, "loss": 0.4529, "num_tokens": 197010813.0, "step": 5717 }, { "epoch": 2.0945357944396097, "grad_norm": 0.15210267446778827, "learning_rate": 2.75176049205517e-05, "loss": 0.4764, "num_tokens": 197630543.0, "step": 5718 }, { "epoch": 2.0949022122475154, "grad_norm": 0.14028979997938124, "learning_rate": 2.751353932677121e-05, "loss": 0.4525, "num_tokens": 198353789.0, "step": 5719 }, { "epoch": 2.0952686300554206, "grad_norm": 0.12936700566325954, "learning_rate": 2.7509473422567462e-05, "loss": 0.4312, "num_tokens": 199124794.0, "step": 5720 }, { "epoch": 2.0956350478633263, "grad_norm": 0.13612872024474726, "learning_rate": 2.7505407208169374e-05, "loss": 0.4532, "num_tokens": 199856448.0, "step": 5721 }, { "epoch": 2.0960014656712316, "grad_norm": 0.1515822328206801, "learning_rate": 2.7501340683805887e-05, "loss": 0.4637, "num_tokens": 200551756.0, "step": 5722 }, { "epoch": 2.0963678834791373, "grad_norm": 0.13472715815420905, "learning_rate": 2.7497273849705958e-05, "loss": 0.4594, "num_tokens": 201433864.0, "step": 5723 }, { "epoch": 2.0967343012870425, "grad_norm": 0.12976195180808067, "learning_rate": 2.749320670609854e-05, "loss": 0.4408, "num_tokens": 202178824.0, "step": 5724 }, { "epoch": 2.097100719094948, "grad_norm": 0.1362524910630468, "learning_rate": 2.7489139253212637e-05, "loss": 0.4521, "num_tokens": 202941843.0, "step": 5725 }, { "epoch": 2.0974671369028535, "grad_norm": 0.14772154677471086, "learning_rate": 2.748507149127726e-05, "loss": 0.4647, "num_tokens": 203639899.0, "step": 5726 }, { "epoch": 2.0978335547107587, "grad_norm": 0.1262392707595595, "learning_rate": 2.748100342052141e-05, "loss": 0.4483, "num_tokens": 204480089.0, "step": 5727 }, { "epoch": 2.0981999725186644, "grad_norm": 0.13112532553710549, "learning_rate": 2.7476935041174146e-05, "loss": 0.4508, "num_tokens": 205249067.0, "step": 5728 }, { "epoch": 2.0985663903265697, "grad_norm": 0.12794999060688203, "learning_rate": 2.747286635346452e-05, "loss": 0.4485, "num_tokens": 206223076.0, "step": 5729 }, { "epoch": 2.0989328081344754, "grad_norm": 0.134213567138266, "learning_rate": 2.7468797357621612e-05, "loss": 0.4437, "num_tokens": 206958231.0, "step": 5730 }, { "epoch": 2.0992992259423806, "grad_norm": 0.1315972407664048, "learning_rate": 2.7464728053874502e-05, "loss": 0.4564, "num_tokens": 207767705.0, "step": 5731 }, { "epoch": 2.0996656437502863, "grad_norm": 0.14469157446682593, "learning_rate": 2.746065844245231e-05, "loss": 0.4671, "num_tokens": 208430331.0, "step": 5732 }, { "epoch": 2.1000320615581916, "grad_norm": 0.1510731332631829, "learning_rate": 2.745658852358416e-05, "loss": 0.4604, "num_tokens": 209124027.0, "step": 5733 }, { "epoch": 2.1003984793660972, "grad_norm": 0.15467218053581494, "learning_rate": 2.7452518297499212e-05, "loss": 0.4707, "num_tokens": 209807047.0, "step": 5734 }, { "epoch": 2.1007648971740025, "grad_norm": 0.14096134031229862, "learning_rate": 2.74484477644266e-05, "loss": 0.4558, "num_tokens": 210596427.0, "step": 5735 }, { "epoch": 2.101131314981908, "grad_norm": 0.1515734351907209, "learning_rate": 2.7444376924595512e-05, "loss": 0.4699, "num_tokens": 211267540.0, "step": 5736 }, { "epoch": 2.1014977327898134, "grad_norm": 0.14698578924247882, "learning_rate": 2.7440305778235162e-05, "loss": 0.4932, "num_tokens": 212074250.0, "step": 5737 }, { "epoch": 2.101864150597719, "grad_norm": 0.1415001005549418, "learning_rate": 2.7436234325574748e-05, "loss": 0.4476, "num_tokens": 212926840.0, "step": 5738 }, { "epoch": 2.1022305684056244, "grad_norm": 0.14634499818519653, "learning_rate": 2.74321625668435e-05, "loss": 0.4806, "num_tokens": 213627652.0, "step": 5739 }, { "epoch": 2.10259698621353, "grad_norm": 0.13928693288473318, "learning_rate": 2.742809050227067e-05, "loss": 0.4511, "num_tokens": 214369179.0, "step": 5740 }, { "epoch": 2.1029634040214353, "grad_norm": 0.14013108904692445, "learning_rate": 2.7424018132085525e-05, "loss": 0.4262, "num_tokens": 215245977.0, "step": 5741 }, { "epoch": 2.103329821829341, "grad_norm": 0.14561159729592368, "learning_rate": 2.7419945456517343e-05, "loss": 0.4792, "num_tokens": 215966505.0, "step": 5742 }, { "epoch": 2.1036962396372463, "grad_norm": 0.13301336734955804, "learning_rate": 2.7415872475795428e-05, "loss": 0.4227, "num_tokens": 216757663.0, "step": 5743 }, { "epoch": 2.104062657445152, "grad_norm": 0.16696867747232744, "learning_rate": 2.74117991901491e-05, "loss": 0.4844, "num_tokens": 217375999.0, "step": 5744 }, { "epoch": 2.1044290752530572, "grad_norm": 0.14078315025964527, "learning_rate": 2.7407725599807683e-05, "loss": 0.4475, "num_tokens": 218149496.0, "step": 5745 }, { "epoch": 2.104795493060963, "grad_norm": 0.14311724612889565, "learning_rate": 2.740365170500053e-05, "loss": 0.4592, "num_tokens": 218961623.0, "step": 5746 }, { "epoch": 2.105161910868868, "grad_norm": 0.1314419454797213, "learning_rate": 2.7399577505957023e-05, "loss": 0.4474, "num_tokens": 219762894.0, "step": 5747 }, { "epoch": 2.105528328676774, "grad_norm": 0.16291724372208147, "learning_rate": 2.739550300290654e-05, "loss": 0.4817, "num_tokens": 220485701.0, "step": 5748 }, { "epoch": 2.105894746484679, "grad_norm": 0.1553200624850875, "learning_rate": 2.7391428196078464e-05, "loss": 0.4609, "num_tokens": 221321499.0, "step": 5749 }, { "epoch": 2.106261164292585, "grad_norm": 0.13915367617594976, "learning_rate": 2.7387353085702244e-05, "loss": 0.4305, "num_tokens": 222057557.0, "step": 5750 }, { "epoch": 2.10662758210049, "grad_norm": 0.14747812969243543, "learning_rate": 2.738327767200731e-05, "loss": 0.4459, "num_tokens": 222907940.0, "step": 5751 }, { "epoch": 2.1069939999083958, "grad_norm": 0.15851504709279596, "learning_rate": 2.73792019552231e-05, "loss": 0.4709, "num_tokens": 223642890.0, "step": 5752 }, { "epoch": 2.107360417716301, "grad_norm": 0.1393870042954317, "learning_rate": 2.73751259355791e-05, "loss": 0.4676, "num_tokens": 224398395.0, "step": 5753 }, { "epoch": 2.1077268355242067, "grad_norm": 0.14052431886512068, "learning_rate": 2.73710496133048e-05, "loss": 0.4682, "num_tokens": 225130898.0, "step": 5754 }, { "epoch": 2.108093253332112, "grad_norm": 0.1627575023745115, "learning_rate": 2.736697298862969e-05, "loss": 0.4788, "num_tokens": 225789267.0, "step": 5755 }, { "epoch": 2.108459671140017, "grad_norm": 0.1493063200823995, "learning_rate": 2.7362896061783306e-05, "loss": 0.5046, "num_tokens": 226527701.0, "step": 5756 }, { "epoch": 2.108826088947923, "grad_norm": 0.15239037484630433, "learning_rate": 2.735881883299518e-05, "loss": 0.4901, "num_tokens": 227222844.0, "step": 5757 }, { "epoch": 2.109192506755828, "grad_norm": 0.1464009268893371, "learning_rate": 2.735474130249488e-05, "loss": 0.4418, "num_tokens": 227998580.0, "step": 5758 }, { "epoch": 2.109558924563734, "grad_norm": 0.14524718217405302, "learning_rate": 2.7350663470511953e-05, "loss": 0.4425, "num_tokens": 228845668.0, "step": 5759 }, { "epoch": 2.109925342371639, "grad_norm": 0.14973647730719486, "learning_rate": 2.7346585337276016e-05, "loss": 0.4755, "num_tokens": 229521962.0, "step": 5760 }, { "epoch": 2.110291760179545, "grad_norm": 0.13907098038441676, "learning_rate": 2.7342506903016666e-05, "loss": 0.4659, "num_tokens": 230344725.0, "step": 5761 }, { "epoch": 2.11065817798745, "grad_norm": 0.1480744025105882, "learning_rate": 2.7338428167963522e-05, "loss": 0.427, "num_tokens": 231023518.0, "step": 5762 }, { "epoch": 2.1110245957953557, "grad_norm": 0.1483201928056043, "learning_rate": 2.7334349132346226e-05, "loss": 0.4577, "num_tokens": 231750236.0, "step": 5763 }, { "epoch": 2.111391013603261, "grad_norm": 0.1428861880824579, "learning_rate": 2.733026979639445e-05, "loss": 0.4564, "num_tokens": 232607830.0, "step": 5764 }, { "epoch": 2.1117574314111667, "grad_norm": 0.14388129931437368, "learning_rate": 2.732619016033785e-05, "loss": 0.4371, "num_tokens": 233382905.0, "step": 5765 }, { "epoch": 2.112123849219072, "grad_norm": 0.1526948699454475, "learning_rate": 2.7322110224406127e-05, "loss": 0.4348, "num_tokens": 234142396.0, "step": 5766 }, { "epoch": 2.1124902670269776, "grad_norm": 0.13012808003883033, "learning_rate": 2.7318029988828988e-05, "loss": 0.4631, "num_tokens": 235010712.0, "step": 5767 }, { "epoch": 2.112856684834883, "grad_norm": 0.13977520852061148, "learning_rate": 2.731394945383616e-05, "loss": 0.4572, "num_tokens": 235957377.0, "step": 5768 }, { "epoch": 2.1132231026427886, "grad_norm": 0.1471371365787825, "learning_rate": 2.7309868619657388e-05, "loss": 0.4591, "num_tokens": 236753723.0, "step": 5769 }, { "epoch": 2.113589520450694, "grad_norm": 0.15470864303854198, "learning_rate": 2.7305787486522416e-05, "loss": 0.4766, "num_tokens": 237444957.0, "step": 5770 }, { "epoch": 2.1139559382585995, "grad_norm": 0.16379625249331448, "learning_rate": 2.7301706054661038e-05, "loss": 0.4886, "num_tokens": 238115107.0, "step": 5771 }, { "epoch": 2.1143223560665048, "grad_norm": 0.13601396861252193, "learning_rate": 2.7297624324303044e-05, "loss": 0.464, "num_tokens": 238917179.0, "step": 5772 }, { "epoch": 2.1146887738744105, "grad_norm": 0.15675522220839477, "learning_rate": 2.729354229567823e-05, "loss": 0.4713, "num_tokens": 239699265.0, "step": 5773 }, { "epoch": 2.1150551916823157, "grad_norm": 0.16194745077109882, "learning_rate": 2.7289459969016437e-05, "loss": 0.4488, "num_tokens": 240448144.0, "step": 5774 }, { "epoch": 2.1154216094902214, "grad_norm": 0.14775261742469098, "learning_rate": 2.7285377344547502e-05, "loss": 0.4335, "num_tokens": 241163371.0, "step": 5775 }, { "epoch": 2.1157880272981267, "grad_norm": 0.13755998861480126, "learning_rate": 2.7281294422501284e-05, "loss": 0.4501, "num_tokens": 241975734.0, "step": 5776 }, { "epoch": 2.1161544451060323, "grad_norm": 0.14052716808304197, "learning_rate": 2.727721120310766e-05, "loss": 0.467, "num_tokens": 242723543.0, "step": 5777 }, { "epoch": 2.1165208629139376, "grad_norm": 0.14629429483048048, "learning_rate": 2.7273127686596527e-05, "loss": 0.4452, "num_tokens": 243420439.0, "step": 5778 }, { "epoch": 2.1168872807218433, "grad_norm": 0.1407098427296411, "learning_rate": 2.726904387319779e-05, "loss": 0.4597, "num_tokens": 244154521.0, "step": 5779 }, { "epoch": 2.1172536985297485, "grad_norm": 0.135649748670718, "learning_rate": 2.726495976314138e-05, "loss": 0.4653, "num_tokens": 245060671.0, "step": 5780 }, { "epoch": 2.117620116337654, "grad_norm": 0.14977230980435827, "learning_rate": 2.7260875356657243e-05, "loss": 0.4496, "num_tokens": 245780946.0, "step": 5781 }, { "epoch": 2.1179865341455595, "grad_norm": 0.15447432499099162, "learning_rate": 2.725679065397533e-05, "loss": 0.4616, "num_tokens": 246443310.0, "step": 5782 }, { "epoch": 2.1183529519534647, "grad_norm": 0.1508138457557177, "learning_rate": 2.725270565532562e-05, "loss": 0.4601, "num_tokens": 247127177.0, "step": 5783 }, { "epoch": 2.1187193697613704, "grad_norm": 0.15862478763086274, "learning_rate": 2.7248620360938117e-05, "loss": 0.4608, "num_tokens": 247849365.0, "step": 5784 }, { "epoch": 2.1190857875692757, "grad_norm": 0.14135488251160452, "learning_rate": 2.7244534771042826e-05, "loss": 0.4445, "num_tokens": 248554812.0, "step": 5785 }, { "epoch": 2.1194522053771814, "grad_norm": 0.15139069868876368, "learning_rate": 2.7240448885869763e-05, "loss": 0.4302, "num_tokens": 249283395.0, "step": 5786 }, { "epoch": 2.1198186231850866, "grad_norm": 0.13325118039965128, "learning_rate": 2.723636270564898e-05, "loss": 0.4535, "num_tokens": 250107147.0, "step": 5787 }, { "epoch": 2.1201850409929923, "grad_norm": 0.13406810647148448, "learning_rate": 2.7232276230610545e-05, "loss": 0.4621, "num_tokens": 250880689.0, "step": 5788 }, { "epoch": 2.1205514588008976, "grad_norm": 0.1375971664959159, "learning_rate": 2.722818946098453e-05, "loss": 0.4637, "num_tokens": 251609566.0, "step": 5789 }, { "epoch": 2.1209178766088033, "grad_norm": 0.1254531417980457, "learning_rate": 2.7224102397001017e-05, "loss": 0.4388, "num_tokens": 252468016.0, "step": 5790 }, { "epoch": 2.1212842944167085, "grad_norm": 0.13473720104349457, "learning_rate": 2.7220015038890127e-05, "loss": 0.4534, "num_tokens": 253238371.0, "step": 5791 }, { "epoch": 2.121650712224614, "grad_norm": 0.13086766938836852, "learning_rate": 2.7215927386881986e-05, "loss": 0.4307, "num_tokens": 253991426.0, "step": 5792 }, { "epoch": 2.1220171300325195, "grad_norm": 0.13545392527759129, "learning_rate": 2.721183944120673e-05, "loss": 0.433, "num_tokens": 254716608.0, "step": 5793 }, { "epoch": 2.122383547840425, "grad_norm": 0.14539973024093608, "learning_rate": 2.7207751202094526e-05, "loss": 0.4585, "num_tokens": 255440815.0, "step": 5794 }, { "epoch": 2.1227499656483304, "grad_norm": 0.14178936101473888, "learning_rate": 2.720366266977555e-05, "loss": 0.4635, "num_tokens": 256169856.0, "step": 5795 }, { "epoch": 2.123116383456236, "grad_norm": 0.1479663134849236, "learning_rate": 2.7199573844479986e-05, "loss": 0.4889, "num_tokens": 256861345.0, "step": 5796 }, { "epoch": 2.1234828012641414, "grad_norm": 0.1462588056470727, "learning_rate": 2.7195484726438048e-05, "loss": 0.4449, "num_tokens": 257697699.0, "step": 5797 }, { "epoch": 2.123849219072047, "grad_norm": 0.14289797290768835, "learning_rate": 2.7191395315879967e-05, "loss": 0.4392, "num_tokens": 258395480.0, "step": 5798 }, { "epoch": 2.1242156368799523, "grad_norm": 0.1401676567732891, "learning_rate": 2.7187305613035976e-05, "loss": 0.4444, "num_tokens": 259156292.0, "step": 5799 }, { "epoch": 2.124582054687858, "grad_norm": 0.12804898276205212, "learning_rate": 2.7183215618136336e-05, "loss": 0.4556, "num_tokens": 260066226.0, "step": 5800 }, { "epoch": 2.1249484724957632, "grad_norm": 0.1430364272400306, "learning_rate": 2.7179125331411328e-05, "loss": 0.4787, "num_tokens": 260853308.0, "step": 5801 }, { "epoch": 2.125314890303669, "grad_norm": 0.15431900754029934, "learning_rate": 2.717503475309124e-05, "loss": 0.4747, "num_tokens": 261449427.0, "step": 5802 }, { "epoch": 2.125681308111574, "grad_norm": 0.14897355104371027, "learning_rate": 2.7170943883406377e-05, "loss": 0.4359, "num_tokens": 262161398.0, "step": 5803 }, { "epoch": 2.12604772591948, "grad_norm": 0.13457951503724166, "learning_rate": 2.716685272258706e-05, "loss": 0.4281, "num_tokens": 263049477.0, "step": 5804 }, { "epoch": 2.126414143727385, "grad_norm": 0.1342353477339332, "learning_rate": 2.716276127086364e-05, "loss": 0.4341, "num_tokens": 263847157.0, "step": 5805 }, { "epoch": 2.1267805615352904, "grad_norm": 0.1468437173123759, "learning_rate": 2.7158669528466468e-05, "loss": 0.4601, "num_tokens": 264571460.0, "step": 5806 }, { "epoch": 2.127146979343196, "grad_norm": 0.14703532898037067, "learning_rate": 2.7154577495625916e-05, "loss": 0.4598, "num_tokens": 265519170.0, "step": 5807 }, { "epoch": 2.1275133971511018, "grad_norm": 0.14363250085089452, "learning_rate": 2.7150485172572368e-05, "loss": 0.4551, "num_tokens": 266281488.0, "step": 5808 }, { "epoch": 2.127879814959007, "grad_norm": 0.14673902703790218, "learning_rate": 2.7146392559536246e-05, "loss": 0.4492, "num_tokens": 267029146.0, "step": 5809 }, { "epoch": 2.1282462327669123, "grad_norm": 0.16323345632371827, "learning_rate": 2.7142299656747958e-05, "loss": 0.4834, "num_tokens": 267674264.0, "step": 5810 }, { "epoch": 2.128612650574818, "grad_norm": 0.15165979750811184, "learning_rate": 2.7138206464437947e-05, "loss": 0.4628, "num_tokens": 268490871.0, "step": 5811 }, { "epoch": 2.128979068382723, "grad_norm": 0.1499915205920351, "learning_rate": 2.713411298283667e-05, "loss": 0.5281, "num_tokens": 269199715.0, "step": 5812 }, { "epoch": 2.129345486190629, "grad_norm": 0.1514312578367984, "learning_rate": 2.7130019212174602e-05, "loss": 0.454, "num_tokens": 269912200.0, "step": 5813 }, { "epoch": 2.129711903998534, "grad_norm": 0.14390836475787158, "learning_rate": 2.7125925152682214e-05, "loss": 0.439, "num_tokens": 270795580.0, "step": 5814 }, { "epoch": 2.13007832180644, "grad_norm": 0.16073481795706424, "learning_rate": 2.7121830804590028e-05, "loss": 0.4921, "num_tokens": 271568028.0, "step": 5815 }, { "epoch": 2.130444739614345, "grad_norm": 0.13963953218449932, "learning_rate": 2.7117736168128557e-05, "loss": 0.4466, "num_tokens": 272379881.0, "step": 5816 }, { "epoch": 2.130811157422251, "grad_norm": 0.14344864696298767, "learning_rate": 2.711364124352833e-05, "loss": 0.4225, "num_tokens": 273212925.0, "step": 5817 }, { "epoch": 2.131177575230156, "grad_norm": 0.14002622156375857, "learning_rate": 2.7109546031019913e-05, "loss": 0.4505, "num_tokens": 274055731.0, "step": 5818 }, { "epoch": 2.1315439930380617, "grad_norm": 0.1331566498928417, "learning_rate": 2.7105450530833874e-05, "loss": 0.4643, "num_tokens": 274896950.0, "step": 5819 }, { "epoch": 2.131910410845967, "grad_norm": 0.1534698085619037, "learning_rate": 2.7101354743200784e-05, "loss": 0.4704, "num_tokens": 275622598.0, "step": 5820 }, { "epoch": 2.1322768286538727, "grad_norm": 0.15064870979425618, "learning_rate": 2.7097258668351246e-05, "loss": 0.459, "num_tokens": 276313589.0, "step": 5821 }, { "epoch": 2.132643246461778, "grad_norm": 0.1307855781264971, "learning_rate": 2.7093162306515893e-05, "loss": 0.4477, "num_tokens": 277112288.0, "step": 5822 }, { "epoch": 2.1330096642696836, "grad_norm": 0.14774020742472696, "learning_rate": 2.7089065657925346e-05, "loss": 0.4625, "num_tokens": 277896921.0, "step": 5823 }, { "epoch": 2.133376082077589, "grad_norm": 0.14070151901611969, "learning_rate": 2.708496872281026e-05, "loss": 0.4634, "num_tokens": 278707525.0, "step": 5824 }, { "epoch": 2.1337424998854946, "grad_norm": 0.13826006322875017, "learning_rate": 2.708087150140129e-05, "loss": 0.4812, "num_tokens": 279460410.0, "step": 5825 }, { "epoch": 2.1341089176934, "grad_norm": 0.13646131464335545, "learning_rate": 2.7076773993929125e-05, "loss": 0.4449, "num_tokens": 280231654.0, "step": 5826 }, { "epoch": 2.1344753355013055, "grad_norm": 0.1441141701891251, "learning_rate": 2.7072676200624473e-05, "loss": 0.4493, "num_tokens": 280991152.0, "step": 5827 }, { "epoch": 2.1348417533092108, "grad_norm": 0.13751303261017941, "learning_rate": 2.7068578121718028e-05, "loss": 0.4664, "num_tokens": 281798835.0, "step": 5828 }, { "epoch": 2.1352081711171165, "grad_norm": 0.15005884616598217, "learning_rate": 2.7064479757440535e-05, "loss": 0.4631, "num_tokens": 282429104.0, "step": 5829 }, { "epoch": 2.1355745889250217, "grad_norm": 0.13787426241647172, "learning_rate": 2.7060381108022732e-05, "loss": 0.4594, "num_tokens": 283186550.0, "step": 5830 }, { "epoch": 2.1359410067329274, "grad_norm": 0.13269700984590052, "learning_rate": 2.7056282173695386e-05, "loss": 0.4345, "num_tokens": 284057958.0, "step": 5831 }, { "epoch": 2.1363074245408327, "grad_norm": 0.13925585473475002, "learning_rate": 2.7052182954689272e-05, "loss": 0.478, "num_tokens": 284829719.0, "step": 5832 }, { "epoch": 2.1366738423487384, "grad_norm": 0.1279713830812031, "learning_rate": 2.7048083451235186e-05, "loss": 0.447, "num_tokens": 285640726.0, "step": 5833 }, { "epoch": 2.1370402601566436, "grad_norm": 0.1477369064646776, "learning_rate": 2.7043983663563945e-05, "loss": 0.482, "num_tokens": 286360997.0, "step": 5834 }, { "epoch": 2.137406677964549, "grad_norm": 0.13348770977929522, "learning_rate": 2.703988359190636e-05, "loss": 0.445, "num_tokens": 287139585.0, "step": 5835 }, { "epoch": 2.1377730957724546, "grad_norm": 0.14488865801765677, "learning_rate": 2.7035783236493283e-05, "loss": 0.4836, "num_tokens": 287822791.0, "step": 5836 }, { "epoch": 2.13813951358036, "grad_norm": 0.13337315565444474, "learning_rate": 2.703168259755557e-05, "loss": 0.4567, "num_tokens": 288593112.0, "step": 5837 }, { "epoch": 2.1385059313882655, "grad_norm": 0.1456845363134365, "learning_rate": 2.702758167532409e-05, "loss": 0.4567, "num_tokens": 289268306.0, "step": 5838 }, { "epoch": 2.1388723491961708, "grad_norm": 0.1342764862001148, "learning_rate": 2.7023480470029748e-05, "loss": 0.4629, "num_tokens": 290099489.0, "step": 5839 }, { "epoch": 2.1392387670040764, "grad_norm": 0.12895203223108595, "learning_rate": 2.7019378981903436e-05, "loss": 0.4188, "num_tokens": 290893145.0, "step": 5840 }, { "epoch": 2.1396051848119817, "grad_norm": 0.14578089862431617, "learning_rate": 2.7015277211176084e-05, "loss": 0.4274, "num_tokens": 291555076.0, "step": 5841 }, { "epoch": 2.1399716026198874, "grad_norm": 0.12980716437045303, "learning_rate": 2.7011175158078623e-05, "loss": 0.431, "num_tokens": 292365070.0, "step": 5842 }, { "epoch": 2.1403380204277926, "grad_norm": 0.1546284106549559, "learning_rate": 2.7007072822842013e-05, "loss": 0.4455, "num_tokens": 293010395.0, "step": 5843 }, { "epoch": 2.1407044382356983, "grad_norm": 0.14870036568668524, "learning_rate": 2.700297020569722e-05, "loss": 0.4644, "num_tokens": 293647555.0, "step": 5844 }, { "epoch": 2.1410708560436036, "grad_norm": 0.13304504030478467, "learning_rate": 2.699886730687523e-05, "loss": 0.4473, "num_tokens": 294511506.0, "step": 5845 }, { "epoch": 2.1414372738515093, "grad_norm": 0.13146930739157794, "learning_rate": 2.6994764126607044e-05, "loss": 0.4373, "num_tokens": 295293844.0, "step": 5846 }, { "epoch": 2.1418036916594145, "grad_norm": 0.13778892719130328, "learning_rate": 2.6990660665123683e-05, "loss": 0.4293, "num_tokens": 296060300.0, "step": 5847 }, { "epoch": 2.1421701094673202, "grad_norm": 0.14129613339702346, "learning_rate": 2.698655692265617e-05, "loss": 0.4444, "num_tokens": 296791955.0, "step": 5848 }, { "epoch": 2.1425365272752255, "grad_norm": 0.14616995658151655, "learning_rate": 2.698245289943557e-05, "loss": 0.4775, "num_tokens": 297495446.0, "step": 5849 }, { "epoch": 2.142902945083131, "grad_norm": 0.14077219372533378, "learning_rate": 2.6978348595692936e-05, "loss": 0.4243, "num_tokens": 298275425.0, "step": 5850 }, { "epoch": 2.1432693628910364, "grad_norm": 0.14678882086855452, "learning_rate": 2.6974244011659348e-05, "loss": 0.4846, "num_tokens": 298986196.0, "step": 5851 }, { "epoch": 2.143635780698942, "grad_norm": 0.13462748789144038, "learning_rate": 2.6970139147565904e-05, "loss": 0.4568, "num_tokens": 299739913.0, "step": 5852 }, { "epoch": 2.1440021985068474, "grad_norm": 0.13229202739367082, "learning_rate": 2.6966034003643728e-05, "loss": 0.4405, "num_tokens": 300555490.0, "step": 5853 }, { "epoch": 2.144368616314753, "grad_norm": 0.13664840882270232, "learning_rate": 2.6961928580123924e-05, "loss": 0.4869, "num_tokens": 301321992.0, "step": 5854 }, { "epoch": 2.1447350341226583, "grad_norm": 0.1345545165668053, "learning_rate": 2.6957822877237652e-05, "loss": 0.4576, "num_tokens": 302064932.0, "step": 5855 }, { "epoch": 2.145101451930564, "grad_norm": 0.1365019017239575, "learning_rate": 2.6953716895216074e-05, "loss": 0.4639, "num_tokens": 302822926.0, "step": 5856 }, { "epoch": 2.1454678697384693, "grad_norm": 0.14660144076406284, "learning_rate": 2.6949610634290357e-05, "loss": 0.5018, "num_tokens": 303622085.0, "step": 5857 }, { "epoch": 2.145834287546375, "grad_norm": 0.12613028633082812, "learning_rate": 2.6945504094691696e-05, "loss": 0.4392, "num_tokens": 304454158.0, "step": 5858 }, { "epoch": 2.14620070535428, "grad_norm": 0.13366651254378367, "learning_rate": 2.6941397276651285e-05, "loss": 0.4635, "num_tokens": 305236491.0, "step": 5859 }, { "epoch": 2.1465671231621855, "grad_norm": 0.1306969519166204, "learning_rate": 2.6937290180400365e-05, "loss": 0.4557, "num_tokens": 306016145.0, "step": 5860 }, { "epoch": 2.146933540970091, "grad_norm": 0.14771400994690786, "learning_rate": 2.6933182806170166e-05, "loss": 0.4555, "num_tokens": 306694869.0, "step": 5861 }, { "epoch": 2.147299958777997, "grad_norm": 0.13848940141194307, "learning_rate": 2.6929075154191937e-05, "loss": 0.4815, "num_tokens": 307405494.0, "step": 5862 }, { "epoch": 2.147666376585902, "grad_norm": 0.1482950454567941, "learning_rate": 2.6924967224696947e-05, "loss": 0.4429, "num_tokens": 308066442.0, "step": 5863 }, { "epoch": 2.1480327943938073, "grad_norm": 0.12495617621045131, "learning_rate": 2.692085901791649e-05, "loss": 0.4237, "num_tokens": 308843353.0, "step": 5864 }, { "epoch": 2.148399212201713, "grad_norm": 0.15522353942067343, "learning_rate": 2.6916750534081855e-05, "loss": 0.5067, "num_tokens": 309467693.0, "step": 5865 }, { "epoch": 2.1487656300096183, "grad_norm": 0.14387331938104714, "learning_rate": 2.6912641773424364e-05, "loss": 0.4606, "num_tokens": 310231229.0, "step": 5866 }, { "epoch": 2.149132047817524, "grad_norm": 0.12090155901453742, "learning_rate": 2.690853273617535e-05, "loss": 0.4348, "num_tokens": 311099291.0, "step": 5867 }, { "epoch": 2.1494984656254292, "grad_norm": 0.14395673906334172, "learning_rate": 2.6904423422566156e-05, "loss": 0.4597, "num_tokens": 311793437.0, "step": 5868 }, { "epoch": 2.149864883433335, "grad_norm": 0.1440062505354754, "learning_rate": 2.690031383282814e-05, "loss": 0.4576, "num_tokens": 312528866.0, "step": 5869 }, { "epoch": 2.15023130124124, "grad_norm": 0.13729398643682877, "learning_rate": 2.6896203967192692e-05, "loss": 0.4517, "num_tokens": 313281674.0, "step": 5870 }, { "epoch": 2.150597719049146, "grad_norm": 0.14703086899258302, "learning_rate": 2.6892093825891202e-05, "loss": 0.454, "num_tokens": 314048586.0, "step": 5871 }, { "epoch": 2.150964136857051, "grad_norm": 0.15186965547114406, "learning_rate": 2.6887983409155075e-05, "loss": 0.4832, "num_tokens": 314891489.0, "step": 5872 }, { "epoch": 2.151330554664957, "grad_norm": 0.14290080254921528, "learning_rate": 2.688387271721573e-05, "loss": 0.4689, "num_tokens": 315609470.0, "step": 5873 }, { "epoch": 2.151696972472862, "grad_norm": 0.14670535788468284, "learning_rate": 2.6879761750304627e-05, "loss": 0.4535, "num_tokens": 316507389.0, "step": 5874 }, { "epoch": 2.1520633902807678, "grad_norm": 0.13813699672336974, "learning_rate": 2.68756505086532e-05, "loss": 0.4334, "num_tokens": 317196591.0, "step": 5875 }, { "epoch": 2.152429808088673, "grad_norm": 0.14173808521685857, "learning_rate": 2.687153899249293e-05, "loss": 0.4873, "num_tokens": 317859154.0, "step": 5876 }, { "epoch": 2.1527962258965787, "grad_norm": 0.15491555186601016, "learning_rate": 2.6867427202055302e-05, "loss": 0.4911, "num_tokens": 318632185.0, "step": 5877 }, { "epoch": 2.153162643704484, "grad_norm": 0.14746385552600416, "learning_rate": 2.6863315137571824e-05, "loss": 0.4428, "num_tokens": 319421094.0, "step": 5878 }, { "epoch": 2.1535290615123897, "grad_norm": 0.1403459795826127, "learning_rate": 2.6859202799274005e-05, "loss": 0.4486, "num_tokens": 320150753.0, "step": 5879 }, { "epoch": 2.153895479320295, "grad_norm": 0.13555516208341392, "learning_rate": 2.685509018739338e-05, "loss": 0.4393, "num_tokens": 320863143.0, "step": 5880 }, { "epoch": 2.1542618971282006, "grad_norm": 0.14681278081195417, "learning_rate": 2.6850977302161502e-05, "loss": 0.4388, "num_tokens": 321630180.0, "step": 5881 }, { "epoch": 2.154628314936106, "grad_norm": 0.1437105537515497, "learning_rate": 2.6846864143809925e-05, "loss": 0.4463, "num_tokens": 322378251.0, "step": 5882 }, { "epoch": 2.1549947327440115, "grad_norm": 0.15208278909236084, "learning_rate": 2.684275071257024e-05, "loss": 0.4708, "num_tokens": 323102886.0, "step": 5883 }, { "epoch": 2.155361150551917, "grad_norm": 0.13725455464182806, "learning_rate": 2.6838637008674026e-05, "loss": 0.428, "num_tokens": 323771779.0, "step": 5884 }, { "epoch": 2.1557275683598225, "grad_norm": 0.14045188003426232, "learning_rate": 2.6834523032352916e-05, "loss": 0.477, "num_tokens": 324488157.0, "step": 5885 }, { "epoch": 2.1560939861677277, "grad_norm": 0.1359325663441439, "learning_rate": 2.6830408783838514e-05, "loss": 0.4594, "num_tokens": 325253321.0, "step": 5886 }, { "epoch": 2.1564604039756334, "grad_norm": 0.14377828632266296, "learning_rate": 2.6826294263362463e-05, "loss": 0.4786, "num_tokens": 326045485.0, "step": 5887 }, { "epoch": 2.1568268217835387, "grad_norm": 0.1363140892554896, "learning_rate": 2.682217947115643e-05, "loss": 0.4375, "num_tokens": 326769789.0, "step": 5888 }, { "epoch": 2.157193239591444, "grad_norm": 0.1389635095473083, "learning_rate": 2.681806440745208e-05, "loss": 0.45, "num_tokens": 327465023.0, "step": 5889 }, { "epoch": 2.1575596573993496, "grad_norm": 0.1393982226462399, "learning_rate": 2.681394907248109e-05, "loss": 0.4427, "num_tokens": 328161619.0, "step": 5890 }, { "epoch": 2.157926075207255, "grad_norm": 0.1380812318189527, "learning_rate": 2.6809833466475175e-05, "loss": 0.4373, "num_tokens": 328929215.0, "step": 5891 }, { "epoch": 2.1582924930151606, "grad_norm": 0.13833782038582945, "learning_rate": 2.6805717589666055e-05, "loss": 0.4687, "num_tokens": 329772903.0, "step": 5892 }, { "epoch": 2.158658910823066, "grad_norm": 0.12908583872945148, "learning_rate": 2.680160144228544e-05, "loss": 0.4326, "num_tokens": 330564398.0, "step": 5893 }, { "epoch": 2.1590253286309715, "grad_norm": 0.12194825748444793, "learning_rate": 2.67974850245651e-05, "loss": 0.4175, "num_tokens": 331444396.0, "step": 5894 }, { "epoch": 2.1593917464388768, "grad_norm": 0.13826257283707902, "learning_rate": 2.6793368336736785e-05, "loss": 0.4489, "num_tokens": 332127248.0, "step": 5895 }, { "epoch": 2.1597581642467825, "grad_norm": 0.149970706031758, "learning_rate": 2.6789251379032276e-05, "loss": 0.4803, "num_tokens": 332865005.0, "step": 5896 }, { "epoch": 2.1601245820546877, "grad_norm": 0.13039018280183012, "learning_rate": 2.6785134151683375e-05, "loss": 0.4624, "num_tokens": 333730515.0, "step": 5897 }, { "epoch": 2.1604909998625934, "grad_norm": 0.13781161822113513, "learning_rate": 2.678101665492187e-05, "loss": 0.4335, "num_tokens": 334498662.0, "step": 5898 }, { "epoch": 2.1608574176704987, "grad_norm": 0.14463284869838525, "learning_rate": 2.6776898888979607e-05, "loss": 0.4389, "num_tokens": 335201237.0, "step": 5899 }, { "epoch": 2.1612238354784044, "grad_norm": 0.13871371774534066, "learning_rate": 2.67727808540884e-05, "loss": 0.4369, "num_tokens": 335920613.0, "step": 5900 }, { "epoch": 2.1615902532863096, "grad_norm": 0.14116332580662816, "learning_rate": 2.6768662550480122e-05, "loss": 0.4831, "num_tokens": 336616306.0, "step": 5901 }, { "epoch": 2.1619566710942153, "grad_norm": 0.1499275508396069, "learning_rate": 2.676454397838664e-05, "loss": 0.4932, "num_tokens": 337297520.0, "step": 5902 }, { "epoch": 2.1623230889021205, "grad_norm": 0.1435975306190729, "learning_rate": 2.6760425138039828e-05, "loss": 0.4637, "num_tokens": 338109733.0, "step": 5903 }, { "epoch": 2.1626895067100262, "grad_norm": 0.13508332864769806, "learning_rate": 2.675630602967159e-05, "loss": 0.4648, "num_tokens": 338923560.0, "step": 5904 }, { "epoch": 2.1630559245179315, "grad_norm": 0.13917254433413828, "learning_rate": 2.6752186653513847e-05, "loss": 0.4638, "num_tokens": 339676907.0, "step": 5905 }, { "epoch": 2.163422342325837, "grad_norm": 0.13320565419844976, "learning_rate": 2.674806700979852e-05, "loss": 0.4311, "num_tokens": 340430114.0, "step": 5906 }, { "epoch": 2.1637887601337424, "grad_norm": 0.13323469756991768, "learning_rate": 2.674394709875755e-05, "loss": 0.4446, "num_tokens": 341377483.0, "step": 5907 }, { "epoch": 2.164155177941648, "grad_norm": 0.13158600849330568, "learning_rate": 2.67398269206229e-05, "loss": 0.4107, "num_tokens": 342098574.0, "step": 5908 }, { "epoch": 2.1645215957495534, "grad_norm": 0.14017185272483482, "learning_rate": 2.673570647562655e-05, "loss": 0.4449, "num_tokens": 342806631.0, "step": 5909 }, { "epoch": 2.164888013557459, "grad_norm": 0.13061221752702792, "learning_rate": 2.6731585764000478e-05, "loss": 0.4628, "num_tokens": 343607844.0, "step": 5910 }, { "epoch": 2.1652544313653643, "grad_norm": 0.14161067856661966, "learning_rate": 2.67274647859767e-05, "loss": 0.4683, "num_tokens": 344331421.0, "step": 5911 }, { "epoch": 2.16562084917327, "grad_norm": 0.1335235080359472, "learning_rate": 2.6723343541787226e-05, "loss": 0.4601, "num_tokens": 345136186.0, "step": 5912 }, { "epoch": 2.1659872669811753, "grad_norm": 0.14627803973553133, "learning_rate": 2.67192220316641e-05, "loss": 0.4449, "num_tokens": 345903442.0, "step": 5913 }, { "epoch": 2.1663536847890805, "grad_norm": 0.1403106974331046, "learning_rate": 2.671510025583936e-05, "loss": 0.4465, "num_tokens": 346632367.0, "step": 5914 }, { "epoch": 2.166720102596986, "grad_norm": 0.13275768635968713, "learning_rate": 2.671097821454508e-05, "loss": 0.4567, "num_tokens": 347478297.0, "step": 5915 }, { "epoch": 2.1670865204048915, "grad_norm": 0.12948058392919978, "learning_rate": 2.6706855908013326e-05, "loss": 0.4471, "num_tokens": 348343452.0, "step": 5916 }, { "epoch": 2.167452938212797, "grad_norm": 0.14387783137948096, "learning_rate": 2.6702733336476213e-05, "loss": 0.4378, "num_tokens": 349094460.0, "step": 5917 }, { "epoch": 2.1678193560207024, "grad_norm": 0.1474738808385574, "learning_rate": 2.6698610500165826e-05, "loss": 0.4678, "num_tokens": 349776092.0, "step": 5918 }, { "epoch": 2.168185773828608, "grad_norm": 0.14293525629442624, "learning_rate": 2.669448739931431e-05, "loss": 0.4576, "num_tokens": 350534628.0, "step": 5919 }, { "epoch": 2.1685521916365134, "grad_norm": 0.14836587873239432, "learning_rate": 2.6690364034153787e-05, "loss": 0.4673, "num_tokens": 351339460.0, "step": 5920 }, { "epoch": 2.168918609444419, "grad_norm": 0.17425952750284235, "learning_rate": 2.668624040491642e-05, "loss": 0.4996, "num_tokens": 352150138.0, "step": 5921 }, { "epoch": 2.1692850272523243, "grad_norm": 0.15245469338404843, "learning_rate": 2.668211651183438e-05, "loss": 0.4611, "num_tokens": 352821749.0, "step": 5922 }, { "epoch": 2.16965144506023, "grad_norm": 0.13953028128433662, "learning_rate": 2.667799235513984e-05, "loss": 0.448, "num_tokens": 353542488.0, "step": 5923 }, { "epoch": 2.1700178628681352, "grad_norm": 0.13299431868418665, "learning_rate": 2.6673867935065012e-05, "loss": 0.4475, "num_tokens": 354372008.0, "step": 5924 }, { "epoch": 2.170384280676041, "grad_norm": 0.1621769612103518, "learning_rate": 2.6669743251842093e-05, "loss": 0.4532, "num_tokens": 355044465.0, "step": 5925 }, { "epoch": 2.170750698483946, "grad_norm": 0.13901799795012285, "learning_rate": 2.6665618305703325e-05, "loss": 0.4798, "num_tokens": 355861197.0, "step": 5926 }, { "epoch": 2.171117116291852, "grad_norm": 0.14596808057354937, "learning_rate": 2.6661493096880945e-05, "loss": 0.4767, "num_tokens": 356615861.0, "step": 5927 }, { "epoch": 2.171483534099757, "grad_norm": 0.14138059741011916, "learning_rate": 2.6657367625607208e-05, "loss": 0.4483, "num_tokens": 357379435.0, "step": 5928 }, { "epoch": 2.171849951907663, "grad_norm": 0.13158727648898313, "learning_rate": 2.665324189211439e-05, "loss": 0.4622, "num_tokens": 358191315.0, "step": 5929 }, { "epoch": 2.172216369715568, "grad_norm": 0.1315322642680421, "learning_rate": 2.6649115896634773e-05, "loss": 0.4486, "num_tokens": 359106724.0, "step": 5930 }, { "epoch": 2.172582787523474, "grad_norm": 0.13887415298257702, "learning_rate": 2.6644989639400662e-05, "loss": 0.4643, "num_tokens": 359813903.0, "step": 5931 }, { "epoch": 2.172949205331379, "grad_norm": 0.13264242343018445, "learning_rate": 2.6640863120644383e-05, "loss": 0.474, "num_tokens": 360609889.0, "step": 5932 }, { "epoch": 2.1733156231392847, "grad_norm": 0.14223485808511807, "learning_rate": 2.6636736340598254e-05, "loss": 0.4741, "num_tokens": 361395084.0, "step": 5933 }, { "epoch": 2.17368204094719, "grad_norm": 0.15322023151424446, "learning_rate": 2.6632609299494622e-05, "loss": 0.4662, "num_tokens": 361996992.0, "step": 5934 }, { "epoch": 2.1740484587550957, "grad_norm": 0.1451461045826791, "learning_rate": 2.6628481997565853e-05, "loss": 0.4454, "num_tokens": 362669551.0, "step": 5935 }, { "epoch": 2.174414876563001, "grad_norm": 0.13179190389619744, "learning_rate": 2.662435443504432e-05, "loss": 0.4758, "num_tokens": 363510037.0, "step": 5936 }, { "epoch": 2.1747812943709066, "grad_norm": 0.13554059558887613, "learning_rate": 2.6620226612162414e-05, "loss": 0.4738, "num_tokens": 364399742.0, "step": 5937 }, { "epoch": 2.175147712178812, "grad_norm": 0.12801230442244552, "learning_rate": 2.6616098529152536e-05, "loss": 0.4307, "num_tokens": 365188934.0, "step": 5938 }, { "epoch": 2.1755141299867176, "grad_norm": 0.1377601572329875, "learning_rate": 2.6611970186247112e-05, "loss": 0.4612, "num_tokens": 366077923.0, "step": 5939 }, { "epoch": 2.175880547794623, "grad_norm": 0.12153509858066414, "learning_rate": 2.6607841583678568e-05, "loss": 0.435, "num_tokens": 366980007.0, "step": 5940 }, { "epoch": 2.1762469656025285, "grad_norm": 0.14756439830135737, "learning_rate": 2.660371272167936e-05, "loss": 0.4541, "num_tokens": 367696560.0, "step": 5941 }, { "epoch": 2.1766133834104338, "grad_norm": 0.14179536972239482, "learning_rate": 2.6599583600481943e-05, "loss": 0.4812, "num_tokens": 368453438.0, "step": 5942 }, { "epoch": 2.176979801218339, "grad_norm": 0.1556074314849748, "learning_rate": 2.659545422031881e-05, "loss": 0.5013, "num_tokens": 369155879.0, "step": 5943 }, { "epoch": 2.1773462190262447, "grad_norm": 0.14601140081722414, "learning_rate": 2.6591324581422436e-05, "loss": 0.435, "num_tokens": 369810088.0, "step": 5944 }, { "epoch": 2.17771263683415, "grad_norm": 0.15412065807026396, "learning_rate": 2.658719468402533e-05, "loss": 0.4958, "num_tokens": 370487778.0, "step": 5945 }, { "epoch": 2.1780790546420556, "grad_norm": 0.14091665868463488, "learning_rate": 2.6583064528360028e-05, "loss": 0.4662, "num_tokens": 371154024.0, "step": 5946 }, { "epoch": 2.178445472449961, "grad_norm": 0.13960183346334978, "learning_rate": 2.657893411465905e-05, "loss": 0.4377, "num_tokens": 371949482.0, "step": 5947 }, { "epoch": 2.1788118902578666, "grad_norm": 0.1520667892756352, "learning_rate": 2.657480344315495e-05, "loss": 0.4639, "num_tokens": 372640004.0, "step": 5948 }, { "epoch": 2.179178308065772, "grad_norm": 0.15037169708520084, "learning_rate": 2.65706725140803e-05, "loss": 0.4466, "num_tokens": 373316188.0, "step": 5949 }, { "epoch": 2.1795447258736775, "grad_norm": 0.1419392157969853, "learning_rate": 2.6566541327667678e-05, "loss": 0.4868, "num_tokens": 374029803.0, "step": 5950 }, { "epoch": 2.179911143681583, "grad_norm": 0.145785549151944, "learning_rate": 2.656240988414967e-05, "loss": 0.4687, "num_tokens": 374769738.0, "step": 5951 }, { "epoch": 2.1802775614894885, "grad_norm": 0.14896349584525082, "learning_rate": 2.655827818375889e-05, "loss": 0.4399, "num_tokens": 375465929.0, "step": 5952 }, { "epoch": 2.1806439792973937, "grad_norm": 0.1397319437986002, "learning_rate": 2.6554146226727965e-05, "loss": 0.4172, "num_tokens": 376327011.0, "step": 5953 }, { "epoch": 2.1810103971052994, "grad_norm": 0.13125148892524227, "learning_rate": 2.6550014013289523e-05, "loss": 0.4362, "num_tokens": 377082724.0, "step": 5954 }, { "epoch": 2.1813768149132047, "grad_norm": 0.14277575060806816, "learning_rate": 2.6545881543676223e-05, "loss": 0.4563, "num_tokens": 377874470.0, "step": 5955 }, { "epoch": 2.1817432327211104, "grad_norm": 0.14674907708687462, "learning_rate": 2.6541748818120733e-05, "loss": 0.4437, "num_tokens": 378601974.0, "step": 5956 }, { "epoch": 2.1821096505290156, "grad_norm": 0.13986948529060875, "learning_rate": 2.6537615836855734e-05, "loss": 0.4553, "num_tokens": 379323854.0, "step": 5957 }, { "epoch": 2.1824760683369213, "grad_norm": 0.13410184133381256, "learning_rate": 2.6533482600113905e-05, "loss": 0.4385, "num_tokens": 380149507.0, "step": 5958 }, { "epoch": 2.1828424861448266, "grad_norm": 0.1633123041942763, "learning_rate": 2.6529349108127975e-05, "loss": 0.4914, "num_tokens": 380789406.0, "step": 5959 }, { "epoch": 2.1832089039527323, "grad_norm": 0.13898597998177606, "learning_rate": 2.652521536113066e-05, "loss": 0.4191, "num_tokens": 381528346.0, "step": 5960 }, { "epoch": 2.1835753217606375, "grad_norm": 0.1482253752643942, "learning_rate": 2.6521081359354697e-05, "loss": 0.4575, "num_tokens": 382334607.0, "step": 5961 }, { "epoch": 2.183941739568543, "grad_norm": 0.16633210004378216, "learning_rate": 2.651694710303284e-05, "loss": 0.4834, "num_tokens": 383091196.0, "step": 5962 }, { "epoch": 2.1843081573764485, "grad_norm": 0.14006734999436485, "learning_rate": 2.651281259239786e-05, "loss": 0.423, "num_tokens": 383896226.0, "step": 5963 }, { "epoch": 2.184674575184354, "grad_norm": 0.1710470714203969, "learning_rate": 2.6508677827682533e-05, "loss": 0.4432, "num_tokens": 384578466.0, "step": 5964 }, { "epoch": 2.1850409929922594, "grad_norm": 0.14132222674850067, "learning_rate": 2.650454280911965e-05, "loss": 0.4499, "num_tokens": 385335975.0, "step": 5965 }, { "epoch": 2.185407410800165, "grad_norm": 0.1401301113424999, "learning_rate": 2.650040753694203e-05, "loss": 0.4354, "num_tokens": 386121466.0, "step": 5966 }, { "epoch": 2.1857738286080703, "grad_norm": 0.14495638899866994, "learning_rate": 2.6496272011382496e-05, "loss": 0.4535, "num_tokens": 386961911.0, "step": 5967 }, { "epoch": 2.1861402464159756, "grad_norm": 0.14573188565949066, "learning_rate": 2.649213623267388e-05, "loss": 0.4817, "num_tokens": 387655700.0, "step": 5968 }, { "epoch": 2.1865066642238813, "grad_norm": 0.14050278223106824, "learning_rate": 2.648800020104904e-05, "loss": 0.4336, "num_tokens": 388446589.0, "step": 5969 }, { "epoch": 2.1868730820317865, "grad_norm": 0.14920641163321474, "learning_rate": 2.6483863916740843e-05, "loss": 0.459, "num_tokens": 389141062.0, "step": 5970 }, { "epoch": 2.1872394998396922, "grad_norm": 0.14937448884755067, "learning_rate": 2.6479727379982172e-05, "loss": 0.4644, "num_tokens": 389826441.0, "step": 5971 }, { "epoch": 2.1876059176475975, "grad_norm": 0.14953124074637403, "learning_rate": 2.647559059100591e-05, "loss": 0.4364, "num_tokens": 390647567.0, "step": 5972 }, { "epoch": 2.187972335455503, "grad_norm": 0.1496225443112846, "learning_rate": 2.6471453550044975e-05, "loss": 0.4756, "num_tokens": 391372953.0, "step": 5973 }, { "epoch": 2.1883387532634084, "grad_norm": 0.15040872509385775, "learning_rate": 2.64673162573323e-05, "loss": 0.4622, "num_tokens": 392100826.0, "step": 5974 }, { "epoch": 2.188705171071314, "grad_norm": 0.15352229129151154, "learning_rate": 2.646317871310081e-05, "loss": 0.4741, "num_tokens": 392855132.0, "step": 5975 }, { "epoch": 2.1890715888792194, "grad_norm": 0.12397836848314842, "learning_rate": 2.6459040917583454e-05, "loss": 0.4686, "num_tokens": 393767799.0, "step": 5976 }, { "epoch": 2.189438006687125, "grad_norm": 0.16078955572187176, "learning_rate": 2.6454902871013214e-05, "loss": 0.5286, "num_tokens": 394444205.0, "step": 5977 }, { "epoch": 2.1898044244950303, "grad_norm": 0.1444883128413464, "learning_rate": 2.6450764573623054e-05, "loss": 0.4763, "num_tokens": 395316923.0, "step": 5978 }, { "epoch": 2.190170842302936, "grad_norm": 0.14130426361580561, "learning_rate": 2.644662602564598e-05, "loss": 0.4938, "num_tokens": 396103404.0, "step": 5979 }, { "epoch": 2.1905372601108413, "grad_norm": 0.14179755094742677, "learning_rate": 2.6442487227314994e-05, "loss": 0.4552, "num_tokens": 396782325.0, "step": 5980 }, { "epoch": 2.190903677918747, "grad_norm": 0.14694783297142647, "learning_rate": 2.643834817886312e-05, "loss": 0.458, "num_tokens": 397542755.0, "step": 5981 }, { "epoch": 2.191270095726652, "grad_norm": 0.14565607884310353, "learning_rate": 2.6434208880523397e-05, "loss": 0.4722, "num_tokens": 398261085.0, "step": 5982 }, { "epoch": 2.191636513534558, "grad_norm": 0.14379239933799504, "learning_rate": 2.6430069332528877e-05, "loss": 0.4317, "num_tokens": 399056906.0, "step": 5983 }, { "epoch": 2.192002931342463, "grad_norm": 0.14680984191629137, "learning_rate": 2.6425929535112617e-05, "loss": 0.458, "num_tokens": 399861164.0, "step": 5984 }, { "epoch": 2.192369349150369, "grad_norm": 0.14639857835057482, "learning_rate": 2.64217894885077e-05, "loss": 0.4733, "num_tokens": 400682986.0, "step": 5985 }, { "epoch": 2.192735766958274, "grad_norm": 0.15384812174362378, "learning_rate": 2.641764919294722e-05, "loss": 0.4854, "num_tokens": 401432835.0, "step": 5986 }, { "epoch": 2.19310218476618, "grad_norm": 0.13218151143305903, "learning_rate": 2.6413508648664284e-05, "loss": 0.4573, "num_tokens": 402237655.0, "step": 5987 }, { "epoch": 2.193468602574085, "grad_norm": 0.13751169006920294, "learning_rate": 2.640936785589202e-05, "loss": 0.4484, "num_tokens": 403029563.0, "step": 5988 }, { "epoch": 2.1938350203819907, "grad_norm": 0.15562109626795545, "learning_rate": 2.640522681486354e-05, "loss": 0.4807, "num_tokens": 403673529.0, "step": 5989 }, { "epoch": 2.194201438189896, "grad_norm": 0.1443630852653827, "learning_rate": 2.6401085525812015e-05, "loss": 0.4428, "num_tokens": 404403564.0, "step": 5990 }, { "epoch": 2.1945678559978017, "grad_norm": 0.1327129375767248, "learning_rate": 2.639694398897061e-05, "loss": 0.4464, "num_tokens": 405178549.0, "step": 5991 }, { "epoch": 2.194934273805707, "grad_norm": 0.1379760158844249, "learning_rate": 2.639280220457248e-05, "loss": 0.4488, "num_tokens": 405923376.0, "step": 5992 }, { "epoch": 2.195300691613612, "grad_norm": 0.1339251682380712, "learning_rate": 2.638866017285083e-05, "loss": 0.4697, "num_tokens": 406657603.0, "step": 5993 }, { "epoch": 2.195667109421518, "grad_norm": 0.13442889867393246, "learning_rate": 2.638451789403887e-05, "loss": 0.4225, "num_tokens": 407370927.0, "step": 5994 }, { "epoch": 2.1960335272294236, "grad_norm": 0.1343210958857928, "learning_rate": 2.6380375368369805e-05, "loss": 0.4767, "num_tokens": 408188566.0, "step": 5995 }, { "epoch": 2.196399945037329, "grad_norm": 0.14172006280192875, "learning_rate": 2.6376232596076876e-05, "loss": 0.4313, "num_tokens": 408894040.0, "step": 5996 }, { "epoch": 2.196766362845234, "grad_norm": 0.14110266561604495, "learning_rate": 2.6372089577393332e-05, "loss": 0.4642, "num_tokens": 409678761.0, "step": 5997 }, { "epoch": 2.1971327806531398, "grad_norm": 0.1569272865905965, "learning_rate": 2.636794631255242e-05, "loss": 0.464, "num_tokens": 410328445.0, "step": 5998 }, { "epoch": 2.197499198461045, "grad_norm": 0.1436842076184309, "learning_rate": 2.6363802801787427e-05, "loss": 0.454, "num_tokens": 411028647.0, "step": 5999 }, { "epoch": 2.1978656162689507, "grad_norm": 0.14279917862541497, "learning_rate": 2.6359659045331637e-05, "loss": 0.4463, "num_tokens": 411743454.0, "step": 6000 }, { "epoch": 2.198232034076856, "grad_norm": 0.14758725097134312, "learning_rate": 2.6355515043418353e-05, "loss": 0.4478, "num_tokens": 412445674.0, "step": 6001 }, { "epoch": 2.1985984518847617, "grad_norm": 0.14374732552078046, "learning_rate": 2.6351370796280886e-05, "loss": 0.4358, "num_tokens": 413204973.0, "step": 6002 }, { "epoch": 2.198964869692667, "grad_norm": 0.14347704008579626, "learning_rate": 2.6347226304152562e-05, "loss": 0.4372, "num_tokens": 413953749.0, "step": 6003 }, { "epoch": 2.1993312875005726, "grad_norm": 0.13304011933734378, "learning_rate": 2.634308156726674e-05, "loss": 0.4315, "num_tokens": 414730571.0, "step": 6004 }, { "epoch": 2.199697705308478, "grad_norm": 0.15159423262184687, "learning_rate": 2.6338936585856763e-05, "loss": 0.4408, "num_tokens": 415354448.0, "step": 6005 }, { "epoch": 2.2000641231163836, "grad_norm": 0.13942041045692286, "learning_rate": 2.633479136015601e-05, "loss": 0.46, "num_tokens": 416298304.0, "step": 6006 }, { "epoch": 2.200430540924289, "grad_norm": 0.12886004815217283, "learning_rate": 2.6330645890397857e-05, "loss": 0.4327, "num_tokens": 417129744.0, "step": 6007 }, { "epoch": 2.2007969587321945, "grad_norm": 0.1370335897848294, "learning_rate": 2.6326500176815715e-05, "loss": 0.4722, "num_tokens": 417918814.0, "step": 6008 }, { "epoch": 2.2011633765400997, "grad_norm": 0.15107900867543642, "learning_rate": 2.6322354219642977e-05, "loss": 0.4678, "num_tokens": 418622544.0, "step": 6009 }, { "epoch": 2.2015297943480054, "grad_norm": 0.1369852376032004, "learning_rate": 2.6318208019113085e-05, "loss": 0.4515, "num_tokens": 419403202.0, "step": 6010 }, { "epoch": 2.2018962121559107, "grad_norm": 0.13585055588626932, "learning_rate": 2.6314061575459472e-05, "loss": 0.4516, "num_tokens": 420097578.0, "step": 6011 }, { "epoch": 2.2022626299638164, "grad_norm": 0.1403726264428841, "learning_rate": 2.6309914888915594e-05, "loss": 0.4591, "num_tokens": 420857580.0, "step": 6012 }, { "epoch": 2.2026290477717216, "grad_norm": 0.12820631420317785, "learning_rate": 2.6305767959714907e-05, "loss": 0.4221, "num_tokens": 421720506.0, "step": 6013 }, { "epoch": 2.2029954655796273, "grad_norm": 0.1309856860864473, "learning_rate": 2.6301620788090903e-05, "loss": 0.4344, "num_tokens": 422550909.0, "step": 6014 }, { "epoch": 2.2033618833875326, "grad_norm": 0.13656000512933572, "learning_rate": 2.629747337427708e-05, "loss": 0.4475, "num_tokens": 423441329.0, "step": 6015 }, { "epoch": 2.2037283011954383, "grad_norm": 0.1314055418038549, "learning_rate": 2.629332571850694e-05, "loss": 0.4533, "num_tokens": 424339521.0, "step": 6016 }, { "epoch": 2.2040947190033435, "grad_norm": 0.13740133607677163, "learning_rate": 2.628917782101399e-05, "loss": 0.4443, "num_tokens": 425034518.0, "step": 6017 }, { "epoch": 2.2044611368112492, "grad_norm": 0.13105534140680253, "learning_rate": 2.6285029682031786e-05, "loss": 0.4427, "num_tokens": 425816159.0, "step": 6018 }, { "epoch": 2.2048275546191545, "grad_norm": 0.14853995862143254, "learning_rate": 2.628088130179387e-05, "loss": 0.4726, "num_tokens": 426558253.0, "step": 6019 }, { "epoch": 2.20519397242706, "grad_norm": 0.125903565781083, "learning_rate": 2.6276732680533798e-05, "loss": 0.4323, "num_tokens": 427421355.0, "step": 6020 }, { "epoch": 2.2055603902349654, "grad_norm": 0.1311271682921909, "learning_rate": 2.6272583818485152e-05, "loss": 0.4467, "num_tokens": 428193176.0, "step": 6021 }, { "epoch": 2.2059268080428707, "grad_norm": 0.12853936750905745, "learning_rate": 2.626843471588152e-05, "loss": 0.4586, "num_tokens": 429021302.0, "step": 6022 }, { "epoch": 2.2062932258507764, "grad_norm": 0.1372199437780545, "learning_rate": 2.6264285372956502e-05, "loss": 0.4972, "num_tokens": 429777491.0, "step": 6023 }, { "epoch": 2.2066596436586816, "grad_norm": 0.1327377011399211, "learning_rate": 2.6260135789943708e-05, "loss": 0.4818, "num_tokens": 430553952.0, "step": 6024 }, { "epoch": 2.2070260614665873, "grad_norm": 0.13530766502920677, "learning_rate": 2.6255985967076787e-05, "loss": 0.4255, "num_tokens": 431225642.0, "step": 6025 }, { "epoch": 2.2073924792744926, "grad_norm": 0.14272528258405565, "learning_rate": 2.6251835904589372e-05, "loss": 0.4901, "num_tokens": 431965022.0, "step": 6026 }, { "epoch": 2.2077588970823983, "grad_norm": 0.127041760571039, "learning_rate": 2.624768560271511e-05, "loss": 0.4264, "num_tokens": 432814097.0, "step": 6027 }, { "epoch": 2.2081253148903035, "grad_norm": 0.15111923276112946, "learning_rate": 2.624353506168769e-05, "loss": 0.4902, "num_tokens": 433532093.0, "step": 6028 }, { "epoch": 2.208491732698209, "grad_norm": 0.1331277490557015, "learning_rate": 2.6239384281740784e-05, "loss": 0.4829, "num_tokens": 434286423.0, "step": 6029 }, { "epoch": 2.2088581505061144, "grad_norm": 0.1353165214026368, "learning_rate": 2.6235233263108084e-05, "loss": 0.4792, "num_tokens": 435151754.0, "step": 6030 }, { "epoch": 2.20922456831402, "grad_norm": 0.1316268669703551, "learning_rate": 2.6231082006023317e-05, "loss": 0.4387, "num_tokens": 435879952.0, "step": 6031 }, { "epoch": 2.2095909861219254, "grad_norm": 0.11838787527131883, "learning_rate": 2.6226930510720193e-05, "loss": 0.4574, "num_tokens": 436861322.0, "step": 6032 }, { "epoch": 2.209957403929831, "grad_norm": 0.1340817047615259, "learning_rate": 2.6222778777432462e-05, "loss": 0.4587, "num_tokens": 437577401.0, "step": 6033 }, { "epoch": 2.2103238217377363, "grad_norm": 0.12989482863084612, "learning_rate": 2.6218626806393853e-05, "loss": 0.4238, "num_tokens": 438288943.0, "step": 6034 }, { "epoch": 2.210690239545642, "grad_norm": 0.13832938223657046, "learning_rate": 2.6214474597838155e-05, "loss": 0.4714, "num_tokens": 439012571.0, "step": 6035 }, { "epoch": 2.2110566573535473, "grad_norm": 0.13577728477458545, "learning_rate": 2.6210322151999136e-05, "loss": 0.4444, "num_tokens": 439734210.0, "step": 6036 }, { "epoch": 2.211423075161453, "grad_norm": 0.14220054811171426, "learning_rate": 2.6206169469110582e-05, "loss": 0.4688, "num_tokens": 440376150.0, "step": 6037 }, { "epoch": 2.2117894929693582, "grad_norm": 0.13643266507161994, "learning_rate": 2.6202016549406303e-05, "loss": 0.4439, "num_tokens": 441130882.0, "step": 6038 }, { "epoch": 2.212155910777264, "grad_norm": 0.1312925541540522, "learning_rate": 2.619786339312011e-05, "loss": 0.4487, "num_tokens": 441946365.0, "step": 6039 }, { "epoch": 2.212522328585169, "grad_norm": 0.13257828884221698, "learning_rate": 2.6193710000485845e-05, "loss": 0.4574, "num_tokens": 442830973.0, "step": 6040 }, { "epoch": 2.212888746393075, "grad_norm": 0.13865401045575243, "learning_rate": 2.6189556371737347e-05, "loss": 0.4556, "num_tokens": 443658119.0, "step": 6041 }, { "epoch": 2.21325516420098, "grad_norm": 0.15440269336353418, "learning_rate": 2.618540250710847e-05, "loss": 0.505, "num_tokens": 444348183.0, "step": 6042 }, { "epoch": 2.213621582008886, "grad_norm": 0.14648919514802322, "learning_rate": 2.6181248406833083e-05, "loss": 0.4751, "num_tokens": 445110800.0, "step": 6043 }, { "epoch": 2.213987999816791, "grad_norm": 0.14735315447969521, "learning_rate": 2.6177094071145083e-05, "loss": 0.4472, "num_tokens": 445894076.0, "step": 6044 }, { "epoch": 2.2143544176246968, "grad_norm": 0.156546534391791, "learning_rate": 2.6172939500278355e-05, "loss": 0.4834, "num_tokens": 446614090.0, "step": 6045 }, { "epoch": 2.214720835432602, "grad_norm": 0.13608816412035246, "learning_rate": 2.6168784694466815e-05, "loss": 0.451, "num_tokens": 447446105.0, "step": 6046 }, { "epoch": 2.2150872532405073, "grad_norm": 0.13158371306302952, "learning_rate": 2.616462965394439e-05, "loss": 0.4482, "num_tokens": 448270806.0, "step": 6047 }, { "epoch": 2.215453671048413, "grad_norm": 0.13846853382798752, "learning_rate": 2.6160474378945004e-05, "loss": 0.4692, "num_tokens": 449138138.0, "step": 6048 }, { "epoch": 2.2158200888563186, "grad_norm": 0.13605343731187464, "learning_rate": 2.6156318869702623e-05, "loss": 0.4216, "num_tokens": 449953908.0, "step": 6049 }, { "epoch": 2.216186506664224, "grad_norm": 0.13555746631227555, "learning_rate": 2.6152163126451203e-05, "loss": 0.4327, "num_tokens": 450683725.0, "step": 6050 }, { "epoch": 2.216552924472129, "grad_norm": 0.13367310068611513, "learning_rate": 2.614800714942472e-05, "loss": 0.4601, "num_tokens": 451551137.0, "step": 6051 }, { "epoch": 2.216919342280035, "grad_norm": 0.14316475545462248, "learning_rate": 2.614385093885717e-05, "loss": 0.4891, "num_tokens": 452361046.0, "step": 6052 }, { "epoch": 2.21728576008794, "grad_norm": 0.15071745042424634, "learning_rate": 2.6139694494982544e-05, "loss": 0.4658, "num_tokens": 453149994.0, "step": 6053 }, { "epoch": 2.217652177895846, "grad_norm": 0.13249287551649047, "learning_rate": 2.6135537818034868e-05, "loss": 0.441, "num_tokens": 454062837.0, "step": 6054 }, { "epoch": 2.218018595703751, "grad_norm": 0.14939711746283443, "learning_rate": 2.6131380908248165e-05, "loss": 0.4771, "num_tokens": 454966628.0, "step": 6055 }, { "epoch": 2.2183850135116567, "grad_norm": 0.1277543615787214, "learning_rate": 2.612722376585648e-05, "loss": 0.4343, "num_tokens": 455808019.0, "step": 6056 }, { "epoch": 2.218751431319562, "grad_norm": 0.14960203050480517, "learning_rate": 2.6123066391093876e-05, "loss": 0.4433, "num_tokens": 456530603.0, "step": 6057 }, { "epoch": 2.2191178491274677, "grad_norm": 0.14993206440319753, "learning_rate": 2.611890878419441e-05, "loss": 0.4307, "num_tokens": 457264993.0, "step": 6058 }, { "epoch": 2.219484266935373, "grad_norm": 0.14394923919672034, "learning_rate": 2.6114750945392167e-05, "loss": 0.4483, "num_tokens": 457933842.0, "step": 6059 }, { "epoch": 2.2198506847432786, "grad_norm": 0.12390400953737082, "learning_rate": 2.611059287492124e-05, "loss": 0.4335, "num_tokens": 458863806.0, "step": 6060 }, { "epoch": 2.220217102551184, "grad_norm": 0.14984196646603845, "learning_rate": 2.6106434573015743e-05, "loss": 0.4543, "num_tokens": 459600492.0, "step": 6061 }, { "epoch": 2.2205835203590896, "grad_norm": 0.14853060678613822, "learning_rate": 2.6102276039909794e-05, "loss": 0.4758, "num_tokens": 460355185.0, "step": 6062 }, { "epoch": 2.220949938166995, "grad_norm": 0.1368273994957703, "learning_rate": 2.6098117275837526e-05, "loss": 0.4721, "num_tokens": 461160524.0, "step": 6063 }, { "epoch": 2.2213163559749005, "grad_norm": 0.15368058969341186, "learning_rate": 2.6093958281033084e-05, "loss": 0.4612, "num_tokens": 461847809.0, "step": 6064 }, { "epoch": 2.2216827737828058, "grad_norm": 0.1616032442903034, "learning_rate": 2.6089799055730627e-05, "loss": 0.4604, "num_tokens": 462646866.0, "step": 6065 }, { "epoch": 2.2220491915907115, "grad_norm": 0.12871616196320326, "learning_rate": 2.6085639600164334e-05, "loss": 0.4652, "num_tokens": 463418343.0, "step": 6066 }, { "epoch": 2.2224156093986167, "grad_norm": 0.14676640470415797, "learning_rate": 2.608147991456838e-05, "loss": 0.4603, "num_tokens": 464180456.0, "step": 6067 }, { "epoch": 2.2227820272065224, "grad_norm": 0.15677079632268676, "learning_rate": 2.6077319999176972e-05, "loss": 0.4407, "num_tokens": 464823194.0, "step": 6068 }, { "epoch": 2.2231484450144277, "grad_norm": 0.13323982876223844, "learning_rate": 2.6073159854224326e-05, "loss": 0.4531, "num_tokens": 465571175.0, "step": 6069 }, { "epoch": 2.2235148628223333, "grad_norm": 0.13242267649210976, "learning_rate": 2.606899947994465e-05, "loss": 0.415, "num_tokens": 466400444.0, "step": 6070 }, { "epoch": 2.2238812806302386, "grad_norm": 0.15291067062419436, "learning_rate": 2.6064838876572204e-05, "loss": 0.4567, "num_tokens": 467105841.0, "step": 6071 }, { "epoch": 2.2242476984381443, "grad_norm": 0.14146460308411896, "learning_rate": 2.6060678044341215e-05, "loss": 0.4602, "num_tokens": 467935005.0, "step": 6072 }, { "epoch": 2.2246141162460495, "grad_norm": 0.15141788991421076, "learning_rate": 2.6056516983485954e-05, "loss": 0.4613, "num_tokens": 468667976.0, "step": 6073 }, { "epoch": 2.2249805340539552, "grad_norm": 0.1323828994182039, "learning_rate": 2.605235569424071e-05, "loss": 0.4269, "num_tokens": 469449549.0, "step": 6074 }, { "epoch": 2.2253469518618605, "grad_norm": 0.13208388690488443, "learning_rate": 2.6048194176839757e-05, "loss": 0.4448, "num_tokens": 470254899.0, "step": 6075 }, { "epoch": 2.2257133696697657, "grad_norm": 0.14591333575306112, "learning_rate": 2.60440324315174e-05, "loss": 0.4784, "num_tokens": 470959835.0, "step": 6076 }, { "epoch": 2.2260797874776714, "grad_norm": 0.14632520986907202, "learning_rate": 2.603987045850796e-05, "loss": 0.4762, "num_tokens": 471707876.0, "step": 6077 }, { "epoch": 2.2264462052855767, "grad_norm": 0.1355592778517315, "learning_rate": 2.6035708258045752e-05, "loss": 0.4527, "num_tokens": 472471537.0, "step": 6078 }, { "epoch": 2.2268126230934824, "grad_norm": 0.13331683355940604, "learning_rate": 2.6031545830365127e-05, "loss": 0.4799, "num_tokens": 473205286.0, "step": 6079 }, { "epoch": 2.2271790409013876, "grad_norm": 0.1403120045073276, "learning_rate": 2.6027383175700435e-05, "loss": 0.4394, "num_tokens": 473848528.0, "step": 6080 }, { "epoch": 2.2275454587092933, "grad_norm": 0.14666218762176444, "learning_rate": 2.602322029428604e-05, "loss": 0.4626, "num_tokens": 474556939.0, "step": 6081 }, { "epoch": 2.2279118765171986, "grad_norm": 0.14211365682990457, "learning_rate": 2.601905718635632e-05, "loss": 0.4528, "num_tokens": 475272315.0, "step": 6082 }, { "epoch": 2.2282782943251043, "grad_norm": 0.13185365369408625, "learning_rate": 2.601489385214567e-05, "loss": 0.473, "num_tokens": 476049683.0, "step": 6083 }, { "epoch": 2.2286447121330095, "grad_norm": 0.13389645796522617, "learning_rate": 2.6010730291888492e-05, "loss": 0.4626, "num_tokens": 476798394.0, "step": 6084 }, { "epoch": 2.229011129940915, "grad_norm": 0.1274949430573204, "learning_rate": 2.6006566505819205e-05, "loss": 0.4305, "num_tokens": 477594072.0, "step": 6085 }, { "epoch": 2.2293775477488205, "grad_norm": 0.1386137844213178, "learning_rate": 2.6002402494172234e-05, "loss": 0.4401, "num_tokens": 478371884.0, "step": 6086 }, { "epoch": 2.229743965556726, "grad_norm": 0.13367281387673027, "learning_rate": 2.5998238257182024e-05, "loss": 0.4509, "num_tokens": 479113849.0, "step": 6087 }, { "epoch": 2.2301103833646314, "grad_norm": 0.12994902966467406, "learning_rate": 2.599407379508303e-05, "loss": 0.4665, "num_tokens": 479928677.0, "step": 6088 }, { "epoch": 2.230476801172537, "grad_norm": 0.13373092120576502, "learning_rate": 2.598990910810971e-05, "loss": 0.4467, "num_tokens": 480631011.0, "step": 6089 }, { "epoch": 2.2308432189804424, "grad_norm": 0.14150393167598654, "learning_rate": 2.5985744196496564e-05, "loss": 0.4319, "num_tokens": 481313478.0, "step": 6090 }, { "epoch": 2.231209636788348, "grad_norm": 0.14703843216830836, "learning_rate": 2.5981579060478072e-05, "loss": 0.4866, "num_tokens": 481997815.0, "step": 6091 }, { "epoch": 2.2315760545962533, "grad_norm": 0.1317614911549813, "learning_rate": 2.597741370028874e-05, "loss": 0.4488, "num_tokens": 482839715.0, "step": 6092 }, { "epoch": 2.231942472404159, "grad_norm": 0.14189394439307973, "learning_rate": 2.597324811616308e-05, "loss": 0.4497, "num_tokens": 483584149.0, "step": 6093 }, { "epoch": 2.2323088902120642, "grad_norm": 0.14570824879484603, "learning_rate": 2.5969082308335645e-05, "loss": 0.4555, "num_tokens": 484288810.0, "step": 6094 }, { "epoch": 2.23267530801997, "grad_norm": 0.1354231514662541, "learning_rate": 2.5964916277040953e-05, "loss": 0.4273, "num_tokens": 485086858.0, "step": 6095 }, { "epoch": 2.233041725827875, "grad_norm": 0.14000038044790114, "learning_rate": 2.596075002251357e-05, "loss": 0.4711, "num_tokens": 485965408.0, "step": 6096 }, { "epoch": 2.233408143635781, "grad_norm": 0.12127824932297322, "learning_rate": 2.5956583544988064e-05, "loss": 0.4435, "num_tokens": 486825407.0, "step": 6097 }, { "epoch": 2.233774561443686, "grad_norm": 0.14132540542376051, "learning_rate": 2.595241684469902e-05, "loss": 0.4533, "num_tokens": 487551812.0, "step": 6098 }, { "epoch": 2.234140979251592, "grad_norm": 0.1337867035566268, "learning_rate": 2.5948249921881025e-05, "loss": 0.4409, "num_tokens": 488388306.0, "step": 6099 }, { "epoch": 2.234507397059497, "grad_norm": 0.1324874442598288, "learning_rate": 2.594408277676869e-05, "loss": 0.4385, "num_tokens": 489105706.0, "step": 6100 }, { "epoch": 2.2348738148674023, "grad_norm": 0.1304091016039428, "learning_rate": 2.5939915409596636e-05, "loss": 0.457, "num_tokens": 489826632.0, "step": 6101 }, { "epoch": 2.235240232675308, "grad_norm": 0.13237715250655802, "learning_rate": 2.5935747820599483e-05, "loss": 0.4696, "num_tokens": 490651732.0, "step": 6102 }, { "epoch": 2.2356066504832137, "grad_norm": 0.12911970459809233, "learning_rate": 2.593158001001188e-05, "loss": 0.4453, "num_tokens": 491394779.0, "step": 6103 }, { "epoch": 2.235973068291119, "grad_norm": 0.139085266040273, "learning_rate": 2.5927411978068492e-05, "loss": 0.4803, "num_tokens": 492138123.0, "step": 6104 }, { "epoch": 2.236339486099024, "grad_norm": 0.15155073475503686, "learning_rate": 2.592324372500398e-05, "loss": 0.4728, "num_tokens": 492882894.0, "step": 6105 }, { "epoch": 2.23670590390693, "grad_norm": 0.1441198131725778, "learning_rate": 2.5919075251053012e-05, "loss": 0.4682, "num_tokens": 493480325.0, "step": 6106 }, { "epoch": 2.237072321714835, "grad_norm": 0.14043449320836204, "learning_rate": 2.5914906556450307e-05, "loss": 0.4514, "num_tokens": 494116019.0, "step": 6107 }, { "epoch": 2.237438739522741, "grad_norm": 0.14554441041110794, "learning_rate": 2.591073764143056e-05, "loss": 0.4574, "num_tokens": 494852705.0, "step": 6108 }, { "epoch": 2.237805157330646, "grad_norm": 0.1425980820834882, "learning_rate": 2.590656850622848e-05, "loss": 0.4526, "num_tokens": 495579630.0, "step": 6109 }, { "epoch": 2.238171575138552, "grad_norm": 0.14064063736571591, "learning_rate": 2.5902399151078804e-05, "loss": 0.4866, "num_tokens": 496269165.0, "step": 6110 }, { "epoch": 2.238537992946457, "grad_norm": 0.13912451265955128, "learning_rate": 2.5898229576216278e-05, "loss": 0.4659, "num_tokens": 497016859.0, "step": 6111 }, { "epoch": 2.2389044107543628, "grad_norm": 0.1791204169794865, "learning_rate": 2.589405978187566e-05, "loss": 0.4549, "num_tokens": 497848204.0, "step": 6112 }, { "epoch": 2.239270828562268, "grad_norm": 0.1402652791409278, "learning_rate": 2.5889889768291708e-05, "loss": 0.4448, "num_tokens": 498544920.0, "step": 6113 }, { "epoch": 2.2396372463701737, "grad_norm": 0.15833065631600138, "learning_rate": 2.5885719535699205e-05, "loss": 0.4542, "num_tokens": 499275965.0, "step": 6114 }, { "epoch": 2.240003664178079, "grad_norm": 0.1389282376724882, "learning_rate": 2.588154908433295e-05, "loss": 0.4419, "num_tokens": 500086741.0, "step": 6115 }, { "epoch": 2.2403700819859846, "grad_norm": 0.14360849519947813, "learning_rate": 2.587737841442774e-05, "loss": 0.4694, "num_tokens": 500853976.0, "step": 6116 }, { "epoch": 2.24073649979389, "grad_norm": 0.13467412109879828, "learning_rate": 2.58732075262184e-05, "loss": 0.4408, "num_tokens": 501567525.0, "step": 6117 }, { "epoch": 2.2411029176017956, "grad_norm": 0.14181133097401072, "learning_rate": 2.586903641993975e-05, "loss": 0.4967, "num_tokens": 502308081.0, "step": 6118 }, { "epoch": 2.241469335409701, "grad_norm": 0.13956547457285043, "learning_rate": 2.586486509582664e-05, "loss": 0.4387, "num_tokens": 502997689.0, "step": 6119 }, { "epoch": 2.2418357532176065, "grad_norm": 0.14950218204772162, "learning_rate": 2.5860693554113913e-05, "loss": 0.4485, "num_tokens": 503741343.0, "step": 6120 }, { "epoch": 2.242202171025512, "grad_norm": 0.1395381334515538, "learning_rate": 2.585652179503645e-05, "loss": 0.4655, "num_tokens": 504623563.0, "step": 6121 }, { "epoch": 2.2425685888334175, "grad_norm": 0.13903298011899634, "learning_rate": 2.585234981882912e-05, "loss": 0.4608, "num_tokens": 505379486.0, "step": 6122 }, { "epoch": 2.2429350066413227, "grad_norm": 0.1332713377019052, "learning_rate": 2.584817762572681e-05, "loss": 0.4375, "num_tokens": 506153823.0, "step": 6123 }, { "epoch": 2.2433014244492284, "grad_norm": 0.1329817861571565, "learning_rate": 2.5844005215964432e-05, "loss": 0.4309, "num_tokens": 506966510.0, "step": 6124 }, { "epoch": 2.2436678422571337, "grad_norm": 0.15286773848157234, "learning_rate": 2.5839832589776895e-05, "loss": 0.4734, "num_tokens": 507616126.0, "step": 6125 }, { "epoch": 2.2440342600650394, "grad_norm": 0.14821814501025155, "learning_rate": 2.5835659747399137e-05, "loss": 0.4727, "num_tokens": 508258588.0, "step": 6126 }, { "epoch": 2.2444006778729446, "grad_norm": 0.14151887403655083, "learning_rate": 2.583148668906608e-05, "loss": 0.4453, "num_tokens": 509022396.0, "step": 6127 }, { "epoch": 2.2447670956808503, "grad_norm": 0.13965605298530348, "learning_rate": 2.582731341501268e-05, "loss": 0.4615, "num_tokens": 509799356.0, "step": 6128 }, { "epoch": 2.2451335134887556, "grad_norm": 0.13862545376336702, "learning_rate": 2.5823139925473917e-05, "loss": 0.4338, "num_tokens": 510538306.0, "step": 6129 }, { "epoch": 2.245499931296661, "grad_norm": 0.13269414421651765, "learning_rate": 2.5818966220684743e-05, "loss": 0.4572, "num_tokens": 511453623.0, "step": 6130 }, { "epoch": 2.2458663491045665, "grad_norm": 0.12757672182448712, "learning_rate": 2.5814792300880157e-05, "loss": 0.4357, "num_tokens": 512186188.0, "step": 6131 }, { "epoch": 2.2462327669124718, "grad_norm": 0.14595795942392772, "learning_rate": 2.5810618166295172e-05, "loss": 0.4747, "num_tokens": 512977180.0, "step": 6132 }, { "epoch": 2.2465991847203775, "grad_norm": 0.12957472378065604, "learning_rate": 2.580644381716478e-05, "loss": 0.4481, "num_tokens": 513842109.0, "step": 6133 }, { "epoch": 2.2469656025282827, "grad_norm": 0.15051251194591864, "learning_rate": 2.5802269253724007e-05, "loss": 0.4784, "num_tokens": 514506174.0, "step": 6134 }, { "epoch": 2.2473320203361884, "grad_norm": 0.1623208445846545, "learning_rate": 2.57980944762079e-05, "loss": 0.4586, "num_tokens": 515138074.0, "step": 6135 }, { "epoch": 2.2476984381440936, "grad_norm": 0.13521989510423407, "learning_rate": 2.57939194848515e-05, "loss": 0.4429, "num_tokens": 515907272.0, "step": 6136 }, { "epoch": 2.2480648559519993, "grad_norm": 0.14601787475287906, "learning_rate": 2.578974427988987e-05, "loss": 0.4781, "num_tokens": 516601386.0, "step": 6137 }, { "epoch": 2.2484312737599046, "grad_norm": 0.14125430981286458, "learning_rate": 2.5785568861558083e-05, "loss": 0.4831, "num_tokens": 517397860.0, "step": 6138 }, { "epoch": 2.2487976915678103, "grad_norm": 0.1375290546984907, "learning_rate": 2.578139323009122e-05, "loss": 0.4628, "num_tokens": 518220415.0, "step": 6139 }, { "epoch": 2.2491641093757155, "grad_norm": 0.13109503638553596, "learning_rate": 2.577721738572438e-05, "loss": 0.4406, "num_tokens": 519053208.0, "step": 6140 }, { "epoch": 2.2495305271836212, "grad_norm": 0.14654937088595227, "learning_rate": 2.5773041328692678e-05, "loss": 0.4627, "num_tokens": 519858914.0, "step": 6141 }, { "epoch": 2.2498969449915265, "grad_norm": 0.127000856597145, "learning_rate": 2.576886505923122e-05, "loss": 0.43, "num_tokens": 520672306.0, "step": 6142 }, { "epoch": 2.250263362799432, "grad_norm": 0.13611449104914275, "learning_rate": 2.576468857757515e-05, "loss": 0.4526, "num_tokens": 521408997.0, "step": 6143 }, { "epoch": 2.2506297806073374, "grad_norm": 0.13642809530957936, "learning_rate": 2.5760511883959594e-05, "loss": 0.4436, "num_tokens": 522230375.0, "step": 6144 }, { "epoch": 2.250996198415243, "grad_norm": 0.13972031806819113, "learning_rate": 2.5756334978619738e-05, "loss": 0.4519, "num_tokens": 523010786.0, "step": 6145 }, { "epoch": 2.2513626162231484, "grad_norm": 0.12910650937148646, "learning_rate": 2.5752157861790727e-05, "loss": 0.4252, "num_tokens": 523808620.0, "step": 6146 }, { "epoch": 2.251729034031054, "grad_norm": 0.13157056968074354, "learning_rate": 2.5747980533707756e-05, "loss": 0.4505, "num_tokens": 524689185.0, "step": 6147 }, { "epoch": 2.2520954518389593, "grad_norm": 0.13166696338315576, "learning_rate": 2.5743802994605997e-05, "loss": 0.4352, "num_tokens": 525530918.0, "step": 6148 }, { "epoch": 2.252461869646865, "grad_norm": 0.13720115113864595, "learning_rate": 2.5739625244720675e-05, "loss": 0.4531, "num_tokens": 526308236.0, "step": 6149 }, { "epoch": 2.2528282874547703, "grad_norm": 0.13140420881616932, "learning_rate": 2.573544728428699e-05, "loss": 0.4412, "num_tokens": 527179721.0, "step": 6150 }, { "epoch": 2.253194705262676, "grad_norm": 0.15267410068630158, "learning_rate": 2.5731269113540176e-05, "loss": 0.4664, "num_tokens": 527778773.0, "step": 6151 }, { "epoch": 2.253561123070581, "grad_norm": 0.14073692045683814, "learning_rate": 2.572709073271548e-05, "loss": 0.4375, "num_tokens": 528475411.0, "step": 6152 }, { "epoch": 2.253927540878487, "grad_norm": 0.14002187184539824, "learning_rate": 2.572291214204814e-05, "loss": 0.4924, "num_tokens": 529210378.0, "step": 6153 }, { "epoch": 2.254293958686392, "grad_norm": 0.1309797177699661, "learning_rate": 2.5718733341773422e-05, "loss": 0.4428, "num_tokens": 529963504.0, "step": 6154 }, { "epoch": 2.2546603764942974, "grad_norm": 0.13910923430819044, "learning_rate": 2.5714554332126603e-05, "loss": 0.4712, "num_tokens": 530835410.0, "step": 6155 }, { "epoch": 2.255026794302203, "grad_norm": 0.13258090070764922, "learning_rate": 2.5710375113342975e-05, "loss": 0.4614, "num_tokens": 531565868.0, "step": 6156 }, { "epoch": 2.255393212110109, "grad_norm": 0.16191210982125903, "learning_rate": 2.570619568565783e-05, "loss": 0.4345, "num_tokens": 532216037.0, "step": 6157 }, { "epoch": 2.255759629918014, "grad_norm": 0.14591080488291155, "learning_rate": 2.5702016049306475e-05, "loss": 0.4354, "num_tokens": 532918027.0, "step": 6158 }, { "epoch": 2.2561260477259193, "grad_norm": 0.13516139543650657, "learning_rate": 2.5697836204524242e-05, "loss": 0.4458, "num_tokens": 533760963.0, "step": 6159 }, { "epoch": 2.256492465533825, "grad_norm": 0.1577627211966395, "learning_rate": 2.5693656151546457e-05, "loss": 0.4486, "num_tokens": 534506326.0, "step": 6160 }, { "epoch": 2.2568588833417302, "grad_norm": 0.1410450622548865, "learning_rate": 2.5689475890608463e-05, "loss": 0.4616, "num_tokens": 535266688.0, "step": 6161 }, { "epoch": 2.257225301149636, "grad_norm": 0.15922975828675592, "learning_rate": 2.5685295421945623e-05, "loss": 0.4396, "num_tokens": 535890379.0, "step": 6162 }, { "epoch": 2.257591718957541, "grad_norm": 0.1438478111324343, "learning_rate": 2.5681114745793308e-05, "loss": 0.433, "num_tokens": 536605050.0, "step": 6163 }, { "epoch": 2.257958136765447, "grad_norm": 0.18263086149101956, "learning_rate": 2.5676933862386885e-05, "loss": 0.4627, "num_tokens": 537283010.0, "step": 6164 }, { "epoch": 2.258324554573352, "grad_norm": 0.1736916938423404, "learning_rate": 2.5672752771961764e-05, "loss": 0.5093, "num_tokens": 537938398.0, "step": 6165 }, { "epoch": 2.258690972381258, "grad_norm": 0.15603655357082954, "learning_rate": 2.566857147475333e-05, "loss": 0.4573, "num_tokens": 538697689.0, "step": 6166 }, { "epoch": 2.259057390189163, "grad_norm": 0.14647230727136046, "learning_rate": 2.566438997099702e-05, "loss": 0.4785, "num_tokens": 539388770.0, "step": 6167 }, { "epoch": 2.2594238079970688, "grad_norm": 0.13450351571410987, "learning_rate": 2.5660208260928243e-05, "loss": 0.4392, "num_tokens": 540220036.0, "step": 6168 }, { "epoch": 2.259790225804974, "grad_norm": 0.17511045770039513, "learning_rate": 2.565602634478244e-05, "loss": 0.4292, "num_tokens": 540907692.0, "step": 6169 }, { "epoch": 2.2601566436128797, "grad_norm": 0.14472521896246537, "learning_rate": 2.5651844222795074e-05, "loss": 0.4717, "num_tokens": 541744199.0, "step": 6170 }, { "epoch": 2.260523061420785, "grad_norm": 0.13965112376343095, "learning_rate": 2.5647661895201595e-05, "loss": 0.4482, "num_tokens": 542479581.0, "step": 6171 }, { "epoch": 2.2608894792286907, "grad_norm": 0.1563279417445556, "learning_rate": 2.5643479362237474e-05, "loss": 0.4453, "num_tokens": 543163827.0, "step": 6172 }, { "epoch": 2.261255897036596, "grad_norm": 0.16331763032938865, "learning_rate": 2.563929662413821e-05, "loss": 0.447, "num_tokens": 544074737.0, "step": 6173 }, { "epoch": 2.2616223148445016, "grad_norm": 0.12914164877441442, "learning_rate": 2.5635113681139285e-05, "loss": 0.4543, "num_tokens": 544935710.0, "step": 6174 }, { "epoch": 2.261988732652407, "grad_norm": 0.147196168950396, "learning_rate": 2.563093053347621e-05, "loss": 0.4481, "num_tokens": 545691323.0, "step": 6175 }, { "epoch": 2.2623551504603125, "grad_norm": 0.15596525396127264, "learning_rate": 2.562674718138451e-05, "loss": 0.4758, "num_tokens": 546512709.0, "step": 6176 }, { "epoch": 2.262721568268218, "grad_norm": 0.14163544560849206, "learning_rate": 2.5622563625099717e-05, "loss": 0.4653, "num_tokens": 547295225.0, "step": 6177 }, { "epoch": 2.2630879860761235, "grad_norm": 0.166793813983598, "learning_rate": 2.5618379864857364e-05, "loss": 0.5126, "num_tokens": 547940273.0, "step": 6178 }, { "epoch": 2.2634544038840287, "grad_norm": 0.1505328577442566, "learning_rate": 2.5614195900893014e-05, "loss": 0.456, "num_tokens": 548736075.0, "step": 6179 }, { "epoch": 2.263820821691934, "grad_norm": 0.1504093455638006, "learning_rate": 2.561001173344223e-05, "loss": 0.4822, "num_tokens": 549433296.0, "step": 6180 }, { "epoch": 2.2641872394998397, "grad_norm": 0.13490437473666778, "learning_rate": 2.5605827362740594e-05, "loss": 0.4338, "num_tokens": 550208651.0, "step": 6181 }, { "epoch": 2.2645536573077454, "grad_norm": 0.1374017551609824, "learning_rate": 2.560164278902368e-05, "loss": 0.4362, "num_tokens": 550931615.0, "step": 6182 }, { "epoch": 2.2649200751156506, "grad_norm": 0.15609272989436115, "learning_rate": 2.5597458012527097e-05, "loss": 0.4289, "num_tokens": 551593516.0, "step": 6183 }, { "epoch": 2.265286492923556, "grad_norm": 0.12073418331729942, "learning_rate": 2.5593273033486462e-05, "loss": 0.4353, "num_tokens": 552523030.0, "step": 6184 }, { "epoch": 2.2656529107314616, "grad_norm": 0.1274526713943614, "learning_rate": 2.5589087852137388e-05, "loss": 0.4421, "num_tokens": 553377232.0, "step": 6185 }, { "epoch": 2.266019328539367, "grad_norm": 0.14025606246849612, "learning_rate": 2.5584902468715512e-05, "loss": 0.4213, "num_tokens": 554183793.0, "step": 6186 }, { "epoch": 2.2663857463472725, "grad_norm": 0.13603592537429032, "learning_rate": 2.5580716883456483e-05, "loss": 0.4935, "num_tokens": 554992461.0, "step": 6187 }, { "epoch": 2.2667521641551778, "grad_norm": 0.1279969895123762, "learning_rate": 2.5576531096595952e-05, "loss": 0.4156, "num_tokens": 555724272.0, "step": 6188 }, { "epoch": 2.2671185819630835, "grad_norm": 0.13611677013373677, "learning_rate": 2.557234510836959e-05, "loss": 0.4298, "num_tokens": 556553629.0, "step": 6189 }, { "epoch": 2.2674849997709887, "grad_norm": 0.15358948507041, "learning_rate": 2.556815891901308e-05, "loss": 0.4541, "num_tokens": 557202602.0, "step": 6190 }, { "epoch": 2.2678514175788944, "grad_norm": 0.12902520663645037, "learning_rate": 2.5563972528762108e-05, "loss": 0.4365, "num_tokens": 558013953.0, "step": 6191 }, { "epoch": 2.2682178353867997, "grad_norm": 0.14586421447331716, "learning_rate": 2.5559785937852377e-05, "loss": 0.4507, "num_tokens": 558831840.0, "step": 6192 }, { "epoch": 2.2685842531947054, "grad_norm": 0.1690427071559064, "learning_rate": 2.5555599146519597e-05, "loss": 0.486, "num_tokens": 559502078.0, "step": 6193 }, { "epoch": 2.2689506710026106, "grad_norm": 0.1343902606506414, "learning_rate": 2.55514121549995e-05, "loss": 0.4442, "num_tokens": 560285984.0, "step": 6194 }, { "epoch": 2.2693170888105163, "grad_norm": 0.14394341141991515, "learning_rate": 2.5547224963527822e-05, "loss": 0.4651, "num_tokens": 560964644.0, "step": 6195 }, { "epoch": 2.2696835066184216, "grad_norm": 0.13426295241797356, "learning_rate": 2.5543037572340302e-05, "loss": 0.4301, "num_tokens": 561808712.0, "step": 6196 }, { "epoch": 2.2700499244263272, "grad_norm": 0.13505084586968738, "learning_rate": 2.5538849981672704e-05, "loss": 0.466, "num_tokens": 562589235.0, "step": 6197 }, { "epoch": 2.2704163422342325, "grad_norm": 0.13408380074792703, "learning_rate": 2.55346621917608e-05, "loss": 0.4426, "num_tokens": 563411145.0, "step": 6198 }, { "epoch": 2.270782760042138, "grad_norm": 0.17399553888608618, "learning_rate": 2.5530474202840367e-05, "loss": 0.4625, "num_tokens": 564008911.0, "step": 6199 }, { "epoch": 2.2711491778500434, "grad_norm": 0.14911796533102245, "learning_rate": 2.5526286015147194e-05, "loss": 0.4623, "num_tokens": 564776161.0, "step": 6200 }, { "epoch": 2.271515595657949, "grad_norm": 0.14348841119593367, "learning_rate": 2.552209762891709e-05, "loss": 0.44, "num_tokens": 565557840.0, "step": 6201 }, { "epoch": 2.2718820134658544, "grad_norm": 0.16133568647335914, "learning_rate": 2.5517909044385868e-05, "loss": 0.4436, "num_tokens": 566312003.0, "step": 6202 }, { "epoch": 2.27224843127376, "grad_norm": 0.13556609607659392, "learning_rate": 2.5513720261789353e-05, "loss": 0.4599, "num_tokens": 567122007.0, "step": 6203 }, { "epoch": 2.2726148490816653, "grad_norm": 0.12866622807370345, "learning_rate": 2.5509531281363388e-05, "loss": 0.4368, "num_tokens": 567996730.0, "step": 6204 }, { "epoch": 2.2729812668895706, "grad_norm": 0.1454204345778478, "learning_rate": 2.5505342103343806e-05, "loss": 0.4742, "num_tokens": 568784444.0, "step": 6205 }, { "epoch": 2.2733476846974763, "grad_norm": 0.1465124228267172, "learning_rate": 2.550115272796648e-05, "loss": 0.4525, "num_tokens": 569571756.0, "step": 6206 }, { "epoch": 2.273714102505382, "grad_norm": 0.1598727327458713, "learning_rate": 2.5496963155467275e-05, "loss": 0.4718, "num_tokens": 570325174.0, "step": 6207 }, { "epoch": 2.2740805203132872, "grad_norm": 0.1338443427014171, "learning_rate": 2.5492773386082073e-05, "loss": 0.4279, "num_tokens": 571140397.0, "step": 6208 }, { "epoch": 2.2744469381211925, "grad_norm": 0.1260170238039201, "learning_rate": 2.5488583420046768e-05, "loss": 0.4462, "num_tokens": 571991011.0, "step": 6209 }, { "epoch": 2.274813355929098, "grad_norm": 0.1628892141981241, "learning_rate": 2.5484393257597258e-05, "loss": 0.4655, "num_tokens": 572895770.0, "step": 6210 }, { "epoch": 2.275179773737004, "grad_norm": 0.14943830728901183, "learning_rate": 2.5480202898969463e-05, "loss": 0.4867, "num_tokens": 573668599.0, "step": 6211 }, { "epoch": 2.275546191544909, "grad_norm": 0.14878953739448814, "learning_rate": 2.547601234439931e-05, "loss": 0.4842, "num_tokens": 574319670.0, "step": 6212 }, { "epoch": 2.2759126093528144, "grad_norm": 0.1475161897746155, "learning_rate": 2.547182159412273e-05, "loss": 0.449, "num_tokens": 575211848.0, "step": 6213 }, { "epoch": 2.27627902716072, "grad_norm": 0.15183414456041433, "learning_rate": 2.5467630648375666e-05, "loss": 0.4519, "num_tokens": 575905387.0, "step": 6214 }, { "epoch": 2.2766454449686253, "grad_norm": 0.13168099357591959, "learning_rate": 2.5463439507394095e-05, "loss": 0.4505, "num_tokens": 576713858.0, "step": 6215 }, { "epoch": 2.277011862776531, "grad_norm": 0.1306840882856695, "learning_rate": 2.5459248171413962e-05, "loss": 0.4228, "num_tokens": 577459774.0, "step": 6216 }, { "epoch": 2.2773782805844363, "grad_norm": 0.1474052631931734, "learning_rate": 2.5455056640671272e-05, "loss": 0.4702, "num_tokens": 578172218.0, "step": 6217 }, { "epoch": 2.277744698392342, "grad_norm": 0.15788478536282116, "learning_rate": 2.5450864915402003e-05, "loss": 0.4368, "num_tokens": 578885380.0, "step": 6218 }, { "epoch": 2.278111116200247, "grad_norm": 0.144183327556309, "learning_rate": 2.5446672995842153e-05, "loss": 0.4775, "num_tokens": 579652590.0, "step": 6219 }, { "epoch": 2.278477534008153, "grad_norm": 0.15456081680057202, "learning_rate": 2.5442480882227747e-05, "loss": 0.4425, "num_tokens": 580355164.0, "step": 6220 }, { "epoch": 2.278843951816058, "grad_norm": 0.1533608292048287, "learning_rate": 2.5438288574794804e-05, "loss": 0.4496, "num_tokens": 581096763.0, "step": 6221 }, { "epoch": 2.279210369623964, "grad_norm": 0.1654130534180654, "learning_rate": 2.543409607377936e-05, "loss": 0.4956, "num_tokens": 581798564.0, "step": 6222 }, { "epoch": 2.279576787431869, "grad_norm": 0.14373901200369324, "learning_rate": 2.5429903379417455e-05, "loss": 0.4565, "num_tokens": 582609865.0, "step": 6223 }, { "epoch": 2.279943205239775, "grad_norm": 0.12910405480291887, "learning_rate": 2.5425710491945162e-05, "loss": 0.4362, "num_tokens": 583438431.0, "step": 6224 }, { "epoch": 2.28030962304768, "grad_norm": 0.16184349594906522, "learning_rate": 2.5421517411598534e-05, "loss": 0.4725, "num_tokens": 584118893.0, "step": 6225 }, { "epoch": 2.2806760408555857, "grad_norm": 0.14960035993610302, "learning_rate": 2.541732413861365e-05, "loss": 0.4129, "num_tokens": 584907419.0, "step": 6226 }, { "epoch": 2.281042458663491, "grad_norm": 0.1399139347730869, "learning_rate": 2.5413130673226612e-05, "loss": 0.4859, "num_tokens": 585679921.0, "step": 6227 }, { "epoch": 2.2814088764713967, "grad_norm": 0.12965931043979312, "learning_rate": 2.5408937015673506e-05, "loss": 0.4448, "num_tokens": 586517672.0, "step": 6228 }, { "epoch": 2.281775294279302, "grad_norm": 0.15558579162464353, "learning_rate": 2.540474316619046e-05, "loss": 0.4622, "num_tokens": 587347915.0, "step": 6229 }, { "epoch": 2.2821417120872076, "grad_norm": 0.13640592925471334, "learning_rate": 2.5400549125013568e-05, "loss": 0.4467, "num_tokens": 588192292.0, "step": 6230 }, { "epoch": 2.282508129895113, "grad_norm": 0.1381694180395883, "learning_rate": 2.5396354892378992e-05, "loss": 0.4491, "num_tokens": 589019510.0, "step": 6231 }, { "epoch": 2.2828745477030186, "grad_norm": 0.13129444882219954, "learning_rate": 2.5392160468522873e-05, "loss": 0.4256, "num_tokens": 589790866.0, "step": 6232 }, { "epoch": 2.283240965510924, "grad_norm": 0.16613925023007892, "learning_rate": 2.5387965853681342e-05, "loss": 0.4623, "num_tokens": 590529151.0, "step": 6233 }, { "epoch": 2.283607383318829, "grad_norm": 0.1601587533711262, "learning_rate": 2.538377104809059e-05, "loss": 0.4499, "num_tokens": 591234848.0, "step": 6234 }, { "epoch": 2.2839738011267348, "grad_norm": 0.1441099460864269, "learning_rate": 2.537957605198678e-05, "loss": 0.4548, "num_tokens": 591963138.0, "step": 6235 }, { "epoch": 2.2843402189346405, "grad_norm": 0.15596136604586947, "learning_rate": 2.5375380865606096e-05, "loss": 0.4706, "num_tokens": 592782070.0, "step": 6236 }, { "epoch": 2.2847066367425457, "grad_norm": 0.14878176976754207, "learning_rate": 2.537118548918474e-05, "loss": 0.4263, "num_tokens": 593590522.0, "step": 6237 }, { "epoch": 2.285073054550451, "grad_norm": 0.1459970279269907, "learning_rate": 2.5366989922958924e-05, "loss": 0.4888, "num_tokens": 594348919.0, "step": 6238 }, { "epoch": 2.2854394723583566, "grad_norm": 0.1436450953087884, "learning_rate": 2.5362794167164863e-05, "loss": 0.4544, "num_tokens": 595149476.0, "step": 6239 }, { "epoch": 2.285805890166262, "grad_norm": 0.14613853975284133, "learning_rate": 2.535859822203878e-05, "loss": 0.4501, "num_tokens": 595908995.0, "step": 6240 }, { "epoch": 2.2861723079741676, "grad_norm": 0.14176257294797284, "learning_rate": 2.5354402087816925e-05, "loss": 0.4619, "num_tokens": 596608946.0, "step": 6241 }, { "epoch": 2.286538725782073, "grad_norm": 0.13496027216498244, "learning_rate": 2.535020576473555e-05, "loss": 0.4317, "num_tokens": 597308823.0, "step": 6242 }, { "epoch": 2.2869051435899785, "grad_norm": 0.12925486234260053, "learning_rate": 2.5346009253030905e-05, "loss": 0.4504, "num_tokens": 598207830.0, "step": 6243 }, { "epoch": 2.287271561397884, "grad_norm": 0.15783475417828616, "learning_rate": 2.5341812552939274e-05, "loss": 0.4772, "num_tokens": 598879178.0, "step": 6244 }, { "epoch": 2.2876379792057895, "grad_norm": 0.1443807050716467, "learning_rate": 2.5337615664696932e-05, "loss": 0.4796, "num_tokens": 599693191.0, "step": 6245 }, { "epoch": 2.2880043970136947, "grad_norm": 0.1389263558030446, "learning_rate": 2.5333418588540175e-05, "loss": 0.4451, "num_tokens": 600325674.0, "step": 6246 }, { "epoch": 2.2883708148216004, "grad_norm": 0.12616547203332876, "learning_rate": 2.53292213247053e-05, "loss": 0.4421, "num_tokens": 601156836.0, "step": 6247 }, { "epoch": 2.2887372326295057, "grad_norm": 0.13915307400097982, "learning_rate": 2.5325023873428634e-05, "loss": 0.4876, "num_tokens": 601951642.0, "step": 6248 }, { "epoch": 2.2891036504374114, "grad_norm": 0.13794985264177667, "learning_rate": 2.5320826234946493e-05, "loss": 0.4691, "num_tokens": 602689808.0, "step": 6249 }, { "epoch": 2.2894700682453166, "grad_norm": 0.16164043566595476, "learning_rate": 2.5316628409495213e-05, "loss": 0.5032, "num_tokens": 603329385.0, "step": 6250 }, { "epoch": 2.2898364860532223, "grad_norm": 0.15417309272891147, "learning_rate": 2.5312430397311145e-05, "loss": 0.4795, "num_tokens": 604009431.0, "step": 6251 }, { "epoch": 2.2902029038611276, "grad_norm": 0.14641484140470828, "learning_rate": 2.530823219863064e-05, "loss": 0.4414, "num_tokens": 604759874.0, "step": 6252 }, { "epoch": 2.2905693216690333, "grad_norm": 0.14967755353601234, "learning_rate": 2.5304033813690065e-05, "loss": 0.4584, "num_tokens": 605517688.0, "step": 6253 }, { "epoch": 2.2909357394769385, "grad_norm": 0.14606010432710703, "learning_rate": 2.52998352427258e-05, "loss": 0.4426, "num_tokens": 606265433.0, "step": 6254 }, { "epoch": 2.291302157284844, "grad_norm": 0.1347450418960953, "learning_rate": 2.529563648597423e-05, "loss": 0.4456, "num_tokens": 607049029.0, "step": 6255 }, { "epoch": 2.2916685750927495, "grad_norm": 0.140866999209047, "learning_rate": 2.529143754367176e-05, "loss": 0.4684, "num_tokens": 607939143.0, "step": 6256 }, { "epoch": 2.292034992900655, "grad_norm": 0.15102315403683408, "learning_rate": 2.5287238416054796e-05, "loss": 0.4591, "num_tokens": 608590965.0, "step": 6257 }, { "epoch": 2.2924014107085604, "grad_norm": 0.14829114802504254, "learning_rate": 2.528303910335975e-05, "loss": 0.4562, "num_tokens": 609311891.0, "step": 6258 }, { "epoch": 2.2927678285164657, "grad_norm": 0.14863750688026814, "learning_rate": 2.5278839605823066e-05, "loss": 0.4455, "num_tokens": 610034298.0, "step": 6259 }, { "epoch": 2.2931342463243714, "grad_norm": 0.13529179737585076, "learning_rate": 2.5274639923681162e-05, "loss": 0.4477, "num_tokens": 610960496.0, "step": 6260 }, { "epoch": 2.293500664132277, "grad_norm": 0.1419793243653951, "learning_rate": 2.527044005717051e-05, "loss": 0.5021, "num_tokens": 611673210.0, "step": 6261 }, { "epoch": 2.2938670819401823, "grad_norm": 0.15116890388994622, "learning_rate": 2.5266240006527566e-05, "loss": 0.46, "num_tokens": 612334195.0, "step": 6262 }, { "epoch": 2.2942334997480875, "grad_norm": 0.14794894747044104, "learning_rate": 2.5262039771988796e-05, "loss": 0.4739, "num_tokens": 613184909.0, "step": 6263 }, { "epoch": 2.2945999175559932, "grad_norm": 0.13329757892085356, "learning_rate": 2.5257839353790674e-05, "loss": 0.4224, "num_tokens": 613951981.0, "step": 6264 }, { "epoch": 2.294966335363899, "grad_norm": 0.14894495016983875, "learning_rate": 2.5253638752169715e-05, "loss": 0.5, "num_tokens": 614679797.0, "step": 6265 }, { "epoch": 2.295332753171804, "grad_norm": 0.12264696369632516, "learning_rate": 2.52494379673624e-05, "loss": 0.4281, "num_tokens": 615606774.0, "step": 6266 }, { "epoch": 2.2956991709797094, "grad_norm": 0.1401792062149907, "learning_rate": 2.5245236999605257e-05, "loss": 0.4617, "num_tokens": 616338798.0, "step": 6267 }, { "epoch": 2.296065588787615, "grad_norm": 0.14354509674923355, "learning_rate": 2.5241035849134795e-05, "loss": 0.4471, "num_tokens": 617065121.0, "step": 6268 }, { "epoch": 2.2964320065955204, "grad_norm": 0.13776026477414965, "learning_rate": 2.523683451618755e-05, "loss": 0.4151, "num_tokens": 617754370.0, "step": 6269 }, { "epoch": 2.296798424403426, "grad_norm": 0.14394768547646897, "learning_rate": 2.5232633001000078e-05, "loss": 0.4791, "num_tokens": 618466614.0, "step": 6270 }, { "epoch": 2.2971648422113313, "grad_norm": 0.1295221228338573, "learning_rate": 2.5228431303808914e-05, "loss": 0.4594, "num_tokens": 619369543.0, "step": 6271 }, { "epoch": 2.297531260019237, "grad_norm": 0.1488946701970087, "learning_rate": 2.5224229424850636e-05, "loss": 0.4988, "num_tokens": 620135852.0, "step": 6272 }, { "epoch": 2.2978976778271423, "grad_norm": 0.16071588961327757, "learning_rate": 2.5220027364361817e-05, "loss": 0.4692, "num_tokens": 620819158.0, "step": 6273 }, { "epoch": 2.298264095635048, "grad_norm": 0.14584848093748037, "learning_rate": 2.521582512257903e-05, "loss": 0.46, "num_tokens": 621745509.0, "step": 6274 }, { "epoch": 2.298630513442953, "grad_norm": 0.13618926431628198, "learning_rate": 2.5211622699738883e-05, "loss": 0.4787, "num_tokens": 622532197.0, "step": 6275 }, { "epoch": 2.298996931250859, "grad_norm": 0.12556771308100373, "learning_rate": 2.5207420096077978e-05, "loss": 0.4263, "num_tokens": 623423982.0, "step": 6276 }, { "epoch": 2.299363349058764, "grad_norm": 0.14864006678660946, "learning_rate": 2.520321731183292e-05, "loss": 0.4795, "num_tokens": 624192396.0, "step": 6277 }, { "epoch": 2.29972976686667, "grad_norm": 0.14676392633598917, "learning_rate": 2.5199014347240342e-05, "loss": 0.4727, "num_tokens": 624883202.0, "step": 6278 }, { "epoch": 2.300096184674575, "grad_norm": 0.149034542857486, "learning_rate": 2.519481120253689e-05, "loss": 0.4932, "num_tokens": 625642821.0, "step": 6279 }, { "epoch": 2.300462602482481, "grad_norm": 0.13586321879358848, "learning_rate": 2.5190607877959182e-05, "loss": 0.45, "num_tokens": 626512233.0, "step": 6280 }, { "epoch": 2.300829020290386, "grad_norm": 0.16270050080015447, "learning_rate": 2.5186404373743897e-05, "loss": 0.4634, "num_tokens": 627237694.0, "step": 6281 }, { "epoch": 2.3011954380982917, "grad_norm": 0.15006319861083228, "learning_rate": 2.5182200690127698e-05, "loss": 0.5126, "num_tokens": 628049123.0, "step": 6282 }, { "epoch": 2.301561855906197, "grad_norm": 0.13375855676665505, "learning_rate": 2.5177996827347252e-05, "loss": 0.4418, "num_tokens": 628792533.0, "step": 6283 }, { "epoch": 2.3019282737141027, "grad_norm": 0.14932768521264547, "learning_rate": 2.5173792785639245e-05, "loss": 0.4856, "num_tokens": 629480566.0, "step": 6284 }, { "epoch": 2.302294691522008, "grad_norm": 0.1390656757036298, "learning_rate": 2.516958856524038e-05, "loss": 0.4701, "num_tokens": 630234171.0, "step": 6285 }, { "epoch": 2.3026611093299136, "grad_norm": 0.1354087379852127, "learning_rate": 2.516538416638736e-05, "loss": 0.442, "num_tokens": 630989431.0, "step": 6286 }, { "epoch": 2.303027527137819, "grad_norm": 0.13653088607860198, "learning_rate": 2.5161179589316907e-05, "loss": 0.4474, "num_tokens": 631731803.0, "step": 6287 }, { "epoch": 2.303393944945724, "grad_norm": 0.1324336254384749, "learning_rate": 2.5156974834265727e-05, "loss": 0.4508, "num_tokens": 632569953.0, "step": 6288 }, { "epoch": 2.30376036275363, "grad_norm": 0.13908467627596646, "learning_rate": 2.515276990147058e-05, "loss": 0.4424, "num_tokens": 633279327.0, "step": 6289 }, { "epoch": 2.3041267805615355, "grad_norm": 0.13154678005324305, "learning_rate": 2.51485647911682e-05, "loss": 0.4713, "num_tokens": 634094605.0, "step": 6290 }, { "epoch": 2.3044931983694408, "grad_norm": 0.14072003409301215, "learning_rate": 2.5144359503595352e-05, "loss": 0.4483, "num_tokens": 634916826.0, "step": 6291 }, { "epoch": 2.304859616177346, "grad_norm": 0.14011198183514245, "learning_rate": 2.5140154038988782e-05, "loss": 0.4352, "num_tokens": 635660513.0, "step": 6292 }, { "epoch": 2.3052260339852517, "grad_norm": 0.13092839449469548, "learning_rate": 2.513594839758529e-05, "loss": 0.4486, "num_tokens": 636497695.0, "step": 6293 }, { "epoch": 2.305592451793157, "grad_norm": 0.13461791821411281, "learning_rate": 2.5131742579621644e-05, "loss": 0.4378, "num_tokens": 637306422.0, "step": 6294 }, { "epoch": 2.3059588696010627, "grad_norm": 0.1467697368590266, "learning_rate": 2.5127536585334652e-05, "loss": 0.4872, "num_tokens": 637967532.0, "step": 6295 }, { "epoch": 2.306325287408968, "grad_norm": 0.13255289747000612, "learning_rate": 2.5123330414961115e-05, "loss": 0.4619, "num_tokens": 638746297.0, "step": 6296 }, { "epoch": 2.3066917052168736, "grad_norm": 0.12658364841849715, "learning_rate": 2.511912406873785e-05, "loss": 0.4317, "num_tokens": 639608301.0, "step": 6297 }, { "epoch": 2.307058123024779, "grad_norm": 0.14425287072654963, "learning_rate": 2.5114917546901678e-05, "loss": 0.4815, "num_tokens": 640340578.0, "step": 6298 }, { "epoch": 2.3074245408326846, "grad_norm": 0.12622830650692196, "learning_rate": 2.5110710849689436e-05, "loss": 0.4554, "num_tokens": 641128139.0, "step": 6299 }, { "epoch": 2.30779095864059, "grad_norm": 0.15273638145872928, "learning_rate": 2.5106503977337978e-05, "loss": 0.4985, "num_tokens": 641833432.0, "step": 6300 }, { "epoch": 2.3081573764484955, "grad_norm": 0.1450151312956709, "learning_rate": 2.5102296930084153e-05, "loss": 0.4647, "num_tokens": 642646511.0, "step": 6301 }, { "epoch": 2.3085237942564008, "grad_norm": 0.14075125581924447, "learning_rate": 2.5098089708164815e-05, "loss": 0.4375, "num_tokens": 643416378.0, "step": 6302 }, { "epoch": 2.3088902120643064, "grad_norm": 0.12973729960107208, "learning_rate": 2.5093882311816864e-05, "loss": 0.4301, "num_tokens": 644186519.0, "step": 6303 }, { "epoch": 2.3092566298722117, "grad_norm": 0.13653499983530676, "learning_rate": 2.5089674741277167e-05, "loss": 0.4254, "num_tokens": 645024000.0, "step": 6304 }, { "epoch": 2.3096230476801174, "grad_norm": 0.13269385608491158, "learning_rate": 2.5085466996782622e-05, "loss": 0.4141, "num_tokens": 645750878.0, "step": 6305 }, { "epoch": 2.3099894654880226, "grad_norm": 0.13536627922175673, "learning_rate": 2.5081259078570127e-05, "loss": 0.4538, "num_tokens": 646655706.0, "step": 6306 }, { "epoch": 2.3103558832959283, "grad_norm": 0.13305855261038096, "learning_rate": 2.5077050986876614e-05, "loss": 0.4725, "num_tokens": 647426485.0, "step": 6307 }, { "epoch": 2.3107223011038336, "grad_norm": 0.13499520741212723, "learning_rate": 2.5072842721938984e-05, "loss": 0.4455, "num_tokens": 648229570.0, "step": 6308 }, { "epoch": 2.3110887189117393, "grad_norm": 0.14239676860519715, "learning_rate": 2.5068634283994195e-05, "loss": 0.4467, "num_tokens": 648970951.0, "step": 6309 }, { "epoch": 2.3114551367196445, "grad_norm": 0.14583346123755705, "learning_rate": 2.506442567327917e-05, "loss": 0.4927, "num_tokens": 649664188.0, "step": 6310 }, { "epoch": 2.3118215545275502, "grad_norm": 0.1237559120632952, "learning_rate": 2.5060216890030872e-05, "loss": 0.4386, "num_tokens": 650514902.0, "step": 6311 }, { "epoch": 2.3121879723354555, "grad_norm": 0.14717245930964487, "learning_rate": 2.5056007934486264e-05, "loss": 0.4914, "num_tokens": 651220010.0, "step": 6312 }, { "epoch": 2.3125543901433607, "grad_norm": 0.1295513979667455, "learning_rate": 2.5051798806882316e-05, "loss": 0.4612, "num_tokens": 651974825.0, "step": 6313 }, { "epoch": 2.3129208079512664, "grad_norm": 0.13098917655543527, "learning_rate": 2.5047589507456013e-05, "loss": 0.4371, "num_tokens": 652746663.0, "step": 6314 }, { "epoch": 2.313287225759172, "grad_norm": 0.14292632752481493, "learning_rate": 2.504338003644434e-05, "loss": 0.4914, "num_tokens": 653464079.0, "step": 6315 }, { "epoch": 2.3136536435670774, "grad_norm": 0.13416553942187986, "learning_rate": 2.5039170394084305e-05, "loss": 0.4558, "num_tokens": 654227662.0, "step": 6316 }, { "epoch": 2.3140200613749826, "grad_norm": 0.13997694615458398, "learning_rate": 2.503496058061292e-05, "loss": 0.4401, "num_tokens": 654945153.0, "step": 6317 }, { "epoch": 2.3143864791828883, "grad_norm": 0.1356595743553369, "learning_rate": 2.503075059626721e-05, "loss": 0.4565, "num_tokens": 655775823.0, "step": 6318 }, { "epoch": 2.3147528969907936, "grad_norm": 0.1276985521018199, "learning_rate": 2.5026540441284187e-05, "loss": 0.4616, "num_tokens": 656660846.0, "step": 6319 }, { "epoch": 2.3151193147986993, "grad_norm": 0.12778472858483061, "learning_rate": 2.5022330115900912e-05, "loss": 0.4227, "num_tokens": 657459202.0, "step": 6320 }, { "epoch": 2.3154857326066045, "grad_norm": 0.13245463117151063, "learning_rate": 2.501811962035442e-05, "loss": 0.4802, "num_tokens": 658239408.0, "step": 6321 }, { "epoch": 2.31585215041451, "grad_norm": 0.15283158201041472, "learning_rate": 2.5013908954881783e-05, "loss": 0.4478, "num_tokens": 658914641.0, "step": 6322 }, { "epoch": 2.3162185682224155, "grad_norm": 0.1403562675643622, "learning_rate": 2.5009698119720057e-05, "loss": 0.4736, "num_tokens": 659684740.0, "step": 6323 }, { "epoch": 2.316584986030321, "grad_norm": 0.14008517618898025, "learning_rate": 2.5005487115106327e-05, "loss": 0.4507, "num_tokens": 660482430.0, "step": 6324 }, { "epoch": 2.3169514038382264, "grad_norm": 0.14417704330966133, "learning_rate": 2.500127594127769e-05, "loss": 0.4617, "num_tokens": 661271041.0, "step": 6325 }, { "epoch": 2.317317821646132, "grad_norm": 0.13690472847987933, "learning_rate": 2.4997064598471227e-05, "loss": 0.4665, "num_tokens": 662164389.0, "step": 6326 }, { "epoch": 2.3176842394540373, "grad_norm": 0.14393344450055878, "learning_rate": 2.4992853086924046e-05, "loss": 0.4688, "num_tokens": 662839938.0, "step": 6327 }, { "epoch": 2.318050657261943, "grad_norm": 0.13681375613497346, "learning_rate": 2.498864140687328e-05, "loss": 0.4474, "num_tokens": 663631168.0, "step": 6328 }, { "epoch": 2.3184170750698483, "grad_norm": 0.14171987152130153, "learning_rate": 2.498442955855604e-05, "loss": 0.4543, "num_tokens": 664406795.0, "step": 6329 }, { "epoch": 2.318783492877754, "grad_norm": 0.1386355649012679, "learning_rate": 2.4980217542209467e-05, "loss": 0.453, "num_tokens": 665251485.0, "step": 6330 }, { "epoch": 2.3191499106856592, "grad_norm": 0.13966304364984175, "learning_rate": 2.4976005358070703e-05, "loss": 0.4725, "num_tokens": 666021588.0, "step": 6331 }, { "epoch": 2.319516328493565, "grad_norm": 0.12430415205162745, "learning_rate": 2.4971793006376906e-05, "loss": 0.444, "num_tokens": 666841867.0, "step": 6332 }, { "epoch": 2.31988274630147, "grad_norm": 0.15617656335154773, "learning_rate": 2.4967580487365238e-05, "loss": 0.4567, "num_tokens": 667442400.0, "step": 6333 }, { "epoch": 2.320249164109376, "grad_norm": 0.14226357352077962, "learning_rate": 2.4963367801272872e-05, "loss": 0.4821, "num_tokens": 668128950.0, "step": 6334 }, { "epoch": 2.320615581917281, "grad_norm": 0.13893591898039384, "learning_rate": 2.4959154948336997e-05, "loss": 0.4389, "num_tokens": 668809337.0, "step": 6335 }, { "epoch": 2.320981999725187, "grad_norm": 0.12659781889926638, "learning_rate": 2.4954941928794787e-05, "loss": 0.4409, "num_tokens": 669525217.0, "step": 6336 }, { "epoch": 2.321348417533092, "grad_norm": 0.13885319648677058, "learning_rate": 2.4950728742883465e-05, "loss": 0.4736, "num_tokens": 670295613.0, "step": 6337 }, { "epoch": 2.3217148353409978, "grad_norm": 0.14843363336416823, "learning_rate": 2.494651539084023e-05, "loss": 0.4731, "num_tokens": 671055300.0, "step": 6338 }, { "epoch": 2.322081253148903, "grad_norm": 0.1303920419176495, "learning_rate": 2.4942301872902305e-05, "loss": 0.4214, "num_tokens": 671767291.0, "step": 6339 }, { "epoch": 2.3224476709568087, "grad_norm": 0.14963274469743693, "learning_rate": 2.493808818930692e-05, "loss": 0.4754, "num_tokens": 672428782.0, "step": 6340 }, { "epoch": 2.322814088764714, "grad_norm": 0.14392661002413343, "learning_rate": 2.4933874340291303e-05, "loss": 0.4595, "num_tokens": 673093930.0, "step": 6341 }, { "epoch": 2.323180506572619, "grad_norm": 0.13021622214071452, "learning_rate": 2.4929660326092723e-05, "loss": 0.4385, "num_tokens": 673920588.0, "step": 6342 }, { "epoch": 2.323546924380525, "grad_norm": 0.13092074974898743, "learning_rate": 2.4925446146948418e-05, "loss": 0.4789, "num_tokens": 674709545.0, "step": 6343 }, { "epoch": 2.3239133421884306, "grad_norm": 0.13535024584574584, "learning_rate": 2.492123180309567e-05, "loss": 0.4322, "num_tokens": 675394329.0, "step": 6344 }, { "epoch": 2.324279759996336, "grad_norm": 0.14052339929441096, "learning_rate": 2.4917017294771743e-05, "loss": 0.4443, "num_tokens": 676176809.0, "step": 6345 }, { "epoch": 2.324646177804241, "grad_norm": 0.13159286552579938, "learning_rate": 2.4912802622213925e-05, "loss": 0.4564, "num_tokens": 676896054.0, "step": 6346 }, { "epoch": 2.325012595612147, "grad_norm": 0.14309009809390835, "learning_rate": 2.4908587785659517e-05, "loss": 0.4489, "num_tokens": 677620500.0, "step": 6347 }, { "epoch": 2.325379013420052, "grad_norm": 0.13636655139165718, "learning_rate": 2.490437278534582e-05, "loss": 0.465, "num_tokens": 678442476.0, "step": 6348 }, { "epoch": 2.3257454312279577, "grad_norm": 0.1438680483492023, "learning_rate": 2.4900157621510142e-05, "loss": 0.4558, "num_tokens": 679120588.0, "step": 6349 }, { "epoch": 2.326111849035863, "grad_norm": 0.12964564308017976, "learning_rate": 2.4895942294389802e-05, "loss": 0.448, "num_tokens": 679902660.0, "step": 6350 }, { "epoch": 2.3264782668437687, "grad_norm": 0.14390663031513093, "learning_rate": 2.4891726804222146e-05, "loss": 0.4351, "num_tokens": 680652417.0, "step": 6351 }, { "epoch": 2.326844684651674, "grad_norm": 0.1417027281609476, "learning_rate": 2.4887511151244504e-05, "loss": 0.4327, "num_tokens": 681316960.0, "step": 6352 }, { "epoch": 2.3272111024595796, "grad_norm": 0.14169387470573688, "learning_rate": 2.488329533569422e-05, "loss": 0.479, "num_tokens": 682059037.0, "step": 6353 }, { "epoch": 2.327577520267485, "grad_norm": 0.13408550562841484, "learning_rate": 2.487907935780867e-05, "loss": 0.4392, "num_tokens": 682839270.0, "step": 6354 }, { "epoch": 2.3279439380753906, "grad_norm": 0.1644030938576817, "learning_rate": 2.487486321782521e-05, "loss": 0.4545, "num_tokens": 683621805.0, "step": 6355 }, { "epoch": 2.328310355883296, "grad_norm": 0.1397706990187183, "learning_rate": 2.4870646915981224e-05, "loss": 0.4421, "num_tokens": 684384931.0, "step": 6356 }, { "epoch": 2.3286767736912015, "grad_norm": 0.1522279111252054, "learning_rate": 2.4866430452514086e-05, "loss": 0.4615, "num_tokens": 685104231.0, "step": 6357 }, { "epoch": 2.3290431914991068, "grad_norm": 0.14295002373120205, "learning_rate": 2.4862213827661198e-05, "loss": 0.4517, "num_tokens": 685847544.0, "step": 6358 }, { "epoch": 2.3294096093070125, "grad_norm": 0.14426811511611462, "learning_rate": 2.485799704165997e-05, "loss": 0.4606, "num_tokens": 686696978.0, "step": 6359 }, { "epoch": 2.3297760271149177, "grad_norm": 0.13646367693300257, "learning_rate": 2.485378009474781e-05, "loss": 0.4347, "num_tokens": 687393458.0, "step": 6360 }, { "epoch": 2.3301424449228234, "grad_norm": 0.1420149878472766, "learning_rate": 2.4849562987162137e-05, "loss": 0.4494, "num_tokens": 688158660.0, "step": 6361 }, { "epoch": 2.3305088627307287, "grad_norm": 0.15321808560251723, "learning_rate": 2.4845345719140395e-05, "loss": 0.4775, "num_tokens": 688850185.0, "step": 6362 }, { "epoch": 2.3308752805386344, "grad_norm": 0.13197393022782064, "learning_rate": 2.484112829092001e-05, "loss": 0.4597, "num_tokens": 689740947.0, "step": 6363 }, { "epoch": 2.3312416983465396, "grad_norm": 0.14457459872555248, "learning_rate": 2.4836910702738442e-05, "loss": 0.4157, "num_tokens": 690404333.0, "step": 6364 }, { "epoch": 2.3316081161544453, "grad_norm": 0.15925826179161834, "learning_rate": 2.4832692954833148e-05, "loss": 0.4807, "num_tokens": 691144751.0, "step": 6365 }, { "epoch": 2.3319745339623505, "grad_norm": 0.136265643967004, "learning_rate": 2.4828475047441586e-05, "loss": 0.4426, "num_tokens": 691962718.0, "step": 6366 }, { "epoch": 2.332340951770256, "grad_norm": 0.14787959974034956, "learning_rate": 2.4824256980801244e-05, "loss": 0.4839, "num_tokens": 692684729.0, "step": 6367 }, { "epoch": 2.3327073695781615, "grad_norm": 0.1352630355908018, "learning_rate": 2.4820038755149605e-05, "loss": 0.4553, "num_tokens": 693387827.0, "step": 6368 }, { "epoch": 2.333073787386067, "grad_norm": 0.1447141458332079, "learning_rate": 2.481582037072416e-05, "loss": 0.4342, "num_tokens": 694120074.0, "step": 6369 }, { "epoch": 2.3334402051939724, "grad_norm": 0.15406878387750075, "learning_rate": 2.4811601827762413e-05, "loss": 0.4199, "num_tokens": 694849997.0, "step": 6370 }, { "epoch": 2.3338066230018777, "grad_norm": 0.14261176486058436, "learning_rate": 2.480738312650188e-05, "loss": 0.4558, "num_tokens": 695594012.0, "step": 6371 }, { "epoch": 2.3341730408097834, "grad_norm": 0.13818029024550652, "learning_rate": 2.480316426718008e-05, "loss": 0.4708, "num_tokens": 696371486.0, "step": 6372 }, { "epoch": 2.3345394586176886, "grad_norm": 0.14357866156883048, "learning_rate": 2.4798945250034545e-05, "loss": 0.4382, "num_tokens": 697155414.0, "step": 6373 }, { "epoch": 2.3349058764255943, "grad_norm": 0.1436950532709079, "learning_rate": 2.4794726075302804e-05, "loss": 0.467, "num_tokens": 697846352.0, "step": 6374 }, { "epoch": 2.3352722942334996, "grad_norm": 0.14327620496515556, "learning_rate": 2.479050674322242e-05, "loss": 0.4588, "num_tokens": 698539971.0, "step": 6375 }, { "epoch": 2.3356387120414053, "grad_norm": 0.1578004806956001, "learning_rate": 2.4786287254030952e-05, "loss": 0.4261, "num_tokens": 699310315.0, "step": 6376 }, { "epoch": 2.3360051298493105, "grad_norm": 0.127934597937402, "learning_rate": 2.4782067607965946e-05, "loss": 0.4492, "num_tokens": 700120524.0, "step": 6377 }, { "epoch": 2.336371547657216, "grad_norm": 0.13692031890568587, "learning_rate": 2.4777847805264984e-05, "loss": 0.4699, "num_tokens": 700935090.0, "step": 6378 }, { "epoch": 2.3367379654651215, "grad_norm": 0.13128172855506812, "learning_rate": 2.477362784616566e-05, "loss": 0.4478, "num_tokens": 701763971.0, "step": 6379 }, { "epoch": 2.337104383273027, "grad_norm": 0.13362418879730978, "learning_rate": 2.4769407730905556e-05, "loss": 0.4425, "num_tokens": 702517871.0, "step": 6380 }, { "epoch": 2.3374708010809324, "grad_norm": 0.1511861357745523, "learning_rate": 2.4765187459722276e-05, "loss": 0.4948, "num_tokens": 703202419.0, "step": 6381 }, { "epoch": 2.337837218888838, "grad_norm": 0.1338894818500191, "learning_rate": 2.4760967032853432e-05, "loss": 0.454, "num_tokens": 703990582.0, "step": 6382 }, { "epoch": 2.3382036366967434, "grad_norm": 0.1328702206915532, "learning_rate": 2.4756746450536637e-05, "loss": 0.4428, "num_tokens": 704783043.0, "step": 6383 }, { "epoch": 2.338570054504649, "grad_norm": 0.14856167846712368, "learning_rate": 2.4752525713009518e-05, "loss": 0.4485, "num_tokens": 705492235.0, "step": 6384 }, { "epoch": 2.3389364723125543, "grad_norm": 0.16072234946131242, "learning_rate": 2.4748304820509716e-05, "loss": 0.4605, "num_tokens": 706290060.0, "step": 6385 }, { "epoch": 2.33930289012046, "grad_norm": 0.13540222429796006, "learning_rate": 2.4744083773274876e-05, "loss": 0.427, "num_tokens": 707012361.0, "step": 6386 }, { "epoch": 2.3396693079283652, "grad_norm": 0.13728741799305258, "learning_rate": 2.4739862571542647e-05, "loss": 0.4411, "num_tokens": 707844843.0, "step": 6387 }, { "epoch": 2.340035725736271, "grad_norm": 0.14096618196287036, "learning_rate": 2.4735641215550694e-05, "loss": 0.4525, "num_tokens": 708662592.0, "step": 6388 }, { "epoch": 2.340402143544176, "grad_norm": 0.12627205250248186, "learning_rate": 2.4731419705536687e-05, "loss": 0.4289, "num_tokens": 709533207.0, "step": 6389 }, { "epoch": 2.340768561352082, "grad_norm": 0.13915736924785263, "learning_rate": 2.4727198041738305e-05, "loss": 0.4795, "num_tokens": 710326139.0, "step": 6390 }, { "epoch": 2.341134979159987, "grad_norm": 0.14205615336718114, "learning_rate": 2.4722976224393232e-05, "loss": 0.4282, "num_tokens": 711043127.0, "step": 6391 }, { "epoch": 2.3415013969678924, "grad_norm": 0.13198381272742524, "learning_rate": 2.4718754253739174e-05, "loss": 0.4308, "num_tokens": 711815536.0, "step": 6392 }, { "epoch": 2.341867814775798, "grad_norm": 0.13940789880886292, "learning_rate": 2.4714532130013838e-05, "loss": 0.4572, "num_tokens": 712577066.0, "step": 6393 }, { "epoch": 2.342234232583704, "grad_norm": 0.14243961023682428, "learning_rate": 2.4710309853454918e-05, "loss": 0.4632, "num_tokens": 713364296.0, "step": 6394 }, { "epoch": 2.342600650391609, "grad_norm": 0.1490373350678291, "learning_rate": 2.4706087424300154e-05, "loss": 0.4554, "num_tokens": 714132970.0, "step": 6395 }, { "epoch": 2.3429670681995143, "grad_norm": 0.16115642636325356, "learning_rate": 2.4701864842787276e-05, "loss": 0.5139, "num_tokens": 714808051.0, "step": 6396 }, { "epoch": 2.34333348600742, "grad_norm": 0.1345036775476477, "learning_rate": 2.469764210915402e-05, "loss": 0.4572, "num_tokens": 715613743.0, "step": 6397 }, { "epoch": 2.3436999038153257, "grad_norm": 0.14078069251965258, "learning_rate": 2.4693419223638137e-05, "loss": 0.4355, "num_tokens": 716399562.0, "step": 6398 }, { "epoch": 2.344066321623231, "grad_norm": 0.15249567838790315, "learning_rate": 2.4689196186477376e-05, "loss": 0.4553, "num_tokens": 717122563.0, "step": 6399 }, { "epoch": 2.344432739431136, "grad_norm": 0.12641104188218671, "learning_rate": 2.4684972997909517e-05, "loss": 0.4387, "num_tokens": 717964274.0, "step": 6400 }, { "epoch": 2.344799157239042, "grad_norm": 0.16627686544743592, "learning_rate": 2.4680749658172315e-05, "loss": 0.4483, "num_tokens": 718767903.0, "step": 6401 }, { "epoch": 2.345165575046947, "grad_norm": 0.14022755093965072, "learning_rate": 2.467652616750357e-05, "loss": 0.4635, "num_tokens": 719593811.0, "step": 6402 }, { "epoch": 2.345531992854853, "grad_norm": 0.14083452506229113, "learning_rate": 2.4672302526141064e-05, "loss": 0.4403, "num_tokens": 720371940.0, "step": 6403 }, { "epoch": 2.345898410662758, "grad_norm": 0.1508863083788197, "learning_rate": 2.46680787343226e-05, "loss": 0.4617, "num_tokens": 721190444.0, "step": 6404 }, { "epoch": 2.3462648284706638, "grad_norm": 0.1386668780370394, "learning_rate": 2.4663854792285983e-05, "loss": 0.438, "num_tokens": 721984956.0, "step": 6405 }, { "epoch": 2.346631246278569, "grad_norm": 0.13987861720532907, "learning_rate": 2.465963070026903e-05, "loss": 0.4543, "num_tokens": 722689102.0, "step": 6406 }, { "epoch": 2.3469976640864747, "grad_norm": 0.14452866194838393, "learning_rate": 2.4655406458509567e-05, "loss": 0.4748, "num_tokens": 723433751.0, "step": 6407 }, { "epoch": 2.34736408189438, "grad_norm": 0.13350198013745637, "learning_rate": 2.4651182067245423e-05, "loss": 0.4675, "num_tokens": 724307587.0, "step": 6408 }, { "epoch": 2.3477304997022856, "grad_norm": 0.1412968091133851, "learning_rate": 2.4646957526714454e-05, "loss": 0.4338, "num_tokens": 725047461.0, "step": 6409 }, { "epoch": 2.348096917510191, "grad_norm": 0.14589794532019196, "learning_rate": 2.4642732837154484e-05, "loss": 0.4959, "num_tokens": 725791688.0, "step": 6410 }, { "epoch": 2.3484633353180966, "grad_norm": 0.13166172672917809, "learning_rate": 2.4638507998803403e-05, "loss": 0.4187, "num_tokens": 726475561.0, "step": 6411 }, { "epoch": 2.348829753126002, "grad_norm": 0.16259460213197363, "learning_rate": 2.4634283011899047e-05, "loss": 0.5057, "num_tokens": 727204944.0, "step": 6412 }, { "epoch": 2.3491961709339075, "grad_norm": 0.15540042296116952, "learning_rate": 2.463005787667931e-05, "loss": 0.4549, "num_tokens": 727895175.0, "step": 6413 }, { "epoch": 2.349562588741813, "grad_norm": 0.13134957693036145, "learning_rate": 2.462583259338208e-05, "loss": 0.4679, "num_tokens": 728621219.0, "step": 6414 }, { "epoch": 2.3499290065497185, "grad_norm": 0.14366917584307776, "learning_rate": 2.4621607162245228e-05, "loss": 0.47, "num_tokens": 729333856.0, "step": 6415 }, { "epoch": 2.3502954243576237, "grad_norm": 0.14885363508534755, "learning_rate": 2.4617381583506673e-05, "loss": 0.4676, "num_tokens": 730181766.0, "step": 6416 }, { "epoch": 2.3506618421655294, "grad_norm": 0.12155510902087498, "learning_rate": 2.4613155857404317e-05, "loss": 0.4279, "num_tokens": 731000263.0, "step": 6417 }, { "epoch": 2.3510282599734347, "grad_norm": 0.14040902284443832, "learning_rate": 2.4608929984176072e-05, "loss": 0.455, "num_tokens": 731767767.0, "step": 6418 }, { "epoch": 2.3513946777813404, "grad_norm": 0.12836349852617165, "learning_rate": 2.4604703964059873e-05, "loss": 0.4493, "num_tokens": 732676857.0, "step": 6419 }, { "epoch": 2.3517610955892456, "grad_norm": 0.13189942721763506, "learning_rate": 2.460047779729364e-05, "loss": 0.4399, "num_tokens": 733489859.0, "step": 6420 }, { "epoch": 2.352127513397151, "grad_norm": 0.1352495511072968, "learning_rate": 2.459625148411533e-05, "loss": 0.4582, "num_tokens": 734208536.0, "step": 6421 }, { "epoch": 2.3524939312050566, "grad_norm": 0.13607408696583853, "learning_rate": 2.4592025024762882e-05, "loss": 0.4259, "num_tokens": 734965691.0, "step": 6422 }, { "epoch": 2.3528603490129623, "grad_norm": 0.14659150807452898, "learning_rate": 2.458779841947426e-05, "loss": 0.4701, "num_tokens": 735641154.0, "step": 6423 }, { "epoch": 2.3532267668208675, "grad_norm": 0.13309965369459092, "learning_rate": 2.4583571668487428e-05, "loss": 0.4434, "num_tokens": 736346524.0, "step": 6424 }, { "epoch": 2.3535931846287728, "grad_norm": 0.14486774299379448, "learning_rate": 2.4579344772040357e-05, "loss": 0.4601, "num_tokens": 737084148.0, "step": 6425 }, { "epoch": 2.3539596024366785, "grad_norm": 0.13451288620652338, "learning_rate": 2.457511773037104e-05, "loss": 0.4554, "num_tokens": 737847985.0, "step": 6426 }, { "epoch": 2.3543260202445837, "grad_norm": 0.12792261083984077, "learning_rate": 2.4570890543717457e-05, "loss": 0.4536, "num_tokens": 738711057.0, "step": 6427 }, { "epoch": 2.3546924380524894, "grad_norm": 0.13976548242726247, "learning_rate": 2.4566663212317613e-05, "loss": 0.4417, "num_tokens": 739429858.0, "step": 6428 }, { "epoch": 2.3550588558603947, "grad_norm": 0.13135239722527595, "learning_rate": 2.4562435736409512e-05, "loss": 0.4486, "num_tokens": 740199855.0, "step": 6429 }, { "epoch": 2.3554252736683003, "grad_norm": 0.14179374673783654, "learning_rate": 2.4558208116231165e-05, "loss": 0.4718, "num_tokens": 740862037.0, "step": 6430 }, { "epoch": 2.3557916914762056, "grad_norm": 0.14114088380680923, "learning_rate": 2.455398035202061e-05, "loss": 0.4657, "num_tokens": 741714957.0, "step": 6431 }, { "epoch": 2.3561581092841113, "grad_norm": 0.14023719166699025, "learning_rate": 2.4549752444015866e-05, "loss": 0.4603, "num_tokens": 742479903.0, "step": 6432 }, { "epoch": 2.3565245270920165, "grad_norm": 0.1283123455083221, "learning_rate": 2.4545524392454972e-05, "loss": 0.4582, "num_tokens": 743270885.0, "step": 6433 }, { "epoch": 2.3568909448999222, "grad_norm": 0.13852808489936883, "learning_rate": 2.4541296197575982e-05, "loss": 0.4699, "num_tokens": 744065126.0, "step": 6434 }, { "epoch": 2.3572573627078275, "grad_norm": 0.13385441613895355, "learning_rate": 2.4537067859616956e-05, "loss": 0.4467, "num_tokens": 744817268.0, "step": 6435 }, { "epoch": 2.357623780515733, "grad_norm": 0.13120336512982886, "learning_rate": 2.4532839378815943e-05, "loss": 0.4819, "num_tokens": 745665339.0, "step": 6436 }, { "epoch": 2.3579901983236384, "grad_norm": 0.13039851919438297, "learning_rate": 2.4528610755411028e-05, "loss": 0.4279, "num_tokens": 746483951.0, "step": 6437 }, { "epoch": 2.358356616131544, "grad_norm": 0.1315864146071968, "learning_rate": 2.4524381989640285e-05, "loss": 0.4345, "num_tokens": 747289342.0, "step": 6438 }, { "epoch": 2.3587230339394494, "grad_norm": 0.1475860012777032, "learning_rate": 2.4520153081741804e-05, "loss": 0.4596, "num_tokens": 747976186.0, "step": 6439 }, { "epoch": 2.359089451747355, "grad_norm": 0.14392461598575332, "learning_rate": 2.451592403195368e-05, "loss": 0.4566, "num_tokens": 748628389.0, "step": 6440 }, { "epoch": 2.3594558695552603, "grad_norm": 0.1254162295238108, "learning_rate": 2.4511694840514014e-05, "loss": 0.4527, "num_tokens": 749502871.0, "step": 6441 }, { "epoch": 2.359822287363166, "grad_norm": 0.143345882062155, "learning_rate": 2.450746550766093e-05, "loss": 0.4794, "num_tokens": 750210026.0, "step": 6442 }, { "epoch": 2.3601887051710713, "grad_norm": 0.14115219195427367, "learning_rate": 2.450323603363253e-05, "loss": 0.4341, "num_tokens": 750931327.0, "step": 6443 }, { "epoch": 2.360555122978977, "grad_norm": 0.1377090403169478, "learning_rate": 2.4499006418666956e-05, "loss": 0.4233, "num_tokens": 751799599.0, "step": 6444 }, { "epoch": 2.360921540786882, "grad_norm": 0.13749070571092012, "learning_rate": 2.4494776663002343e-05, "loss": 0.4237, "num_tokens": 752505188.0, "step": 6445 }, { "epoch": 2.3612879585947875, "grad_norm": 0.14854985770372373, "learning_rate": 2.4490546766876815e-05, "loss": 0.4708, "num_tokens": 753346338.0, "step": 6446 }, { "epoch": 2.361654376402693, "grad_norm": 0.1454795021693937, "learning_rate": 2.4486316730528546e-05, "loss": 0.429, "num_tokens": 754259142.0, "step": 6447 }, { "epoch": 2.362020794210599, "grad_norm": 0.13727033686641177, "learning_rate": 2.4482086554195696e-05, "loss": 0.4644, "num_tokens": 755091997.0, "step": 6448 }, { "epoch": 2.362387212018504, "grad_norm": 0.13000073765040765, "learning_rate": 2.4477856238116416e-05, "loss": 0.4471, "num_tokens": 755932370.0, "step": 6449 }, { "epoch": 2.3627536298264094, "grad_norm": 0.17817669892891258, "learning_rate": 2.4473625782528895e-05, "loss": 0.4484, "num_tokens": 756746456.0, "step": 6450 }, { "epoch": 2.363120047634315, "grad_norm": 0.14334854839454783, "learning_rate": 2.4469395187671313e-05, "loss": 0.4604, "num_tokens": 757425794.0, "step": 6451 }, { "epoch": 2.3634864654422207, "grad_norm": 0.15331946121200904, "learning_rate": 2.446516445378185e-05, "loss": 0.4617, "num_tokens": 758140679.0, "step": 6452 }, { "epoch": 2.363852883250126, "grad_norm": 0.1488560116484278, "learning_rate": 2.446093358109872e-05, "loss": 0.4479, "num_tokens": 758942738.0, "step": 6453 }, { "epoch": 2.3642193010580312, "grad_norm": 0.137676568130728, "learning_rate": 2.445670256986012e-05, "loss": 0.4417, "num_tokens": 759661315.0, "step": 6454 }, { "epoch": 2.364585718865937, "grad_norm": 0.1407326773675686, "learning_rate": 2.4452471420304275e-05, "loss": 0.4339, "num_tokens": 760472849.0, "step": 6455 }, { "epoch": 2.364952136673842, "grad_norm": 0.13156264729394346, "learning_rate": 2.444824013266939e-05, "loss": 0.4378, "num_tokens": 761235447.0, "step": 6456 }, { "epoch": 2.365318554481748, "grad_norm": 0.12728126377497945, "learning_rate": 2.444400870719371e-05, "loss": 0.4291, "num_tokens": 762015622.0, "step": 6457 }, { "epoch": 2.365684972289653, "grad_norm": 0.14170943023400773, "learning_rate": 2.4439777144115463e-05, "loss": 0.4735, "num_tokens": 762767381.0, "step": 6458 }, { "epoch": 2.366051390097559, "grad_norm": 0.1358802347939507, "learning_rate": 2.4435545443672907e-05, "loss": 0.4803, "num_tokens": 763535973.0, "step": 6459 }, { "epoch": 2.366417807905464, "grad_norm": 0.12496327604827155, "learning_rate": 2.4431313606104278e-05, "loss": 0.4455, "num_tokens": 764317234.0, "step": 6460 }, { "epoch": 2.3667842257133698, "grad_norm": 0.14066409292565124, "learning_rate": 2.442708163164785e-05, "loss": 0.4276, "num_tokens": 765073071.0, "step": 6461 }, { "epoch": 2.367150643521275, "grad_norm": 0.14426857082172856, "learning_rate": 2.4422849520541892e-05, "loss": 0.462, "num_tokens": 765764966.0, "step": 6462 }, { "epoch": 2.3675170613291807, "grad_norm": 0.1452872694666083, "learning_rate": 2.441861727302467e-05, "loss": 0.4821, "num_tokens": 766504875.0, "step": 6463 }, { "epoch": 2.367883479137086, "grad_norm": 0.1420161978014508, "learning_rate": 2.4414384889334475e-05, "loss": 0.4467, "num_tokens": 767167835.0, "step": 6464 }, { "epoch": 2.3682498969449917, "grad_norm": 0.14331405542007783, "learning_rate": 2.44101523697096e-05, "loss": 0.453, "num_tokens": 767931685.0, "step": 6465 }, { "epoch": 2.368616314752897, "grad_norm": 0.15005230179950424, "learning_rate": 2.4405919714388338e-05, "loss": 0.4536, "num_tokens": 768656327.0, "step": 6466 }, { "epoch": 2.3689827325608026, "grad_norm": 0.13222935578960904, "learning_rate": 2.4401686923608997e-05, "loss": 0.431, "num_tokens": 769359131.0, "step": 6467 }, { "epoch": 2.369349150368708, "grad_norm": 0.13327359924951387, "learning_rate": 2.4397453997609902e-05, "loss": 0.4261, "num_tokens": 770163608.0, "step": 6468 }, { "epoch": 2.3697155681766136, "grad_norm": 0.1439557975537942, "learning_rate": 2.4393220936629365e-05, "loss": 0.4397, "num_tokens": 770863526.0, "step": 6469 }, { "epoch": 2.370081985984519, "grad_norm": 0.13708845827179764, "learning_rate": 2.4388987740905713e-05, "loss": 0.4444, "num_tokens": 771578712.0, "step": 6470 }, { "epoch": 2.3704484037924245, "grad_norm": 0.14582778104723676, "learning_rate": 2.438475441067729e-05, "loss": 0.4401, "num_tokens": 772261260.0, "step": 6471 }, { "epoch": 2.3708148216003297, "grad_norm": 0.14953930296383872, "learning_rate": 2.438052094618244e-05, "loss": 0.4836, "num_tokens": 773002277.0, "step": 6472 }, { "epoch": 2.3711812394082354, "grad_norm": 0.13613195749555124, "learning_rate": 2.437628734765952e-05, "loss": 0.4485, "num_tokens": 773868501.0, "step": 6473 }, { "epoch": 2.3715476572161407, "grad_norm": 0.13345466987968446, "learning_rate": 2.437205361534688e-05, "loss": 0.4452, "num_tokens": 774589463.0, "step": 6474 }, { "epoch": 2.371914075024046, "grad_norm": 0.14474672706994796, "learning_rate": 2.4367819749482896e-05, "loss": 0.4699, "num_tokens": 775311248.0, "step": 6475 }, { "epoch": 2.3722804928319516, "grad_norm": 0.14499295020068914, "learning_rate": 2.436358575030594e-05, "loss": 0.4526, "num_tokens": 776042455.0, "step": 6476 }, { "epoch": 2.3726469106398573, "grad_norm": 0.13523940133348264, "learning_rate": 2.4359351618054383e-05, "loss": 0.4428, "num_tokens": 776828603.0, "step": 6477 }, { "epoch": 2.3730133284477626, "grad_norm": 0.13483811719542013, "learning_rate": 2.435511735296664e-05, "loss": 0.4685, "num_tokens": 777627305.0, "step": 6478 }, { "epoch": 2.373379746255668, "grad_norm": 0.1298025297181489, "learning_rate": 2.4350882955281086e-05, "loss": 0.4455, "num_tokens": 778351938.0, "step": 6479 }, { "epoch": 2.3737461640635735, "grad_norm": 0.13781993064536108, "learning_rate": 2.4346648425236133e-05, "loss": 0.434, "num_tokens": 779139396.0, "step": 6480 }, { "epoch": 2.3741125818714788, "grad_norm": 0.1288429375222208, "learning_rate": 2.4342413763070205e-05, "loss": 0.4581, "num_tokens": 779970903.0, "step": 6481 }, { "epoch": 2.3744789996793845, "grad_norm": 0.13649958418317257, "learning_rate": 2.4338178969021707e-05, "loss": 0.4581, "num_tokens": 780735047.0, "step": 6482 }, { "epoch": 2.3748454174872897, "grad_norm": 0.1395586963225769, "learning_rate": 2.433394404332908e-05, "loss": 0.4536, "num_tokens": 781421923.0, "step": 6483 }, { "epoch": 2.3752118352951954, "grad_norm": 0.14266894606946973, "learning_rate": 2.432970898623074e-05, "loss": 0.4757, "num_tokens": 782115602.0, "step": 6484 }, { "epoch": 2.3755782531031007, "grad_norm": 0.13468188827684158, "learning_rate": 2.4325473797965144e-05, "loss": 0.4527, "num_tokens": 782890611.0, "step": 6485 }, { "epoch": 2.3759446709110064, "grad_norm": 0.12690795835356034, "learning_rate": 2.4321238478770734e-05, "loss": 0.4541, "num_tokens": 783784927.0, "step": 6486 }, { "epoch": 2.3763110887189116, "grad_norm": 0.13601683690275662, "learning_rate": 2.4317003028885974e-05, "loss": 0.4478, "num_tokens": 784559320.0, "step": 6487 }, { "epoch": 2.3766775065268173, "grad_norm": 0.14515748060527656, "learning_rate": 2.4312767448549318e-05, "loss": 0.4508, "num_tokens": 785279653.0, "step": 6488 }, { "epoch": 2.3770439243347226, "grad_norm": 0.13145188207082753, "learning_rate": 2.430853173799925e-05, "loss": 0.4732, "num_tokens": 786104997.0, "step": 6489 }, { "epoch": 2.3774103421426283, "grad_norm": 0.1278366068266417, "learning_rate": 2.4304295897474237e-05, "loss": 0.4314, "num_tokens": 786944668.0, "step": 6490 }, { "epoch": 2.3777767599505335, "grad_norm": 0.13702460116059811, "learning_rate": 2.4300059927212777e-05, "loss": 0.4469, "num_tokens": 787679534.0, "step": 6491 }, { "epoch": 2.378143177758439, "grad_norm": 0.13549681159078075, "learning_rate": 2.429582382745336e-05, "loss": 0.4625, "num_tokens": 788449767.0, "step": 6492 }, { "epoch": 2.3785095955663444, "grad_norm": 0.13563280859613228, "learning_rate": 2.4291587598434477e-05, "loss": 0.4528, "num_tokens": 789226747.0, "step": 6493 }, { "epoch": 2.37887601337425, "grad_norm": 0.141482388520899, "learning_rate": 2.428735124039465e-05, "loss": 0.4662, "num_tokens": 789880239.0, "step": 6494 }, { "epoch": 2.3792424311821554, "grad_norm": 0.13668611938266978, "learning_rate": 2.4283114753572382e-05, "loss": 0.4691, "num_tokens": 790620703.0, "step": 6495 }, { "epoch": 2.379608848990061, "grad_norm": 0.13387495990121723, "learning_rate": 2.4278878138206204e-05, "loss": 0.4783, "num_tokens": 791409792.0, "step": 6496 }, { "epoch": 2.3799752667979663, "grad_norm": 0.12667280417344987, "learning_rate": 2.4274641394534647e-05, "loss": 0.457, "num_tokens": 792314114.0, "step": 6497 }, { "epoch": 2.380341684605872, "grad_norm": 0.13541679001117193, "learning_rate": 2.427040452279624e-05, "loss": 0.4278, "num_tokens": 793131376.0, "step": 6498 }, { "epoch": 2.3807081024137773, "grad_norm": 0.12683565708710393, "learning_rate": 2.4266167523229532e-05, "loss": 0.4783, "num_tokens": 793963983.0, "step": 6499 }, { "epoch": 2.3810745202216825, "grad_norm": 0.14396347664479375, "learning_rate": 2.4261930396073085e-05, "loss": 0.4735, "num_tokens": 794599095.0, "step": 6500 }, { "epoch": 2.3814409380295882, "grad_norm": 0.1284493015323148, "learning_rate": 2.425769314156543e-05, "loss": 0.4137, "num_tokens": 795369537.0, "step": 6501 }, { "epoch": 2.381807355837494, "grad_norm": 0.14034710300468423, "learning_rate": 2.4253455759945158e-05, "loss": 0.4581, "num_tokens": 796133332.0, "step": 6502 }, { "epoch": 2.382173773645399, "grad_norm": 0.1418169406236149, "learning_rate": 2.424921825145084e-05, "loss": 0.4506, "num_tokens": 796814472.0, "step": 6503 }, { "epoch": 2.3825401914533044, "grad_norm": 0.13055105150097113, "learning_rate": 2.4244980616321045e-05, "loss": 0.4461, "num_tokens": 797648918.0, "step": 6504 }, { "epoch": 2.38290660926121, "grad_norm": 0.1432078559493527, "learning_rate": 2.424074285479437e-05, "loss": 0.4452, "num_tokens": 798400098.0, "step": 6505 }, { "epoch": 2.3832730270691154, "grad_norm": 0.13126592225804917, "learning_rate": 2.4236504967109397e-05, "loss": 0.4663, "num_tokens": 799207388.0, "step": 6506 }, { "epoch": 2.383639444877021, "grad_norm": 0.13928738341601832, "learning_rate": 2.423226695350474e-05, "loss": 0.4607, "num_tokens": 799922675.0, "step": 6507 }, { "epoch": 2.3840058626849263, "grad_norm": 0.13882094656944804, "learning_rate": 2.4228028814219005e-05, "loss": 0.4466, "num_tokens": 800736001.0, "step": 6508 }, { "epoch": 2.384372280492832, "grad_norm": 0.1302299008619895, "learning_rate": 2.4223790549490808e-05, "loss": 0.4493, "num_tokens": 801627641.0, "step": 6509 }, { "epoch": 2.3847386983007373, "grad_norm": 0.13328706528589607, "learning_rate": 2.421955215955876e-05, "loss": 0.4663, "num_tokens": 802450060.0, "step": 6510 }, { "epoch": 2.385105116108643, "grad_norm": 0.1560386839089837, "learning_rate": 2.4215313644661506e-05, "loss": 0.5022, "num_tokens": 803015266.0, "step": 6511 }, { "epoch": 2.385471533916548, "grad_norm": 0.12682684432521862, "learning_rate": 2.4211075005037682e-05, "loss": 0.4677, "num_tokens": 803819885.0, "step": 6512 }, { "epoch": 2.385837951724454, "grad_norm": 0.13843186882753866, "learning_rate": 2.4206836240925923e-05, "loss": 0.4581, "num_tokens": 804707936.0, "step": 6513 }, { "epoch": 2.386204369532359, "grad_norm": 0.1440093595530175, "learning_rate": 2.4202597352564884e-05, "loss": 0.4452, "num_tokens": 805388155.0, "step": 6514 }, { "epoch": 2.386570787340265, "grad_norm": 0.14415613152430226, "learning_rate": 2.4198358340193216e-05, "loss": 0.464, "num_tokens": 806032132.0, "step": 6515 }, { "epoch": 2.38693720514817, "grad_norm": 0.14209514408672613, "learning_rate": 2.41941192040496e-05, "loss": 0.4632, "num_tokens": 806805597.0, "step": 6516 }, { "epoch": 2.387303622956076, "grad_norm": 0.12807031158851223, "learning_rate": 2.41898799443727e-05, "loss": 0.4548, "num_tokens": 807578280.0, "step": 6517 }, { "epoch": 2.387670040763981, "grad_norm": 0.13816958284424938, "learning_rate": 2.4185640561401183e-05, "loss": 0.475, "num_tokens": 808321608.0, "step": 6518 }, { "epoch": 2.3880364585718867, "grad_norm": 0.14165668341768478, "learning_rate": 2.4181401055373757e-05, "loss": 0.4633, "num_tokens": 809153866.0, "step": 6519 }, { "epoch": 2.388402876379792, "grad_norm": 0.14969524727297592, "learning_rate": 2.41771614265291e-05, "loss": 0.4587, "num_tokens": 809960241.0, "step": 6520 }, { "epoch": 2.3887692941876977, "grad_norm": 0.1345289666794581, "learning_rate": 2.417292167510591e-05, "loss": 0.4448, "num_tokens": 810670184.0, "step": 6521 }, { "epoch": 2.389135711995603, "grad_norm": 0.14087182543602045, "learning_rate": 2.41686818013429e-05, "loss": 0.4497, "num_tokens": 811452979.0, "step": 6522 }, { "epoch": 2.3895021298035086, "grad_norm": 0.1411965128920239, "learning_rate": 2.4164441805478786e-05, "loss": 0.463, "num_tokens": 812297240.0, "step": 6523 }, { "epoch": 2.389868547611414, "grad_norm": 0.14717747722651653, "learning_rate": 2.4160201687752273e-05, "loss": 0.4724, "num_tokens": 813043189.0, "step": 6524 }, { "epoch": 2.3902349654193196, "grad_norm": 0.13231082642285177, "learning_rate": 2.4155961448402106e-05, "loss": 0.461, "num_tokens": 813843364.0, "step": 6525 }, { "epoch": 2.390601383227225, "grad_norm": 0.1680494831464939, "learning_rate": 2.4151721087667004e-05, "loss": 0.4909, "num_tokens": 814476382.0, "step": 6526 }, { "epoch": 2.3909678010351305, "grad_norm": 0.1373482440631755, "learning_rate": 2.4147480605785723e-05, "loss": 0.4631, "num_tokens": 815350789.0, "step": 6527 }, { "epoch": 2.3913342188430358, "grad_norm": 0.14908284051980022, "learning_rate": 2.4143240002996998e-05, "loss": 0.4427, "num_tokens": 816004740.0, "step": 6528 }, { "epoch": 2.391700636650941, "grad_norm": 0.13927663552975975, "learning_rate": 2.4138999279539586e-05, "loss": 0.4662, "num_tokens": 816850089.0, "step": 6529 }, { "epoch": 2.3920670544588467, "grad_norm": 0.15356988724864268, "learning_rate": 2.413475843565226e-05, "loss": 0.496, "num_tokens": 817524461.0, "step": 6530 }, { "epoch": 2.3924334722667524, "grad_norm": 0.15082491174902632, "learning_rate": 2.4130517471573774e-05, "loss": 0.4531, "num_tokens": 818217962.0, "step": 6531 }, { "epoch": 2.3927998900746577, "grad_norm": 0.1529162908217174, "learning_rate": 2.4126276387542897e-05, "loss": 0.4606, "num_tokens": 818837830.0, "step": 6532 }, { "epoch": 2.393166307882563, "grad_norm": 0.1541908907743856, "learning_rate": 2.412203518379843e-05, "loss": 0.4502, "num_tokens": 819623298.0, "step": 6533 }, { "epoch": 2.3935327256904686, "grad_norm": 0.13998376305540947, "learning_rate": 2.4117793860579158e-05, "loss": 0.465, "num_tokens": 820364253.0, "step": 6534 }, { "epoch": 2.393899143498374, "grad_norm": 0.13388862070832167, "learning_rate": 2.4113552418123856e-05, "loss": 0.4472, "num_tokens": 821140110.0, "step": 6535 }, { "epoch": 2.3942655613062795, "grad_norm": 0.14018646188772382, "learning_rate": 2.4109310856671346e-05, "loss": 0.4501, "num_tokens": 821911286.0, "step": 6536 }, { "epoch": 2.394631979114185, "grad_norm": 0.13336651308427697, "learning_rate": 2.410506917646042e-05, "loss": 0.4731, "num_tokens": 822677985.0, "step": 6537 }, { "epoch": 2.3949983969220905, "grad_norm": 0.14156347861739585, "learning_rate": 2.410082737772992e-05, "loss": 0.471, "num_tokens": 823475378.0, "step": 6538 }, { "epoch": 2.3953648147299957, "grad_norm": 0.1499267082908866, "learning_rate": 2.4096585460718636e-05, "loss": 0.4673, "num_tokens": 824245400.0, "step": 6539 }, { "epoch": 2.3957312325379014, "grad_norm": 0.14903106502614102, "learning_rate": 2.4092343425665417e-05, "loss": 0.4797, "num_tokens": 824989918.0, "step": 6540 }, { "epoch": 2.3960976503458067, "grad_norm": 0.15803201479369638, "learning_rate": 2.4088101272809095e-05, "loss": 0.5151, "num_tokens": 825677875.0, "step": 6541 }, { "epoch": 2.3964640681537124, "grad_norm": 0.12929379979668498, "learning_rate": 2.4083859002388504e-05, "loss": 0.4427, "num_tokens": 826502719.0, "step": 6542 }, { "epoch": 2.3968304859616176, "grad_norm": 0.13251443433784377, "learning_rate": 2.4079616614642495e-05, "loss": 0.4287, "num_tokens": 827406623.0, "step": 6543 }, { "epoch": 2.3971969037695233, "grad_norm": 0.13540524868557768, "learning_rate": 2.407537410980993e-05, "loss": 0.4299, "num_tokens": 828285022.0, "step": 6544 }, { "epoch": 2.3975633215774286, "grad_norm": 0.14479364150010102, "learning_rate": 2.407113148812966e-05, "loss": 0.4483, "num_tokens": 829007254.0, "step": 6545 }, { "epoch": 2.3979297393853343, "grad_norm": 0.13752110398585818, "learning_rate": 2.4066888749840557e-05, "loss": 0.4749, "num_tokens": 829771175.0, "step": 6546 }, { "epoch": 2.3982961571932395, "grad_norm": 0.14080305583602107, "learning_rate": 2.4062645895181504e-05, "loss": 0.4119, "num_tokens": 830517533.0, "step": 6547 }, { "epoch": 2.398662575001145, "grad_norm": 0.14263921954825626, "learning_rate": 2.4058402924391374e-05, "loss": 0.4808, "num_tokens": 831227119.0, "step": 6548 }, { "epoch": 2.3990289928090505, "grad_norm": 0.137109388803585, "learning_rate": 2.405415983770905e-05, "loss": 0.4498, "num_tokens": 831988677.0, "step": 6549 }, { "epoch": 2.399395410616956, "grad_norm": 0.1324368460437444, "learning_rate": 2.404991663537344e-05, "loss": 0.4343, "num_tokens": 832773622.0, "step": 6550 }, { "epoch": 2.3997618284248614, "grad_norm": 0.1477482545088786, "learning_rate": 2.404567331762343e-05, "loss": 0.4548, "num_tokens": 833526854.0, "step": 6551 }, { "epoch": 2.400128246232767, "grad_norm": 0.12831808241467965, "learning_rate": 2.4041429884697937e-05, "loss": 0.418, "num_tokens": 834361072.0, "step": 6552 }, { "epoch": 2.4004946640406724, "grad_norm": 0.1368218690614581, "learning_rate": 2.4037186336835868e-05, "loss": 0.4442, "num_tokens": 835152190.0, "step": 6553 }, { "epoch": 2.4008610818485776, "grad_norm": 0.143203151726524, "learning_rate": 2.4032942674276142e-05, "loss": 0.4755, "num_tokens": 835877024.0, "step": 6554 }, { "epoch": 2.4012274996564833, "grad_norm": 0.1290588788790547, "learning_rate": 2.4028698897257702e-05, "loss": 0.4456, "num_tokens": 836710972.0, "step": 6555 }, { "epoch": 2.401593917464389, "grad_norm": 0.14526485988771262, "learning_rate": 2.4024455006019464e-05, "loss": 0.4828, "num_tokens": 837402342.0, "step": 6556 }, { "epoch": 2.4019603352722942, "grad_norm": 0.12601423167936737, "learning_rate": 2.4020211000800373e-05, "loss": 0.4419, "num_tokens": 838239016.0, "step": 6557 }, { "epoch": 2.4023267530801995, "grad_norm": 0.13195372945229178, "learning_rate": 2.401596688183938e-05, "loss": 0.4519, "num_tokens": 839049268.0, "step": 6558 }, { "epoch": 2.402693170888105, "grad_norm": 0.13365325380366436, "learning_rate": 2.4011722649375426e-05, "loss": 0.4462, "num_tokens": 839844830.0, "step": 6559 }, { "epoch": 2.4030595886960104, "grad_norm": 0.13599882999178808, "learning_rate": 2.400747830364748e-05, "loss": 0.4364, "num_tokens": 840591481.0, "step": 6560 }, { "epoch": 2.403426006503916, "grad_norm": 0.14269459068736742, "learning_rate": 2.4003233844894505e-05, "loss": 0.475, "num_tokens": 841321434.0, "step": 6561 }, { "epoch": 2.4037924243118214, "grad_norm": 0.1491130132814392, "learning_rate": 2.3998989273355467e-05, "loss": 0.4511, "num_tokens": 842019892.0, "step": 6562 }, { "epoch": 2.404158842119727, "grad_norm": 0.14578274490980178, "learning_rate": 2.399474458926935e-05, "loss": 0.4734, "num_tokens": 842898036.0, "step": 6563 }, { "epoch": 2.4045252599276323, "grad_norm": 0.13827810870941507, "learning_rate": 2.399049979287514e-05, "loss": 0.4504, "num_tokens": 843621704.0, "step": 6564 }, { "epoch": 2.404891677735538, "grad_norm": 0.1427803021483017, "learning_rate": 2.3986254884411812e-05, "loss": 0.4758, "num_tokens": 844293871.0, "step": 6565 }, { "epoch": 2.4052580955434433, "grad_norm": 0.13683378123151455, "learning_rate": 2.3982009864118377e-05, "loss": 0.4582, "num_tokens": 845031080.0, "step": 6566 }, { "epoch": 2.405624513351349, "grad_norm": 0.13807079490291188, "learning_rate": 2.3977764732233844e-05, "loss": 0.4553, "num_tokens": 845817080.0, "step": 6567 }, { "epoch": 2.405990931159254, "grad_norm": 0.14086239818211058, "learning_rate": 2.3973519488997208e-05, "loss": 0.4538, "num_tokens": 846570028.0, "step": 6568 }, { "epoch": 2.40635734896716, "grad_norm": 0.13759001782737956, "learning_rate": 2.3969274134647495e-05, "loss": 0.4329, "num_tokens": 847228925.0, "step": 6569 }, { "epoch": 2.406723766775065, "grad_norm": 0.15155610217756676, "learning_rate": 2.3965028669423716e-05, "loss": 0.4808, "num_tokens": 847852713.0, "step": 6570 }, { "epoch": 2.407090184582971, "grad_norm": 0.14521918728731306, "learning_rate": 2.3960783093564906e-05, "loss": 0.445, "num_tokens": 848594695.0, "step": 6571 }, { "epoch": 2.407456602390876, "grad_norm": 0.13110163118009166, "learning_rate": 2.3956537407310102e-05, "loss": 0.4669, "num_tokens": 849411712.0, "step": 6572 }, { "epoch": 2.407823020198782, "grad_norm": 0.1263867814602781, "learning_rate": 2.395229161089834e-05, "loss": 0.4476, "num_tokens": 850267068.0, "step": 6573 }, { "epoch": 2.408189438006687, "grad_norm": 0.1436054461353936, "learning_rate": 2.394804570456867e-05, "loss": 0.4456, "num_tokens": 850996467.0, "step": 6574 }, { "epoch": 2.4085558558145927, "grad_norm": 0.14691490747799274, "learning_rate": 2.394379968856014e-05, "loss": 0.4918, "num_tokens": 851715417.0, "step": 6575 }, { "epoch": 2.408922273622498, "grad_norm": 0.123856052585297, "learning_rate": 2.393955356311181e-05, "loss": 0.4367, "num_tokens": 852572949.0, "step": 6576 }, { "epoch": 2.4092886914304037, "grad_norm": 0.1336416199234444, "learning_rate": 2.393530732846275e-05, "loss": 0.4004, "num_tokens": 853268981.0, "step": 6577 }, { "epoch": 2.409655109238309, "grad_norm": 0.14313420691588088, "learning_rate": 2.3931060984852037e-05, "loss": 0.4746, "num_tokens": 854032274.0, "step": 6578 }, { "epoch": 2.4100215270462146, "grad_norm": 0.13835407033413716, "learning_rate": 2.3926814532518732e-05, "loss": 0.4565, "num_tokens": 854805897.0, "step": 6579 }, { "epoch": 2.41038794485412, "grad_norm": 0.13267260349454937, "learning_rate": 2.392256797170193e-05, "loss": 0.4609, "num_tokens": 855692780.0, "step": 6580 }, { "epoch": 2.4107543626620256, "grad_norm": 0.14793468028107143, "learning_rate": 2.391832130264072e-05, "loss": 0.4606, "num_tokens": 856431144.0, "step": 6581 }, { "epoch": 2.411120780469931, "grad_norm": 0.13343920016769234, "learning_rate": 2.3914074525574194e-05, "loss": 0.4622, "num_tokens": 857222293.0, "step": 6582 }, { "epoch": 2.411487198277836, "grad_norm": 0.150448863900927, "learning_rate": 2.3909827640741458e-05, "loss": 0.4684, "num_tokens": 857953636.0, "step": 6583 }, { "epoch": 2.411853616085742, "grad_norm": 0.14511401485529316, "learning_rate": 2.3905580648381616e-05, "loss": 0.4751, "num_tokens": 858785246.0, "step": 6584 }, { "epoch": 2.4122200338936475, "grad_norm": 0.13824902640553327, "learning_rate": 2.3901333548733786e-05, "loss": 0.4504, "num_tokens": 859506012.0, "step": 6585 }, { "epoch": 2.4125864517015527, "grad_norm": 0.13299499570088646, "learning_rate": 2.389708634203709e-05, "loss": 0.4505, "num_tokens": 860368134.0, "step": 6586 }, { "epoch": 2.412952869509458, "grad_norm": 0.1439998497309875, "learning_rate": 2.3892839028530637e-05, "loss": 0.4619, "num_tokens": 861171778.0, "step": 6587 }, { "epoch": 2.4133192873173637, "grad_norm": 0.13589167614405837, "learning_rate": 2.3888591608453587e-05, "loss": 0.4314, "num_tokens": 861829905.0, "step": 6588 }, { "epoch": 2.413685705125269, "grad_norm": 0.13913675824736024, "learning_rate": 2.3884344082045063e-05, "loss": 0.4378, "num_tokens": 862653235.0, "step": 6589 }, { "epoch": 2.4140521229331746, "grad_norm": 0.13872487930585192, "learning_rate": 2.388009644954421e-05, "loss": 0.4689, "num_tokens": 863367581.0, "step": 6590 }, { "epoch": 2.41441854074108, "grad_norm": 0.12979276306553927, "learning_rate": 2.3875848711190175e-05, "loss": 0.4338, "num_tokens": 864189281.0, "step": 6591 }, { "epoch": 2.4147849585489856, "grad_norm": 0.13209688356547897, "learning_rate": 2.387160086722212e-05, "loss": 0.4713, "num_tokens": 864931032.0, "step": 6592 }, { "epoch": 2.415151376356891, "grad_norm": 0.14173364907052685, "learning_rate": 2.3867352917879206e-05, "loss": 0.464, "num_tokens": 865616848.0, "step": 6593 }, { "epoch": 2.4155177941647965, "grad_norm": 0.13222118045835832, "learning_rate": 2.3863104863400595e-05, "loss": 0.4692, "num_tokens": 866345135.0, "step": 6594 }, { "epoch": 2.4158842119727018, "grad_norm": 0.13739758066930036, "learning_rate": 2.3858856704025474e-05, "loss": 0.4718, "num_tokens": 867137108.0, "step": 6595 }, { "epoch": 2.4162506297806075, "grad_norm": 0.15245976846493717, "learning_rate": 2.3854608439993005e-05, "loss": 0.4684, "num_tokens": 867790021.0, "step": 6596 }, { "epoch": 2.4166170475885127, "grad_norm": 0.12441254649219727, "learning_rate": 2.385036007154239e-05, "loss": 0.4697, "num_tokens": 868600719.0, "step": 6597 }, { "epoch": 2.4169834653964184, "grad_norm": 0.12674810559210062, "learning_rate": 2.3846111598912815e-05, "loss": 0.4137, "num_tokens": 869368721.0, "step": 6598 }, { "epoch": 2.4173498832043236, "grad_norm": 0.13647411213952018, "learning_rate": 2.3841863022343474e-05, "loss": 0.4765, "num_tokens": 870168490.0, "step": 6599 }, { "epoch": 2.4177163010122293, "grad_norm": 0.13059755937199752, "learning_rate": 2.3837614342073573e-05, "loss": 0.4211, "num_tokens": 871027441.0, "step": 6600 }, { "epoch": 2.4180827188201346, "grad_norm": 0.13449219708427165, "learning_rate": 2.3833365558342315e-05, "loss": 0.4329, "num_tokens": 871740784.0, "step": 6601 }, { "epoch": 2.4184491366280403, "grad_norm": 0.12401418068514644, "learning_rate": 2.382911667138893e-05, "loss": 0.4674, "num_tokens": 872639724.0, "step": 6602 }, { "epoch": 2.4188155544359455, "grad_norm": 0.1404793494492926, "learning_rate": 2.3824867681452632e-05, "loss": 0.4607, "num_tokens": 873403664.0, "step": 6603 }, { "epoch": 2.4191819722438512, "grad_norm": 0.13866866686441442, "learning_rate": 2.3820618588772635e-05, "loss": 0.4949, "num_tokens": 874165623.0, "step": 6604 }, { "epoch": 2.4195483900517565, "grad_norm": 0.1298156031470973, "learning_rate": 2.3816369393588194e-05, "loss": 0.4201, "num_tokens": 874993768.0, "step": 6605 }, { "epoch": 2.419914807859662, "grad_norm": 0.12766231368887246, "learning_rate": 2.3812120096138527e-05, "loss": 0.4332, "num_tokens": 875803436.0, "step": 6606 }, { "epoch": 2.4202812256675674, "grad_norm": 0.13925764412969951, "learning_rate": 2.3807870696662885e-05, "loss": 0.4636, "num_tokens": 876465173.0, "step": 6607 }, { "epoch": 2.4206476434754727, "grad_norm": 0.13554082647653348, "learning_rate": 2.380362119540052e-05, "loss": 0.4734, "num_tokens": 877245193.0, "step": 6608 }, { "epoch": 2.4210140612833784, "grad_norm": 0.1288178710056068, "learning_rate": 2.379937159259069e-05, "loss": 0.4288, "num_tokens": 878034351.0, "step": 6609 }, { "epoch": 2.421380479091284, "grad_norm": 0.1355085876753675, "learning_rate": 2.379512188847265e-05, "loss": 0.5064, "num_tokens": 878773951.0, "step": 6610 }, { "epoch": 2.4217468968991893, "grad_norm": 0.12976295235855134, "learning_rate": 2.3790872083285663e-05, "loss": 0.4453, "num_tokens": 879563678.0, "step": 6611 }, { "epoch": 2.4221133147070946, "grad_norm": 0.13596142566280756, "learning_rate": 2.3786622177269014e-05, "loss": 0.4548, "num_tokens": 880385904.0, "step": 6612 }, { "epoch": 2.4224797325150003, "grad_norm": 0.15011769066127603, "learning_rate": 2.3782372170661975e-05, "loss": 0.4942, "num_tokens": 880988931.0, "step": 6613 }, { "epoch": 2.4228461503229055, "grad_norm": 0.1463076821768097, "learning_rate": 2.377812206370383e-05, "loss": 0.4616, "num_tokens": 881693645.0, "step": 6614 }, { "epoch": 2.423212568130811, "grad_norm": 0.1333350732761946, "learning_rate": 2.377387185663386e-05, "loss": 0.4413, "num_tokens": 882513376.0, "step": 6615 }, { "epoch": 2.4235789859387165, "grad_norm": 0.13549297401666555, "learning_rate": 2.3769621549691377e-05, "loss": 0.4709, "num_tokens": 883318910.0, "step": 6616 }, { "epoch": 2.423945403746622, "grad_norm": 0.1338425193205137, "learning_rate": 2.3765371143115664e-05, "loss": 0.4357, "num_tokens": 883995708.0, "step": 6617 }, { "epoch": 2.4243118215545274, "grad_norm": 0.14539751394469808, "learning_rate": 2.3761120637146043e-05, "loss": 0.4227, "num_tokens": 884764043.0, "step": 6618 }, { "epoch": 2.424678239362433, "grad_norm": 0.12330920794067543, "learning_rate": 2.3756870032021824e-05, "loss": 0.4308, "num_tokens": 885638230.0, "step": 6619 }, { "epoch": 2.4250446571703383, "grad_norm": 0.12246603195042365, "learning_rate": 2.3752619327982313e-05, "loss": 0.4265, "num_tokens": 886461005.0, "step": 6620 }, { "epoch": 2.425411074978244, "grad_norm": 0.1365950002192731, "learning_rate": 2.3748368525266835e-05, "loss": 0.4342, "num_tokens": 887119114.0, "step": 6621 }, { "epoch": 2.4257774927861493, "grad_norm": 0.14920041220330307, "learning_rate": 2.3744117624114735e-05, "loss": 0.451, "num_tokens": 887740392.0, "step": 6622 }, { "epoch": 2.426143910594055, "grad_norm": 0.14388148049103863, "learning_rate": 2.3739866624765325e-05, "loss": 0.4672, "num_tokens": 888442010.0, "step": 6623 }, { "epoch": 2.4265103284019602, "grad_norm": 0.1383154871538921, "learning_rate": 2.373561552745796e-05, "loss": 0.4436, "num_tokens": 889181451.0, "step": 6624 }, { "epoch": 2.426876746209866, "grad_norm": 0.1420677100391978, "learning_rate": 2.373136433243198e-05, "loss": 0.3995, "num_tokens": 889924465.0, "step": 6625 }, { "epoch": 2.427243164017771, "grad_norm": 0.15291728371389166, "learning_rate": 2.372711303992674e-05, "loss": 0.4945, "num_tokens": 890653431.0, "step": 6626 }, { "epoch": 2.427609581825677, "grad_norm": 0.1405387458775623, "learning_rate": 2.372286165018159e-05, "loss": 0.4614, "num_tokens": 891333981.0, "step": 6627 }, { "epoch": 2.427975999633582, "grad_norm": 0.14937270036583594, "learning_rate": 2.3718610163435892e-05, "loss": 0.4766, "num_tokens": 892091122.0, "step": 6628 }, { "epoch": 2.428342417441488, "grad_norm": 0.14845369325041954, "learning_rate": 2.3714358579929015e-05, "loss": 0.4595, "num_tokens": 892814970.0, "step": 6629 }, { "epoch": 2.428708835249393, "grad_norm": 0.14069066554046464, "learning_rate": 2.3710106899900338e-05, "loss": 0.4611, "num_tokens": 893524000.0, "step": 6630 }, { "epoch": 2.4290752530572988, "grad_norm": 0.13222114684284922, "learning_rate": 2.370585512358923e-05, "loss": 0.4814, "num_tokens": 894346926.0, "step": 6631 }, { "epoch": 2.429441670865204, "grad_norm": 0.14276093078468438, "learning_rate": 2.3701603251235073e-05, "loss": 0.4618, "num_tokens": 895199230.0, "step": 6632 }, { "epoch": 2.4298080886731093, "grad_norm": 0.12753698980330558, "learning_rate": 2.3697351283077266e-05, "loss": 0.4572, "num_tokens": 896007782.0, "step": 6633 }, { "epoch": 2.430174506481015, "grad_norm": 0.13824545420598194, "learning_rate": 2.36930992193552e-05, "loss": 0.4603, "num_tokens": 896739931.0, "step": 6634 }, { "epoch": 2.4305409242889207, "grad_norm": 0.13851786898491913, "learning_rate": 2.3688847060308266e-05, "loss": 0.4303, "num_tokens": 897448155.0, "step": 6635 }, { "epoch": 2.430907342096826, "grad_norm": 0.13670836486478824, "learning_rate": 2.3684594806175883e-05, "loss": 0.4762, "num_tokens": 898217884.0, "step": 6636 }, { "epoch": 2.431273759904731, "grad_norm": 0.13934906797505053, "learning_rate": 2.368034245719745e-05, "loss": 0.4604, "num_tokens": 898982780.0, "step": 6637 }, { "epoch": 2.431640177712637, "grad_norm": 0.12616976613799272, "learning_rate": 2.3676090013612383e-05, "loss": 0.4562, "num_tokens": 899798603.0, "step": 6638 }, { "epoch": 2.4320065955205425, "grad_norm": 0.14122721168334984, "learning_rate": 2.3671837475660114e-05, "loss": 0.4949, "num_tokens": 900596150.0, "step": 6639 }, { "epoch": 2.432373013328448, "grad_norm": 0.13120077039759123, "learning_rate": 2.3667584843580058e-05, "loss": 0.4385, "num_tokens": 901376325.0, "step": 6640 }, { "epoch": 2.432739431136353, "grad_norm": 0.13677707110134557, "learning_rate": 2.366333211761165e-05, "loss": 0.4742, "num_tokens": 902174091.0, "step": 6641 }, { "epoch": 2.4331058489442587, "grad_norm": 0.13485602026855079, "learning_rate": 2.365907929799433e-05, "loss": 0.4514, "num_tokens": 902874057.0, "step": 6642 }, { "epoch": 2.433472266752164, "grad_norm": 0.13680482253890627, "learning_rate": 2.365482638496754e-05, "loss": 0.456, "num_tokens": 903635668.0, "step": 6643 }, { "epoch": 2.4338386845600697, "grad_norm": 0.12958626039139465, "learning_rate": 2.3650573378770725e-05, "loss": 0.4503, "num_tokens": 904437895.0, "step": 6644 }, { "epoch": 2.434205102367975, "grad_norm": 0.1500258707037051, "learning_rate": 2.3646320279643332e-05, "loss": 0.4481, "num_tokens": 905150174.0, "step": 6645 }, { "epoch": 2.4345715201758806, "grad_norm": 0.14594524984075646, "learning_rate": 2.364206708782483e-05, "loss": 0.4433, "num_tokens": 905847038.0, "step": 6646 }, { "epoch": 2.434937937983786, "grad_norm": 0.12894342737942568, "learning_rate": 2.3637813803554676e-05, "loss": 0.4611, "num_tokens": 906667649.0, "step": 6647 }, { "epoch": 2.4353043557916916, "grad_norm": 0.12651234044899543, "learning_rate": 2.363356042707234e-05, "loss": 0.441, "num_tokens": 907463199.0, "step": 6648 }, { "epoch": 2.435670773599597, "grad_norm": 0.15486265814775213, "learning_rate": 2.3629306958617297e-05, "loss": 0.4491, "num_tokens": 908079981.0, "step": 6649 }, { "epoch": 2.4360371914075025, "grad_norm": 0.14411088179410944, "learning_rate": 2.3625053398429025e-05, "loss": 0.4815, "num_tokens": 908791867.0, "step": 6650 }, { "epoch": 2.4364036092154078, "grad_norm": 0.12978221556177524, "learning_rate": 2.3620799746746997e-05, "loss": 0.4716, "num_tokens": 909621918.0, "step": 6651 }, { "epoch": 2.4367700270233135, "grad_norm": 0.12971548436434588, "learning_rate": 2.3616546003810722e-05, "loss": 0.4696, "num_tokens": 910473195.0, "step": 6652 }, { "epoch": 2.4371364448312187, "grad_norm": 0.13941738370099002, "learning_rate": 2.3612292169859687e-05, "loss": 0.463, "num_tokens": 911243045.0, "step": 6653 }, { "epoch": 2.4375028626391244, "grad_norm": 0.1321799965630477, "learning_rate": 2.3608038245133377e-05, "loss": 0.4606, "num_tokens": 912038561.0, "step": 6654 }, { "epoch": 2.4378692804470297, "grad_norm": 0.127132414778859, "learning_rate": 2.3603784229871317e-05, "loss": 0.4586, "num_tokens": 912888364.0, "step": 6655 }, { "epoch": 2.4382356982549354, "grad_norm": 0.1330949096904294, "learning_rate": 2.3599530124313e-05, "loss": 0.4595, "num_tokens": 913754251.0, "step": 6656 }, { "epoch": 2.4386021160628406, "grad_norm": 0.13599173572347303, "learning_rate": 2.3595275928697958e-05, "loss": 0.4548, "num_tokens": 914541101.0, "step": 6657 }, { "epoch": 2.4389685338707463, "grad_norm": 0.14954733576614232, "learning_rate": 2.35910216432657e-05, "loss": 0.4826, "num_tokens": 915260637.0, "step": 6658 }, { "epoch": 2.4393349516786516, "grad_norm": 0.1527569861879725, "learning_rate": 2.358676726825574e-05, "loss": 0.4723, "num_tokens": 915914145.0, "step": 6659 }, { "epoch": 2.4397013694865572, "grad_norm": 0.13909105623307597, "learning_rate": 2.358251280390763e-05, "loss": 0.4485, "num_tokens": 916737453.0, "step": 6660 }, { "epoch": 2.4400677872944625, "grad_norm": 0.14351479823423607, "learning_rate": 2.3578258250460896e-05, "loss": 0.4705, "num_tokens": 917512320.0, "step": 6661 }, { "epoch": 2.4404342051023677, "grad_norm": 0.13274520603859205, "learning_rate": 2.3574003608155067e-05, "loss": 0.4624, "num_tokens": 918317167.0, "step": 6662 }, { "epoch": 2.4408006229102734, "grad_norm": 0.1392279603734575, "learning_rate": 2.3569748877229696e-05, "loss": 0.4512, "num_tokens": 919028297.0, "step": 6663 }, { "epoch": 2.441167040718179, "grad_norm": 0.14855226326027507, "learning_rate": 2.356549405792434e-05, "loss": 0.482, "num_tokens": 919771644.0, "step": 6664 }, { "epoch": 2.4415334585260844, "grad_norm": 0.1312051028982427, "learning_rate": 2.3561239150478548e-05, "loss": 0.4441, "num_tokens": 920612609.0, "step": 6665 }, { "epoch": 2.4418998763339896, "grad_norm": 0.139437379235759, "learning_rate": 2.355698415513188e-05, "loss": 0.4453, "num_tokens": 921470700.0, "step": 6666 }, { "epoch": 2.4422662941418953, "grad_norm": 0.1348351363941482, "learning_rate": 2.3552729072123895e-05, "loss": 0.4468, "num_tokens": 922196273.0, "step": 6667 }, { "epoch": 2.4426327119498006, "grad_norm": 0.13862548041740197, "learning_rate": 2.3548473901694168e-05, "loss": 0.4338, "num_tokens": 922950303.0, "step": 6668 }, { "epoch": 2.4429991297577063, "grad_norm": 0.1488474025608506, "learning_rate": 2.3544218644082278e-05, "loss": 0.442, "num_tokens": 923679092.0, "step": 6669 }, { "epoch": 2.4433655475656115, "grad_norm": 0.14085324401956698, "learning_rate": 2.3539963299527795e-05, "loss": 0.431, "num_tokens": 924479966.0, "step": 6670 }, { "epoch": 2.4437319653735172, "grad_norm": 0.13241215691291172, "learning_rate": 2.3535707868270318e-05, "loss": 0.4568, "num_tokens": 925306872.0, "step": 6671 }, { "epoch": 2.4440983831814225, "grad_norm": 0.13725190600969706, "learning_rate": 2.3531452350549415e-05, "loss": 0.4445, "num_tokens": 925996553.0, "step": 6672 }, { "epoch": 2.444464800989328, "grad_norm": 0.1463522022764861, "learning_rate": 2.352719674660469e-05, "loss": 0.4569, "num_tokens": 926762823.0, "step": 6673 }, { "epoch": 2.4448312187972334, "grad_norm": 0.13473344037776924, "learning_rate": 2.3522941056675754e-05, "loss": 0.4305, "num_tokens": 927536498.0, "step": 6674 }, { "epoch": 2.445197636605139, "grad_norm": 0.13566634954992707, "learning_rate": 2.35186852810022e-05, "loss": 0.4314, "num_tokens": 928350079.0, "step": 6675 }, { "epoch": 2.4455640544130444, "grad_norm": 0.13725177874049252, "learning_rate": 2.3514429419823626e-05, "loss": 0.4753, "num_tokens": 929108685.0, "step": 6676 }, { "epoch": 2.44593047222095, "grad_norm": 0.13423173291362572, "learning_rate": 2.3510173473379663e-05, "loss": 0.4667, "num_tokens": 929947615.0, "step": 6677 }, { "epoch": 2.4462968900288553, "grad_norm": 0.13281058157179515, "learning_rate": 2.3505917441909927e-05, "loss": 0.4472, "num_tokens": 930766648.0, "step": 6678 }, { "epoch": 2.446663307836761, "grad_norm": 0.15701976372789345, "learning_rate": 2.350166132565403e-05, "loss": 0.4952, "num_tokens": 931458240.0, "step": 6679 }, { "epoch": 2.4470297256446663, "grad_norm": 0.14191826419315182, "learning_rate": 2.349740512485161e-05, "loss": 0.4643, "num_tokens": 932251383.0, "step": 6680 }, { "epoch": 2.447396143452572, "grad_norm": 0.14517002706383889, "learning_rate": 2.3493148839742292e-05, "loss": 0.4326, "num_tokens": 932993743.0, "step": 6681 }, { "epoch": 2.447762561260477, "grad_norm": 0.14260678433686885, "learning_rate": 2.3488892470565725e-05, "loss": 0.4449, "num_tokens": 933615918.0, "step": 6682 }, { "epoch": 2.448128979068383, "grad_norm": 0.14353603939909856, "learning_rate": 2.3484636017561538e-05, "loss": 0.4352, "num_tokens": 934322108.0, "step": 6683 }, { "epoch": 2.448495396876288, "grad_norm": 0.14305265755040242, "learning_rate": 2.3480379480969382e-05, "loss": 0.4499, "num_tokens": 934990970.0, "step": 6684 }, { "epoch": 2.448861814684194, "grad_norm": 0.14231282248187319, "learning_rate": 2.347612286102892e-05, "loss": 0.4667, "num_tokens": 935711485.0, "step": 6685 }, { "epoch": 2.449228232492099, "grad_norm": 0.12730440006411028, "learning_rate": 2.3471866157979788e-05, "loss": 0.4169, "num_tokens": 936557811.0, "step": 6686 }, { "epoch": 2.4495946503000043, "grad_norm": 0.1329521273704206, "learning_rate": 2.3467609372061655e-05, "loss": 0.4607, "num_tokens": 937303979.0, "step": 6687 }, { "epoch": 2.44996106810791, "grad_norm": 0.13426372068256168, "learning_rate": 2.3463352503514197e-05, "loss": 0.4459, "num_tokens": 938050284.0, "step": 6688 }, { "epoch": 2.4503274859158157, "grad_norm": 0.1452998093086837, "learning_rate": 2.3459095552577075e-05, "loss": 0.4713, "num_tokens": 938871893.0, "step": 6689 }, { "epoch": 2.450693903723721, "grad_norm": 0.1298081515692192, "learning_rate": 2.3454838519489957e-05, "loss": 0.455, "num_tokens": 939725285.0, "step": 6690 }, { "epoch": 2.4510603215316262, "grad_norm": 0.13572974954717268, "learning_rate": 2.345058140449254e-05, "loss": 0.4504, "num_tokens": 940545792.0, "step": 6691 }, { "epoch": 2.451426739339532, "grad_norm": 0.14285754486692773, "learning_rate": 2.344632420782449e-05, "loss": 0.4889, "num_tokens": 941375000.0, "step": 6692 }, { "epoch": 2.451793157147437, "grad_norm": 0.13124739032751304, "learning_rate": 2.3442066929725505e-05, "loss": 0.463, "num_tokens": 942228726.0, "step": 6693 }, { "epoch": 2.452159574955343, "grad_norm": 0.12653477981081404, "learning_rate": 2.3437809570435283e-05, "loss": 0.4621, "num_tokens": 943035652.0, "step": 6694 }, { "epoch": 2.452525992763248, "grad_norm": 0.1401444518303184, "learning_rate": 2.3433552130193505e-05, "loss": 0.4617, "num_tokens": 943794060.0, "step": 6695 }, { "epoch": 2.452892410571154, "grad_norm": 0.13116015434724643, "learning_rate": 2.3429294609239896e-05, "loss": 0.437, "num_tokens": 944654507.0, "step": 6696 }, { "epoch": 2.453258828379059, "grad_norm": 0.1313832592905412, "learning_rate": 2.342503700781414e-05, "loss": 0.4331, "num_tokens": 945400824.0, "step": 6697 }, { "epoch": 2.4536252461869648, "grad_norm": 0.14907574172570887, "learning_rate": 2.3420779326155966e-05, "loss": 0.4752, "num_tokens": 946191593.0, "step": 6698 }, { "epoch": 2.45399166399487, "grad_norm": 0.13758259599688294, "learning_rate": 2.3416521564505087e-05, "loss": 0.4417, "num_tokens": 946893004.0, "step": 6699 }, { "epoch": 2.4543580818027757, "grad_norm": 0.14930302897143669, "learning_rate": 2.3412263723101214e-05, "loss": 0.4861, "num_tokens": 947684320.0, "step": 6700 }, { "epoch": 2.454724499610681, "grad_norm": 0.13276000760220222, "learning_rate": 2.3408005802184074e-05, "loss": 0.4504, "num_tokens": 948483875.0, "step": 6701 }, { "epoch": 2.4550909174185866, "grad_norm": 0.13347470205676026, "learning_rate": 2.340374780199341e-05, "loss": 0.4531, "num_tokens": 949152681.0, "step": 6702 }, { "epoch": 2.455457335226492, "grad_norm": 0.13561894762067975, "learning_rate": 2.339948972276894e-05, "loss": 0.4601, "num_tokens": 949902035.0, "step": 6703 }, { "epoch": 2.4558237530343976, "grad_norm": 0.13626372903330222, "learning_rate": 2.3395231564750413e-05, "loss": 0.4397, "num_tokens": 950743405.0, "step": 6704 }, { "epoch": 2.456190170842303, "grad_norm": 0.13656473821931767, "learning_rate": 2.3390973328177565e-05, "loss": 0.4642, "num_tokens": 951565361.0, "step": 6705 }, { "epoch": 2.4565565886502085, "grad_norm": 0.13716639390722285, "learning_rate": 2.3386715013290142e-05, "loss": 0.4273, "num_tokens": 952218913.0, "step": 6706 }, { "epoch": 2.456923006458114, "grad_norm": 0.14011208960880078, "learning_rate": 2.33824566203279e-05, "loss": 0.4821, "num_tokens": 952895670.0, "step": 6707 }, { "epoch": 2.4572894242660195, "grad_norm": 0.1452659573410118, "learning_rate": 2.33781981495306e-05, "loss": 0.4852, "num_tokens": 953620959.0, "step": 6708 }, { "epoch": 2.4576558420739247, "grad_norm": 0.14751357889466044, "learning_rate": 2.337393960113799e-05, "loss": 0.4485, "num_tokens": 954359614.0, "step": 6709 }, { "epoch": 2.4580222598818304, "grad_norm": 0.1348774580556258, "learning_rate": 2.3369680975389846e-05, "loss": 0.4915, "num_tokens": 955249748.0, "step": 6710 }, { "epoch": 2.4583886776897357, "grad_norm": 0.1330586564219695, "learning_rate": 2.3365422272525934e-05, "loss": 0.4535, "num_tokens": 955970994.0, "step": 6711 }, { "epoch": 2.4587550954976414, "grad_norm": 0.16038487307889224, "learning_rate": 2.336116349278602e-05, "loss": 0.508, "num_tokens": 956615560.0, "step": 6712 }, { "epoch": 2.4591215133055466, "grad_norm": 0.13844827158655076, "learning_rate": 2.3356904636409895e-05, "loss": 0.4599, "num_tokens": 957407176.0, "step": 6713 }, { "epoch": 2.4594879311134523, "grad_norm": 0.12913769012561271, "learning_rate": 2.3352645703637324e-05, "loss": 0.4388, "num_tokens": 958257295.0, "step": 6714 }, { "epoch": 2.4598543489213576, "grad_norm": 0.1356455181407049, "learning_rate": 2.334838669470811e-05, "loss": 0.4475, "num_tokens": 959012941.0, "step": 6715 }, { "epoch": 2.460220766729263, "grad_norm": 0.13079446513597717, "learning_rate": 2.334412760986204e-05, "loss": 0.4568, "num_tokens": 959792531.0, "step": 6716 }, { "epoch": 2.4605871845371685, "grad_norm": 0.13557914428876397, "learning_rate": 2.33398684493389e-05, "loss": 0.4463, "num_tokens": 960558849.0, "step": 6717 }, { "epoch": 2.460953602345074, "grad_norm": 0.12933265012841347, "learning_rate": 2.3335609213378498e-05, "loss": 0.4511, "num_tokens": 961431728.0, "step": 6718 }, { "epoch": 2.4613200201529795, "grad_norm": 0.1289038852714497, "learning_rate": 2.333134990222063e-05, "loss": 0.4587, "num_tokens": 962178018.0, "step": 6719 }, { "epoch": 2.4616864379608847, "grad_norm": 0.14467099191107535, "learning_rate": 2.3327090516105115e-05, "loss": 0.5105, "num_tokens": 962953337.0, "step": 6720 }, { "epoch": 2.4620528557687904, "grad_norm": 0.14665482970743396, "learning_rate": 2.332283105527175e-05, "loss": 0.4525, "num_tokens": 963633475.0, "step": 6721 }, { "epoch": 2.4624192735766957, "grad_norm": 0.13689894747178746, "learning_rate": 2.331857151996037e-05, "loss": 0.4353, "num_tokens": 964323637.0, "step": 6722 }, { "epoch": 2.4627856913846013, "grad_norm": 0.13024044381329, "learning_rate": 2.3314311910410776e-05, "loss": 0.4301, "num_tokens": 965082036.0, "step": 6723 }, { "epoch": 2.4631521091925066, "grad_norm": 0.1331019214104805, "learning_rate": 2.33100522268628e-05, "loss": 0.4296, "num_tokens": 965795622.0, "step": 6724 }, { "epoch": 2.4635185270004123, "grad_norm": 0.14391859986409158, "learning_rate": 2.330579246955628e-05, "loss": 0.4561, "num_tokens": 966543163.0, "step": 6725 }, { "epoch": 2.4638849448083175, "grad_norm": 0.1278755691981016, "learning_rate": 2.3301532638731036e-05, "loss": 0.4378, "num_tokens": 967340660.0, "step": 6726 }, { "epoch": 2.4642513626162232, "grad_norm": 0.14133550562136554, "learning_rate": 2.3297272734626913e-05, "loss": 0.4458, "num_tokens": 967995689.0, "step": 6727 }, { "epoch": 2.4646177804241285, "grad_norm": 0.13548821962526858, "learning_rate": 2.329301275748374e-05, "loss": 0.4673, "num_tokens": 968740035.0, "step": 6728 }, { "epoch": 2.464984198232034, "grad_norm": 0.14706803579272196, "learning_rate": 2.3288752707541385e-05, "loss": 0.4646, "num_tokens": 969458942.0, "step": 6729 }, { "epoch": 2.4653506160399394, "grad_norm": 0.13767801404714583, "learning_rate": 2.3284492585039672e-05, "loss": 0.4838, "num_tokens": 970146293.0, "step": 6730 }, { "epoch": 2.465717033847845, "grad_norm": 0.14335010883116817, "learning_rate": 2.328023239021847e-05, "loss": 0.4518, "num_tokens": 970914302.0, "step": 6731 }, { "epoch": 2.4660834516557504, "grad_norm": 0.1538677372797875, "learning_rate": 2.3275972123317628e-05, "loss": 0.4536, "num_tokens": 971615606.0, "step": 6732 }, { "epoch": 2.466449869463656, "grad_norm": 0.14237412911606773, "learning_rate": 2.327171178457702e-05, "loss": 0.4536, "num_tokens": 972373459.0, "step": 6733 }, { "epoch": 2.4668162872715613, "grad_norm": 0.14462627621275184, "learning_rate": 2.32674513742365e-05, "loss": 0.4358, "num_tokens": 973140557.0, "step": 6734 }, { "epoch": 2.467182705079467, "grad_norm": 0.15593981134926901, "learning_rate": 2.3263190892535934e-05, "loss": 0.4867, "num_tokens": 973940919.0, "step": 6735 }, { "epoch": 2.4675491228873723, "grad_norm": 0.1498192379134472, "learning_rate": 2.3258930339715212e-05, "loss": 0.4629, "num_tokens": 974772557.0, "step": 6736 }, { "epoch": 2.467915540695278, "grad_norm": 0.14193811599946335, "learning_rate": 2.3254669716014194e-05, "loss": 0.4522, "num_tokens": 975484866.0, "step": 6737 }, { "epoch": 2.468281958503183, "grad_norm": 0.13168072135657022, "learning_rate": 2.3250409021672776e-05, "loss": 0.4593, "num_tokens": 976256226.0, "step": 6738 }, { "epoch": 2.468648376311089, "grad_norm": 0.15095066367535115, "learning_rate": 2.3246148256930835e-05, "loss": 0.4707, "num_tokens": 977029872.0, "step": 6739 }, { "epoch": 2.469014794118994, "grad_norm": 0.14071821503150136, "learning_rate": 2.324188742202826e-05, "loss": 0.4408, "num_tokens": 977742213.0, "step": 6740 }, { "epoch": 2.4693812119268994, "grad_norm": 0.1321326830175617, "learning_rate": 2.323762651720495e-05, "loss": 0.4537, "num_tokens": 978564331.0, "step": 6741 }, { "epoch": 2.469747629734805, "grad_norm": 0.13776107293136688, "learning_rate": 2.3233365542700797e-05, "loss": 0.4537, "num_tokens": 979343348.0, "step": 6742 }, { "epoch": 2.470114047542711, "grad_norm": 0.14133983117606613, "learning_rate": 2.322910449875571e-05, "loss": 0.4417, "num_tokens": 980070884.0, "step": 6743 }, { "epoch": 2.470480465350616, "grad_norm": 0.13352739230229105, "learning_rate": 2.3224843385609587e-05, "loss": 0.484, "num_tokens": 980814153.0, "step": 6744 }, { "epoch": 2.4708468831585213, "grad_norm": 0.12895276599302097, "learning_rate": 2.322058220350234e-05, "loss": 0.433, "num_tokens": 981594036.0, "step": 6745 }, { "epoch": 2.471213300966427, "grad_norm": 0.13787209532930678, "learning_rate": 2.321632095267388e-05, "loss": 0.4678, "num_tokens": 982429479.0, "step": 6746 }, { "epoch": 2.4715797187743322, "grad_norm": 0.13447270775021064, "learning_rate": 2.3212059633364125e-05, "loss": 0.4505, "num_tokens": 983197471.0, "step": 6747 }, { "epoch": 2.471946136582238, "grad_norm": 0.1315875556715126, "learning_rate": 2.3207798245813e-05, "loss": 0.4692, "num_tokens": 983995589.0, "step": 6748 }, { "epoch": 2.472312554390143, "grad_norm": 0.14405936640217598, "learning_rate": 2.3203536790260423e-05, "loss": 0.4742, "num_tokens": 984669531.0, "step": 6749 }, { "epoch": 2.472678972198049, "grad_norm": 0.14416880122107126, "learning_rate": 2.3199275266946328e-05, "loss": 0.4725, "num_tokens": 985399995.0, "step": 6750 }, { "epoch": 2.473045390005954, "grad_norm": 0.12594126888773058, "learning_rate": 2.3195013676110643e-05, "loss": 0.4241, "num_tokens": 986199468.0, "step": 6751 }, { "epoch": 2.47341180781386, "grad_norm": 0.130452446935899, "learning_rate": 2.31907520179933e-05, "loss": 0.4647, "num_tokens": 986970115.0, "step": 6752 }, { "epoch": 2.473778225621765, "grad_norm": 0.1328138095537514, "learning_rate": 2.318649029283425e-05, "loss": 0.4422, "num_tokens": 987672957.0, "step": 6753 }, { "epoch": 2.4741446434296708, "grad_norm": 0.14757991006117535, "learning_rate": 2.3182228500873434e-05, "loss": 0.4422, "num_tokens": 988372909.0, "step": 6754 }, { "epoch": 2.474511061237576, "grad_norm": 0.12496738721840424, "learning_rate": 2.317796664235079e-05, "loss": 0.4394, "num_tokens": 989149034.0, "step": 6755 }, { "epoch": 2.4748774790454817, "grad_norm": 0.12804468946081912, "learning_rate": 2.317370471750628e-05, "loss": 0.4301, "num_tokens": 989968893.0, "step": 6756 }, { "epoch": 2.475243896853387, "grad_norm": 0.1457255037964242, "learning_rate": 2.3169442726579855e-05, "loss": 0.4613, "num_tokens": 990639274.0, "step": 6757 }, { "epoch": 2.4756103146612927, "grad_norm": 0.13799292056271162, "learning_rate": 2.3165180669811474e-05, "loss": 0.426, "num_tokens": 991424862.0, "step": 6758 }, { "epoch": 2.475976732469198, "grad_norm": 0.12805260873353633, "learning_rate": 2.316091854744109e-05, "loss": 0.4303, "num_tokens": 992266819.0, "step": 6759 }, { "epoch": 2.4763431502771036, "grad_norm": 0.14272216413595545, "learning_rate": 2.3156656359708687e-05, "loss": 0.4708, "num_tokens": 993083780.0, "step": 6760 }, { "epoch": 2.476709568085009, "grad_norm": 0.1299480400393473, "learning_rate": 2.3152394106854225e-05, "loss": 0.4433, "num_tokens": 993913058.0, "step": 6761 }, { "epoch": 2.4770759858929146, "grad_norm": 0.14394384687943046, "learning_rate": 2.314813178911767e-05, "loss": 0.4326, "num_tokens": 994762698.0, "step": 6762 }, { "epoch": 2.47744240370082, "grad_norm": 0.13949475035279119, "learning_rate": 2.314386940673901e-05, "loss": 0.4775, "num_tokens": 995440672.0, "step": 6763 }, { "epoch": 2.4778088215087255, "grad_norm": 0.1373228076569564, "learning_rate": 2.313960695995822e-05, "loss": 0.4386, "num_tokens": 996149065.0, "step": 6764 }, { "epoch": 2.4781752393166308, "grad_norm": 0.13538004218120134, "learning_rate": 2.3135344449015285e-05, "loss": 0.468, "num_tokens": 996925251.0, "step": 6765 }, { "epoch": 2.4785416571245364, "grad_norm": 0.12913497980958982, "learning_rate": 2.3131081874150203e-05, "loss": 0.4359, "num_tokens": 997757781.0, "step": 6766 }, { "epoch": 2.4789080749324417, "grad_norm": 0.1428170497547513, "learning_rate": 2.3126819235602952e-05, "loss": 0.4693, "num_tokens": 998532849.0, "step": 6767 }, { "epoch": 2.4792744927403474, "grad_norm": 0.1368853782120202, "learning_rate": 2.3122556533613526e-05, "loss": 0.4542, "num_tokens": 999295868.0, "step": 6768 }, { "epoch": 2.4796409105482526, "grad_norm": 0.12993712661094375, "learning_rate": 2.3118293768421936e-05, "loss": 0.4619, "num_tokens": 1000068968.0, "step": 6769 }, { "epoch": 2.480007328356158, "grad_norm": 0.13745728298738322, "learning_rate": 2.3114030940268176e-05, "loss": 0.4538, "num_tokens": 1000727390.0, "step": 6770 }, { "epoch": 2.4803737461640636, "grad_norm": 0.13170203678071798, "learning_rate": 2.310976804939225e-05, "loss": 0.4492, "num_tokens": 1001504986.0, "step": 6771 }, { "epoch": 2.4807401639719693, "grad_norm": 0.1485189256589938, "learning_rate": 2.3105505096034174e-05, "loss": 0.4622, "num_tokens": 1002272095.0, "step": 6772 }, { "epoch": 2.4811065817798745, "grad_norm": 0.1240480903877364, "learning_rate": 2.3101242080433954e-05, "loss": 0.4352, "num_tokens": 1003072097.0, "step": 6773 }, { "epoch": 2.48147299958778, "grad_norm": 0.12822403705607682, "learning_rate": 2.3096979002831613e-05, "loss": 0.4298, "num_tokens": 1003908865.0, "step": 6774 }, { "epoch": 2.4818394173956855, "grad_norm": 0.1451157455879365, "learning_rate": 2.309271586346716e-05, "loss": 0.4699, "num_tokens": 1004699017.0, "step": 6775 }, { "epoch": 2.4822058352035907, "grad_norm": 0.14111689655626222, "learning_rate": 2.3088452662580637e-05, "loss": 0.4858, "num_tokens": 1005454483.0, "step": 6776 }, { "epoch": 2.4825722530114964, "grad_norm": 0.15565245950634193, "learning_rate": 2.3084189400412052e-05, "loss": 0.4626, "num_tokens": 1006224518.0, "step": 6777 }, { "epoch": 2.4829386708194017, "grad_norm": 0.1405334271255355, "learning_rate": 2.3079926077201443e-05, "loss": 0.4341, "num_tokens": 1007086615.0, "step": 6778 }, { "epoch": 2.4833050886273074, "grad_norm": 0.14651342564630898, "learning_rate": 2.307566269318884e-05, "loss": 0.4704, "num_tokens": 1007722681.0, "step": 6779 }, { "epoch": 2.4836715064352126, "grad_norm": 0.13090565525622627, "learning_rate": 2.307139924861429e-05, "loss": 0.4275, "num_tokens": 1008522882.0, "step": 6780 }, { "epoch": 2.4840379242431183, "grad_norm": 0.16653085780728516, "learning_rate": 2.3067135743717822e-05, "loss": 0.4943, "num_tokens": 1009258573.0, "step": 6781 }, { "epoch": 2.4844043420510236, "grad_norm": 0.1392265936890518, "learning_rate": 2.3062872178739486e-05, "loss": 0.4592, "num_tokens": 1010097122.0, "step": 6782 }, { "epoch": 2.4847707598589293, "grad_norm": 0.12900623213464316, "learning_rate": 2.3058608553919325e-05, "loss": 0.4363, "num_tokens": 1010940933.0, "step": 6783 }, { "epoch": 2.4851371776668345, "grad_norm": 0.13447634443519785, "learning_rate": 2.305434486949739e-05, "loss": 0.4523, "num_tokens": 1011859042.0, "step": 6784 }, { "epoch": 2.48550359547474, "grad_norm": 0.15703852376754876, "learning_rate": 2.3050081125713743e-05, "loss": 0.4473, "num_tokens": 1012547755.0, "step": 6785 }, { "epoch": 2.4858700132826455, "grad_norm": 0.13259268751805317, "learning_rate": 2.304581732280843e-05, "loss": 0.451, "num_tokens": 1013333334.0, "step": 6786 }, { "epoch": 2.486236431090551, "grad_norm": 0.14177037797508413, "learning_rate": 2.3041553461021518e-05, "loss": 0.4235, "num_tokens": 1014119746.0, "step": 6787 }, { "epoch": 2.4866028488984564, "grad_norm": 0.14096335281230485, "learning_rate": 2.3037289540593073e-05, "loss": 0.4868, "num_tokens": 1014875429.0, "step": 6788 }, { "epoch": 2.486969266706362, "grad_norm": 0.13996391325862378, "learning_rate": 2.3033025561763153e-05, "loss": 0.4618, "num_tokens": 1015624538.0, "step": 6789 }, { "epoch": 2.4873356845142673, "grad_norm": 0.13975698624191923, "learning_rate": 2.3028761524771834e-05, "loss": 0.4452, "num_tokens": 1016289103.0, "step": 6790 }, { "epoch": 2.487702102322173, "grad_norm": 0.1298737239380731, "learning_rate": 2.30244974298592e-05, "loss": 0.4495, "num_tokens": 1017142284.0, "step": 6791 }, { "epoch": 2.4880685201300783, "grad_norm": 0.13929666045070838, "learning_rate": 2.3020233277265312e-05, "loss": 0.4596, "num_tokens": 1017933398.0, "step": 6792 }, { "epoch": 2.488434937937984, "grad_norm": 0.14493308545990713, "learning_rate": 2.301596906723025e-05, "loss": 0.4788, "num_tokens": 1018727252.0, "step": 6793 }, { "epoch": 2.4888013557458892, "grad_norm": 0.1617698948759057, "learning_rate": 2.301170479999412e-05, "loss": 0.4804, "num_tokens": 1019280019.0, "step": 6794 }, { "epoch": 2.4891677735537945, "grad_norm": 0.12618000716344205, "learning_rate": 2.300744047579698e-05, "loss": 0.4304, "num_tokens": 1020114267.0, "step": 6795 }, { "epoch": 2.4895341913617, "grad_norm": 0.1572876922364208, "learning_rate": 2.300317609487894e-05, "loss": 0.4556, "num_tokens": 1020837296.0, "step": 6796 }, { "epoch": 2.489900609169606, "grad_norm": 0.13481058767032514, "learning_rate": 2.299891165748008e-05, "loss": 0.4716, "num_tokens": 1021602862.0, "step": 6797 }, { "epoch": 2.490267026977511, "grad_norm": 0.14904347341785146, "learning_rate": 2.2994647163840506e-05, "loss": 0.4836, "num_tokens": 1022308314.0, "step": 6798 }, { "epoch": 2.4906334447854164, "grad_norm": 0.13765413931153062, "learning_rate": 2.299038261420031e-05, "loss": 0.4737, "num_tokens": 1023081794.0, "step": 6799 }, { "epoch": 2.490999862593322, "grad_norm": 0.14488358526153589, "learning_rate": 2.29861180087996e-05, "loss": 0.467, "num_tokens": 1023738257.0, "step": 6800 }, { "epoch": 2.4913662804012273, "grad_norm": 0.14790497889572593, "learning_rate": 2.2981853347878484e-05, "loss": 0.4428, "num_tokens": 1024530228.0, "step": 6801 }, { "epoch": 2.491732698209133, "grad_norm": 0.13206016525649722, "learning_rate": 2.2977588631677062e-05, "loss": 0.4624, "num_tokens": 1025308158.0, "step": 6802 }, { "epoch": 2.4920991160170383, "grad_norm": 0.13702354003483036, "learning_rate": 2.2973323860435453e-05, "loss": 0.4831, "num_tokens": 1026034902.0, "step": 6803 }, { "epoch": 2.492465533824944, "grad_norm": 0.1374257684649163, "learning_rate": 2.296905903439377e-05, "loss": 0.486, "num_tokens": 1026820605.0, "step": 6804 }, { "epoch": 2.492831951632849, "grad_norm": 0.13005548846070075, "learning_rate": 2.2964794153792134e-05, "loss": 0.4509, "num_tokens": 1027672966.0, "step": 6805 }, { "epoch": 2.493198369440755, "grad_norm": 0.14991401068644364, "learning_rate": 2.2960529218870658e-05, "loss": 0.4819, "num_tokens": 1028265975.0, "step": 6806 }, { "epoch": 2.49356478724866, "grad_norm": 0.1306231709460124, "learning_rate": 2.2956264229869472e-05, "loss": 0.4696, "num_tokens": 1029028604.0, "step": 6807 }, { "epoch": 2.493931205056566, "grad_norm": 0.14183737628739282, "learning_rate": 2.2951999187028714e-05, "loss": 0.472, "num_tokens": 1029750731.0, "step": 6808 }, { "epoch": 2.494297622864471, "grad_norm": 0.14489835697321576, "learning_rate": 2.2947734090588497e-05, "loss": 0.4737, "num_tokens": 1030509183.0, "step": 6809 }, { "epoch": 2.494664040672377, "grad_norm": 0.14295796187168153, "learning_rate": 2.2943468940788955e-05, "loss": 0.435, "num_tokens": 1031283477.0, "step": 6810 }, { "epoch": 2.495030458480282, "grad_norm": 0.12663771546884, "learning_rate": 2.2939203737870238e-05, "loss": 0.4307, "num_tokens": 1032027583.0, "step": 6811 }, { "epoch": 2.4953968762881877, "grad_norm": 0.13844585559647463, "learning_rate": 2.2934938482072478e-05, "loss": 0.4511, "num_tokens": 1032812301.0, "step": 6812 }, { "epoch": 2.495763294096093, "grad_norm": 0.13781820086242214, "learning_rate": 2.293067317363582e-05, "loss": 0.4494, "num_tokens": 1033676465.0, "step": 6813 }, { "epoch": 2.4961297119039987, "grad_norm": 0.14503662727992117, "learning_rate": 2.29264078128004e-05, "loss": 0.4434, "num_tokens": 1034339580.0, "step": 6814 }, { "epoch": 2.496496129711904, "grad_norm": 0.1363886744646412, "learning_rate": 2.292214239980638e-05, "loss": 0.4642, "num_tokens": 1035065508.0, "step": 6815 }, { "epoch": 2.4968625475198096, "grad_norm": 0.13645100723309006, "learning_rate": 2.2917876934893903e-05, "loss": 0.4485, "num_tokens": 1035845492.0, "step": 6816 }, { "epoch": 2.497228965327715, "grad_norm": 0.13546901376883405, "learning_rate": 2.291361141830312e-05, "loss": 0.4331, "num_tokens": 1036616521.0, "step": 6817 }, { "epoch": 2.4975953831356206, "grad_norm": 0.14156920150262506, "learning_rate": 2.2909345850274198e-05, "loss": 0.4671, "num_tokens": 1037333511.0, "step": 6818 }, { "epoch": 2.497961800943526, "grad_norm": 0.13559435763881977, "learning_rate": 2.290508023104729e-05, "loss": 0.4643, "num_tokens": 1038066574.0, "step": 6819 }, { "epoch": 2.498328218751431, "grad_norm": 0.13352756602881674, "learning_rate": 2.290081456086256e-05, "loss": 0.4572, "num_tokens": 1038850192.0, "step": 6820 }, { "epoch": 2.4986946365593368, "grad_norm": 0.12819925331724288, "learning_rate": 2.289654883996017e-05, "loss": 0.477, "num_tokens": 1039592752.0, "step": 6821 }, { "epoch": 2.4990610543672425, "grad_norm": 0.13684593539233905, "learning_rate": 2.28922830685803e-05, "loss": 0.472, "num_tokens": 1040307508.0, "step": 6822 }, { "epoch": 2.4994274721751477, "grad_norm": 0.14397586487448902, "learning_rate": 2.288801724696311e-05, "loss": 0.4746, "num_tokens": 1041075165.0, "step": 6823 }, { "epoch": 2.499793889983053, "grad_norm": 0.1310771968760252, "learning_rate": 2.288375137534878e-05, "loss": 0.4795, "num_tokens": 1042042814.0, "step": 6824 }, { "epoch": 2.5001603077909587, "grad_norm": 0.14616061345052853, "learning_rate": 2.2879485453977485e-05, "loss": 0.4812, "num_tokens": 1042780245.0, "step": 6825 }, { "epoch": 2.5005267255988644, "grad_norm": 0.1538527521555314, "learning_rate": 2.287521948308941e-05, "loss": 0.4577, "num_tokens": 1043463956.0, "step": 6826 }, { "epoch": 2.5008931434067696, "grad_norm": 0.1338461493121117, "learning_rate": 2.287095346292473e-05, "loss": 0.4724, "num_tokens": 1044358947.0, "step": 6827 }, { "epoch": 2.501259561214675, "grad_norm": 0.1320462383532263, "learning_rate": 2.286668739372363e-05, "loss": 0.479, "num_tokens": 1045109176.0, "step": 6828 }, { "epoch": 2.5016259790225805, "grad_norm": 0.12827464555975707, "learning_rate": 2.286242127572631e-05, "loss": 0.4379, "num_tokens": 1045835320.0, "step": 6829 }, { "epoch": 2.501992396830486, "grad_norm": 0.1259892774533055, "learning_rate": 2.2858155109172943e-05, "loss": 0.4428, "num_tokens": 1046648865.0, "step": 6830 }, { "epoch": 2.5023588146383915, "grad_norm": 0.1399063696200348, "learning_rate": 2.2853888894303737e-05, "loss": 0.4681, "num_tokens": 1047481180.0, "step": 6831 }, { "epoch": 2.5027252324462967, "grad_norm": 0.12649199128191213, "learning_rate": 2.284962263135889e-05, "loss": 0.4251, "num_tokens": 1048222980.0, "step": 6832 }, { "epoch": 2.5030916502542024, "grad_norm": 0.1445828827086879, "learning_rate": 2.2845356320578595e-05, "loss": 0.4422, "num_tokens": 1048886352.0, "step": 6833 }, { "epoch": 2.5034580680621077, "grad_norm": 0.1367374718759476, "learning_rate": 2.2841089962203042e-05, "loss": 0.4369, "num_tokens": 1049520485.0, "step": 6834 }, { "epoch": 2.5038244858700134, "grad_norm": 0.14144982709903967, "learning_rate": 2.2836823556472455e-05, "loss": 0.4529, "num_tokens": 1050225339.0, "step": 6835 }, { "epoch": 2.5041909036779186, "grad_norm": 0.13266115325789055, "learning_rate": 2.2832557103627032e-05, "loss": 0.4567, "num_tokens": 1050947082.0, "step": 6836 }, { "epoch": 2.5045573214858243, "grad_norm": 0.1571937157542309, "learning_rate": 2.2828290603906985e-05, "loss": 0.4761, "num_tokens": 1051617428.0, "step": 6837 }, { "epoch": 2.5049237392937296, "grad_norm": 0.1596603862511143, "learning_rate": 2.2824024057552524e-05, "loss": 0.4591, "num_tokens": 1052280746.0, "step": 6838 }, { "epoch": 2.5052901571016353, "grad_norm": 0.12963160494901052, "learning_rate": 2.2819757464803865e-05, "loss": 0.4449, "num_tokens": 1053058409.0, "step": 6839 }, { "epoch": 2.5056565749095405, "grad_norm": 0.13325800682025365, "learning_rate": 2.2815490825901235e-05, "loss": 0.484, "num_tokens": 1053940096.0, "step": 6840 }, { "epoch": 2.506022992717446, "grad_norm": 0.15135700504101526, "learning_rate": 2.2811224141084835e-05, "loss": 0.4747, "num_tokens": 1054678346.0, "step": 6841 }, { "epoch": 2.5063894105253515, "grad_norm": 0.14169995919734468, "learning_rate": 2.28069574105949e-05, "loss": 0.4615, "num_tokens": 1055467046.0, "step": 6842 }, { "epoch": 2.506755828333257, "grad_norm": 0.1377893903767369, "learning_rate": 2.2802690634671664e-05, "loss": 0.4548, "num_tokens": 1056242558.0, "step": 6843 }, { "epoch": 2.5071222461411624, "grad_norm": 0.1280866195501303, "learning_rate": 2.2798423813555333e-05, "loss": 0.4305, "num_tokens": 1057040947.0, "step": 6844 }, { "epoch": 2.5074886639490677, "grad_norm": 0.13931796194484491, "learning_rate": 2.2794156947486155e-05, "loss": 0.4487, "num_tokens": 1057777133.0, "step": 6845 }, { "epoch": 2.5078550817569734, "grad_norm": 0.13592569492253423, "learning_rate": 2.2789890036704363e-05, "loss": 0.4615, "num_tokens": 1058538383.0, "step": 6846 }, { "epoch": 2.508221499564879, "grad_norm": 0.13388220434156045, "learning_rate": 2.2785623081450177e-05, "loss": 0.4785, "num_tokens": 1059346211.0, "step": 6847 }, { "epoch": 2.5085879173727843, "grad_norm": 0.12696870260832194, "learning_rate": 2.278135608196385e-05, "loss": 0.4434, "num_tokens": 1060126531.0, "step": 6848 }, { "epoch": 2.5089543351806896, "grad_norm": 0.12256128942434359, "learning_rate": 2.277708903848562e-05, "loss": 0.4328, "num_tokens": 1060944505.0, "step": 6849 }, { "epoch": 2.5093207529885952, "grad_norm": 0.13500938352752817, "learning_rate": 2.2772821951255724e-05, "loss": 0.478, "num_tokens": 1061791999.0, "step": 6850 }, { "epoch": 2.509687170796501, "grad_norm": 0.1464941346286275, "learning_rate": 2.2768554820514414e-05, "loss": 0.4757, "num_tokens": 1062631953.0, "step": 6851 }, { "epoch": 2.510053588604406, "grad_norm": 0.13275255686630866, "learning_rate": 2.276428764650193e-05, "loss": 0.4398, "num_tokens": 1063421795.0, "step": 6852 }, { "epoch": 2.5104200064123114, "grad_norm": 0.1279533244989163, "learning_rate": 2.2760020429458532e-05, "loss": 0.4364, "num_tokens": 1064199966.0, "step": 6853 }, { "epoch": 2.510786424220217, "grad_norm": 0.14350574504794114, "learning_rate": 2.2755753169624475e-05, "loss": 0.4433, "num_tokens": 1065069462.0, "step": 6854 }, { "epoch": 2.511152842028123, "grad_norm": 0.1506425488094057, "learning_rate": 2.275148586724e-05, "loss": 0.5095, "num_tokens": 1065753902.0, "step": 6855 }, { "epoch": 2.511519259836028, "grad_norm": 0.13945054828091755, "learning_rate": 2.274721852254537e-05, "loss": 0.4583, "num_tokens": 1066417242.0, "step": 6856 }, { "epoch": 2.5118856776439333, "grad_norm": 0.13160164863394058, "learning_rate": 2.2742951135780857e-05, "loss": 0.4854, "num_tokens": 1067238022.0, "step": 6857 }, { "epoch": 2.512252095451839, "grad_norm": 0.13465479063932018, "learning_rate": 2.273868370718671e-05, "loss": 0.4709, "num_tokens": 1068045508.0, "step": 6858 }, { "epoch": 2.5126185132597443, "grad_norm": 0.1520412756122026, "learning_rate": 2.273441623700319e-05, "loss": 0.4848, "num_tokens": 1068664316.0, "step": 6859 }, { "epoch": 2.51298493106765, "grad_norm": 0.13580614839509733, "learning_rate": 2.2730148725470583e-05, "loss": 0.4673, "num_tokens": 1069503057.0, "step": 6860 }, { "epoch": 2.5133513488755552, "grad_norm": 0.1557035562309876, "learning_rate": 2.272588117282914e-05, "loss": 0.4703, "num_tokens": 1070189152.0, "step": 6861 }, { "epoch": 2.513717766683461, "grad_norm": 0.1388138395158523, "learning_rate": 2.2721613579319143e-05, "loss": 0.4696, "num_tokens": 1070969368.0, "step": 6862 }, { "epoch": 2.514084184491366, "grad_norm": 0.14054399609642262, "learning_rate": 2.271734594518086e-05, "loss": 0.4753, "num_tokens": 1071814274.0, "step": 6863 }, { "epoch": 2.514450602299272, "grad_norm": 0.1355610721283745, "learning_rate": 2.2713078270654573e-05, "loss": 0.4866, "num_tokens": 1072665886.0, "step": 6864 }, { "epoch": 2.514817020107177, "grad_norm": 0.13215590272817918, "learning_rate": 2.2708810555980554e-05, "loss": 0.4568, "num_tokens": 1073509438.0, "step": 6865 }, { "epoch": 2.515183437915083, "grad_norm": 0.15089826562033914, "learning_rate": 2.2704542801399095e-05, "loss": 0.4542, "num_tokens": 1074246085.0, "step": 6866 }, { "epoch": 2.515549855722988, "grad_norm": 0.13766385259854383, "learning_rate": 2.2700275007150464e-05, "loss": 0.437, "num_tokens": 1075011248.0, "step": 6867 }, { "epoch": 2.5159162735308938, "grad_norm": 0.14443390411603887, "learning_rate": 2.2696007173474953e-05, "loss": 0.4653, "num_tokens": 1075781123.0, "step": 6868 }, { "epoch": 2.516282691338799, "grad_norm": 0.13226550127386283, "learning_rate": 2.2691739300612862e-05, "loss": 0.4467, "num_tokens": 1076594459.0, "step": 6869 }, { "epoch": 2.5166491091467043, "grad_norm": 0.12532990687986337, "learning_rate": 2.2687471388804464e-05, "loss": 0.4407, "num_tokens": 1077502491.0, "step": 6870 }, { "epoch": 2.51701552695461, "grad_norm": 0.14556978699642684, "learning_rate": 2.2683203438290054e-05, "loss": 0.4618, "num_tokens": 1078187526.0, "step": 6871 }, { "epoch": 2.5173819447625156, "grad_norm": 0.1439405544211467, "learning_rate": 2.267893544930992e-05, "loss": 0.4562, "num_tokens": 1078913099.0, "step": 6872 }, { "epoch": 2.517748362570421, "grad_norm": 0.14575082657409133, "learning_rate": 2.267466742210438e-05, "loss": 0.4373, "num_tokens": 1079639628.0, "step": 6873 }, { "epoch": 2.518114780378326, "grad_norm": 0.14663577016321788, "learning_rate": 2.2670399356913717e-05, "loss": 0.4325, "num_tokens": 1080331549.0, "step": 6874 }, { "epoch": 2.518481198186232, "grad_norm": 0.1467783357331525, "learning_rate": 2.266613125397823e-05, "loss": 0.4719, "num_tokens": 1081128569.0, "step": 6875 }, { "epoch": 2.5188476159941375, "grad_norm": 0.13643359492691776, "learning_rate": 2.2661863113538227e-05, "loss": 0.4713, "num_tokens": 1081988250.0, "step": 6876 }, { "epoch": 2.519214033802043, "grad_norm": 0.13868348702373318, "learning_rate": 2.265759493583401e-05, "loss": 0.4168, "num_tokens": 1082780744.0, "step": 6877 }, { "epoch": 2.519580451609948, "grad_norm": 0.13948675290009965, "learning_rate": 2.2653326721105887e-05, "loss": 0.4716, "num_tokens": 1083632213.0, "step": 6878 }, { "epoch": 2.5199468694178537, "grad_norm": 0.14788150894289778, "learning_rate": 2.2649058469594166e-05, "loss": 0.4465, "num_tokens": 1084305259.0, "step": 6879 }, { "epoch": 2.5203132872257594, "grad_norm": 0.14288839504589074, "learning_rate": 2.2644790181539164e-05, "loss": 0.449, "num_tokens": 1085039842.0, "step": 6880 }, { "epoch": 2.5206797050336647, "grad_norm": 0.14292074931402823, "learning_rate": 2.2640521857181187e-05, "loss": 0.4334, "num_tokens": 1085725304.0, "step": 6881 }, { "epoch": 2.52104612284157, "grad_norm": 0.1560045470823233, "learning_rate": 2.263625349676055e-05, "loss": 0.4703, "num_tokens": 1086403262.0, "step": 6882 }, { "epoch": 2.5214125406494756, "grad_norm": 0.14105734207887782, "learning_rate": 2.2631985100517574e-05, "loss": 0.4503, "num_tokens": 1087180157.0, "step": 6883 }, { "epoch": 2.521778958457381, "grad_norm": 0.12830124447182228, "learning_rate": 2.262771666869258e-05, "loss": 0.4615, "num_tokens": 1087962549.0, "step": 6884 }, { "epoch": 2.5221453762652866, "grad_norm": 0.1398597027583639, "learning_rate": 2.2623448201525878e-05, "loss": 0.4515, "num_tokens": 1088733197.0, "step": 6885 }, { "epoch": 2.522511794073192, "grad_norm": 0.1441897922506102, "learning_rate": 2.261917969925781e-05, "loss": 0.4557, "num_tokens": 1089373615.0, "step": 6886 }, { "epoch": 2.5228782118810975, "grad_norm": 0.12891835278608038, "learning_rate": 2.261491116212869e-05, "loss": 0.4398, "num_tokens": 1090213270.0, "step": 6887 }, { "epoch": 2.5232446296890028, "grad_norm": 0.16073567199493316, "learning_rate": 2.261064259037884e-05, "loss": 0.469, "num_tokens": 1090873543.0, "step": 6888 }, { "epoch": 2.5236110474969085, "grad_norm": 0.14958867635955744, "learning_rate": 2.2606373984248592e-05, "loss": 0.4608, "num_tokens": 1091635949.0, "step": 6889 }, { "epoch": 2.5239774653048137, "grad_norm": 0.13111994266310717, "learning_rate": 2.2602105343978286e-05, "loss": 0.4674, "num_tokens": 1092429109.0, "step": 6890 }, { "epoch": 2.5243438831127194, "grad_norm": 0.12895488979080635, "learning_rate": 2.2597836669808248e-05, "loss": 0.4695, "num_tokens": 1093212300.0, "step": 6891 }, { "epoch": 2.5247103009206247, "grad_norm": 0.1361456526230467, "learning_rate": 2.259356796197881e-05, "loss": 0.4445, "num_tokens": 1093992837.0, "step": 6892 }, { "epoch": 2.5250767187285303, "grad_norm": 0.12840055347509405, "learning_rate": 2.2589299220730313e-05, "loss": 0.4443, "num_tokens": 1094794594.0, "step": 6893 }, { "epoch": 2.5254431365364356, "grad_norm": 0.1334684235476421, "learning_rate": 2.25850304463031e-05, "loss": 0.4486, "num_tokens": 1095469595.0, "step": 6894 }, { "epoch": 2.5258095543443413, "grad_norm": 0.14286251455453255, "learning_rate": 2.2580761638937504e-05, "loss": 0.4563, "num_tokens": 1096173596.0, "step": 6895 }, { "epoch": 2.5261759721522465, "grad_norm": 0.1276344768632967, "learning_rate": 2.2576492798873867e-05, "loss": 0.4669, "num_tokens": 1096946183.0, "step": 6896 }, { "epoch": 2.5265423899601522, "grad_norm": 0.13468471732441026, "learning_rate": 2.2572223926352538e-05, "loss": 0.4699, "num_tokens": 1097690386.0, "step": 6897 }, { "epoch": 2.5269088077680575, "grad_norm": 0.14448556854410624, "learning_rate": 2.2567955021613863e-05, "loss": 0.4423, "num_tokens": 1098506246.0, "step": 6898 }, { "epoch": 2.5272752255759627, "grad_norm": 0.12701253873452573, "learning_rate": 2.2563686084898184e-05, "loss": 0.4514, "num_tokens": 1099261459.0, "step": 6899 }, { "epoch": 2.5276416433838684, "grad_norm": 0.12564884021552986, "learning_rate": 2.2559417116445857e-05, "loss": 0.4293, "num_tokens": 1100009676.0, "step": 6900 }, { "epoch": 2.528008061191774, "grad_norm": 0.13242312832233247, "learning_rate": 2.2555148116497234e-05, "loss": 0.4437, "num_tokens": 1100786244.0, "step": 6901 }, { "epoch": 2.5283744789996794, "grad_norm": 0.15314836088931952, "learning_rate": 2.2550879085292657e-05, "loss": 0.4453, "num_tokens": 1101492924.0, "step": 6902 }, { "epoch": 2.5287408968075846, "grad_norm": 0.14361073897724008, "learning_rate": 2.2546610023072492e-05, "loss": 0.4656, "num_tokens": 1102205366.0, "step": 6903 }, { "epoch": 2.5291073146154903, "grad_norm": 0.1352950896912083, "learning_rate": 2.25423409300771e-05, "loss": 0.4478, "num_tokens": 1102933694.0, "step": 6904 }, { "epoch": 2.529473732423396, "grad_norm": 0.16353233845203627, "learning_rate": 2.2538071806546837e-05, "loss": 0.4728, "num_tokens": 1103559305.0, "step": 6905 }, { "epoch": 2.5298401502313013, "grad_norm": 0.15158407166524412, "learning_rate": 2.2533802652722037e-05, "loss": 0.4767, "num_tokens": 1104307582.0, "step": 6906 }, { "epoch": 2.5302065680392065, "grad_norm": 0.12426816370201246, "learning_rate": 2.2529533468843103e-05, "loss": 0.4237, "num_tokens": 1105079666.0, "step": 6907 }, { "epoch": 2.530572985847112, "grad_norm": 0.15592863671218246, "learning_rate": 2.2525264255150374e-05, "loss": 0.4679, "num_tokens": 1105896930.0, "step": 6908 }, { "epoch": 2.530939403655018, "grad_norm": 0.13088328686828854, "learning_rate": 2.2520995011884218e-05, "loss": 0.4185, "num_tokens": 1106746836.0, "step": 6909 }, { "epoch": 2.531305821462923, "grad_norm": 0.1298983302188532, "learning_rate": 2.2516725739285002e-05, "loss": 0.4421, "num_tokens": 1107571728.0, "step": 6910 }, { "epoch": 2.5316722392708284, "grad_norm": 0.13102577956357442, "learning_rate": 2.2512456437593095e-05, "loss": 0.4641, "num_tokens": 1108308920.0, "step": 6911 }, { "epoch": 2.532038657078734, "grad_norm": 0.15014184325498459, "learning_rate": 2.2508187107048878e-05, "loss": 0.4672, "num_tokens": 1109139906.0, "step": 6912 }, { "epoch": 2.5324050748866394, "grad_norm": 0.1403880297156823, "learning_rate": 2.250391774789271e-05, "loss": 0.4504, "num_tokens": 1109951370.0, "step": 6913 }, { "epoch": 2.532771492694545, "grad_norm": 0.13704627584714033, "learning_rate": 2.2499648360364965e-05, "loss": 0.4538, "num_tokens": 1110616213.0, "step": 6914 }, { "epoch": 2.5331379105024503, "grad_norm": 0.14466796342429625, "learning_rate": 2.249537894470603e-05, "loss": 0.4569, "num_tokens": 1111234063.0, "step": 6915 }, { "epoch": 2.533504328310356, "grad_norm": 0.15254534577891662, "learning_rate": 2.249110950115626e-05, "loss": 0.4652, "num_tokens": 1111912889.0, "step": 6916 }, { "epoch": 2.5338707461182612, "grad_norm": 0.13384382019036742, "learning_rate": 2.248684002995605e-05, "loss": 0.451, "num_tokens": 1112728970.0, "step": 6917 }, { "epoch": 2.534237163926167, "grad_norm": 0.14110721790548095, "learning_rate": 2.248257053134578e-05, "loss": 0.4728, "num_tokens": 1113447726.0, "step": 6918 }, { "epoch": 2.534603581734072, "grad_norm": 0.13813365857942672, "learning_rate": 2.2478301005565824e-05, "loss": 0.4512, "num_tokens": 1114222334.0, "step": 6919 }, { "epoch": 2.534969999541978, "grad_norm": 0.1460485385528562, "learning_rate": 2.2474031452856567e-05, "loss": 0.4596, "num_tokens": 1115032604.0, "step": 6920 }, { "epoch": 2.535336417349883, "grad_norm": 0.1289077251847517, "learning_rate": 2.2469761873458403e-05, "loss": 0.4258, "num_tokens": 1115803755.0, "step": 6921 }, { "epoch": 2.535702835157789, "grad_norm": 0.13529196007706862, "learning_rate": 2.2465492267611696e-05, "loss": 0.4513, "num_tokens": 1116608777.0, "step": 6922 }, { "epoch": 2.536069252965694, "grad_norm": 0.14479738560984917, "learning_rate": 2.2461222635556855e-05, "loss": 0.4764, "num_tokens": 1117390788.0, "step": 6923 }, { "epoch": 2.5364356707735993, "grad_norm": 0.14146445654235068, "learning_rate": 2.2456952977534257e-05, "loss": 0.4609, "num_tokens": 1118137437.0, "step": 6924 }, { "epoch": 2.536802088581505, "grad_norm": 0.1481228497783708, "learning_rate": 2.2452683293784294e-05, "loss": 0.4576, "num_tokens": 1118772784.0, "step": 6925 }, { "epoch": 2.5371685063894107, "grad_norm": 0.14330464694715062, "learning_rate": 2.2448413584547363e-05, "loss": 0.4729, "num_tokens": 1119467647.0, "step": 6926 }, { "epoch": 2.537534924197316, "grad_norm": 0.17367343331607152, "learning_rate": 2.2444143850063857e-05, "loss": 0.4584, "num_tokens": 1120093907.0, "step": 6927 }, { "epoch": 2.537901342005221, "grad_norm": 0.13790548869012562, "learning_rate": 2.2439874090574155e-05, "loss": 0.4511, "num_tokens": 1120815924.0, "step": 6928 }, { "epoch": 2.538267759813127, "grad_norm": 0.15157657881725137, "learning_rate": 2.243560430631867e-05, "loss": 0.4848, "num_tokens": 1121511653.0, "step": 6929 }, { "epoch": 2.5386341776210326, "grad_norm": 0.15155437195358182, "learning_rate": 2.2431334497537795e-05, "loss": 0.4543, "num_tokens": 1122303996.0, "step": 6930 }, { "epoch": 2.539000595428938, "grad_norm": 0.14079849758459045, "learning_rate": 2.242706466447193e-05, "loss": 0.4645, "num_tokens": 1123089929.0, "step": 6931 }, { "epoch": 2.539367013236843, "grad_norm": 0.13223219006249404, "learning_rate": 2.242279480736147e-05, "loss": 0.4451, "num_tokens": 1123911570.0, "step": 6932 }, { "epoch": 2.539733431044749, "grad_norm": 0.12670239651601134, "learning_rate": 2.2418524926446824e-05, "loss": 0.4588, "num_tokens": 1124769221.0, "step": 6933 }, { "epoch": 2.5400998488526545, "grad_norm": 0.15585271698560998, "learning_rate": 2.2414255021968386e-05, "loss": 0.4646, "num_tokens": 1125485971.0, "step": 6934 }, { "epoch": 2.5404662666605597, "grad_norm": 0.12926771881341156, "learning_rate": 2.2409985094166563e-05, "loss": 0.4655, "num_tokens": 1126364626.0, "step": 6935 }, { "epoch": 2.540832684468465, "grad_norm": 0.1417998963064916, "learning_rate": 2.2405715143281765e-05, "loss": 0.4479, "num_tokens": 1127082894.0, "step": 6936 }, { "epoch": 2.5411991022763707, "grad_norm": 0.12881426125972456, "learning_rate": 2.240144516955439e-05, "loss": 0.4334, "num_tokens": 1127821315.0, "step": 6937 }, { "epoch": 2.541565520084276, "grad_norm": 0.1336746287779632, "learning_rate": 2.239717517322486e-05, "loss": 0.461, "num_tokens": 1128606184.0, "step": 6938 }, { "epoch": 2.5419319378921816, "grad_norm": 0.12856024072873712, "learning_rate": 2.239290515453357e-05, "loss": 0.4243, "num_tokens": 1129363338.0, "step": 6939 }, { "epoch": 2.542298355700087, "grad_norm": 0.13181763807585017, "learning_rate": 2.2388635113720936e-05, "loss": 0.4638, "num_tokens": 1130226746.0, "step": 6940 }, { "epoch": 2.5426647735079926, "grad_norm": 0.13741046307066213, "learning_rate": 2.238436505102738e-05, "loss": 0.432, "num_tokens": 1130912532.0, "step": 6941 }, { "epoch": 2.543031191315898, "grad_norm": 0.15308401688768142, "learning_rate": 2.2380094966693295e-05, "loss": 0.454, "num_tokens": 1131679876.0, "step": 6942 }, { "epoch": 2.5433976091238035, "grad_norm": 0.1455245970697392, "learning_rate": 2.237582486095911e-05, "loss": 0.5027, "num_tokens": 1132439234.0, "step": 6943 }, { "epoch": 2.5437640269317088, "grad_norm": 0.13281918815227123, "learning_rate": 2.2371554734065225e-05, "loss": 0.4339, "num_tokens": 1133239962.0, "step": 6944 }, { "epoch": 2.5441304447396145, "grad_norm": 0.1426134476392032, "learning_rate": 2.2367284586252084e-05, "loss": 0.4422, "num_tokens": 1133923884.0, "step": 6945 }, { "epoch": 2.5444968625475197, "grad_norm": 0.1483781808176232, "learning_rate": 2.2363014417760086e-05, "loss": 0.4394, "num_tokens": 1134641634.0, "step": 6946 }, { "epoch": 2.5448632803554254, "grad_norm": 0.12969093142567362, "learning_rate": 2.2358744228829644e-05, "loss": 0.447, "num_tokens": 1135486983.0, "step": 6947 }, { "epoch": 2.5452296981633307, "grad_norm": 0.14777447144417524, "learning_rate": 2.235447401970119e-05, "loss": 0.464, "num_tokens": 1136147477.0, "step": 6948 }, { "epoch": 2.5455961159712364, "grad_norm": 0.13039231675189905, "learning_rate": 2.235020379061515e-05, "loss": 0.4453, "num_tokens": 1136905481.0, "step": 6949 }, { "epoch": 2.5459625337791416, "grad_norm": 0.13808174980005666, "learning_rate": 2.234593354181193e-05, "loss": 0.466, "num_tokens": 1137584636.0, "step": 6950 }, { "epoch": 2.5463289515870473, "grad_norm": 0.13310237412853235, "learning_rate": 2.234166327353196e-05, "loss": 0.4334, "num_tokens": 1138428302.0, "step": 6951 }, { "epoch": 2.5466953693949526, "grad_norm": 0.13696185898458488, "learning_rate": 2.233739298601567e-05, "loss": 0.4788, "num_tokens": 1139140241.0, "step": 6952 }, { "epoch": 2.547061787202858, "grad_norm": 0.1334225000111771, "learning_rate": 2.2333122679503486e-05, "loss": 0.4533, "num_tokens": 1139882734.0, "step": 6953 }, { "epoch": 2.5474282050107635, "grad_norm": 0.1569686100781746, "learning_rate": 2.2328852354235827e-05, "loss": 0.4851, "num_tokens": 1140695608.0, "step": 6954 }, { "epoch": 2.547794622818669, "grad_norm": 0.13599927698334552, "learning_rate": 2.2324582010453126e-05, "loss": 0.4597, "num_tokens": 1141515488.0, "step": 6955 }, { "epoch": 2.5481610406265744, "grad_norm": 0.12822427124322805, "learning_rate": 2.2320311648395804e-05, "loss": 0.4151, "num_tokens": 1142297844.0, "step": 6956 }, { "epoch": 2.5485274584344797, "grad_norm": 0.14260439207693218, "learning_rate": 2.2316041268304303e-05, "loss": 0.4359, "num_tokens": 1142935791.0, "step": 6957 }, { "epoch": 2.5488938762423854, "grad_norm": 0.1444608142369517, "learning_rate": 2.2311770870419053e-05, "loss": 0.4209, "num_tokens": 1143649695.0, "step": 6958 }, { "epoch": 2.549260294050291, "grad_norm": 0.14408476207147888, "learning_rate": 2.230750045498048e-05, "loss": 0.4604, "num_tokens": 1144404972.0, "step": 6959 }, { "epoch": 2.5496267118581963, "grad_norm": 0.13105167806213833, "learning_rate": 2.230323002222902e-05, "loss": 0.4407, "num_tokens": 1145189878.0, "step": 6960 }, { "epoch": 2.5499931296661016, "grad_norm": 0.1409527036510074, "learning_rate": 2.229895957240509e-05, "loss": 0.4564, "num_tokens": 1145903011.0, "step": 6961 }, { "epoch": 2.5503595474740073, "grad_norm": 0.14720353262437239, "learning_rate": 2.2294689105749158e-05, "loss": 0.4852, "num_tokens": 1146671963.0, "step": 6962 }, { "epoch": 2.550725965281913, "grad_norm": 0.12217482221386401, "learning_rate": 2.2290418622501635e-05, "loss": 0.4189, "num_tokens": 1147542714.0, "step": 6963 }, { "epoch": 2.5510923830898182, "grad_norm": 0.12948592864577563, "learning_rate": 2.2286148122902964e-05, "loss": 0.4541, "num_tokens": 1148277939.0, "step": 6964 }, { "epoch": 2.5514588008977235, "grad_norm": 0.13418548134868583, "learning_rate": 2.2281877607193584e-05, "loss": 0.4444, "num_tokens": 1149083056.0, "step": 6965 }, { "epoch": 2.551825218705629, "grad_norm": 0.1161784881736862, "learning_rate": 2.2277607075613927e-05, "loss": 0.433, "num_tokens": 1150062602.0, "step": 6966 }, { "epoch": 2.5521916365135344, "grad_norm": 0.14384333602755592, "learning_rate": 2.2273336528404453e-05, "loss": 0.4668, "num_tokens": 1150725824.0, "step": 6967 }, { "epoch": 2.55255805432144, "grad_norm": 0.1422756450980454, "learning_rate": 2.2269065965805575e-05, "loss": 0.4699, "num_tokens": 1151425611.0, "step": 6968 }, { "epoch": 2.5529244721293454, "grad_norm": 0.1289000998202319, "learning_rate": 2.226479538805775e-05, "loss": 0.4652, "num_tokens": 1152181179.0, "step": 6969 }, { "epoch": 2.553290889937251, "grad_norm": 0.1320858322800496, "learning_rate": 2.2260524795401417e-05, "loss": 0.4491, "num_tokens": 1152927670.0, "step": 6970 }, { "epoch": 2.5536573077451563, "grad_norm": 0.14402582132312067, "learning_rate": 2.225625418807702e-05, "loss": 0.4851, "num_tokens": 1153641098.0, "step": 6971 }, { "epoch": 2.554023725553062, "grad_norm": 0.1338713584632846, "learning_rate": 2.2251983566325e-05, "loss": 0.4461, "num_tokens": 1154350736.0, "step": 6972 }, { "epoch": 2.5543901433609673, "grad_norm": 0.1335838622816861, "learning_rate": 2.2247712930385797e-05, "loss": 0.4482, "num_tokens": 1155078882.0, "step": 6973 }, { "epoch": 2.554756561168873, "grad_norm": 0.14798647834731876, "learning_rate": 2.2243442280499873e-05, "loss": 0.4986, "num_tokens": 1155887327.0, "step": 6974 }, { "epoch": 2.555122978976778, "grad_norm": 0.12984590703327145, "learning_rate": 2.2239171616907654e-05, "loss": 0.4501, "num_tokens": 1156675784.0, "step": 6975 }, { "epoch": 2.555489396784684, "grad_norm": 0.1386775749855245, "learning_rate": 2.2234900939849604e-05, "loss": 0.4631, "num_tokens": 1157388728.0, "step": 6976 }, { "epoch": 2.555855814592589, "grad_norm": 0.14588393756119614, "learning_rate": 2.2230630249566165e-05, "loss": 0.4694, "num_tokens": 1158111088.0, "step": 6977 }, { "epoch": 2.5562222324004944, "grad_norm": 0.13941175277930853, "learning_rate": 2.222635954629777e-05, "loss": 0.4639, "num_tokens": 1158876466.0, "step": 6978 }, { "epoch": 2.5565886502084, "grad_norm": 0.13127385183380375, "learning_rate": 2.222208883028489e-05, "loss": 0.4495, "num_tokens": 1159666083.0, "step": 6979 }, { "epoch": 2.556955068016306, "grad_norm": 0.13162141405618724, "learning_rate": 2.2217818101767965e-05, "loss": 0.4408, "num_tokens": 1160522275.0, "step": 6980 }, { "epoch": 2.557321485824211, "grad_norm": 0.12934026456813133, "learning_rate": 2.2213547360987446e-05, "loss": 0.4283, "num_tokens": 1161331978.0, "step": 6981 }, { "epoch": 2.5576879036321163, "grad_norm": 0.1432563104020258, "learning_rate": 2.2209276608183787e-05, "loss": 0.4605, "num_tokens": 1162029981.0, "step": 6982 }, { "epoch": 2.558054321440022, "grad_norm": 0.1327529464573578, "learning_rate": 2.2205005843597434e-05, "loss": 0.4417, "num_tokens": 1162761443.0, "step": 6983 }, { "epoch": 2.5584207392479277, "grad_norm": 0.13666727671416276, "learning_rate": 2.220073506746885e-05, "loss": 0.4533, "num_tokens": 1163481852.0, "step": 6984 }, { "epoch": 2.558787157055833, "grad_norm": 0.144400100804607, "learning_rate": 2.2196464280038472e-05, "loss": 0.4242, "num_tokens": 1164221547.0, "step": 6985 }, { "epoch": 2.559153574863738, "grad_norm": 0.14531954709933487, "learning_rate": 2.2192193481546767e-05, "loss": 0.4565, "num_tokens": 1164962872.0, "step": 6986 }, { "epoch": 2.559519992671644, "grad_norm": 0.13308872191705803, "learning_rate": 2.218792267223419e-05, "loss": 0.4421, "num_tokens": 1165740702.0, "step": 6987 }, { "epoch": 2.5598864104795496, "grad_norm": 0.1340854535620357, "learning_rate": 2.218365185234119e-05, "loss": 0.4463, "num_tokens": 1166486081.0, "step": 6988 }, { "epoch": 2.560252828287455, "grad_norm": 0.12699890103784384, "learning_rate": 2.217938102210822e-05, "loss": 0.4307, "num_tokens": 1167247608.0, "step": 6989 }, { "epoch": 2.56061924609536, "grad_norm": 0.13571462520982347, "learning_rate": 2.217511018177574e-05, "loss": 0.4611, "num_tokens": 1167960089.0, "step": 6990 }, { "epoch": 2.5609856639032658, "grad_norm": 0.13441410758379427, "learning_rate": 2.2170839331584215e-05, "loss": 0.4747, "num_tokens": 1168734921.0, "step": 6991 }, { "epoch": 2.561352081711171, "grad_norm": 0.13305107989000386, "learning_rate": 2.2166568471774093e-05, "loss": 0.4642, "num_tokens": 1169551552.0, "step": 6992 }, { "epoch": 2.5617184995190767, "grad_norm": 0.13309966930564343, "learning_rate": 2.2162297602585837e-05, "loss": 0.4653, "num_tokens": 1170266678.0, "step": 6993 }, { "epoch": 2.562084917326982, "grad_norm": 0.14523093399778375, "learning_rate": 2.2158026724259894e-05, "loss": 0.4988, "num_tokens": 1170982523.0, "step": 6994 }, { "epoch": 2.5624513351348877, "grad_norm": 0.13027373382419952, "learning_rate": 2.2153755837036733e-05, "loss": 0.4485, "num_tokens": 1171779480.0, "step": 6995 }, { "epoch": 2.562817752942793, "grad_norm": 0.14388646535250557, "learning_rate": 2.214948494115682e-05, "loss": 0.4516, "num_tokens": 1172474186.0, "step": 6996 }, { "epoch": 2.5631841707506986, "grad_norm": 0.13379774955460194, "learning_rate": 2.2145214036860598e-05, "loss": 0.4519, "num_tokens": 1173268117.0, "step": 6997 }, { "epoch": 2.563550588558604, "grad_norm": 0.12605844900643215, "learning_rate": 2.2140943124388544e-05, "loss": 0.4671, "num_tokens": 1174083391.0, "step": 6998 }, { "epoch": 2.5639170063665095, "grad_norm": 0.13205582852128664, "learning_rate": 2.2136672203981103e-05, "loss": 0.4348, "num_tokens": 1174863181.0, "step": 6999 }, { "epoch": 2.564283424174415, "grad_norm": 0.13947099753486125, "learning_rate": 2.213240127587875e-05, "loss": 0.4607, "num_tokens": 1175677673.0, "step": 7000 }, { "epoch": 2.5646498419823205, "grad_norm": 0.12602412808610491, "learning_rate": 2.2128130340321942e-05, "loss": 0.4274, "num_tokens": 1176513540.0, "step": 7001 }, { "epoch": 2.5650162597902257, "grad_norm": 0.13621964932542524, "learning_rate": 2.2123859397551137e-05, "loss": 0.4657, "num_tokens": 1177287652.0, "step": 7002 }, { "epoch": 2.5653826775981314, "grad_norm": 0.139133106114329, "learning_rate": 2.2119588447806798e-05, "loss": 0.4301, "num_tokens": 1178091952.0, "step": 7003 }, { "epoch": 2.5657490954060367, "grad_norm": 0.13877589176904875, "learning_rate": 2.2115317491329397e-05, "loss": 0.4545, "num_tokens": 1178818665.0, "step": 7004 }, { "epoch": 2.5661155132139424, "grad_norm": 0.14381772902546014, "learning_rate": 2.2111046528359384e-05, "loss": 0.4425, "num_tokens": 1179540048.0, "step": 7005 }, { "epoch": 2.5664819310218476, "grad_norm": 0.13477087387651265, "learning_rate": 2.2106775559137238e-05, "loss": 0.43, "num_tokens": 1180246813.0, "step": 7006 }, { "epoch": 2.566848348829753, "grad_norm": 0.1407343782656642, "learning_rate": 2.2102504583903417e-05, "loss": 0.4749, "num_tokens": 1181023867.0, "step": 7007 }, { "epoch": 2.5672147666376586, "grad_norm": 0.12713192396119496, "learning_rate": 2.209823360289838e-05, "loss": 0.4696, "num_tokens": 1181877020.0, "step": 7008 }, { "epoch": 2.5675811844455643, "grad_norm": 0.14639948844010828, "learning_rate": 2.209396261636259e-05, "loss": 0.4557, "num_tokens": 1182718453.0, "step": 7009 }, { "epoch": 2.5679476022534695, "grad_norm": 0.13832403229974563, "learning_rate": 2.2089691624536523e-05, "loss": 0.4662, "num_tokens": 1183481483.0, "step": 7010 }, { "epoch": 2.5683140200613748, "grad_norm": 0.14621335468061472, "learning_rate": 2.2085420627660635e-05, "loss": 0.5037, "num_tokens": 1184240190.0, "step": 7011 }, { "epoch": 2.5686804378692805, "grad_norm": 0.13875832490600984, "learning_rate": 2.2081149625975402e-05, "loss": 0.4551, "num_tokens": 1184973102.0, "step": 7012 }, { "epoch": 2.569046855677186, "grad_norm": 0.1441132701770875, "learning_rate": 2.2076878619721276e-05, "loss": 0.4685, "num_tokens": 1185655219.0, "step": 7013 }, { "epoch": 2.5694132734850914, "grad_norm": 0.13211251160765522, "learning_rate": 2.207260760913873e-05, "loss": 0.4604, "num_tokens": 1186403991.0, "step": 7014 }, { "epoch": 2.5697796912929967, "grad_norm": 0.14372568178053263, "learning_rate": 2.2068336594468236e-05, "loss": 0.4447, "num_tokens": 1187058670.0, "step": 7015 }, { "epoch": 2.5701461091009024, "grad_norm": 0.15244758985812573, "learning_rate": 2.2064065575950253e-05, "loss": 0.4697, "num_tokens": 1187762506.0, "step": 7016 }, { "epoch": 2.5705125269088076, "grad_norm": 0.13136597083803347, "learning_rate": 2.2059794553825254e-05, "loss": 0.4846, "num_tokens": 1188615862.0, "step": 7017 }, { "epoch": 2.5708789447167133, "grad_norm": 0.1521205615296265, "learning_rate": 2.2055523528333696e-05, "loss": 0.4561, "num_tokens": 1189351564.0, "step": 7018 }, { "epoch": 2.5712453625246185, "grad_norm": 0.12848436217995976, "learning_rate": 2.205125249971606e-05, "loss": 0.4522, "num_tokens": 1190144973.0, "step": 7019 }, { "epoch": 2.5716117803325242, "grad_norm": 0.12942925485738044, "learning_rate": 2.2046981468212802e-05, "loss": 0.4623, "num_tokens": 1190983636.0, "step": 7020 }, { "epoch": 2.5719781981404295, "grad_norm": 0.144371218421329, "learning_rate": 2.2042710434064394e-05, "loss": 0.4871, "num_tokens": 1191677299.0, "step": 7021 }, { "epoch": 2.572344615948335, "grad_norm": 0.1399890265065794, "learning_rate": 2.2038439397511304e-05, "loss": 0.452, "num_tokens": 1192471325.0, "step": 7022 }, { "epoch": 2.5727110337562404, "grad_norm": 0.133762759922726, "learning_rate": 2.2034168358793994e-05, "loss": 0.4892, "num_tokens": 1193291409.0, "step": 7023 }, { "epoch": 2.573077451564146, "grad_norm": 0.13680890297709883, "learning_rate": 2.2029897318152943e-05, "loss": 0.4819, "num_tokens": 1194097986.0, "step": 7024 }, { "epoch": 2.5734438693720514, "grad_norm": 0.13242215244165173, "learning_rate": 2.2025626275828614e-05, "loss": 0.4402, "num_tokens": 1194848775.0, "step": 7025 }, { "epoch": 2.573810287179957, "grad_norm": 0.1421725717335405, "learning_rate": 2.2021355232061473e-05, "loss": 0.4392, "num_tokens": 1195511370.0, "step": 7026 }, { "epoch": 2.5741767049878623, "grad_norm": 0.13974921305259055, "learning_rate": 2.2017084187091986e-05, "loss": 0.4366, "num_tokens": 1196238673.0, "step": 7027 }, { "epoch": 2.574543122795768, "grad_norm": 0.13699573408125176, "learning_rate": 2.2012813141160635e-05, "loss": 0.458, "num_tokens": 1196972602.0, "step": 7028 }, { "epoch": 2.5749095406036733, "grad_norm": 0.1391952966459009, "learning_rate": 2.2008542094507873e-05, "loss": 0.4894, "num_tokens": 1197751866.0, "step": 7029 }, { "epoch": 2.575275958411579, "grad_norm": 0.13775820357498805, "learning_rate": 2.200427104737417e-05, "loss": 0.4367, "num_tokens": 1198502141.0, "step": 7030 }, { "epoch": 2.575642376219484, "grad_norm": 0.14123092215111582, "learning_rate": 2.2000000000000003e-05, "loss": 0.4669, "num_tokens": 1199259486.0, "step": 7031 }, { "epoch": 2.5760087940273895, "grad_norm": 0.13240952927720834, "learning_rate": 2.199572895262584e-05, "loss": 0.4715, "num_tokens": 1200035238.0, "step": 7032 }, { "epoch": 2.576375211835295, "grad_norm": 0.13831774984153164, "learning_rate": 2.1991457905492136e-05, "loss": 0.4415, "num_tokens": 1200726651.0, "step": 7033 }, { "epoch": 2.576741629643201, "grad_norm": 0.1499398771108092, "learning_rate": 2.198718685883937e-05, "loss": 0.4779, "num_tokens": 1201424041.0, "step": 7034 }, { "epoch": 2.577108047451106, "grad_norm": 0.1397385863549479, "learning_rate": 2.1982915812908016e-05, "loss": 0.4443, "num_tokens": 1202137287.0, "step": 7035 }, { "epoch": 2.5774744652590114, "grad_norm": 0.14508655774442894, "learning_rate": 2.1978644767938536e-05, "loss": 0.4845, "num_tokens": 1202869224.0, "step": 7036 }, { "epoch": 2.577840883066917, "grad_norm": 0.13263614521506947, "learning_rate": 2.1974373724171388e-05, "loss": 0.4171, "num_tokens": 1203662937.0, "step": 7037 }, { "epoch": 2.5782073008748227, "grad_norm": 0.1395504968911793, "learning_rate": 2.1970102681847062e-05, "loss": 0.4521, "num_tokens": 1204423858.0, "step": 7038 }, { "epoch": 2.578573718682728, "grad_norm": 0.13446961341488775, "learning_rate": 2.196583164120601e-05, "loss": 0.443, "num_tokens": 1205084139.0, "step": 7039 }, { "epoch": 2.5789401364906333, "grad_norm": 0.1284167912237113, "learning_rate": 2.19615606024887e-05, "loss": 0.4355, "num_tokens": 1205896100.0, "step": 7040 }, { "epoch": 2.579306554298539, "grad_norm": 0.1441071238800367, "learning_rate": 2.1957289565935615e-05, "loss": 0.4485, "num_tokens": 1206701643.0, "step": 7041 }, { "epoch": 2.5796729721064446, "grad_norm": 0.12825384221691644, "learning_rate": 2.195301853178721e-05, "loss": 0.4321, "num_tokens": 1207479382.0, "step": 7042 }, { "epoch": 2.58003938991435, "grad_norm": 0.13691857585409453, "learning_rate": 2.1948747500283948e-05, "loss": 0.4563, "num_tokens": 1208273543.0, "step": 7043 }, { "epoch": 2.580405807722255, "grad_norm": 0.14034725164695416, "learning_rate": 2.194447647166631e-05, "loss": 0.4331, "num_tokens": 1209016162.0, "step": 7044 }, { "epoch": 2.580772225530161, "grad_norm": 0.13488044004253202, "learning_rate": 2.1940205446174748e-05, "loss": 0.4481, "num_tokens": 1209786752.0, "step": 7045 }, { "epoch": 2.581138643338066, "grad_norm": 0.1372411425431604, "learning_rate": 2.193593442404975e-05, "loss": 0.4396, "num_tokens": 1210434760.0, "step": 7046 }, { "epoch": 2.581505061145972, "grad_norm": 0.15242548030688827, "learning_rate": 2.1931663405531766e-05, "loss": 0.4629, "num_tokens": 1211221444.0, "step": 7047 }, { "epoch": 2.581871478953877, "grad_norm": 0.15273803635924643, "learning_rate": 2.1927392390861274e-05, "loss": 0.4872, "num_tokens": 1211900493.0, "step": 7048 }, { "epoch": 2.5822378967617827, "grad_norm": 0.12950880517785773, "learning_rate": 2.1923121380278736e-05, "loss": 0.4345, "num_tokens": 1212706360.0, "step": 7049 }, { "epoch": 2.582604314569688, "grad_norm": 0.13911950523530486, "learning_rate": 2.191885037402461e-05, "loss": 0.4512, "num_tokens": 1213415934.0, "step": 7050 }, { "epoch": 2.5829707323775937, "grad_norm": 0.14774245430139438, "learning_rate": 2.1914579372339367e-05, "loss": 0.4707, "num_tokens": 1214089118.0, "step": 7051 }, { "epoch": 2.583337150185499, "grad_norm": 0.14737461730883533, "learning_rate": 2.1910308375463486e-05, "loss": 0.4633, "num_tokens": 1214772667.0, "step": 7052 }, { "epoch": 2.5837035679934046, "grad_norm": 0.1564897280198181, "learning_rate": 2.1906037383637418e-05, "loss": 0.4899, "num_tokens": 1215427499.0, "step": 7053 }, { "epoch": 2.58406998580131, "grad_norm": 0.1315677494154151, "learning_rate": 2.190176639710163e-05, "loss": 0.4732, "num_tokens": 1216263561.0, "step": 7054 }, { "epoch": 2.5844364036092156, "grad_norm": 0.13446105074620116, "learning_rate": 2.1897495416096595e-05, "loss": 0.4773, "num_tokens": 1217124652.0, "step": 7055 }, { "epoch": 2.584802821417121, "grad_norm": 0.13163322745189873, "learning_rate": 2.1893224440862764e-05, "loss": 0.4597, "num_tokens": 1217921949.0, "step": 7056 }, { "epoch": 2.585169239225026, "grad_norm": 0.13675248137412443, "learning_rate": 2.1888953471640618e-05, "loss": 0.4822, "num_tokens": 1218745105.0, "step": 7057 }, { "epoch": 2.5855356570329318, "grad_norm": 0.12869185309337886, "learning_rate": 2.1884682508670608e-05, "loss": 0.4499, "num_tokens": 1219549033.0, "step": 7058 }, { "epoch": 2.5859020748408375, "grad_norm": 0.13374961088936022, "learning_rate": 2.1880411552193208e-05, "loss": 0.4717, "num_tokens": 1220295227.0, "step": 7059 }, { "epoch": 2.5862684926487427, "grad_norm": 0.14907041104939842, "learning_rate": 2.187614060244887e-05, "loss": 0.4935, "num_tokens": 1221026590.0, "step": 7060 }, { "epoch": 2.586634910456648, "grad_norm": 0.14369371430022965, "learning_rate": 2.187186965967807e-05, "loss": 0.4414, "num_tokens": 1221670581.0, "step": 7061 }, { "epoch": 2.5870013282645536, "grad_norm": 0.13284560164436773, "learning_rate": 2.186759872412125e-05, "loss": 0.4684, "num_tokens": 1222443646.0, "step": 7062 }, { "epoch": 2.5873677460724593, "grad_norm": 0.14172389493367962, "learning_rate": 2.18633277960189e-05, "loss": 0.469, "num_tokens": 1223162960.0, "step": 7063 }, { "epoch": 2.5877341638803646, "grad_norm": 0.13425149276416706, "learning_rate": 2.1859056875611465e-05, "loss": 0.4498, "num_tokens": 1223877970.0, "step": 7064 }, { "epoch": 2.58810058168827, "grad_norm": 0.13240405061812976, "learning_rate": 2.1854785963139404e-05, "loss": 0.4509, "num_tokens": 1224724847.0, "step": 7065 }, { "epoch": 2.5884669994961755, "grad_norm": 0.13414937978541994, "learning_rate": 2.185051505884319e-05, "loss": 0.4551, "num_tokens": 1225437914.0, "step": 7066 }, { "epoch": 2.5888334173040812, "grad_norm": 0.1344095662669802, "learning_rate": 2.184624416296327e-05, "loss": 0.4772, "num_tokens": 1226147626.0, "step": 7067 }, { "epoch": 2.5891998351119865, "grad_norm": 0.13332973184781682, "learning_rate": 2.184197327574011e-05, "loss": 0.4622, "num_tokens": 1226879792.0, "step": 7068 }, { "epoch": 2.5895662529198917, "grad_norm": 0.1436777683925612, "learning_rate": 2.1837702397414172e-05, "loss": 0.4348, "num_tokens": 1227592375.0, "step": 7069 }, { "epoch": 2.5899326707277974, "grad_norm": 0.13062824803148237, "learning_rate": 2.1833431528225916e-05, "loss": 0.4823, "num_tokens": 1228469063.0, "step": 7070 }, { "epoch": 2.5902990885357027, "grad_norm": 0.13441428775687703, "learning_rate": 2.182916066841579e-05, "loss": 0.4874, "num_tokens": 1229246095.0, "step": 7071 }, { "epoch": 2.5906655063436084, "grad_norm": 0.12804794488445037, "learning_rate": 2.1824889818224264e-05, "loss": 0.4283, "num_tokens": 1230007972.0, "step": 7072 }, { "epoch": 2.5910319241515136, "grad_norm": 0.13760939176620882, "learning_rate": 2.1820618977891784e-05, "loss": 0.4661, "num_tokens": 1230839927.0, "step": 7073 }, { "epoch": 2.5913983419594193, "grad_norm": 0.1383472383587958, "learning_rate": 2.181634814765882e-05, "loss": 0.4381, "num_tokens": 1231467812.0, "step": 7074 }, { "epoch": 2.5917647597673246, "grad_norm": 0.13747166462934754, "learning_rate": 2.1812077327765814e-05, "loss": 0.4445, "num_tokens": 1232192049.0, "step": 7075 }, { "epoch": 2.5921311775752303, "grad_norm": 0.12858878265575469, "learning_rate": 2.1807806518453242e-05, "loss": 0.4356, "num_tokens": 1232966456.0, "step": 7076 }, { "epoch": 2.5924975953831355, "grad_norm": 0.14674998478620882, "learning_rate": 2.1803535719961533e-05, "loss": 0.4749, "num_tokens": 1233711800.0, "step": 7077 }, { "epoch": 2.592864013191041, "grad_norm": 0.13983958037877917, "learning_rate": 2.1799264932531163e-05, "loss": 0.4399, "num_tokens": 1234377274.0, "step": 7078 }, { "epoch": 2.5932304309989465, "grad_norm": 0.1475405166878735, "learning_rate": 2.1794994156402568e-05, "loss": 0.4697, "num_tokens": 1235086613.0, "step": 7079 }, { "epoch": 2.593596848806852, "grad_norm": 0.15244432115058623, "learning_rate": 2.1790723391816222e-05, "loss": 0.4831, "num_tokens": 1235762710.0, "step": 7080 }, { "epoch": 2.5939632666147574, "grad_norm": 0.14169110716942834, "learning_rate": 2.178645263901256e-05, "loss": 0.4743, "num_tokens": 1236495044.0, "step": 7081 }, { "epoch": 2.594329684422663, "grad_norm": 0.1311288447665503, "learning_rate": 2.178218189823204e-05, "loss": 0.4785, "num_tokens": 1237291838.0, "step": 7082 }, { "epoch": 2.5946961022305683, "grad_norm": 0.1478614804397478, "learning_rate": 2.1777911169715123e-05, "loss": 0.4745, "num_tokens": 1238046092.0, "step": 7083 }, { "epoch": 2.595062520038474, "grad_norm": 0.13791795664223602, "learning_rate": 2.1773640453702235e-05, "loss": 0.4483, "num_tokens": 1238882465.0, "step": 7084 }, { "epoch": 2.5954289378463793, "grad_norm": 0.1261177708068094, "learning_rate": 2.1769369750433844e-05, "loss": 0.4428, "num_tokens": 1239686917.0, "step": 7085 }, { "epoch": 2.5957953556542845, "grad_norm": 0.13252192529043397, "learning_rate": 2.17650990601504e-05, "loss": 0.4688, "num_tokens": 1240394259.0, "step": 7086 }, { "epoch": 2.5961617734621902, "grad_norm": 0.14786561601680506, "learning_rate": 2.176082838309235e-05, "loss": 0.4659, "num_tokens": 1241108066.0, "step": 7087 }, { "epoch": 2.596528191270096, "grad_norm": 0.1334199379821037, "learning_rate": 2.1756557719500133e-05, "loss": 0.4437, "num_tokens": 1241898161.0, "step": 7088 }, { "epoch": 2.596894609078001, "grad_norm": 0.14494465396948183, "learning_rate": 2.175228706961421e-05, "loss": 0.4822, "num_tokens": 1242691787.0, "step": 7089 }, { "epoch": 2.5972610268859064, "grad_norm": 0.14176862160735604, "learning_rate": 2.1748016433675007e-05, "loss": 0.4777, "num_tokens": 1243423874.0, "step": 7090 }, { "epoch": 2.597627444693812, "grad_norm": 0.14514481719973696, "learning_rate": 2.1743745811922987e-05, "loss": 0.4948, "num_tokens": 1244172147.0, "step": 7091 }, { "epoch": 2.597993862501718, "grad_norm": 0.14580285941275453, "learning_rate": 2.173947520459859e-05, "loss": 0.4802, "num_tokens": 1244820750.0, "step": 7092 }, { "epoch": 2.598360280309623, "grad_norm": 0.11678354542538286, "learning_rate": 2.173520461194226e-05, "loss": 0.4526, "num_tokens": 1245744499.0, "step": 7093 }, { "epoch": 2.5987266981175283, "grad_norm": 0.13324730657211237, "learning_rate": 2.173093403419443e-05, "loss": 0.4721, "num_tokens": 1246570121.0, "step": 7094 }, { "epoch": 2.599093115925434, "grad_norm": 0.13831704441066525, "learning_rate": 2.172666347159556e-05, "loss": 0.4567, "num_tokens": 1247324677.0, "step": 7095 }, { "epoch": 2.5994595337333397, "grad_norm": 0.12973544544558216, "learning_rate": 2.172239292438607e-05, "loss": 0.4258, "num_tokens": 1248048578.0, "step": 7096 }, { "epoch": 2.599825951541245, "grad_norm": 0.12456455032704197, "learning_rate": 2.171812239280642e-05, "loss": 0.451, "num_tokens": 1248877078.0, "step": 7097 }, { "epoch": 2.60019236934915, "grad_norm": 0.14000773134120964, "learning_rate": 2.171385187709704e-05, "loss": 0.4798, "num_tokens": 1249544834.0, "step": 7098 }, { "epoch": 2.600558787157056, "grad_norm": 0.13668658378770657, "learning_rate": 2.170958137749837e-05, "loss": 0.4528, "num_tokens": 1250325004.0, "step": 7099 }, { "epoch": 2.600925204964961, "grad_norm": 0.13811042100454918, "learning_rate": 2.1705310894250854e-05, "loss": 0.4742, "num_tokens": 1251017493.0, "step": 7100 }, { "epoch": 2.601291622772867, "grad_norm": 0.12756665199612627, "learning_rate": 2.170104042759491e-05, "loss": 0.4699, "num_tokens": 1251859734.0, "step": 7101 }, { "epoch": 2.601658040580772, "grad_norm": 0.12332920801721546, "learning_rate": 2.169676997777099e-05, "loss": 0.4334, "num_tokens": 1252739525.0, "step": 7102 }, { "epoch": 2.602024458388678, "grad_norm": 0.12928543178437576, "learning_rate": 2.1692499545019528e-05, "loss": 0.4607, "num_tokens": 1253541172.0, "step": 7103 }, { "epoch": 2.602390876196583, "grad_norm": 0.13164360332214292, "learning_rate": 2.1688229129580956e-05, "loss": 0.4704, "num_tokens": 1254331245.0, "step": 7104 }, { "epoch": 2.6027572940044887, "grad_norm": 0.13076824727032463, "learning_rate": 2.16839587316957e-05, "loss": 0.449, "num_tokens": 1255123667.0, "step": 7105 }, { "epoch": 2.603123711812394, "grad_norm": 0.1371129143123894, "learning_rate": 2.1679688351604192e-05, "loss": 0.4653, "num_tokens": 1255875318.0, "step": 7106 }, { "epoch": 2.6034901296202997, "grad_norm": 0.1375010012393536, "learning_rate": 2.1675417989546883e-05, "loss": 0.4901, "num_tokens": 1256716222.0, "step": 7107 }, { "epoch": 2.603856547428205, "grad_norm": 0.14220142386448842, "learning_rate": 2.1671147645764185e-05, "loss": 0.4581, "num_tokens": 1257417061.0, "step": 7108 }, { "epoch": 2.6042229652361106, "grad_norm": 0.13779355616268107, "learning_rate": 2.1666877320496523e-05, "loss": 0.4428, "num_tokens": 1258218980.0, "step": 7109 }, { "epoch": 2.604589383044016, "grad_norm": 0.12608275639520802, "learning_rate": 2.1662607013984335e-05, "loss": 0.4121, "num_tokens": 1258972790.0, "step": 7110 }, { "epoch": 2.604955800851921, "grad_norm": 0.12778545508713177, "learning_rate": 2.1658336726468054e-05, "loss": 0.4563, "num_tokens": 1259845831.0, "step": 7111 }, { "epoch": 2.605322218659827, "grad_norm": 0.14826884126286613, "learning_rate": 2.165406645818808e-05, "loss": 0.4893, "num_tokens": 1260521675.0, "step": 7112 }, { "epoch": 2.6056886364677325, "grad_norm": 0.14443401295056754, "learning_rate": 2.164979620938486e-05, "loss": 0.4465, "num_tokens": 1261253746.0, "step": 7113 }, { "epoch": 2.6060550542756378, "grad_norm": 0.13244436663756926, "learning_rate": 2.164552598029882e-05, "loss": 0.4346, "num_tokens": 1262053143.0, "step": 7114 }, { "epoch": 2.606421472083543, "grad_norm": 0.13334076405936704, "learning_rate": 2.1641255771170358e-05, "loss": 0.4526, "num_tokens": 1262810247.0, "step": 7115 }, { "epoch": 2.6067878898914487, "grad_norm": 0.15510745171271262, "learning_rate": 2.1636985582239926e-05, "loss": 0.4637, "num_tokens": 1263485015.0, "step": 7116 }, { "epoch": 2.6071543076993544, "grad_norm": 0.1417714788727032, "learning_rate": 2.1632715413747922e-05, "loss": 0.4536, "num_tokens": 1264254432.0, "step": 7117 }, { "epoch": 2.6075207255072597, "grad_norm": 0.13275398140626565, "learning_rate": 2.1628445265934777e-05, "loss": 0.4771, "num_tokens": 1265081331.0, "step": 7118 }, { "epoch": 2.607887143315165, "grad_norm": 0.15956888982788367, "learning_rate": 2.1624175139040896e-05, "loss": 0.4978, "num_tokens": 1265654047.0, "step": 7119 }, { "epoch": 2.6082535611230706, "grad_norm": 0.12651833094116657, "learning_rate": 2.1619905033306714e-05, "loss": 0.4225, "num_tokens": 1266382236.0, "step": 7120 }, { "epoch": 2.6086199789309763, "grad_norm": 0.13882651318663503, "learning_rate": 2.1615634948972634e-05, "loss": 0.4751, "num_tokens": 1267242502.0, "step": 7121 }, { "epoch": 2.6089863967388816, "grad_norm": 0.1483097948015622, "learning_rate": 2.1611364886279073e-05, "loss": 0.4552, "num_tokens": 1267849285.0, "step": 7122 }, { "epoch": 2.609352814546787, "grad_norm": 0.13824869233820816, "learning_rate": 2.1607094845466437e-05, "loss": 0.463, "num_tokens": 1268598557.0, "step": 7123 }, { "epoch": 2.6097192323546925, "grad_norm": 0.13277524779137917, "learning_rate": 2.1602824826775146e-05, "loss": 0.4623, "num_tokens": 1269416489.0, "step": 7124 }, { "epoch": 2.6100856501625977, "grad_norm": 0.13175109767295837, "learning_rate": 2.1598554830445617e-05, "loss": 0.4408, "num_tokens": 1270183232.0, "step": 7125 }, { "epoch": 2.6104520679705034, "grad_norm": 0.13280790260701128, "learning_rate": 2.1594284856718244e-05, "loss": 0.449, "num_tokens": 1270972856.0, "step": 7126 }, { "epoch": 2.6108184857784087, "grad_norm": 0.1365765836965413, "learning_rate": 2.1590014905833446e-05, "loss": 0.4887, "num_tokens": 1271811697.0, "step": 7127 }, { "epoch": 2.6111849035863144, "grad_norm": 0.13401440793248628, "learning_rate": 2.1585744978031623e-05, "loss": 0.4425, "num_tokens": 1272576759.0, "step": 7128 }, { "epoch": 2.6115513213942196, "grad_norm": 0.13257462771558765, "learning_rate": 2.1581475073553185e-05, "loss": 0.4518, "num_tokens": 1273310565.0, "step": 7129 }, { "epoch": 2.6119177392021253, "grad_norm": 0.13330049855544052, "learning_rate": 2.1577205192638536e-05, "loss": 0.4868, "num_tokens": 1274036411.0, "step": 7130 }, { "epoch": 2.6122841570100306, "grad_norm": 0.1330735705251725, "learning_rate": 2.1572935335528077e-05, "loss": 0.4231, "num_tokens": 1274856581.0, "step": 7131 }, { "epoch": 2.6126505748179363, "grad_norm": 0.1327419488973742, "learning_rate": 2.156866550246221e-05, "loss": 0.453, "num_tokens": 1275675908.0, "step": 7132 }, { "epoch": 2.6130169926258415, "grad_norm": 0.15255183814311543, "learning_rate": 2.1564395693681335e-05, "loss": 0.4653, "num_tokens": 1276381658.0, "step": 7133 }, { "epoch": 2.6133834104337472, "grad_norm": 0.14362810920092925, "learning_rate": 2.1560125909425847e-05, "loss": 0.4951, "num_tokens": 1277193121.0, "step": 7134 }, { "epoch": 2.6137498282416525, "grad_norm": 0.14870432959097862, "learning_rate": 2.1555856149936156e-05, "loss": 0.4615, "num_tokens": 1277899940.0, "step": 7135 }, { "epoch": 2.614116246049558, "grad_norm": 0.1498751849107055, "learning_rate": 2.1551586415452643e-05, "loss": 0.4349, "num_tokens": 1278711607.0, "step": 7136 }, { "epoch": 2.6144826638574634, "grad_norm": 0.13184363756926792, "learning_rate": 2.1547316706215712e-05, "loss": 0.4912, "num_tokens": 1279521398.0, "step": 7137 }, { "epoch": 2.614849081665369, "grad_norm": 0.13848255948412716, "learning_rate": 2.1543047022465752e-05, "loss": 0.4816, "num_tokens": 1280312494.0, "step": 7138 }, { "epoch": 2.6152154994732744, "grad_norm": 0.16737685540836902, "learning_rate": 2.153877736444315e-05, "loss": 0.4722, "num_tokens": 1281095960.0, "step": 7139 }, { "epoch": 2.6155819172811796, "grad_norm": 0.1379868496679684, "learning_rate": 2.1534507732388303e-05, "loss": 0.4491, "num_tokens": 1281807803.0, "step": 7140 }, { "epoch": 2.6159483350890853, "grad_norm": 0.12741982165037663, "learning_rate": 2.1530238126541606e-05, "loss": 0.4559, "num_tokens": 1282635411.0, "step": 7141 }, { "epoch": 2.616314752896991, "grad_norm": 0.13784519220197694, "learning_rate": 2.152596854714344e-05, "loss": 0.4545, "num_tokens": 1283334763.0, "step": 7142 }, { "epoch": 2.6166811707048963, "grad_norm": 0.1271988148576599, "learning_rate": 2.1521698994434182e-05, "loss": 0.4434, "num_tokens": 1284159688.0, "step": 7143 }, { "epoch": 2.6170475885128015, "grad_norm": 0.13916960486858737, "learning_rate": 2.1517429468654227e-05, "loss": 0.4438, "num_tokens": 1284882048.0, "step": 7144 }, { "epoch": 2.617414006320707, "grad_norm": 0.13242827335347546, "learning_rate": 2.151315997004395e-05, "loss": 0.4781, "num_tokens": 1285669219.0, "step": 7145 }, { "epoch": 2.617780424128613, "grad_norm": 0.12535923940882443, "learning_rate": 2.150889049884375e-05, "loss": 0.4711, "num_tokens": 1286601988.0, "step": 7146 }, { "epoch": 2.618146841936518, "grad_norm": 0.13746799879923113, "learning_rate": 2.150462105529398e-05, "loss": 0.4646, "num_tokens": 1287274097.0, "step": 7147 }, { "epoch": 2.6185132597444234, "grad_norm": 0.14658769137197405, "learning_rate": 2.150035163963504e-05, "loss": 0.4732, "num_tokens": 1287985173.0, "step": 7148 }, { "epoch": 2.618879677552329, "grad_norm": 0.1257468118401747, "learning_rate": 2.1496082252107298e-05, "loss": 0.4478, "num_tokens": 1288792568.0, "step": 7149 }, { "epoch": 2.619246095360235, "grad_norm": 0.13769844067837334, "learning_rate": 2.1491812892951128e-05, "loss": 0.4402, "num_tokens": 1289514642.0, "step": 7150 }, { "epoch": 2.61961251316814, "grad_norm": 0.128783126923153, "learning_rate": 2.1487543562406904e-05, "loss": 0.4572, "num_tokens": 1290339640.0, "step": 7151 }, { "epoch": 2.6199789309760453, "grad_norm": 0.1399428211346556, "learning_rate": 2.1483274260715007e-05, "loss": 0.4732, "num_tokens": 1291030938.0, "step": 7152 }, { "epoch": 2.620345348783951, "grad_norm": 0.13650945441580137, "learning_rate": 2.147900498811579e-05, "loss": 0.5045, "num_tokens": 1291867997.0, "step": 7153 }, { "epoch": 2.6207117665918562, "grad_norm": 0.1416827868853199, "learning_rate": 2.1474735744849636e-05, "loss": 0.4692, "num_tokens": 1292616359.0, "step": 7154 }, { "epoch": 2.621078184399762, "grad_norm": 0.14004949098331773, "learning_rate": 2.147046653115691e-05, "loss": 0.4528, "num_tokens": 1293336072.0, "step": 7155 }, { "epoch": 2.621444602207667, "grad_norm": 0.13916094855656372, "learning_rate": 2.1466197347277965e-05, "loss": 0.4716, "num_tokens": 1294157924.0, "step": 7156 }, { "epoch": 2.621811020015573, "grad_norm": 0.14350517534175747, "learning_rate": 2.1461928193453176e-05, "loss": 0.4864, "num_tokens": 1294838711.0, "step": 7157 }, { "epoch": 2.622177437823478, "grad_norm": 0.13753880105364658, "learning_rate": 2.1457659069922906e-05, "loss": 0.4323, "num_tokens": 1295562349.0, "step": 7158 }, { "epoch": 2.622543855631384, "grad_norm": 0.142773515892326, "learning_rate": 2.145338997692751e-05, "loss": 0.4649, "num_tokens": 1296198014.0, "step": 7159 }, { "epoch": 2.622910273439289, "grad_norm": 0.12936466678760866, "learning_rate": 2.144912091470735e-05, "loss": 0.452, "num_tokens": 1296975575.0, "step": 7160 }, { "epoch": 2.6232766912471948, "grad_norm": 0.13322171010759795, "learning_rate": 2.144485188350278e-05, "loss": 0.4831, "num_tokens": 1297789830.0, "step": 7161 }, { "epoch": 2.6236431090551, "grad_norm": 0.13632201937205657, "learning_rate": 2.1440582883554145e-05, "loss": 0.4414, "num_tokens": 1298550725.0, "step": 7162 }, { "epoch": 2.6240095268630057, "grad_norm": 0.12685024738487932, "learning_rate": 2.143631391510182e-05, "loss": 0.442, "num_tokens": 1299343958.0, "step": 7163 }, { "epoch": 2.624375944670911, "grad_norm": 0.13724786390039212, "learning_rate": 2.1432044978386142e-05, "loss": 0.4908, "num_tokens": 1300122141.0, "step": 7164 }, { "epoch": 2.624742362478816, "grad_norm": 0.13019030526522082, "learning_rate": 2.142777607364747e-05, "loss": 0.4694, "num_tokens": 1300904919.0, "step": 7165 }, { "epoch": 2.625108780286722, "grad_norm": 0.1407699332464683, "learning_rate": 2.1423507201126145e-05, "loss": 0.465, "num_tokens": 1301624582.0, "step": 7166 }, { "epoch": 2.6254751980946276, "grad_norm": 0.12884518013659274, "learning_rate": 2.1419238361062505e-05, "loss": 0.4267, "num_tokens": 1302327512.0, "step": 7167 }, { "epoch": 2.625841615902533, "grad_norm": 0.15341678579198864, "learning_rate": 2.1414969553696906e-05, "loss": 0.4178, "num_tokens": 1303016131.0, "step": 7168 }, { "epoch": 2.626208033710438, "grad_norm": 0.13680412273463372, "learning_rate": 2.1410700779269696e-05, "loss": 0.4952, "num_tokens": 1303821537.0, "step": 7169 }, { "epoch": 2.626574451518344, "grad_norm": 0.14688342931126736, "learning_rate": 2.1406432038021198e-05, "loss": 0.4864, "num_tokens": 1304508634.0, "step": 7170 }, { "epoch": 2.6269408693262495, "grad_norm": 0.13040296767643442, "learning_rate": 2.140216333019176e-05, "loss": 0.4126, "num_tokens": 1305274597.0, "step": 7171 }, { "epoch": 2.6273072871341547, "grad_norm": 0.13554661192649273, "learning_rate": 2.1397894656021726e-05, "loss": 0.463, "num_tokens": 1306241260.0, "step": 7172 }, { "epoch": 2.62767370494206, "grad_norm": 0.13533928983870158, "learning_rate": 2.1393626015751414e-05, "loss": 0.4427, "num_tokens": 1306984609.0, "step": 7173 }, { "epoch": 2.6280401227499657, "grad_norm": 0.14021571854455747, "learning_rate": 2.1389357409621167e-05, "loss": 0.4779, "num_tokens": 1307783637.0, "step": 7174 }, { "epoch": 2.6284065405578714, "grad_norm": 0.13662924699192824, "learning_rate": 2.138508883787132e-05, "loss": 0.4653, "num_tokens": 1308497514.0, "step": 7175 }, { "epoch": 2.6287729583657766, "grad_norm": 0.14755951693115543, "learning_rate": 2.13808203007422e-05, "loss": 0.4449, "num_tokens": 1309210914.0, "step": 7176 }, { "epoch": 2.629139376173682, "grad_norm": 0.12583428293430346, "learning_rate": 2.1376551798474124e-05, "loss": 0.464, "num_tokens": 1310128185.0, "step": 7177 }, { "epoch": 2.6295057939815876, "grad_norm": 0.1196308617341384, "learning_rate": 2.1372283331307426e-05, "loss": 0.433, "num_tokens": 1310993700.0, "step": 7178 }, { "epoch": 2.629872211789493, "grad_norm": 0.1490957673198513, "learning_rate": 2.136801489948243e-05, "loss": 0.5007, "num_tokens": 1311718203.0, "step": 7179 }, { "epoch": 2.6302386295973985, "grad_norm": 0.1488127662759392, "learning_rate": 2.136374650323946e-05, "loss": 0.479, "num_tokens": 1312527502.0, "step": 7180 }, { "epoch": 2.6306050474053038, "grad_norm": 0.13765659302827768, "learning_rate": 2.135947814281882e-05, "loss": 0.4557, "num_tokens": 1313276113.0, "step": 7181 }, { "epoch": 2.6309714652132095, "grad_norm": 0.13482852114192242, "learning_rate": 2.135520981846085e-05, "loss": 0.4271, "num_tokens": 1314006958.0, "step": 7182 }, { "epoch": 2.6313378830211147, "grad_norm": 0.13912799418520766, "learning_rate": 2.1350941530405843e-05, "loss": 0.4469, "num_tokens": 1314846886.0, "step": 7183 }, { "epoch": 2.6317043008290204, "grad_norm": 0.14438433872350545, "learning_rate": 2.1346673278894122e-05, "loss": 0.455, "num_tokens": 1315576141.0, "step": 7184 }, { "epoch": 2.6320707186369257, "grad_norm": 0.13550172197290974, "learning_rate": 2.1342405064165996e-05, "loss": 0.4248, "num_tokens": 1316324427.0, "step": 7185 }, { "epoch": 2.6324371364448313, "grad_norm": 0.14060777538003638, "learning_rate": 2.133813688646178e-05, "loss": 0.4481, "num_tokens": 1317039952.0, "step": 7186 }, { "epoch": 2.6328035542527366, "grad_norm": 0.14577640393217292, "learning_rate": 2.1333868746021776e-05, "loss": 0.4517, "num_tokens": 1317718798.0, "step": 7187 }, { "epoch": 2.6331699720606423, "grad_norm": 0.13510140280030422, "learning_rate": 2.1329600643086295e-05, "loss": 0.4302, "num_tokens": 1318512588.0, "step": 7188 }, { "epoch": 2.6335363898685475, "grad_norm": 0.13590673541043005, "learning_rate": 2.1325332577895623e-05, "loss": 0.4498, "num_tokens": 1319209150.0, "step": 7189 }, { "epoch": 2.6339028076764532, "grad_norm": 0.1345909776780119, "learning_rate": 2.132106455069008e-05, "loss": 0.4501, "num_tokens": 1319987079.0, "step": 7190 }, { "epoch": 2.6342692254843585, "grad_norm": 0.15642564850294405, "learning_rate": 2.1316796561709955e-05, "loss": 0.4864, "num_tokens": 1320818265.0, "step": 7191 }, { "epoch": 2.634635643292264, "grad_norm": 0.1441217215029071, "learning_rate": 2.131252861119555e-05, "loss": 0.4758, "num_tokens": 1321605519.0, "step": 7192 }, { "epoch": 2.6350020611001694, "grad_norm": 0.12085928642297086, "learning_rate": 2.130826069938715e-05, "loss": 0.4452, "num_tokens": 1322547475.0, "step": 7193 }, { "epoch": 2.6353684789080747, "grad_norm": 0.1311905422718158, "learning_rate": 2.130399282652505e-05, "loss": 0.4492, "num_tokens": 1323283074.0, "step": 7194 }, { "epoch": 2.6357348967159804, "grad_norm": 0.12701323179334564, "learning_rate": 2.1299724992849535e-05, "loss": 0.4305, "num_tokens": 1324107860.0, "step": 7195 }, { "epoch": 2.636101314523886, "grad_norm": 0.12807562119917623, "learning_rate": 2.129545719860091e-05, "loss": 0.4304, "num_tokens": 1324981396.0, "step": 7196 }, { "epoch": 2.6364677323317913, "grad_norm": 0.13754593369915177, "learning_rate": 2.1291189444019452e-05, "loss": 0.4667, "num_tokens": 1325687812.0, "step": 7197 }, { "epoch": 2.6368341501396966, "grad_norm": 0.1265027613201718, "learning_rate": 2.1286921729345433e-05, "loss": 0.4757, "num_tokens": 1326467327.0, "step": 7198 }, { "epoch": 2.6372005679476023, "grad_norm": 0.13653088770123142, "learning_rate": 2.128265405481915e-05, "loss": 0.4639, "num_tokens": 1327186677.0, "step": 7199 }, { "epoch": 2.637566985755508, "grad_norm": 0.13057666831347567, "learning_rate": 2.1278386420680863e-05, "loss": 0.4351, "num_tokens": 1327979921.0, "step": 7200 }, { "epoch": 2.637933403563413, "grad_norm": 0.12647529549718764, "learning_rate": 2.1274118827170868e-05, "loss": 0.4323, "num_tokens": 1328761447.0, "step": 7201 }, { "epoch": 2.6382998213713185, "grad_norm": 0.12097995307009916, "learning_rate": 2.1269851274529426e-05, "loss": 0.4344, "num_tokens": 1329603685.0, "step": 7202 }, { "epoch": 2.638666239179224, "grad_norm": 0.1376506541198903, "learning_rate": 2.1265583762996815e-05, "loss": 0.4711, "num_tokens": 1330298635.0, "step": 7203 }, { "epoch": 2.6390326569871294, "grad_norm": 0.11789698387777432, "learning_rate": 2.1261316292813303e-05, "loss": 0.4463, "num_tokens": 1331233011.0, "step": 7204 }, { "epoch": 2.639399074795035, "grad_norm": 0.13792757145679682, "learning_rate": 2.1257048864219156e-05, "loss": 0.468, "num_tokens": 1332018969.0, "step": 7205 }, { "epoch": 2.6397654926029404, "grad_norm": 0.12169730922822887, "learning_rate": 2.125278147745463e-05, "loss": 0.4414, "num_tokens": 1332875031.0, "step": 7206 }, { "epoch": 2.640131910410846, "grad_norm": 0.1349975422460918, "learning_rate": 2.1248514132760007e-05, "loss": 0.4315, "num_tokens": 1333545966.0, "step": 7207 }, { "epoch": 2.6404983282187513, "grad_norm": 0.1252585084237483, "learning_rate": 2.124424683037553e-05, "loss": 0.431, "num_tokens": 1334291063.0, "step": 7208 }, { "epoch": 2.640864746026657, "grad_norm": 0.12299051959715775, "learning_rate": 2.1239979570541474e-05, "loss": 0.4314, "num_tokens": 1335035869.0, "step": 7209 }, { "epoch": 2.6412311638345622, "grad_norm": 0.11773125023149979, "learning_rate": 2.1235712353498074e-05, "loss": 0.4387, "num_tokens": 1335920057.0, "step": 7210 }, { "epoch": 2.641597581642468, "grad_norm": 0.12536698814508737, "learning_rate": 2.123144517948559e-05, "loss": 0.4589, "num_tokens": 1336747243.0, "step": 7211 }, { "epoch": 2.641963999450373, "grad_norm": 0.12198788333774128, "learning_rate": 2.122717804874428e-05, "loss": 0.4268, "num_tokens": 1337559875.0, "step": 7212 }, { "epoch": 2.642330417258279, "grad_norm": 0.13347459469290315, "learning_rate": 2.1222910961514387e-05, "loss": 0.4609, "num_tokens": 1338289507.0, "step": 7213 }, { "epoch": 2.642696835066184, "grad_norm": 0.12767663210811084, "learning_rate": 2.1218643918036156e-05, "loss": 0.4338, "num_tokens": 1339029502.0, "step": 7214 }, { "epoch": 2.64306325287409, "grad_norm": 0.12486770801281846, "learning_rate": 2.1214376918549832e-05, "loss": 0.4806, "num_tokens": 1339922221.0, "step": 7215 }, { "epoch": 2.643429670681995, "grad_norm": 0.13695647215162549, "learning_rate": 2.121010996329565e-05, "loss": 0.4486, "num_tokens": 1340696119.0, "step": 7216 }, { "epoch": 2.6437960884899008, "grad_norm": 0.12898477883008588, "learning_rate": 2.1205843052513848e-05, "loss": 0.4598, "num_tokens": 1341467171.0, "step": 7217 }, { "epoch": 2.644162506297806, "grad_norm": 0.13038716219957702, "learning_rate": 2.1201576186444672e-05, "loss": 0.4833, "num_tokens": 1342229548.0, "step": 7218 }, { "epoch": 2.6445289241057113, "grad_norm": 0.1387006581045581, "learning_rate": 2.119730936532834e-05, "loss": 0.4627, "num_tokens": 1342993379.0, "step": 7219 }, { "epoch": 2.644895341913617, "grad_norm": 0.12905958022484076, "learning_rate": 2.11930425894051e-05, "loss": 0.4605, "num_tokens": 1343745509.0, "step": 7220 }, { "epoch": 2.6452617597215227, "grad_norm": 0.12409000793688399, "learning_rate": 2.1188775858915177e-05, "loss": 0.4404, "num_tokens": 1344549718.0, "step": 7221 }, { "epoch": 2.645628177529428, "grad_norm": 0.13219988054231943, "learning_rate": 2.1184509174098774e-05, "loss": 0.4639, "num_tokens": 1345383585.0, "step": 7222 }, { "epoch": 2.645994595337333, "grad_norm": 0.1305179367126214, "learning_rate": 2.1180242535196137e-05, "loss": 0.4573, "num_tokens": 1346173647.0, "step": 7223 }, { "epoch": 2.646361013145239, "grad_norm": 0.13427950991215362, "learning_rate": 2.1175975942447482e-05, "loss": 0.4525, "num_tokens": 1347036831.0, "step": 7224 }, { "epoch": 2.6467274309531446, "grad_norm": 0.13909271570691897, "learning_rate": 2.1171709396093024e-05, "loss": 0.4917, "num_tokens": 1347736461.0, "step": 7225 }, { "epoch": 2.64709384876105, "grad_norm": 0.12669366030084503, "learning_rate": 2.1167442896372977e-05, "loss": 0.4598, "num_tokens": 1348638934.0, "step": 7226 }, { "epoch": 2.647460266568955, "grad_norm": 0.1288340150820287, "learning_rate": 2.1163176443527554e-05, "loss": 0.4494, "num_tokens": 1349424244.0, "step": 7227 }, { "epoch": 2.6478266843768608, "grad_norm": 0.14642487032565013, "learning_rate": 2.1158910037796964e-05, "loss": 0.4908, "num_tokens": 1350155442.0, "step": 7228 }, { "epoch": 2.6481931021847664, "grad_norm": 0.14460772355250218, "learning_rate": 2.1154643679421417e-05, "loss": 0.4851, "num_tokens": 1350817711.0, "step": 7229 }, { "epoch": 2.6485595199926717, "grad_norm": 0.1366372352732889, "learning_rate": 2.1150377368641116e-05, "loss": 0.4396, "num_tokens": 1351549856.0, "step": 7230 }, { "epoch": 2.648925937800577, "grad_norm": 0.14032877583913228, "learning_rate": 2.114611110569627e-05, "loss": 0.4692, "num_tokens": 1352351177.0, "step": 7231 }, { "epoch": 2.6492923556084826, "grad_norm": 0.12559578584887332, "learning_rate": 2.114184489082706e-05, "loss": 0.4495, "num_tokens": 1353167369.0, "step": 7232 }, { "epoch": 2.649658773416388, "grad_norm": 0.13660354605918276, "learning_rate": 2.11375787242737e-05, "loss": 0.4469, "num_tokens": 1353970111.0, "step": 7233 }, { "epoch": 2.6500251912242936, "grad_norm": 0.1508943011247, "learning_rate": 2.113331260627637e-05, "loss": 0.4717, "num_tokens": 1354746046.0, "step": 7234 }, { "epoch": 2.650391609032199, "grad_norm": 0.13110133001950075, "learning_rate": 2.1129046537075277e-05, "loss": 0.4339, "num_tokens": 1355518978.0, "step": 7235 }, { "epoch": 2.6507580268401045, "grad_norm": 0.11915047311325024, "learning_rate": 2.1124780516910597e-05, "loss": 0.4326, "num_tokens": 1356403910.0, "step": 7236 }, { "epoch": 2.65112444464801, "grad_norm": 0.14064814060009007, "learning_rate": 2.112051454602252e-05, "loss": 0.441, "num_tokens": 1357120818.0, "step": 7237 }, { "epoch": 2.6514908624559155, "grad_norm": 0.12490341502618636, "learning_rate": 2.111624862465123e-05, "loss": 0.4583, "num_tokens": 1357975351.0, "step": 7238 }, { "epoch": 2.6518572802638207, "grad_norm": 0.14283422239417412, "learning_rate": 2.1111982753036893e-05, "loss": 0.4473, "num_tokens": 1358669659.0, "step": 7239 }, { "epoch": 2.6522236980717264, "grad_norm": 0.14523749798289198, "learning_rate": 2.1107716931419707e-05, "loss": 0.5033, "num_tokens": 1359373169.0, "step": 7240 }, { "epoch": 2.6525901158796317, "grad_norm": 0.13735663827005304, "learning_rate": 2.1103451160039836e-05, "loss": 0.4657, "num_tokens": 1360165382.0, "step": 7241 }, { "epoch": 2.6529565336875374, "grad_norm": 0.14588159401345832, "learning_rate": 2.1099185439137447e-05, "loss": 0.5037, "num_tokens": 1360802584.0, "step": 7242 }, { "epoch": 2.6533229514954426, "grad_norm": 0.13139603363829944, "learning_rate": 2.109491976895272e-05, "loss": 0.4243, "num_tokens": 1361511109.0, "step": 7243 }, { "epoch": 2.653689369303348, "grad_norm": 0.12995666473608555, "learning_rate": 2.1090654149725815e-05, "loss": 0.4241, "num_tokens": 1362262769.0, "step": 7244 }, { "epoch": 2.6540557871112536, "grad_norm": 0.13462837985245613, "learning_rate": 2.1086388581696884e-05, "loss": 0.4437, "num_tokens": 1363043046.0, "step": 7245 }, { "epoch": 2.6544222049191593, "grad_norm": 0.14606116416774206, "learning_rate": 2.1082123065106102e-05, "loss": 0.4601, "num_tokens": 1363776529.0, "step": 7246 }, { "epoch": 2.6547886227270645, "grad_norm": 0.13882956311000932, "learning_rate": 2.107785760019363e-05, "loss": 0.4618, "num_tokens": 1364506363.0, "step": 7247 }, { "epoch": 2.6551550405349698, "grad_norm": 0.13205156458299594, "learning_rate": 2.1073592187199607e-05, "loss": 0.4657, "num_tokens": 1365202937.0, "step": 7248 }, { "epoch": 2.6555214583428755, "grad_norm": 0.1352227152433364, "learning_rate": 2.106932682636419e-05, "loss": 0.4793, "num_tokens": 1366090318.0, "step": 7249 }, { "epoch": 2.655887876150781, "grad_norm": 0.13530487700491642, "learning_rate": 2.1065061517927524e-05, "loss": 0.4594, "num_tokens": 1366913924.0, "step": 7250 }, { "epoch": 2.6562542939586864, "grad_norm": 0.14834110453208874, "learning_rate": 2.106079626212976e-05, "loss": 0.4683, "num_tokens": 1367612459.0, "step": 7251 }, { "epoch": 2.6566207117665916, "grad_norm": 0.12888318701155338, "learning_rate": 2.1056531059211048e-05, "loss": 0.452, "num_tokens": 1368357355.0, "step": 7252 }, { "epoch": 2.6569871295744973, "grad_norm": 0.14288136226540282, "learning_rate": 2.105226590941151e-05, "loss": 0.4544, "num_tokens": 1369034288.0, "step": 7253 }, { "epoch": 2.657353547382403, "grad_norm": 0.12044949173975814, "learning_rate": 2.1048000812971295e-05, "loss": 0.4299, "num_tokens": 1369778371.0, "step": 7254 }, { "epoch": 2.6577199651903083, "grad_norm": 0.13770863113231926, "learning_rate": 2.1043735770130533e-05, "loss": 0.4622, "num_tokens": 1370536201.0, "step": 7255 }, { "epoch": 2.6580863829982135, "grad_norm": 0.14414397129362988, "learning_rate": 2.1039470781129348e-05, "loss": 0.4547, "num_tokens": 1371308249.0, "step": 7256 }, { "epoch": 2.6584528008061192, "grad_norm": 0.1351000068237587, "learning_rate": 2.103520584620787e-05, "loss": 0.4756, "num_tokens": 1372095993.0, "step": 7257 }, { "epoch": 2.6588192186140245, "grad_norm": 0.13355168852588647, "learning_rate": 2.103094096560624e-05, "loss": 0.461, "num_tokens": 1372941145.0, "step": 7258 }, { "epoch": 2.65918563642193, "grad_norm": 0.12919627385712587, "learning_rate": 2.102667613956456e-05, "loss": 0.4374, "num_tokens": 1373741803.0, "step": 7259 }, { "epoch": 2.6595520542298354, "grad_norm": 0.1281809701741728, "learning_rate": 2.1022411368322947e-05, "loss": 0.4422, "num_tokens": 1374492765.0, "step": 7260 }, { "epoch": 2.659918472037741, "grad_norm": 0.1317969832626912, "learning_rate": 2.101814665212152e-05, "loss": 0.4514, "num_tokens": 1375318456.0, "step": 7261 }, { "epoch": 2.6602848898456464, "grad_norm": 0.1416760955857347, "learning_rate": 2.1013881991200408e-05, "loss": 0.4666, "num_tokens": 1375994880.0, "step": 7262 }, { "epoch": 2.660651307653552, "grad_norm": 0.1336470256016558, "learning_rate": 2.1009617385799695e-05, "loss": 0.4607, "num_tokens": 1376747816.0, "step": 7263 }, { "epoch": 2.6610177254614573, "grad_norm": 0.12946213423865874, "learning_rate": 2.1005352836159503e-05, "loss": 0.4341, "num_tokens": 1377596235.0, "step": 7264 }, { "epoch": 2.661384143269363, "grad_norm": 0.12612153130214815, "learning_rate": 2.1001088342519932e-05, "loss": 0.4633, "num_tokens": 1378335909.0, "step": 7265 }, { "epoch": 2.6617505610772683, "grad_norm": 0.1339747929129884, "learning_rate": 2.0996823905121072e-05, "loss": 0.4464, "num_tokens": 1379125307.0, "step": 7266 }, { "epoch": 2.662116978885174, "grad_norm": 0.1461385420529819, "learning_rate": 2.0992559524203023e-05, "loss": 0.4619, "num_tokens": 1379880783.0, "step": 7267 }, { "epoch": 2.662483396693079, "grad_norm": 0.12384907398648584, "learning_rate": 2.098829520000589e-05, "loss": 0.4468, "num_tokens": 1380667685.0, "step": 7268 }, { "epoch": 2.662849814500985, "grad_norm": 0.13469417391337263, "learning_rate": 2.0984030932769754e-05, "loss": 0.4501, "num_tokens": 1381419850.0, "step": 7269 }, { "epoch": 2.66321623230889, "grad_norm": 0.1435003585926067, "learning_rate": 2.0979766722734697e-05, "loss": 0.4692, "num_tokens": 1382195694.0, "step": 7270 }, { "epoch": 2.663582650116796, "grad_norm": 0.14293267937768897, "learning_rate": 2.097550257014081e-05, "loss": 0.4559, "num_tokens": 1382886053.0, "step": 7271 }, { "epoch": 2.663949067924701, "grad_norm": 0.1351799354319628, "learning_rate": 2.0971238475228164e-05, "loss": 0.4832, "num_tokens": 1383622634.0, "step": 7272 }, { "epoch": 2.6643154857326063, "grad_norm": 0.13695078971241745, "learning_rate": 2.0966974438236852e-05, "loss": 0.4584, "num_tokens": 1384387277.0, "step": 7273 }, { "epoch": 2.664681903540512, "grad_norm": 0.13352013680166352, "learning_rate": 2.0962710459406936e-05, "loss": 0.4463, "num_tokens": 1385154131.0, "step": 7274 }, { "epoch": 2.6650483213484177, "grad_norm": 0.12922955140000345, "learning_rate": 2.095844653897849e-05, "loss": 0.4533, "num_tokens": 1386036327.0, "step": 7275 }, { "epoch": 2.665414739156323, "grad_norm": 0.12356884208793398, "learning_rate": 2.095418267719158e-05, "loss": 0.4615, "num_tokens": 1386899284.0, "step": 7276 }, { "epoch": 2.6657811569642282, "grad_norm": 0.14134528630473564, "learning_rate": 2.094991887428627e-05, "loss": 0.4913, "num_tokens": 1387664525.0, "step": 7277 }, { "epoch": 2.666147574772134, "grad_norm": 0.13555852833308477, "learning_rate": 2.0945655130502612e-05, "loss": 0.4402, "num_tokens": 1388337948.0, "step": 7278 }, { "epoch": 2.6665139925800396, "grad_norm": 0.13265026608592695, "learning_rate": 2.094139144608068e-05, "loss": 0.459, "num_tokens": 1389157058.0, "step": 7279 }, { "epoch": 2.666880410387945, "grad_norm": 0.1322893656061972, "learning_rate": 2.093712782126052e-05, "loss": 0.4318, "num_tokens": 1389886381.0, "step": 7280 }, { "epoch": 2.66724682819585, "grad_norm": 0.12873648352341768, "learning_rate": 2.0932864256282187e-05, "loss": 0.4476, "num_tokens": 1390662946.0, "step": 7281 }, { "epoch": 2.667613246003756, "grad_norm": 0.13729598884114771, "learning_rate": 2.092860075138572e-05, "loss": 0.444, "num_tokens": 1391357868.0, "step": 7282 }, { "epoch": 2.6679796638116615, "grad_norm": 0.12243976515950572, "learning_rate": 2.092433730681116e-05, "loss": 0.4558, "num_tokens": 1392255850.0, "step": 7283 }, { "epoch": 2.6683460816195668, "grad_norm": 0.1282384980541456, "learning_rate": 2.092007392279856e-05, "loss": 0.4419, "num_tokens": 1393088670.0, "step": 7284 }, { "epoch": 2.668712499427472, "grad_norm": 0.1477992032243478, "learning_rate": 2.0915810599587957e-05, "loss": 0.4597, "num_tokens": 1393811787.0, "step": 7285 }, { "epoch": 2.6690789172353777, "grad_norm": 0.14618119626528822, "learning_rate": 2.0911547337419376e-05, "loss": 0.4333, "num_tokens": 1394358655.0, "step": 7286 }, { "epoch": 2.669445335043283, "grad_norm": 0.13330406936032055, "learning_rate": 2.0907284136532843e-05, "loss": 0.4419, "num_tokens": 1395146217.0, "step": 7287 }, { "epoch": 2.6698117528511887, "grad_norm": 0.13781905656253202, "learning_rate": 2.0903020997168396e-05, "loss": 0.4624, "num_tokens": 1395912431.0, "step": 7288 }, { "epoch": 2.670178170659094, "grad_norm": 0.14182329880432998, "learning_rate": 2.0898757919566052e-05, "loss": 0.488, "num_tokens": 1396718411.0, "step": 7289 }, { "epoch": 2.6705445884669996, "grad_norm": 0.15599718892767658, "learning_rate": 2.089449490396583e-05, "loss": 0.472, "num_tokens": 1397315484.0, "step": 7290 }, { "epoch": 2.670911006274905, "grad_norm": 0.12879155106326445, "learning_rate": 2.0890231950607753e-05, "loss": 0.4492, "num_tokens": 1398126873.0, "step": 7291 }, { "epoch": 2.6712774240828105, "grad_norm": 0.13485681437687025, "learning_rate": 2.0885969059731837e-05, "loss": 0.4528, "num_tokens": 1398836067.0, "step": 7292 }, { "epoch": 2.671643841890716, "grad_norm": 0.12965748449872194, "learning_rate": 2.0881706231578076e-05, "loss": 0.4211, "num_tokens": 1399617213.0, "step": 7293 }, { "epoch": 2.6720102596986215, "grad_norm": 0.1321741052722627, "learning_rate": 2.087744346638648e-05, "loss": 0.4497, "num_tokens": 1400346958.0, "step": 7294 }, { "epoch": 2.6723766775065267, "grad_norm": 0.1339806121209014, "learning_rate": 2.0873180764397057e-05, "loss": 0.473, "num_tokens": 1401260332.0, "step": 7295 }, { "epoch": 2.6727430953144324, "grad_norm": 0.13042393695015977, "learning_rate": 2.0868918125849803e-05, "loss": 0.441, "num_tokens": 1401997675.0, "step": 7296 }, { "epoch": 2.6731095131223377, "grad_norm": 0.14442421795386517, "learning_rate": 2.0864655550984718e-05, "loss": 0.4689, "num_tokens": 1402780922.0, "step": 7297 }, { "epoch": 2.673475930930243, "grad_norm": 0.13831879230385055, "learning_rate": 2.0860393040041787e-05, "loss": 0.4591, "num_tokens": 1403508491.0, "step": 7298 }, { "epoch": 2.6738423487381486, "grad_norm": 0.1433527774009548, "learning_rate": 2.0856130593261003e-05, "loss": 0.4651, "num_tokens": 1404242589.0, "step": 7299 }, { "epoch": 2.6742087665460543, "grad_norm": 0.14761423947321645, "learning_rate": 2.085186821088234e-05, "loss": 0.4574, "num_tokens": 1404931234.0, "step": 7300 }, { "epoch": 2.6745751843539596, "grad_norm": 0.13551814213652721, "learning_rate": 2.0847605893145784e-05, "loss": 0.4444, "num_tokens": 1405654274.0, "step": 7301 }, { "epoch": 2.674941602161865, "grad_norm": 0.13353736823022777, "learning_rate": 2.084334364029132e-05, "loss": 0.445, "num_tokens": 1406563720.0, "step": 7302 }, { "epoch": 2.6753080199697705, "grad_norm": 0.1318799692703322, "learning_rate": 2.0839081452558916e-05, "loss": 0.4284, "num_tokens": 1407298219.0, "step": 7303 }, { "epoch": 2.675674437777676, "grad_norm": 0.1347012227891173, "learning_rate": 2.0834819330188538e-05, "loss": 0.4725, "num_tokens": 1408113213.0, "step": 7304 }, { "epoch": 2.6760408555855815, "grad_norm": 0.13150639272005474, "learning_rate": 2.0830557273420147e-05, "loss": 0.4421, "num_tokens": 1408930033.0, "step": 7305 }, { "epoch": 2.6764072733934867, "grad_norm": 0.1358343021970922, "learning_rate": 2.082629528249372e-05, "loss": 0.4239, "num_tokens": 1409692324.0, "step": 7306 }, { "epoch": 2.6767736912013924, "grad_norm": 0.14064707414597613, "learning_rate": 2.0822033357649215e-05, "loss": 0.4521, "num_tokens": 1410456427.0, "step": 7307 }, { "epoch": 2.677140109009298, "grad_norm": 0.15143997864838957, "learning_rate": 2.0817771499126575e-05, "loss": 0.4672, "num_tokens": 1411187036.0, "step": 7308 }, { "epoch": 2.6775065268172034, "grad_norm": 0.1430479532395346, "learning_rate": 2.081350970716576e-05, "loss": 0.4486, "num_tokens": 1411866938.0, "step": 7309 }, { "epoch": 2.6778729446251086, "grad_norm": 0.12754416634582844, "learning_rate": 2.080924798200671e-05, "loss": 0.4232, "num_tokens": 1412703227.0, "step": 7310 }, { "epoch": 2.6782393624330143, "grad_norm": 0.14276544530264187, "learning_rate": 2.0804986323889363e-05, "loss": 0.4517, "num_tokens": 1413465831.0, "step": 7311 }, { "epoch": 2.6786057802409196, "grad_norm": 0.1364322472563259, "learning_rate": 2.0800724733053678e-05, "loss": 0.4744, "num_tokens": 1414265155.0, "step": 7312 }, { "epoch": 2.6789721980488252, "grad_norm": 0.13830060243801606, "learning_rate": 2.0796463209739587e-05, "loss": 0.4771, "num_tokens": 1415119754.0, "step": 7313 }, { "epoch": 2.6793386158567305, "grad_norm": 0.12183408950709719, "learning_rate": 2.079220175418701e-05, "loss": 0.458, "num_tokens": 1415987880.0, "step": 7314 }, { "epoch": 2.679705033664636, "grad_norm": 0.1494011639294332, "learning_rate": 2.078794036663588e-05, "loss": 0.4558, "num_tokens": 1416923193.0, "step": 7315 }, { "epoch": 2.6800714514725414, "grad_norm": 0.13962443074708258, "learning_rate": 2.078367904732613e-05, "loss": 0.4767, "num_tokens": 1417657553.0, "step": 7316 }, { "epoch": 2.680437869280447, "grad_norm": 0.12796734023711626, "learning_rate": 2.0779417796497667e-05, "loss": 0.4496, "num_tokens": 1418473066.0, "step": 7317 }, { "epoch": 2.6808042870883524, "grad_norm": 0.13815419922201244, "learning_rate": 2.0775156614390415e-05, "loss": 0.4486, "num_tokens": 1419256201.0, "step": 7318 }, { "epoch": 2.681170704896258, "grad_norm": 0.14473059126735247, "learning_rate": 2.0770895501244297e-05, "loss": 0.4642, "num_tokens": 1420024312.0, "step": 7319 }, { "epoch": 2.6815371227041633, "grad_norm": 0.13091167560887085, "learning_rate": 2.0766634457299212e-05, "loss": 0.448, "num_tokens": 1420789090.0, "step": 7320 }, { "epoch": 2.681903540512069, "grad_norm": 0.13350628658239122, "learning_rate": 2.0762373482795057e-05, "loss": 0.4611, "num_tokens": 1421523370.0, "step": 7321 }, { "epoch": 2.6822699583199743, "grad_norm": 0.12978316116917782, "learning_rate": 2.0758112577971742e-05, "loss": 0.4314, "num_tokens": 1422390355.0, "step": 7322 }, { "epoch": 2.68263637612788, "grad_norm": 0.127571389584046, "learning_rate": 2.0753851743069174e-05, "loss": 0.4552, "num_tokens": 1423366212.0, "step": 7323 }, { "epoch": 2.6830027939357852, "grad_norm": 0.1394353113239653, "learning_rate": 2.0749590978327233e-05, "loss": 0.4647, "num_tokens": 1424148864.0, "step": 7324 }, { "epoch": 2.683369211743691, "grad_norm": 0.1305843278761736, "learning_rate": 2.0745330283985812e-05, "loss": 0.4623, "num_tokens": 1425000245.0, "step": 7325 }, { "epoch": 2.683735629551596, "grad_norm": 0.134759638574138, "learning_rate": 2.07410696602848e-05, "loss": 0.4522, "num_tokens": 1425752734.0, "step": 7326 }, { "epoch": 2.6841020473595014, "grad_norm": 0.15025594651060514, "learning_rate": 2.073680910746408e-05, "loss": 0.4793, "num_tokens": 1426603613.0, "step": 7327 }, { "epoch": 2.684468465167407, "grad_norm": 0.13593424147690122, "learning_rate": 2.073254862576351e-05, "loss": 0.469, "num_tokens": 1427363003.0, "step": 7328 }, { "epoch": 2.684834882975313, "grad_norm": 0.1294362580000416, "learning_rate": 2.0728288215422986e-05, "loss": 0.452, "num_tokens": 1428222373.0, "step": 7329 }, { "epoch": 2.685201300783218, "grad_norm": 0.13481157414923503, "learning_rate": 2.0724027876682377e-05, "loss": 0.4421, "num_tokens": 1429045341.0, "step": 7330 }, { "epoch": 2.6855677185911233, "grad_norm": 0.1326897195515678, "learning_rate": 2.0719767609781544e-05, "loss": 0.4594, "num_tokens": 1429755896.0, "step": 7331 }, { "epoch": 2.685934136399029, "grad_norm": 0.13059472740892444, "learning_rate": 2.0715507414960334e-05, "loss": 0.4969, "num_tokens": 1430542531.0, "step": 7332 }, { "epoch": 2.6863005542069347, "grad_norm": 0.13066656076921723, "learning_rate": 2.0711247292458624e-05, "loss": 0.4565, "num_tokens": 1431447095.0, "step": 7333 }, { "epoch": 2.68666697201484, "grad_norm": 0.12878792368006178, "learning_rate": 2.070698724251626e-05, "loss": 0.4352, "num_tokens": 1432200829.0, "step": 7334 }, { "epoch": 2.687033389822745, "grad_norm": 0.13389669753273342, "learning_rate": 2.0702727265373093e-05, "loss": 0.4379, "num_tokens": 1433007557.0, "step": 7335 }, { "epoch": 2.687399807630651, "grad_norm": 0.13166352298105483, "learning_rate": 2.069846736126897e-05, "loss": 0.4464, "num_tokens": 1433800663.0, "step": 7336 }, { "epoch": 2.6877662254385566, "grad_norm": 0.14956368508924459, "learning_rate": 2.069420753044373e-05, "loss": 0.4953, "num_tokens": 1434556783.0, "step": 7337 }, { "epoch": 2.688132643246462, "grad_norm": 0.14375440181019986, "learning_rate": 2.0689947773137205e-05, "loss": 0.4686, "num_tokens": 1435414903.0, "step": 7338 }, { "epoch": 2.688499061054367, "grad_norm": 0.12797518910632913, "learning_rate": 2.0685688089589226e-05, "loss": 0.4426, "num_tokens": 1436135301.0, "step": 7339 }, { "epoch": 2.688865478862273, "grad_norm": 0.14021037933687094, "learning_rate": 2.068142848003964e-05, "loss": 0.4597, "num_tokens": 1436850209.0, "step": 7340 }, { "epoch": 2.689231896670178, "grad_norm": 0.13851196354035145, "learning_rate": 2.0677168944728255e-05, "loss": 0.4577, "num_tokens": 1437622989.0, "step": 7341 }, { "epoch": 2.6895983144780837, "grad_norm": 0.14971199167704227, "learning_rate": 2.0672909483894898e-05, "loss": 0.486, "num_tokens": 1438302780.0, "step": 7342 }, { "epoch": 2.689964732285989, "grad_norm": 0.1328975798383989, "learning_rate": 2.0668650097779377e-05, "loss": 0.4619, "num_tokens": 1438956588.0, "step": 7343 }, { "epoch": 2.6903311500938947, "grad_norm": 0.13574645794203366, "learning_rate": 2.066439078662151e-05, "loss": 0.4684, "num_tokens": 1439718446.0, "step": 7344 }, { "epoch": 2.6906975679018, "grad_norm": 0.1298009691011112, "learning_rate": 2.066013155066111e-05, "loss": 0.4747, "num_tokens": 1440485836.0, "step": 7345 }, { "epoch": 2.6910639857097056, "grad_norm": 0.133965302887791, "learning_rate": 2.0655872390137968e-05, "loss": 0.4427, "num_tokens": 1441364381.0, "step": 7346 }, { "epoch": 2.691430403517611, "grad_norm": 0.1309514960079107, "learning_rate": 2.0651613305291897e-05, "loss": 0.4238, "num_tokens": 1442136252.0, "step": 7347 }, { "epoch": 2.6917968213255166, "grad_norm": 0.13590658353370708, "learning_rate": 2.0647354296362682e-05, "loss": 0.4545, "num_tokens": 1442844999.0, "step": 7348 }, { "epoch": 2.692163239133422, "grad_norm": 0.1518883058662801, "learning_rate": 2.0643095363590118e-05, "loss": 0.4937, "num_tokens": 1443494674.0, "step": 7349 }, { "epoch": 2.6925296569413275, "grad_norm": 0.12284304190725417, "learning_rate": 2.063883650721398e-05, "loss": 0.4287, "num_tokens": 1444418667.0, "step": 7350 }, { "epoch": 2.6928960747492328, "grad_norm": 0.14628232486301967, "learning_rate": 2.0634577727474075e-05, "loss": 0.4906, "num_tokens": 1445166965.0, "step": 7351 }, { "epoch": 2.693262492557138, "grad_norm": 0.1416237565545407, "learning_rate": 2.063031902461016e-05, "loss": 0.4661, "num_tokens": 1445928863.0, "step": 7352 }, { "epoch": 2.6936289103650437, "grad_norm": 0.13559123349331775, "learning_rate": 2.0626060398862014e-05, "loss": 0.463, "num_tokens": 1446753310.0, "step": 7353 }, { "epoch": 2.6939953281729494, "grad_norm": 0.12532246872529046, "learning_rate": 2.0621801850469413e-05, "loss": 0.4297, "num_tokens": 1447632523.0, "step": 7354 }, { "epoch": 2.6943617459808546, "grad_norm": 0.14468177809182162, "learning_rate": 2.06175433796721e-05, "loss": 0.487, "num_tokens": 1448372156.0, "step": 7355 }, { "epoch": 2.69472816378876, "grad_norm": 0.12668644382673006, "learning_rate": 2.061328498670986e-05, "loss": 0.4485, "num_tokens": 1449203868.0, "step": 7356 }, { "epoch": 2.6950945815966656, "grad_norm": 0.13037724547973775, "learning_rate": 2.0609026671822444e-05, "loss": 0.418, "num_tokens": 1449954898.0, "step": 7357 }, { "epoch": 2.6954609994045713, "grad_norm": 0.13117977055516447, "learning_rate": 2.0604768435249596e-05, "loss": 0.4149, "num_tokens": 1450711696.0, "step": 7358 }, { "epoch": 2.6958274172124765, "grad_norm": 0.1504902377998185, "learning_rate": 2.0600510277231066e-05, "loss": 0.4965, "num_tokens": 1451420260.0, "step": 7359 }, { "epoch": 2.696193835020382, "grad_norm": 0.13223603238082318, "learning_rate": 2.0596252198006603e-05, "loss": 0.4462, "num_tokens": 1452164209.0, "step": 7360 }, { "epoch": 2.6965602528282875, "grad_norm": 0.13697307085495516, "learning_rate": 2.0591994197815928e-05, "loss": 0.4631, "num_tokens": 1452866662.0, "step": 7361 }, { "epoch": 2.696926670636193, "grad_norm": 0.15310926856759627, "learning_rate": 2.05877362768988e-05, "loss": 0.5059, "num_tokens": 1453545747.0, "step": 7362 }, { "epoch": 2.6972930884440984, "grad_norm": 0.13503522885865935, "learning_rate": 2.0583478435494922e-05, "loss": 0.4278, "num_tokens": 1454241198.0, "step": 7363 }, { "epoch": 2.6976595062520037, "grad_norm": 0.12827426994794194, "learning_rate": 2.057922067384404e-05, "loss": 0.4531, "num_tokens": 1455038637.0, "step": 7364 }, { "epoch": 2.6980259240599094, "grad_norm": 0.14500413059672315, "learning_rate": 2.057496299218587e-05, "loss": 0.4615, "num_tokens": 1455793680.0, "step": 7365 }, { "epoch": 2.6983923418678146, "grad_norm": 0.1278821523721432, "learning_rate": 2.057070539076011e-05, "loss": 0.4485, "num_tokens": 1456615945.0, "step": 7366 }, { "epoch": 2.6987587596757203, "grad_norm": 0.1481586261123296, "learning_rate": 2.0566447869806497e-05, "loss": 0.4823, "num_tokens": 1457271513.0, "step": 7367 }, { "epoch": 2.6991251774836256, "grad_norm": 0.12920856671412312, "learning_rate": 2.0562190429564726e-05, "loss": 0.4685, "num_tokens": 1458086285.0, "step": 7368 }, { "epoch": 2.6994915952915313, "grad_norm": 0.13172809301746213, "learning_rate": 2.0557933070274504e-05, "loss": 0.4522, "num_tokens": 1458845460.0, "step": 7369 }, { "epoch": 2.6998580130994365, "grad_norm": 0.14609583264984688, "learning_rate": 2.0553675792175517e-05, "loss": 0.4483, "num_tokens": 1459634770.0, "step": 7370 }, { "epoch": 2.700224430907342, "grad_norm": 0.1378124805380078, "learning_rate": 2.0549418595507475e-05, "loss": 0.4351, "num_tokens": 1460340332.0, "step": 7371 }, { "epoch": 2.7005908487152475, "grad_norm": 0.13628870948496496, "learning_rate": 2.0545161480510045e-05, "loss": 0.4548, "num_tokens": 1461009784.0, "step": 7372 }, { "epoch": 2.700957266523153, "grad_norm": 0.13271576408282165, "learning_rate": 2.0540904447422928e-05, "loss": 0.4592, "num_tokens": 1461892842.0, "step": 7373 }, { "epoch": 2.7013236843310584, "grad_norm": 0.15359327540086523, "learning_rate": 2.0536647496485805e-05, "loss": 0.5015, "num_tokens": 1462613185.0, "step": 7374 }, { "epoch": 2.701690102138964, "grad_norm": 0.14644527569397725, "learning_rate": 2.0532390627938347e-05, "loss": 0.5083, "num_tokens": 1463224432.0, "step": 7375 }, { "epoch": 2.7020565199468694, "grad_norm": 0.13698207927182257, "learning_rate": 2.0528133842020218e-05, "loss": 0.4426, "num_tokens": 1463978558.0, "step": 7376 }, { "epoch": 2.702422937754775, "grad_norm": 0.14346208256107162, "learning_rate": 2.0523877138971085e-05, "loss": 0.4755, "num_tokens": 1464589989.0, "step": 7377 }, { "epoch": 2.7027893555626803, "grad_norm": 0.13175966053859126, "learning_rate": 2.051962051903062e-05, "loss": 0.4725, "num_tokens": 1465445226.0, "step": 7378 }, { "epoch": 2.703155773370586, "grad_norm": 0.13762388859897035, "learning_rate": 2.051536398243847e-05, "loss": 0.4397, "num_tokens": 1466149741.0, "step": 7379 }, { "epoch": 2.7035221911784912, "grad_norm": 0.13083804965422038, "learning_rate": 2.051110752943428e-05, "loss": 0.4641, "num_tokens": 1466896650.0, "step": 7380 }, { "epoch": 2.7038886089863965, "grad_norm": 0.13300916315669, "learning_rate": 2.0506851160257713e-05, "loss": 0.4494, "num_tokens": 1467698806.0, "step": 7381 }, { "epoch": 2.704255026794302, "grad_norm": 0.12197920223590769, "learning_rate": 2.0502594875148403e-05, "loss": 0.4502, "num_tokens": 1468474206.0, "step": 7382 }, { "epoch": 2.704621444602208, "grad_norm": 0.13358752951310243, "learning_rate": 2.0498338674345976e-05, "loss": 0.4348, "num_tokens": 1469223941.0, "step": 7383 }, { "epoch": 2.704987862410113, "grad_norm": 0.13371131365936817, "learning_rate": 2.049408255809008e-05, "loss": 0.4328, "num_tokens": 1470015224.0, "step": 7384 }, { "epoch": 2.7053542802180184, "grad_norm": 0.1319894220503693, "learning_rate": 2.0489826526620346e-05, "loss": 0.4509, "num_tokens": 1470724585.0, "step": 7385 }, { "epoch": 2.705720698025924, "grad_norm": 0.1446749714435505, "learning_rate": 2.0485570580176383e-05, "loss": 0.4673, "num_tokens": 1471482108.0, "step": 7386 }, { "epoch": 2.7060871158338298, "grad_norm": 0.13018622464471946, "learning_rate": 2.0481314718997813e-05, "loss": 0.443, "num_tokens": 1472368256.0, "step": 7387 }, { "epoch": 2.706453533641735, "grad_norm": 0.12991401556972074, "learning_rate": 2.047705894332426e-05, "loss": 0.4572, "num_tokens": 1473143680.0, "step": 7388 }, { "epoch": 2.7068199514496403, "grad_norm": 0.13891899863466323, "learning_rate": 2.0472803253395313e-05, "loss": 0.4782, "num_tokens": 1473885147.0, "step": 7389 }, { "epoch": 2.707186369257546, "grad_norm": 0.1396732248826592, "learning_rate": 2.0468547649450597e-05, "loss": 0.4462, "num_tokens": 1474679514.0, "step": 7390 }, { "epoch": 2.707552787065451, "grad_norm": 0.13105153958052673, "learning_rate": 2.0464292131729695e-05, "loss": 0.4628, "num_tokens": 1475441257.0, "step": 7391 }, { "epoch": 2.707919204873357, "grad_norm": 0.13402311008864284, "learning_rate": 2.046003670047221e-05, "loss": 0.4812, "num_tokens": 1476178515.0, "step": 7392 }, { "epoch": 2.708285622681262, "grad_norm": 0.1331877035846869, "learning_rate": 2.045578135591773e-05, "loss": 0.4576, "num_tokens": 1476878290.0, "step": 7393 }, { "epoch": 2.708652040489168, "grad_norm": 0.12785563032994157, "learning_rate": 2.0451526098305834e-05, "loss": 0.4437, "num_tokens": 1477648097.0, "step": 7394 }, { "epoch": 2.709018458297073, "grad_norm": 0.1288199293716917, "learning_rate": 2.044727092787611e-05, "loss": 0.4522, "num_tokens": 1478356345.0, "step": 7395 }, { "epoch": 2.709384876104979, "grad_norm": 0.1426393342246121, "learning_rate": 2.0443015844868133e-05, "loss": 0.458, "num_tokens": 1479141679.0, "step": 7396 }, { "epoch": 2.709751293912884, "grad_norm": 0.12765294319064951, "learning_rate": 2.043876084952146e-05, "loss": 0.4385, "num_tokens": 1479893456.0, "step": 7397 }, { "epoch": 2.7101177117207897, "grad_norm": 0.13421111629284846, "learning_rate": 2.0434505942075668e-05, "loss": 0.4299, "num_tokens": 1480653917.0, "step": 7398 }, { "epoch": 2.710484129528695, "grad_norm": 0.1369483749139226, "learning_rate": 2.0430251122770313e-05, "loss": 0.4178, "num_tokens": 1481557557.0, "step": 7399 }, { "epoch": 2.7108505473366007, "grad_norm": 0.13725274763388337, "learning_rate": 2.042599639184494e-05, "loss": 0.4549, "num_tokens": 1482269081.0, "step": 7400 }, { "epoch": 2.711216965144506, "grad_norm": 0.13156935159886102, "learning_rate": 2.0421741749539116e-05, "loss": 0.4652, "num_tokens": 1483061418.0, "step": 7401 }, { "epoch": 2.7115833829524116, "grad_norm": 0.16025985066994103, "learning_rate": 2.041748719609238e-05, "loss": 0.4784, "num_tokens": 1483722768.0, "step": 7402 }, { "epoch": 2.711949800760317, "grad_norm": 0.1388011847426898, "learning_rate": 2.041323273174427e-05, "loss": 0.4557, "num_tokens": 1484376944.0, "step": 7403 }, { "epoch": 2.7123162185682226, "grad_norm": 0.1336591670580878, "learning_rate": 2.0408978356734316e-05, "loss": 0.4371, "num_tokens": 1485166873.0, "step": 7404 }, { "epoch": 2.712682636376128, "grad_norm": 0.12764735793204215, "learning_rate": 2.0404724071302048e-05, "loss": 0.4607, "num_tokens": 1486018592.0, "step": 7405 }, { "epoch": 2.713049054184033, "grad_norm": 0.1258263811116159, "learning_rate": 2.0400469875687002e-05, "loss": 0.4785, "num_tokens": 1486868859.0, "step": 7406 }, { "epoch": 2.7134154719919388, "grad_norm": 0.13837706204822786, "learning_rate": 2.0396215770128696e-05, "loss": 0.4811, "num_tokens": 1487599246.0, "step": 7407 }, { "epoch": 2.7137818897998445, "grad_norm": 0.13101520464364835, "learning_rate": 2.039196175486663e-05, "loss": 0.4333, "num_tokens": 1488314343.0, "step": 7408 }, { "epoch": 2.7141483076077497, "grad_norm": 0.12846846893924901, "learning_rate": 2.038770783014033e-05, "loss": 0.4426, "num_tokens": 1489084115.0, "step": 7409 }, { "epoch": 2.714514725415655, "grad_norm": 0.12788376711471414, "learning_rate": 2.038345399618929e-05, "loss": 0.4374, "num_tokens": 1489877983.0, "step": 7410 }, { "epoch": 2.7148811432235607, "grad_norm": 0.13322969555979872, "learning_rate": 2.0379200253253005e-05, "loss": 0.4479, "num_tokens": 1490591120.0, "step": 7411 }, { "epoch": 2.7152475610314664, "grad_norm": 0.1234105984793159, "learning_rate": 2.0374946601570988e-05, "loss": 0.4625, "num_tokens": 1491369265.0, "step": 7412 }, { "epoch": 2.7156139788393716, "grad_norm": 0.1372618308316587, "learning_rate": 2.0370693041382715e-05, "loss": 0.4814, "num_tokens": 1492135902.0, "step": 7413 }, { "epoch": 2.715980396647277, "grad_norm": 0.1302795625294742, "learning_rate": 2.0366439572927667e-05, "loss": 0.4436, "num_tokens": 1492903377.0, "step": 7414 }, { "epoch": 2.7163468144551826, "grad_norm": 0.1338453368768347, "learning_rate": 2.0362186196445336e-05, "loss": 0.4795, "num_tokens": 1493766346.0, "step": 7415 }, { "epoch": 2.7167132322630883, "grad_norm": 0.1279690625218325, "learning_rate": 2.0357932912175173e-05, "loss": 0.4651, "num_tokens": 1494478640.0, "step": 7416 }, { "epoch": 2.7170796500709935, "grad_norm": 0.13043289465506244, "learning_rate": 2.0353679720356677e-05, "loss": 0.437, "num_tokens": 1495190381.0, "step": 7417 }, { "epoch": 2.7174460678788988, "grad_norm": 0.14155746251007423, "learning_rate": 2.0349426621229288e-05, "loss": 0.4604, "num_tokens": 1495943313.0, "step": 7418 }, { "epoch": 2.7178124856868044, "grad_norm": 0.1411553126063352, "learning_rate": 2.0345173615032472e-05, "loss": 0.4606, "num_tokens": 1496782626.0, "step": 7419 }, { "epoch": 2.7181789034947097, "grad_norm": 0.13896959610076587, "learning_rate": 2.034092070200568e-05, "loss": 0.4701, "num_tokens": 1497447374.0, "step": 7420 }, { "epoch": 2.7185453213026154, "grad_norm": 0.1353052085417453, "learning_rate": 2.0336667882388358e-05, "loss": 0.444, "num_tokens": 1498145764.0, "step": 7421 }, { "epoch": 2.7189117391105206, "grad_norm": 0.12931470443284454, "learning_rate": 2.0332415156419944e-05, "loss": 0.4552, "num_tokens": 1498915264.0, "step": 7422 }, { "epoch": 2.7192781569184263, "grad_norm": 0.15057539225166255, "learning_rate": 2.0328162524339895e-05, "loss": 0.4697, "num_tokens": 1499587628.0, "step": 7423 }, { "epoch": 2.7196445747263316, "grad_norm": 0.1422395738564826, "learning_rate": 2.0323909986387626e-05, "loss": 0.4543, "num_tokens": 1500323481.0, "step": 7424 }, { "epoch": 2.7200109925342373, "grad_norm": 0.12074761914942689, "learning_rate": 2.031965754280256e-05, "loss": 0.4287, "num_tokens": 1501191215.0, "step": 7425 }, { "epoch": 2.7203774103421425, "grad_norm": 0.14010178118158823, "learning_rate": 2.031540519382413e-05, "loss": 0.4504, "num_tokens": 1501946260.0, "step": 7426 }, { "epoch": 2.7207438281500482, "grad_norm": 0.14546020288345562, "learning_rate": 2.031115293969174e-05, "loss": 0.4715, "num_tokens": 1502630082.0, "step": 7427 }, { "epoch": 2.7211102459579535, "grad_norm": 0.12075771899382162, "learning_rate": 2.0306900780644806e-05, "loss": 0.4553, "num_tokens": 1503536213.0, "step": 7428 }, { "epoch": 2.721476663765859, "grad_norm": 0.131993296796106, "learning_rate": 2.030264871692274e-05, "loss": 0.4437, "num_tokens": 1504362058.0, "step": 7429 }, { "epoch": 2.7218430815737644, "grad_norm": 0.1347247289210911, "learning_rate": 2.0298396748764936e-05, "loss": 0.4589, "num_tokens": 1505236078.0, "step": 7430 }, { "epoch": 2.7222094993816697, "grad_norm": 0.13150861161754482, "learning_rate": 2.029414487641078e-05, "loss": 0.4418, "num_tokens": 1506056380.0, "step": 7431 }, { "epoch": 2.7225759171895754, "grad_norm": 0.12602875014192044, "learning_rate": 2.0289893100099675e-05, "loss": 0.4451, "num_tokens": 1506841403.0, "step": 7432 }, { "epoch": 2.722942334997481, "grad_norm": 0.12986153983290885, "learning_rate": 2.028564142007099e-05, "loss": 0.4684, "num_tokens": 1507629213.0, "step": 7433 }, { "epoch": 2.7233087528053863, "grad_norm": 0.14057191404891586, "learning_rate": 2.0281389836564117e-05, "loss": 0.4759, "num_tokens": 1508388774.0, "step": 7434 }, { "epoch": 2.7236751706132916, "grad_norm": 0.137859934559246, "learning_rate": 2.027713834981842e-05, "loss": 0.4602, "num_tokens": 1509184875.0, "step": 7435 }, { "epoch": 2.7240415884211973, "grad_norm": 0.13554777246731278, "learning_rate": 2.027288696007327e-05, "loss": 0.4623, "num_tokens": 1509936834.0, "step": 7436 }, { "epoch": 2.724408006229103, "grad_norm": 0.11869542353654995, "learning_rate": 2.026863566756803e-05, "loss": 0.4083, "num_tokens": 1510770394.0, "step": 7437 }, { "epoch": 2.724774424037008, "grad_norm": 0.1386218709985569, "learning_rate": 2.0264384472542048e-05, "loss": 0.4283, "num_tokens": 1511515543.0, "step": 7438 }, { "epoch": 2.7251408418449135, "grad_norm": 0.14730566390319078, "learning_rate": 2.0260133375234677e-05, "loss": 0.4717, "num_tokens": 1512245566.0, "step": 7439 }, { "epoch": 2.725507259652819, "grad_norm": 0.14413769271436944, "learning_rate": 2.0255882375885278e-05, "loss": 0.4886, "num_tokens": 1513042608.0, "step": 7440 }, { "epoch": 2.725873677460725, "grad_norm": 0.13737936340902357, "learning_rate": 2.0251631474733174e-05, "loss": 0.4447, "num_tokens": 1513727376.0, "step": 7441 }, { "epoch": 2.72624009526863, "grad_norm": 0.13559419334101888, "learning_rate": 2.02473806720177e-05, "loss": 0.4343, "num_tokens": 1514409063.0, "step": 7442 }, { "epoch": 2.7266065130765353, "grad_norm": 0.1380835382720335, "learning_rate": 2.0243129967978192e-05, "loss": 0.4384, "num_tokens": 1515175244.0, "step": 7443 }, { "epoch": 2.726972930884441, "grad_norm": 0.13760333133691222, "learning_rate": 2.023887936285396e-05, "loss": 0.4581, "num_tokens": 1515959203.0, "step": 7444 }, { "epoch": 2.7273393486923463, "grad_norm": 0.13866505776001373, "learning_rate": 2.0234628856884338e-05, "loss": 0.44, "num_tokens": 1516685422.0, "step": 7445 }, { "epoch": 2.727705766500252, "grad_norm": 0.1410716827391726, "learning_rate": 2.0230378450308632e-05, "loss": 0.4837, "num_tokens": 1517442598.0, "step": 7446 }, { "epoch": 2.7280721843081572, "grad_norm": 0.13651930334016027, "learning_rate": 2.0226128143366143e-05, "loss": 0.4778, "num_tokens": 1518211273.0, "step": 7447 }, { "epoch": 2.728438602116063, "grad_norm": 0.1338515001961901, "learning_rate": 2.022187793629618e-05, "loss": 0.4432, "num_tokens": 1518905205.0, "step": 7448 }, { "epoch": 2.728805019923968, "grad_norm": 0.16081097168060854, "learning_rate": 2.0217627829338027e-05, "loss": 0.4662, "num_tokens": 1519637920.0, "step": 7449 }, { "epoch": 2.729171437731874, "grad_norm": 0.1416254273629818, "learning_rate": 2.0213377822730988e-05, "loss": 0.4683, "num_tokens": 1520414101.0, "step": 7450 }, { "epoch": 2.729537855539779, "grad_norm": 0.12847673983447222, "learning_rate": 2.020912791671434e-05, "loss": 0.464, "num_tokens": 1521211988.0, "step": 7451 }, { "epoch": 2.729904273347685, "grad_norm": 0.14488769540240864, "learning_rate": 2.020487811152736e-05, "loss": 0.4669, "num_tokens": 1521947507.0, "step": 7452 }, { "epoch": 2.73027069115559, "grad_norm": 0.15346843367991464, "learning_rate": 2.020062840740932e-05, "loss": 0.4859, "num_tokens": 1522715553.0, "step": 7453 }, { "epoch": 2.7306371089634958, "grad_norm": 0.15517858672273435, "learning_rate": 2.0196378804599492e-05, "loss": 0.4931, "num_tokens": 1523327426.0, "step": 7454 }, { "epoch": 2.731003526771401, "grad_norm": 0.13264597834129568, "learning_rate": 2.019212930333712e-05, "loss": 0.4568, "num_tokens": 1524156823.0, "step": 7455 }, { "epoch": 2.7313699445793067, "grad_norm": 0.12656199171772747, "learning_rate": 2.0187879903861475e-05, "loss": 0.4256, "num_tokens": 1524978337.0, "step": 7456 }, { "epoch": 2.731736362387212, "grad_norm": 0.1421295132198035, "learning_rate": 2.0183630606411815e-05, "loss": 0.4527, "num_tokens": 1525757567.0, "step": 7457 }, { "epoch": 2.7321027801951177, "grad_norm": 0.13138284433615294, "learning_rate": 2.017938141122737e-05, "loss": 0.4288, "num_tokens": 1526584134.0, "step": 7458 }, { "epoch": 2.732469198003023, "grad_norm": 0.1236879595104926, "learning_rate": 2.017513231854738e-05, "loss": 0.4681, "num_tokens": 1527460102.0, "step": 7459 }, { "epoch": 2.732835615810928, "grad_norm": 0.1368138635966651, "learning_rate": 2.017088332861107e-05, "loss": 0.4693, "num_tokens": 1528184733.0, "step": 7460 }, { "epoch": 2.733202033618834, "grad_norm": 0.13376259496758616, "learning_rate": 2.0166634441657687e-05, "loss": 0.4543, "num_tokens": 1529009951.0, "step": 7461 }, { "epoch": 2.7335684514267395, "grad_norm": 0.1396652991562071, "learning_rate": 2.0162385657926436e-05, "loss": 0.4795, "num_tokens": 1529776282.0, "step": 7462 }, { "epoch": 2.733934869234645, "grad_norm": 0.1411609624462668, "learning_rate": 2.0158136977656535e-05, "loss": 0.4848, "num_tokens": 1530465118.0, "step": 7463 }, { "epoch": 2.73430128704255, "grad_norm": 0.14605043428024383, "learning_rate": 2.0153888401087198e-05, "loss": 0.461, "num_tokens": 1531149823.0, "step": 7464 }, { "epoch": 2.7346677048504557, "grad_norm": 0.1467045524433222, "learning_rate": 2.014963992845762e-05, "loss": 0.4274, "num_tokens": 1531856208.0, "step": 7465 }, { "epoch": 2.7350341226583614, "grad_norm": 0.1391055946639217, "learning_rate": 2.0145391560006997e-05, "loss": 0.4734, "num_tokens": 1532555761.0, "step": 7466 }, { "epoch": 2.7354005404662667, "grad_norm": 0.15333732427064145, "learning_rate": 2.0141143295974532e-05, "loss": 0.4986, "num_tokens": 1533324694.0, "step": 7467 }, { "epoch": 2.735766958274172, "grad_norm": 0.13064454499778708, "learning_rate": 2.013689513659941e-05, "loss": 0.4356, "num_tokens": 1534069022.0, "step": 7468 }, { "epoch": 2.7361333760820776, "grad_norm": 0.1336343047382969, "learning_rate": 2.0132647082120803e-05, "loss": 0.46, "num_tokens": 1534785389.0, "step": 7469 }, { "epoch": 2.7364997938899833, "grad_norm": 0.12869846484330333, "learning_rate": 2.0128399132777892e-05, "loss": 0.4469, "num_tokens": 1535515660.0, "step": 7470 }, { "epoch": 2.7368662116978886, "grad_norm": 0.147556096409624, "learning_rate": 2.0124151288809838e-05, "loss": 0.4456, "num_tokens": 1536275823.0, "step": 7471 }, { "epoch": 2.737232629505794, "grad_norm": 0.12500297414357683, "learning_rate": 2.0119903550455798e-05, "loss": 0.4315, "num_tokens": 1537092874.0, "step": 7472 }, { "epoch": 2.7375990473136995, "grad_norm": 0.14915334856958026, "learning_rate": 2.0115655917954943e-05, "loss": 0.4835, "num_tokens": 1537917074.0, "step": 7473 }, { "epoch": 2.7379654651216048, "grad_norm": 0.15679048122694766, "learning_rate": 2.011140839154642e-05, "loss": 0.4679, "num_tokens": 1538622045.0, "step": 7474 }, { "epoch": 2.7383318829295105, "grad_norm": 0.12429760765053544, "learning_rate": 2.010716097146937e-05, "loss": 0.4356, "num_tokens": 1539394013.0, "step": 7475 }, { "epoch": 2.7386983007374157, "grad_norm": 0.13579933973911482, "learning_rate": 2.0102913657962924e-05, "loss": 0.4309, "num_tokens": 1540174258.0, "step": 7476 }, { "epoch": 2.7390647185453214, "grad_norm": 0.13620458053546705, "learning_rate": 2.0098666451266216e-05, "loss": 0.4756, "num_tokens": 1541020878.0, "step": 7477 }, { "epoch": 2.7394311363532267, "grad_norm": 0.13286203411735023, "learning_rate": 2.009441935161839e-05, "loss": 0.4645, "num_tokens": 1541798588.0, "step": 7478 }, { "epoch": 2.7397975541611324, "grad_norm": 0.1398667533314717, "learning_rate": 2.0090172359258555e-05, "loss": 0.4637, "num_tokens": 1542580960.0, "step": 7479 }, { "epoch": 2.7401639719690376, "grad_norm": 0.1264507511691734, "learning_rate": 2.0085925474425812e-05, "loss": 0.4634, "num_tokens": 1543486858.0, "step": 7480 }, { "epoch": 2.7405303897769433, "grad_norm": 0.14266475813278023, "learning_rate": 2.008167869735929e-05, "loss": 0.4514, "num_tokens": 1544219087.0, "step": 7481 }, { "epoch": 2.7408968075848485, "grad_norm": 0.13938262156122413, "learning_rate": 2.007743202829808e-05, "loss": 0.4437, "num_tokens": 1544852465.0, "step": 7482 }, { "epoch": 2.7412632253927542, "grad_norm": 0.13145922335040838, "learning_rate": 2.0073185467481277e-05, "loss": 0.4527, "num_tokens": 1545715809.0, "step": 7483 }, { "epoch": 2.7416296432006595, "grad_norm": 0.1420962483074854, "learning_rate": 2.006893901514797e-05, "loss": 0.4836, "num_tokens": 1546394829.0, "step": 7484 }, { "epoch": 2.7419960610085647, "grad_norm": 0.13462113147780935, "learning_rate": 2.006469267153725e-05, "loss": 0.4405, "num_tokens": 1547168267.0, "step": 7485 }, { "epoch": 2.7423624788164704, "grad_norm": 0.1360569988615451, "learning_rate": 2.0060446436888194e-05, "loss": 0.4645, "num_tokens": 1547855597.0, "step": 7486 }, { "epoch": 2.742728896624376, "grad_norm": 0.13380096931498806, "learning_rate": 2.0056200311439872e-05, "loss": 0.4562, "num_tokens": 1548621089.0, "step": 7487 }, { "epoch": 2.7430953144322814, "grad_norm": 0.14064155107179302, "learning_rate": 2.0051954295431337e-05, "loss": 0.4378, "num_tokens": 1549395075.0, "step": 7488 }, { "epoch": 2.7434617322401866, "grad_norm": 0.13934361455456876, "learning_rate": 2.0047708389101667e-05, "loss": 0.4466, "num_tokens": 1550165743.0, "step": 7489 }, { "epoch": 2.7438281500480923, "grad_norm": 0.1299611403484151, "learning_rate": 2.0043462592689904e-05, "loss": 0.4276, "num_tokens": 1550909133.0, "step": 7490 }, { "epoch": 2.744194567855998, "grad_norm": 0.14604348247341156, "learning_rate": 2.00392169064351e-05, "loss": 0.4826, "num_tokens": 1551598358.0, "step": 7491 }, { "epoch": 2.7445609856639033, "grad_norm": 0.12941639918379738, "learning_rate": 2.0034971330576293e-05, "loss": 0.4646, "num_tokens": 1552421449.0, "step": 7492 }, { "epoch": 2.7449274034718085, "grad_norm": 0.12939324756105686, "learning_rate": 2.0030725865352514e-05, "loss": 0.442, "num_tokens": 1553172029.0, "step": 7493 }, { "epoch": 2.745293821279714, "grad_norm": 0.12828299908597046, "learning_rate": 2.0026480511002794e-05, "loss": 0.4295, "num_tokens": 1554017616.0, "step": 7494 }, { "epoch": 2.74566023908762, "grad_norm": 0.1423658454155346, "learning_rate": 2.002223526776616e-05, "loss": 0.4418, "num_tokens": 1554750101.0, "step": 7495 }, { "epoch": 2.746026656895525, "grad_norm": 0.14285231191600545, "learning_rate": 2.0017990135881625e-05, "loss": 0.4745, "num_tokens": 1555489757.0, "step": 7496 }, { "epoch": 2.7463930747034304, "grad_norm": 0.132867773705677, "learning_rate": 2.0013745115588194e-05, "loss": 0.4542, "num_tokens": 1556298262.0, "step": 7497 }, { "epoch": 2.746759492511336, "grad_norm": 0.1332251292208229, "learning_rate": 2.0009500207124874e-05, "loss": 0.4768, "num_tokens": 1557086266.0, "step": 7498 }, { "epoch": 2.7471259103192414, "grad_norm": 0.13369393150879516, "learning_rate": 2.0005255410730657e-05, "loss": 0.4294, "num_tokens": 1557861432.0, "step": 7499 }, { "epoch": 2.747492328127147, "grad_norm": 0.1340982912593514, "learning_rate": 2.0001010726644535e-05, "loss": 0.4304, "num_tokens": 1558567809.0, "step": 7500 }, { "epoch": 2.7478587459350523, "grad_norm": 0.1444338594272758, "learning_rate": 1.9996766155105504e-05, "loss": 0.488, "num_tokens": 1559364361.0, "step": 7501 }, { "epoch": 2.748225163742958, "grad_norm": 0.13304776290852713, "learning_rate": 1.9992521696352524e-05, "loss": 0.4442, "num_tokens": 1560097242.0, "step": 7502 }, { "epoch": 2.7485915815508632, "grad_norm": 0.138340480418553, "learning_rate": 1.9988277350624576e-05, "loss": 0.4549, "num_tokens": 1560758811.0, "step": 7503 }, { "epoch": 2.748957999358769, "grad_norm": 0.14249773926040987, "learning_rate": 1.998403311816063e-05, "loss": 0.4558, "num_tokens": 1561428960.0, "step": 7504 }, { "epoch": 2.749324417166674, "grad_norm": 0.12699597691971246, "learning_rate": 1.997978899919963e-05, "loss": 0.4345, "num_tokens": 1562303047.0, "step": 7505 }, { "epoch": 2.74969083497458, "grad_norm": 0.21387698050502127, "learning_rate": 1.9975544993980542e-05, "loss": 0.4527, "num_tokens": 1563099801.0, "step": 7506 }, { "epoch": 2.750057252782485, "grad_norm": 0.12172338877349011, "learning_rate": 1.99713011027423e-05, "loss": 0.4245, "num_tokens": 1563950774.0, "step": 7507 }, { "epoch": 2.750423670590391, "grad_norm": 0.14414580428236554, "learning_rate": 1.996705732572386e-05, "loss": 0.4908, "num_tokens": 1564673506.0, "step": 7508 }, { "epoch": 2.750790088398296, "grad_norm": 0.13397444947055187, "learning_rate": 1.9962813663164145e-05, "loss": 0.4584, "num_tokens": 1565398827.0, "step": 7509 }, { "epoch": 2.751156506206202, "grad_norm": 0.13488653120376012, "learning_rate": 1.9958570115302072e-05, "loss": 0.4522, "num_tokens": 1566170868.0, "step": 7510 }, { "epoch": 2.751522924014107, "grad_norm": 0.14510768762160353, "learning_rate": 1.995432668237658e-05, "loss": 0.4814, "num_tokens": 1566992636.0, "step": 7511 }, { "epoch": 2.7518893418220127, "grad_norm": 0.1389656491262852, "learning_rate": 1.9950083364626573e-05, "loss": 0.4745, "num_tokens": 1567765507.0, "step": 7512 }, { "epoch": 2.752255759629918, "grad_norm": 0.1316868118457223, "learning_rate": 1.9945840162290958e-05, "loss": 0.4459, "num_tokens": 1568461061.0, "step": 7513 }, { "epoch": 2.7526221774378232, "grad_norm": 0.13224689978410106, "learning_rate": 1.9941597075608635e-05, "loss": 0.4547, "num_tokens": 1569289693.0, "step": 7514 }, { "epoch": 2.752988595245729, "grad_norm": 0.13433319094142776, "learning_rate": 1.9937354104818505e-05, "loss": 0.4169, "num_tokens": 1569995344.0, "step": 7515 }, { "epoch": 2.7533550130536346, "grad_norm": 0.1311353314826369, "learning_rate": 1.9933111250159442e-05, "loss": 0.4558, "num_tokens": 1570731360.0, "step": 7516 }, { "epoch": 2.75372143086154, "grad_norm": 0.1550650823759261, "learning_rate": 1.992886851187035e-05, "loss": 0.4798, "num_tokens": 1571449364.0, "step": 7517 }, { "epoch": 2.754087848669445, "grad_norm": 0.1445166675399481, "learning_rate": 1.9924625890190076e-05, "loss": 0.4961, "num_tokens": 1572102191.0, "step": 7518 }, { "epoch": 2.754454266477351, "grad_norm": 0.15310331257386825, "learning_rate": 1.992038338535751e-05, "loss": 0.4573, "num_tokens": 1572845024.0, "step": 7519 }, { "epoch": 2.7548206842852565, "grad_norm": 0.13955937492053203, "learning_rate": 1.9916140997611505e-05, "loss": 0.4502, "num_tokens": 1573625123.0, "step": 7520 }, { "epoch": 2.7551871020931618, "grad_norm": 0.13156097990816226, "learning_rate": 1.9911898727190907e-05, "loss": 0.4442, "num_tokens": 1574379856.0, "step": 7521 }, { "epoch": 2.755553519901067, "grad_norm": 0.15269940646430705, "learning_rate": 1.9907656574334585e-05, "loss": 0.467, "num_tokens": 1575014534.0, "step": 7522 }, { "epoch": 2.7559199377089727, "grad_norm": 0.13297579524384137, "learning_rate": 1.9903414539281366e-05, "loss": 0.4307, "num_tokens": 1575844062.0, "step": 7523 }, { "epoch": 2.7562863555168784, "grad_norm": 0.1340437312523839, "learning_rate": 1.989917262227009e-05, "loss": 0.4433, "num_tokens": 1576664904.0, "step": 7524 }, { "epoch": 2.7566527733247836, "grad_norm": 0.1371502550140457, "learning_rate": 1.989493082353958e-05, "loss": 0.47, "num_tokens": 1577410593.0, "step": 7525 }, { "epoch": 2.757019191132689, "grad_norm": 0.13785240972032073, "learning_rate": 1.9890689143328667e-05, "loss": 0.468, "num_tokens": 1578204582.0, "step": 7526 }, { "epoch": 2.7573856089405946, "grad_norm": 0.14519585440041577, "learning_rate": 1.988644758187615e-05, "loss": 0.5067, "num_tokens": 1578908904.0, "step": 7527 }, { "epoch": 2.7577520267485, "grad_norm": 0.14653470079130743, "learning_rate": 1.9882206139420855e-05, "loss": 0.4522, "num_tokens": 1579716864.0, "step": 7528 }, { "epoch": 2.7581184445564055, "grad_norm": 0.13671348907811398, "learning_rate": 1.9877964816201575e-05, "loss": 0.4566, "num_tokens": 1580531034.0, "step": 7529 }, { "epoch": 2.758484862364311, "grad_norm": 0.14266899675383504, "learning_rate": 1.987372361245711e-05, "loss": 0.4451, "num_tokens": 1581271376.0, "step": 7530 }, { "epoch": 2.7588512801722165, "grad_norm": 0.13352250850060923, "learning_rate": 1.9869482528426238e-05, "loss": 0.4414, "num_tokens": 1582078084.0, "step": 7531 }, { "epoch": 2.7592176979801217, "grad_norm": 0.13115705159565497, "learning_rate": 1.9865241564347742e-05, "loss": 0.4429, "num_tokens": 1582906198.0, "step": 7532 }, { "epoch": 2.7595841157880274, "grad_norm": 0.13920254289777664, "learning_rate": 1.9861000720460416e-05, "loss": 0.4595, "num_tokens": 1583705708.0, "step": 7533 }, { "epoch": 2.7599505335959327, "grad_norm": 0.1288479893534187, "learning_rate": 1.985675999700301e-05, "loss": 0.4484, "num_tokens": 1584530181.0, "step": 7534 }, { "epoch": 2.7603169514038384, "grad_norm": 0.15992801593237535, "learning_rate": 1.9852519394214286e-05, "loss": 0.4736, "num_tokens": 1585196234.0, "step": 7535 }, { "epoch": 2.7606833692117436, "grad_norm": 0.14446087211963152, "learning_rate": 1.9848278912333e-05, "loss": 0.5083, "num_tokens": 1585918476.0, "step": 7536 }, { "epoch": 2.7610497870196493, "grad_norm": 0.1290658731182773, "learning_rate": 1.9844038551597907e-05, "loss": 0.4318, "num_tokens": 1586819549.0, "step": 7537 }, { "epoch": 2.7614162048275546, "grad_norm": 0.13721291963633386, "learning_rate": 1.9839798312247733e-05, "loss": 0.4551, "num_tokens": 1587628893.0, "step": 7538 }, { "epoch": 2.76178262263546, "grad_norm": 0.13297210995377717, "learning_rate": 1.9835558194521223e-05, "loss": 0.4366, "num_tokens": 1588433989.0, "step": 7539 }, { "epoch": 2.7621490404433655, "grad_norm": 0.1501465189366655, "learning_rate": 1.9831318198657107e-05, "loss": 0.4934, "num_tokens": 1589164516.0, "step": 7540 }, { "epoch": 2.762515458251271, "grad_norm": 0.13770213764823686, "learning_rate": 1.9827078324894095e-05, "loss": 0.4469, "num_tokens": 1589888307.0, "step": 7541 }, { "epoch": 2.7628818760591765, "grad_norm": 0.13255901265372025, "learning_rate": 1.982283857347091e-05, "loss": 0.4427, "num_tokens": 1590614051.0, "step": 7542 }, { "epoch": 2.7632482938670817, "grad_norm": 0.13069922211133633, "learning_rate": 1.9818598944626242e-05, "loss": 0.459, "num_tokens": 1591442529.0, "step": 7543 }, { "epoch": 2.7636147116749874, "grad_norm": 0.13474977618803335, "learning_rate": 1.981435943859882e-05, "loss": 0.4462, "num_tokens": 1592217792.0, "step": 7544 }, { "epoch": 2.763981129482893, "grad_norm": 0.1401284447403429, "learning_rate": 1.9810120055627307e-05, "loss": 0.5021, "num_tokens": 1592954836.0, "step": 7545 }, { "epoch": 2.7643475472907983, "grad_norm": 0.12639671955043147, "learning_rate": 1.9805880795950404e-05, "loss": 0.4467, "num_tokens": 1593761234.0, "step": 7546 }, { "epoch": 2.7647139650987036, "grad_norm": 0.13255822541211368, "learning_rate": 1.980164165980679e-05, "loss": 0.4503, "num_tokens": 1594557999.0, "step": 7547 }, { "epoch": 2.7650803829066093, "grad_norm": 0.13761897314808635, "learning_rate": 1.9797402647435125e-05, "loss": 0.4598, "num_tokens": 1595227025.0, "step": 7548 }, { "epoch": 2.765446800714515, "grad_norm": 0.14663471671478773, "learning_rate": 1.9793163759074082e-05, "loss": 0.4698, "num_tokens": 1595921382.0, "step": 7549 }, { "epoch": 2.7658132185224202, "grad_norm": 0.13104571886155186, "learning_rate": 1.9788924994962327e-05, "loss": 0.4194, "num_tokens": 1596627456.0, "step": 7550 }, { "epoch": 2.7661796363303255, "grad_norm": 0.1288682537944233, "learning_rate": 1.97846863553385e-05, "loss": 0.4357, "num_tokens": 1597337500.0, "step": 7551 }, { "epoch": 2.766546054138231, "grad_norm": 0.1284545743136657, "learning_rate": 1.9780447840441245e-05, "loss": 0.4627, "num_tokens": 1598172917.0, "step": 7552 }, { "epoch": 2.7669124719461364, "grad_norm": 0.12342222743145354, "learning_rate": 1.9776209450509208e-05, "loss": 0.426, "num_tokens": 1598949025.0, "step": 7553 }, { "epoch": 2.767278889754042, "grad_norm": 0.1362621987716605, "learning_rate": 1.9771971185781004e-05, "loss": 0.4269, "num_tokens": 1599657599.0, "step": 7554 }, { "epoch": 2.7676453075619474, "grad_norm": 0.14451200766448286, "learning_rate": 1.9767733046495262e-05, "loss": 0.4697, "num_tokens": 1600285523.0, "step": 7555 }, { "epoch": 2.768011725369853, "grad_norm": 0.1273143984456294, "learning_rate": 1.9763495032890606e-05, "loss": 0.4391, "num_tokens": 1601214532.0, "step": 7556 }, { "epoch": 2.7683781431777583, "grad_norm": 0.12567955485446525, "learning_rate": 1.9759257145205643e-05, "loss": 0.4325, "num_tokens": 1601911843.0, "step": 7557 }, { "epoch": 2.768744560985664, "grad_norm": 0.13038567989964264, "learning_rate": 1.975501938367896e-05, "loss": 0.4822, "num_tokens": 1602716475.0, "step": 7558 }, { "epoch": 2.7691109787935693, "grad_norm": 0.13334053050327108, "learning_rate": 1.9750781748549167e-05, "loss": 0.463, "num_tokens": 1603540712.0, "step": 7559 }, { "epoch": 2.769477396601475, "grad_norm": 0.1385047458342151, "learning_rate": 1.9746544240054838e-05, "loss": 0.4671, "num_tokens": 1604300382.0, "step": 7560 }, { "epoch": 2.76984381440938, "grad_norm": 0.1377904823353336, "learning_rate": 1.9742306858434572e-05, "loss": 0.4244, "num_tokens": 1605037307.0, "step": 7561 }, { "epoch": 2.770210232217286, "grad_norm": 0.14788736549090029, "learning_rate": 1.9738069603926924e-05, "loss": 0.4494, "num_tokens": 1605688782.0, "step": 7562 }, { "epoch": 2.770576650025191, "grad_norm": 0.140553306283654, "learning_rate": 1.973383247677047e-05, "loss": 0.4442, "num_tokens": 1606415987.0, "step": 7563 }, { "epoch": 2.770943067833097, "grad_norm": 0.14018102593093273, "learning_rate": 1.9729595477203768e-05, "loss": 0.4532, "num_tokens": 1607254914.0, "step": 7564 }, { "epoch": 2.771309485641002, "grad_norm": 0.13723051162395158, "learning_rate": 1.9725358605465362e-05, "loss": 0.4755, "num_tokens": 1608001747.0, "step": 7565 }, { "epoch": 2.771675903448908, "grad_norm": 0.13998378145552254, "learning_rate": 1.9721121861793798e-05, "loss": 0.439, "num_tokens": 1608864646.0, "step": 7566 }, { "epoch": 2.772042321256813, "grad_norm": 0.13182270719559946, "learning_rate": 1.971688524642762e-05, "loss": 0.4437, "num_tokens": 1609696140.0, "step": 7567 }, { "epoch": 2.7724087390647183, "grad_norm": 0.13156236513292546, "learning_rate": 1.971264875960536e-05, "loss": 0.4599, "num_tokens": 1610527510.0, "step": 7568 }, { "epoch": 2.772775156872624, "grad_norm": 0.13190080553422381, "learning_rate": 1.9708412401565525e-05, "loss": 0.4618, "num_tokens": 1611348591.0, "step": 7569 }, { "epoch": 2.7731415746805297, "grad_norm": 0.144011001399487, "learning_rate": 1.970417617254665e-05, "loss": 0.4379, "num_tokens": 1612065291.0, "step": 7570 }, { "epoch": 2.773507992488435, "grad_norm": 0.13747342760260847, "learning_rate": 1.9699940072787225e-05, "loss": 0.4185, "num_tokens": 1612683532.0, "step": 7571 }, { "epoch": 2.77387441029634, "grad_norm": 0.1284310228446507, "learning_rate": 1.9695704102525765e-05, "loss": 0.4333, "num_tokens": 1613551456.0, "step": 7572 }, { "epoch": 2.774240828104246, "grad_norm": 0.1307624319816685, "learning_rate": 1.9691468262000754e-05, "loss": 0.4242, "num_tokens": 1614343610.0, "step": 7573 }, { "epoch": 2.7746072459121516, "grad_norm": 0.12937971147200134, "learning_rate": 1.9687232551450684e-05, "loss": 0.4385, "num_tokens": 1615062468.0, "step": 7574 }, { "epoch": 2.774973663720057, "grad_norm": 0.11647504908030797, "learning_rate": 1.9682996971114035e-05, "loss": 0.416, "num_tokens": 1615943359.0, "step": 7575 }, { "epoch": 2.775340081527962, "grad_norm": 0.1357642212279759, "learning_rate": 1.967876152122927e-05, "loss": 0.4534, "num_tokens": 1616634102.0, "step": 7576 }, { "epoch": 2.7757064993358678, "grad_norm": 0.12471236088940568, "learning_rate": 1.967452620203486e-05, "loss": 0.4526, "num_tokens": 1617520579.0, "step": 7577 }, { "epoch": 2.776072917143773, "grad_norm": 0.12098153101492762, "learning_rate": 1.967029101376927e-05, "loss": 0.4686, "num_tokens": 1618405362.0, "step": 7578 }, { "epoch": 2.7764393349516787, "grad_norm": 0.12499984613935666, "learning_rate": 1.9666055956670925e-05, "loss": 0.439, "num_tokens": 1619219687.0, "step": 7579 }, { "epoch": 2.776805752759584, "grad_norm": 0.1276007943613099, "learning_rate": 1.9661821030978298e-05, "loss": 0.4328, "num_tokens": 1620032006.0, "step": 7580 }, { "epoch": 2.7771721705674897, "grad_norm": 0.13717209666387653, "learning_rate": 1.96575862369298e-05, "loss": 0.4857, "num_tokens": 1620783756.0, "step": 7581 }, { "epoch": 2.777538588375395, "grad_norm": 0.1327493261915098, "learning_rate": 1.9653351574763862e-05, "loss": 0.4557, "num_tokens": 1621515887.0, "step": 7582 }, { "epoch": 2.7779050061833006, "grad_norm": 0.13608556357641474, "learning_rate": 1.9649117044718916e-05, "loss": 0.4605, "num_tokens": 1622285836.0, "step": 7583 }, { "epoch": 2.778271423991206, "grad_norm": 0.13526591752662406, "learning_rate": 1.9644882647033364e-05, "loss": 0.4175, "num_tokens": 1622988335.0, "step": 7584 }, { "epoch": 2.7786378417991116, "grad_norm": 0.14154179355437174, "learning_rate": 1.9640648381945626e-05, "loss": 0.4474, "num_tokens": 1623639615.0, "step": 7585 }, { "epoch": 2.779004259607017, "grad_norm": 0.12397386631513427, "learning_rate": 1.9636414249694073e-05, "loss": 0.4442, "num_tokens": 1624483918.0, "step": 7586 }, { "epoch": 2.7793706774149225, "grad_norm": 0.12886374032956271, "learning_rate": 1.963218025051712e-05, "loss": 0.4658, "num_tokens": 1625237707.0, "step": 7587 }, { "epoch": 2.7797370952228277, "grad_norm": 0.13650791957883274, "learning_rate": 1.9627946384653126e-05, "loss": 0.4361, "num_tokens": 1625939870.0, "step": 7588 }, { "epoch": 2.7801035130307334, "grad_norm": 0.12776776004302662, "learning_rate": 1.962371265234049e-05, "loss": 0.4427, "num_tokens": 1626738506.0, "step": 7589 }, { "epoch": 2.7804699308386387, "grad_norm": 0.1411938482650863, "learning_rate": 1.9619479053817563e-05, "loss": 0.492, "num_tokens": 1627420306.0, "step": 7590 }, { "epoch": 2.7808363486465444, "grad_norm": 0.12883176893303164, "learning_rate": 1.9615245589322717e-05, "loss": 0.4218, "num_tokens": 1628291721.0, "step": 7591 }, { "epoch": 2.7812027664544496, "grad_norm": 0.13228115825061393, "learning_rate": 1.9611012259094292e-05, "loss": 0.4499, "num_tokens": 1628971401.0, "step": 7592 }, { "epoch": 2.781569184262355, "grad_norm": 0.13618716075934723, "learning_rate": 1.9606779063370647e-05, "loss": 0.4693, "num_tokens": 1629668683.0, "step": 7593 }, { "epoch": 2.7819356020702606, "grad_norm": 0.13408993876982972, "learning_rate": 1.9602546002390107e-05, "loss": 0.4607, "num_tokens": 1630407323.0, "step": 7594 }, { "epoch": 2.7823020198781663, "grad_norm": 0.13663946930332632, "learning_rate": 1.959831307639101e-05, "loss": 0.4712, "num_tokens": 1631122803.0, "step": 7595 }, { "epoch": 2.7826684376860715, "grad_norm": 0.13173657587341092, "learning_rate": 1.959408028561167e-05, "loss": 0.4416, "num_tokens": 1631870356.0, "step": 7596 }, { "epoch": 2.783034855493977, "grad_norm": 0.12517015621689462, "learning_rate": 1.958984763029041e-05, "loss": 0.4447, "num_tokens": 1632657075.0, "step": 7597 }, { "epoch": 2.7834012733018825, "grad_norm": 0.12539845348638842, "learning_rate": 1.9585615110665538e-05, "loss": 0.457, "num_tokens": 1633495851.0, "step": 7598 }, { "epoch": 2.783767691109788, "grad_norm": 0.13460082629752818, "learning_rate": 1.9581382726975337e-05, "loss": 0.4855, "num_tokens": 1634308650.0, "step": 7599 }, { "epoch": 2.7841341089176934, "grad_norm": 0.13194234043687778, "learning_rate": 1.9577150479458114e-05, "loss": 0.4272, "num_tokens": 1635091170.0, "step": 7600 }, { "epoch": 2.7845005267255987, "grad_norm": 0.12592357240439914, "learning_rate": 1.9572918368352155e-05, "loss": 0.4671, "num_tokens": 1635896458.0, "step": 7601 }, { "epoch": 2.7848669445335044, "grad_norm": 0.12817101463695757, "learning_rate": 1.9568686393895728e-05, "loss": 0.4555, "num_tokens": 1636681517.0, "step": 7602 }, { "epoch": 2.78523336234141, "grad_norm": 0.1330351463791439, "learning_rate": 1.95644545563271e-05, "loss": 0.4544, "num_tokens": 1637434080.0, "step": 7603 }, { "epoch": 2.7855997801493153, "grad_norm": 0.13959971683800343, "learning_rate": 1.956022285588454e-05, "loss": 0.4585, "num_tokens": 1638214386.0, "step": 7604 }, { "epoch": 2.7859661979572206, "grad_norm": 0.13447400500087367, "learning_rate": 1.95559912928063e-05, "loss": 0.4261, "num_tokens": 1639024689.0, "step": 7605 }, { "epoch": 2.7863326157651263, "grad_norm": 0.13154147902761637, "learning_rate": 1.9551759867330617e-05, "loss": 0.4739, "num_tokens": 1639769325.0, "step": 7606 }, { "epoch": 2.7866990335730315, "grad_norm": 0.1372776882587874, "learning_rate": 1.9547528579695734e-05, "loss": 0.4521, "num_tokens": 1640494231.0, "step": 7607 }, { "epoch": 2.787065451380937, "grad_norm": 0.13037930299121414, "learning_rate": 1.9543297430139887e-05, "loss": 0.442, "num_tokens": 1641251498.0, "step": 7608 }, { "epoch": 2.7874318691888424, "grad_norm": 0.1382884045740459, "learning_rate": 1.9539066418901287e-05, "loss": 0.4285, "num_tokens": 1642015914.0, "step": 7609 }, { "epoch": 2.787798286996748, "grad_norm": 0.1490631477664015, "learning_rate": 1.9534835546218155e-05, "loss": 0.498, "num_tokens": 1642733334.0, "step": 7610 }, { "epoch": 2.7881647048046534, "grad_norm": 0.1355903184510411, "learning_rate": 1.9530604812328696e-05, "loss": 0.4439, "num_tokens": 1643548472.0, "step": 7611 }, { "epoch": 2.788531122612559, "grad_norm": 0.12857856410044308, "learning_rate": 1.9526374217471114e-05, "loss": 0.4755, "num_tokens": 1644395793.0, "step": 7612 }, { "epoch": 2.7888975404204643, "grad_norm": 0.14497622833654833, "learning_rate": 1.9522143761883586e-05, "loss": 0.4499, "num_tokens": 1645099675.0, "step": 7613 }, { "epoch": 2.78926395822837, "grad_norm": 0.14445491783525544, "learning_rate": 1.951791344580431e-05, "loss": 0.4338, "num_tokens": 1645843396.0, "step": 7614 }, { "epoch": 2.7896303760362753, "grad_norm": 0.14083066878803735, "learning_rate": 1.951368326947145e-05, "loss": 0.4465, "num_tokens": 1646585635.0, "step": 7615 }, { "epoch": 2.789996793844181, "grad_norm": 0.13174528742589647, "learning_rate": 1.9509453233123188e-05, "loss": 0.4757, "num_tokens": 1647419374.0, "step": 7616 }, { "epoch": 2.7903632116520862, "grad_norm": 0.1431118197431694, "learning_rate": 1.950522333699767e-05, "loss": 0.4345, "num_tokens": 1648121904.0, "step": 7617 }, { "epoch": 2.7907296294599915, "grad_norm": 0.1663466369315282, "learning_rate": 1.9500993581333053e-05, "loss": 0.455, "num_tokens": 1648856600.0, "step": 7618 }, { "epoch": 2.791096047267897, "grad_norm": 0.1344280003310632, "learning_rate": 1.9496763966367477e-05, "loss": 0.4568, "num_tokens": 1649733168.0, "step": 7619 }, { "epoch": 2.791462465075803, "grad_norm": 0.12974719459956344, "learning_rate": 1.9492534492339076e-05, "loss": 0.4405, "num_tokens": 1650502527.0, "step": 7620 }, { "epoch": 2.791828882883708, "grad_norm": 0.15717551212182085, "learning_rate": 1.9488305159485982e-05, "loss": 0.5026, "num_tokens": 1651141977.0, "step": 7621 }, { "epoch": 2.7921953006916134, "grad_norm": 0.1358624871254952, "learning_rate": 1.9484075968046326e-05, "loss": 0.4153, "num_tokens": 1651916821.0, "step": 7622 }, { "epoch": 2.792561718499519, "grad_norm": 0.1620689272970484, "learning_rate": 1.94798469182582e-05, "loss": 0.437, "num_tokens": 1652612745.0, "step": 7623 }, { "epoch": 2.7929281363074248, "grad_norm": 0.13447021609849813, "learning_rate": 1.9475618010359724e-05, "loss": 0.4118, "num_tokens": 1653340147.0, "step": 7624 }, { "epoch": 2.79329455411533, "grad_norm": 0.14131527860529283, "learning_rate": 1.9471389244588984e-05, "loss": 0.4768, "num_tokens": 1654118845.0, "step": 7625 }, { "epoch": 2.7936609719232353, "grad_norm": 0.1467230119121569, "learning_rate": 1.946716062118407e-05, "loss": 0.4486, "num_tokens": 1654902424.0, "step": 7626 }, { "epoch": 2.794027389731141, "grad_norm": 0.13166362299257592, "learning_rate": 1.9462932140383053e-05, "loss": 0.4195, "num_tokens": 1655685637.0, "step": 7627 }, { "epoch": 2.7943938075390466, "grad_norm": 0.14166604759669618, "learning_rate": 1.945870380242402e-05, "loss": 0.4783, "num_tokens": 1656438423.0, "step": 7628 }, { "epoch": 2.794760225346952, "grad_norm": 0.13915601053125887, "learning_rate": 1.9454475607545037e-05, "loss": 0.4638, "num_tokens": 1657210426.0, "step": 7629 }, { "epoch": 2.795126643154857, "grad_norm": 0.14172752927254587, "learning_rate": 1.9450247555984143e-05, "loss": 0.4239, "num_tokens": 1657909833.0, "step": 7630 }, { "epoch": 2.795493060962763, "grad_norm": 0.14342620266832729, "learning_rate": 1.94460196479794e-05, "loss": 0.4486, "num_tokens": 1658600712.0, "step": 7631 }, { "epoch": 2.795859478770668, "grad_norm": 0.1399857947680502, "learning_rate": 1.9441791883768837e-05, "loss": 0.4543, "num_tokens": 1659336681.0, "step": 7632 }, { "epoch": 2.796225896578574, "grad_norm": 0.1358514101286568, "learning_rate": 1.9437564263590497e-05, "loss": 0.4784, "num_tokens": 1660125201.0, "step": 7633 }, { "epoch": 2.796592314386479, "grad_norm": 0.13419903730407795, "learning_rate": 1.9433336787682396e-05, "loss": 0.4722, "num_tokens": 1660960520.0, "step": 7634 }, { "epoch": 2.7969587321943847, "grad_norm": 0.13963848752527463, "learning_rate": 1.942910945628255e-05, "loss": 0.4641, "num_tokens": 1661643696.0, "step": 7635 }, { "epoch": 2.79732515000229, "grad_norm": 0.13214061561719462, "learning_rate": 1.942488226962897e-05, "loss": 0.4472, "num_tokens": 1662452270.0, "step": 7636 }, { "epoch": 2.7976915678101957, "grad_norm": 0.13374685407382272, "learning_rate": 1.9420655227959645e-05, "loss": 0.4673, "num_tokens": 1663180631.0, "step": 7637 }, { "epoch": 2.798057985618101, "grad_norm": 0.11918157238694586, "learning_rate": 1.9416428331512574e-05, "loss": 0.4083, "num_tokens": 1663997606.0, "step": 7638 }, { "epoch": 2.7984244034260066, "grad_norm": 0.1386273370200822, "learning_rate": 1.9412201580525743e-05, "loss": 0.4636, "num_tokens": 1664693418.0, "step": 7639 }, { "epoch": 2.798790821233912, "grad_norm": 0.14105222300880682, "learning_rate": 1.940797497523712e-05, "loss": 0.4683, "num_tokens": 1665422180.0, "step": 7640 }, { "epoch": 2.7991572390418176, "grad_norm": 0.13323217998014655, "learning_rate": 1.9403748515884678e-05, "loss": 0.4587, "num_tokens": 1666187941.0, "step": 7641 }, { "epoch": 2.799523656849723, "grad_norm": 0.1320949329634383, "learning_rate": 1.9399522202706365e-05, "loss": 0.4697, "num_tokens": 1666949447.0, "step": 7642 }, { "epoch": 2.7998900746576285, "grad_norm": 0.14708271716822394, "learning_rate": 1.9395296035940136e-05, "loss": 0.4516, "num_tokens": 1667686580.0, "step": 7643 }, { "epoch": 2.8002564924655338, "grad_norm": 0.13887870571181346, "learning_rate": 1.9391070015823933e-05, "loss": 0.497, "num_tokens": 1668382150.0, "step": 7644 }, { "epoch": 2.8006229102734395, "grad_norm": 0.14560807369882228, "learning_rate": 1.938684414259569e-05, "loss": 0.485, "num_tokens": 1669094548.0, "step": 7645 }, { "epoch": 2.8009893280813447, "grad_norm": 0.1430971801856981, "learning_rate": 1.9382618416493336e-05, "loss": 0.4639, "num_tokens": 1669867078.0, "step": 7646 }, { "epoch": 2.80135574588925, "grad_norm": 0.1362155181456397, "learning_rate": 1.9378392837754778e-05, "loss": 0.4594, "num_tokens": 1670679882.0, "step": 7647 }, { "epoch": 2.8017221636971557, "grad_norm": 0.14835081715297566, "learning_rate": 1.937416740661793e-05, "loss": 0.4826, "num_tokens": 1671284646.0, "step": 7648 }, { "epoch": 2.8020885815050613, "grad_norm": 0.13472807837186768, "learning_rate": 1.936994212332069e-05, "loss": 0.4327, "num_tokens": 1672008017.0, "step": 7649 }, { "epoch": 2.8024549993129666, "grad_norm": 0.13837367841525566, "learning_rate": 1.9365716988100955e-05, "loss": 0.4784, "num_tokens": 1672730916.0, "step": 7650 }, { "epoch": 2.802821417120872, "grad_norm": 0.13320240542072057, "learning_rate": 1.9361492001196606e-05, "loss": 0.4595, "num_tokens": 1673478445.0, "step": 7651 }, { "epoch": 2.8031878349287775, "grad_norm": 0.1313941995928138, "learning_rate": 1.9357267162845518e-05, "loss": 0.4595, "num_tokens": 1674190948.0, "step": 7652 }, { "epoch": 2.8035542527366832, "grad_norm": 0.13653759816414054, "learning_rate": 1.935304247328556e-05, "loss": 0.468, "num_tokens": 1674923644.0, "step": 7653 }, { "epoch": 2.8039206705445885, "grad_norm": 0.13208482502258437, "learning_rate": 1.934881793275458e-05, "loss": 0.4661, "num_tokens": 1675728540.0, "step": 7654 }, { "epoch": 2.8042870883524937, "grad_norm": 0.13135949712187397, "learning_rate": 1.9344593541490435e-05, "loss": 0.479, "num_tokens": 1676555616.0, "step": 7655 }, { "epoch": 2.8046535061603994, "grad_norm": 0.13894514100203814, "learning_rate": 1.934036929973097e-05, "loss": 0.4616, "num_tokens": 1677253121.0, "step": 7656 }, { "epoch": 2.805019923968305, "grad_norm": 0.13515421106386422, "learning_rate": 1.9336145207714026e-05, "loss": 0.4231, "num_tokens": 1678014405.0, "step": 7657 }, { "epoch": 2.8053863417762104, "grad_norm": 0.13152463497684652, "learning_rate": 1.9331921265677404e-05, "loss": 0.4349, "num_tokens": 1678788618.0, "step": 7658 }, { "epoch": 2.8057527595841156, "grad_norm": 0.1249021724726556, "learning_rate": 1.932769747385894e-05, "loss": 0.4638, "num_tokens": 1679645488.0, "step": 7659 }, { "epoch": 2.8061191773920213, "grad_norm": 0.13974657956756745, "learning_rate": 1.9323473832496434e-05, "loss": 0.4709, "num_tokens": 1680435059.0, "step": 7660 }, { "epoch": 2.8064855951999266, "grad_norm": 0.13572697790486488, "learning_rate": 1.9319250341827688e-05, "loss": 0.4505, "num_tokens": 1681116906.0, "step": 7661 }, { "epoch": 2.8068520130078323, "grad_norm": 0.12297276374307858, "learning_rate": 1.9315027002090492e-05, "loss": 0.4516, "num_tokens": 1681923880.0, "step": 7662 }, { "epoch": 2.8072184308157375, "grad_norm": 0.1301173259132913, "learning_rate": 1.931080381352263e-05, "loss": 0.4859, "num_tokens": 1682766616.0, "step": 7663 }, { "epoch": 2.807584848623643, "grad_norm": 0.12354890152363188, "learning_rate": 1.9306580776361872e-05, "loss": 0.4427, "num_tokens": 1683524766.0, "step": 7664 }, { "epoch": 2.8079512664315485, "grad_norm": 0.13148532259649645, "learning_rate": 1.9302357890845983e-05, "loss": 0.4601, "num_tokens": 1684324894.0, "step": 7665 }, { "epoch": 2.808317684239454, "grad_norm": 0.12680340518394012, "learning_rate": 1.9298135157212727e-05, "loss": 0.4306, "num_tokens": 1685039189.0, "step": 7666 }, { "epoch": 2.8086841020473594, "grad_norm": 0.1340896302489441, "learning_rate": 1.929391257569985e-05, "loss": 0.5181, "num_tokens": 1685805638.0, "step": 7667 }, { "epoch": 2.809050519855265, "grad_norm": 0.12332969801075896, "learning_rate": 1.9289690146545087e-05, "loss": 0.4544, "num_tokens": 1686599344.0, "step": 7668 }, { "epoch": 2.8094169376631704, "grad_norm": 0.13356960380557226, "learning_rate": 1.9285467869986175e-05, "loss": 0.4841, "num_tokens": 1687390902.0, "step": 7669 }, { "epoch": 2.809783355471076, "grad_norm": 0.12733438345534737, "learning_rate": 1.9281245746260832e-05, "loss": 0.4387, "num_tokens": 1688189776.0, "step": 7670 }, { "epoch": 2.8101497732789813, "grad_norm": 0.12317370149332109, "learning_rate": 1.9277023775606767e-05, "loss": 0.4613, "num_tokens": 1689046012.0, "step": 7671 }, { "epoch": 2.8105161910868866, "grad_norm": 0.12310094118543617, "learning_rate": 1.9272801958261698e-05, "loss": 0.4447, "num_tokens": 1689812075.0, "step": 7672 }, { "epoch": 2.8108826088947922, "grad_norm": 0.14173875803036906, "learning_rate": 1.926858029446332e-05, "loss": 0.4443, "num_tokens": 1690671498.0, "step": 7673 }, { "epoch": 2.811249026702698, "grad_norm": 0.12889690995961672, "learning_rate": 1.926435878444931e-05, "loss": 0.4651, "num_tokens": 1691464088.0, "step": 7674 }, { "epoch": 2.811615444510603, "grad_norm": 0.15564878880571187, "learning_rate": 1.926013742845736e-05, "loss": 0.4851, "num_tokens": 1692141369.0, "step": 7675 }, { "epoch": 2.8119818623185084, "grad_norm": 0.12712511919230857, "learning_rate": 1.9255916226725126e-05, "loss": 0.4548, "num_tokens": 1692996682.0, "step": 7676 }, { "epoch": 2.812348280126414, "grad_norm": 0.12572428784075243, "learning_rate": 1.9251695179490287e-05, "loss": 0.4585, "num_tokens": 1693782350.0, "step": 7677 }, { "epoch": 2.81271469793432, "grad_norm": 0.13166779180584842, "learning_rate": 1.9247474286990487e-05, "loss": 0.4527, "num_tokens": 1694489793.0, "step": 7678 }, { "epoch": 2.813081115742225, "grad_norm": 0.14024833310682935, "learning_rate": 1.924325354946337e-05, "loss": 0.4844, "num_tokens": 1695173495.0, "step": 7679 }, { "epoch": 2.8134475335501303, "grad_norm": 0.13027386605313132, "learning_rate": 1.923903296714658e-05, "loss": 0.4668, "num_tokens": 1696065640.0, "step": 7680 }, { "epoch": 2.813813951358036, "grad_norm": 0.13051257396798868, "learning_rate": 1.9234812540277733e-05, "loss": 0.415, "num_tokens": 1696776632.0, "step": 7681 }, { "epoch": 2.8141803691659417, "grad_norm": 0.1196033460871834, "learning_rate": 1.923059226909445e-05, "loss": 0.4144, "num_tokens": 1697639929.0, "step": 7682 }, { "epoch": 2.814546786973847, "grad_norm": 0.1300061748757246, "learning_rate": 1.9226372153834342e-05, "loss": 0.4416, "num_tokens": 1698382684.0, "step": 7683 }, { "epoch": 2.814913204781752, "grad_norm": 0.12602953918285542, "learning_rate": 1.9222152194735022e-05, "loss": 0.4784, "num_tokens": 1699192598.0, "step": 7684 }, { "epoch": 2.815279622589658, "grad_norm": 0.12832782554922798, "learning_rate": 1.9217932392034063e-05, "loss": 0.4334, "num_tokens": 1699993510.0, "step": 7685 }, { "epoch": 2.815646040397563, "grad_norm": 0.1312664077391287, "learning_rate": 1.9213712745969064e-05, "loss": 0.4181, "num_tokens": 1700815586.0, "step": 7686 }, { "epoch": 2.816012458205469, "grad_norm": 0.12887197905646083, "learning_rate": 1.9209493256777578e-05, "loss": 0.4695, "num_tokens": 1701702206.0, "step": 7687 }, { "epoch": 2.816378876013374, "grad_norm": 0.12124953711153265, "learning_rate": 1.92052739246972e-05, "loss": 0.4223, "num_tokens": 1702483072.0, "step": 7688 }, { "epoch": 2.81674529382128, "grad_norm": 0.12648259251703042, "learning_rate": 1.920105474996546e-05, "loss": 0.4047, "num_tokens": 1703212534.0, "step": 7689 }, { "epoch": 2.817111711629185, "grad_norm": 0.1293674832838872, "learning_rate": 1.9196835732819927e-05, "loss": 0.4505, "num_tokens": 1704010394.0, "step": 7690 }, { "epoch": 2.8174781294370908, "grad_norm": 0.13266055031095045, "learning_rate": 1.919261687349813e-05, "loss": 0.4579, "num_tokens": 1704814415.0, "step": 7691 }, { "epoch": 2.817844547244996, "grad_norm": 0.1253966302896342, "learning_rate": 1.9188398172237596e-05, "loss": 0.4199, "num_tokens": 1705637780.0, "step": 7692 }, { "epoch": 2.8182109650529017, "grad_norm": 0.14675080642590396, "learning_rate": 1.9184179629275845e-05, "loss": 0.4761, "num_tokens": 1706349141.0, "step": 7693 }, { "epoch": 2.818577382860807, "grad_norm": 0.13934356622959554, "learning_rate": 1.9179961244850404e-05, "loss": 0.494, "num_tokens": 1707040290.0, "step": 7694 }, { "epoch": 2.8189438006687126, "grad_norm": 0.13245998400240566, "learning_rate": 1.9175743019198765e-05, "loss": 0.4759, "num_tokens": 1707836754.0, "step": 7695 }, { "epoch": 2.819310218476618, "grad_norm": 0.12097471674282735, "learning_rate": 1.917152495255842e-05, "loss": 0.4388, "num_tokens": 1708706737.0, "step": 7696 }, { "epoch": 2.8196766362845236, "grad_norm": 0.12390458332984065, "learning_rate": 1.9167307045166864e-05, "loss": 0.4709, "num_tokens": 1709558714.0, "step": 7697 }, { "epoch": 2.820043054092429, "grad_norm": 0.13425003844918604, "learning_rate": 1.916308929726156e-05, "loss": 0.4272, "num_tokens": 1710288594.0, "step": 7698 }, { "epoch": 2.8204094719003345, "grad_norm": 0.13457868312033827, "learning_rate": 1.9158871709079996e-05, "loss": 0.4484, "num_tokens": 1710995024.0, "step": 7699 }, { "epoch": 2.82077588970824, "grad_norm": 0.12790300714797415, "learning_rate": 1.9154654280859607e-05, "loss": 0.4404, "num_tokens": 1711795619.0, "step": 7700 }, { "epoch": 2.821142307516145, "grad_norm": 0.13997277072171135, "learning_rate": 1.9150437012837865e-05, "loss": 0.4644, "num_tokens": 1712494933.0, "step": 7701 }, { "epoch": 2.8215087253240507, "grad_norm": 0.13743624660191914, "learning_rate": 1.9146219905252197e-05, "loss": 0.5076, "num_tokens": 1713210240.0, "step": 7702 }, { "epoch": 2.8218751431319564, "grad_norm": 0.13244339862544713, "learning_rate": 1.914200295834004e-05, "loss": 0.4492, "num_tokens": 1713925472.0, "step": 7703 }, { "epoch": 2.8222415609398617, "grad_norm": 0.12031946907347935, "learning_rate": 1.9137786172338804e-05, "loss": 0.45, "num_tokens": 1714770451.0, "step": 7704 }, { "epoch": 2.822607978747767, "grad_norm": 0.1278312927042214, "learning_rate": 1.9133569547485923e-05, "loss": 0.449, "num_tokens": 1715527517.0, "step": 7705 }, { "epoch": 2.8229743965556726, "grad_norm": 0.12533948785315888, "learning_rate": 1.9129353084018785e-05, "loss": 0.4569, "num_tokens": 1716343012.0, "step": 7706 }, { "epoch": 2.8233408143635783, "grad_norm": 0.1318136881758918, "learning_rate": 1.9125136782174797e-05, "loss": 0.4604, "num_tokens": 1717091526.0, "step": 7707 }, { "epoch": 2.8237072321714836, "grad_norm": 0.12357710361789487, "learning_rate": 1.9120920642191337e-05, "loss": 0.4626, "num_tokens": 1717906613.0, "step": 7708 }, { "epoch": 2.824073649979389, "grad_norm": 0.12419811439282878, "learning_rate": 1.9116704664305783e-05, "loss": 0.4593, "num_tokens": 1718767242.0, "step": 7709 }, { "epoch": 2.8244400677872945, "grad_norm": 0.13298631221367063, "learning_rate": 1.9112488848755498e-05, "loss": 0.4505, "num_tokens": 1719529168.0, "step": 7710 }, { "epoch": 2.8248064855952, "grad_norm": 0.13071848838169298, "learning_rate": 1.910827319577786e-05, "loss": 0.4771, "num_tokens": 1720371363.0, "step": 7711 }, { "epoch": 2.8251729034031055, "grad_norm": 0.12044543911541195, "learning_rate": 1.9104057705610203e-05, "loss": 0.4495, "num_tokens": 1721276265.0, "step": 7712 }, { "epoch": 2.8255393212110107, "grad_norm": 0.13200136325395576, "learning_rate": 1.909984237848987e-05, "loss": 0.477, "num_tokens": 1722113721.0, "step": 7713 }, { "epoch": 2.8259057390189164, "grad_norm": 0.13630065789834556, "learning_rate": 1.9095627214654193e-05, "loss": 0.4733, "num_tokens": 1722866970.0, "step": 7714 }, { "epoch": 2.8262721568268216, "grad_norm": 0.12412359921943253, "learning_rate": 1.9091412214340486e-05, "loss": 0.4246, "num_tokens": 1723655709.0, "step": 7715 }, { "epoch": 2.8266385746347273, "grad_norm": 0.13542767752850982, "learning_rate": 1.9087197377786077e-05, "loss": 0.4458, "num_tokens": 1724348169.0, "step": 7716 }, { "epoch": 2.8270049924426326, "grad_norm": 0.15318008639039996, "learning_rate": 1.9082982705228262e-05, "loss": 0.466, "num_tokens": 1724985080.0, "step": 7717 }, { "epoch": 2.8273714102505383, "grad_norm": 0.1335958874127259, "learning_rate": 1.907876819690434e-05, "loss": 0.4496, "num_tokens": 1725702106.0, "step": 7718 }, { "epoch": 2.8277378280584435, "grad_norm": 0.13613931693871725, "learning_rate": 1.9074553853051587e-05, "loss": 0.4513, "num_tokens": 1726440966.0, "step": 7719 }, { "epoch": 2.8281042458663492, "grad_norm": 0.13965990111336585, "learning_rate": 1.9070339673907286e-05, "loss": 0.4979, "num_tokens": 1727263870.0, "step": 7720 }, { "epoch": 2.8284706636742545, "grad_norm": 0.13350965937850476, "learning_rate": 1.9066125659708692e-05, "loss": 0.4518, "num_tokens": 1728010475.0, "step": 7721 }, { "epoch": 2.82883708148216, "grad_norm": 0.15252658470685576, "learning_rate": 1.9061911810693094e-05, "loss": 0.4648, "num_tokens": 1728701583.0, "step": 7722 }, { "epoch": 2.8292034992900654, "grad_norm": 0.1272950556565107, "learning_rate": 1.90576981270977e-05, "loss": 0.4288, "num_tokens": 1729555513.0, "step": 7723 }, { "epoch": 2.829569917097971, "grad_norm": 0.1277956246460483, "learning_rate": 1.9053484609159776e-05, "loss": 0.4462, "num_tokens": 1730319118.0, "step": 7724 }, { "epoch": 2.8299363349058764, "grad_norm": 0.13645112461818976, "learning_rate": 1.9049271257116544e-05, "loss": 0.4595, "num_tokens": 1731084956.0, "step": 7725 }, { "epoch": 2.8303027527137816, "grad_norm": 0.14645138743770414, "learning_rate": 1.904505807120521e-05, "loss": 0.4631, "num_tokens": 1731667284.0, "step": 7726 }, { "epoch": 2.8306691705216873, "grad_norm": 0.1274654959977172, "learning_rate": 1.904084505166301e-05, "loss": 0.4264, "num_tokens": 1732445422.0, "step": 7727 }, { "epoch": 2.831035588329593, "grad_norm": 0.14026808880474093, "learning_rate": 1.903663219872713e-05, "loss": 0.4333, "num_tokens": 1733064065.0, "step": 7728 }, { "epoch": 2.8314020061374983, "grad_norm": 0.14109110600781166, "learning_rate": 1.903241951263477e-05, "loss": 0.4701, "num_tokens": 1733846710.0, "step": 7729 }, { "epoch": 2.8317684239454035, "grad_norm": 0.1344374173021544, "learning_rate": 1.9028206993623097e-05, "loss": 0.4636, "num_tokens": 1734700049.0, "step": 7730 }, { "epoch": 2.832134841753309, "grad_norm": 0.12811231013510035, "learning_rate": 1.9023994641929303e-05, "loss": 0.4607, "num_tokens": 1735500362.0, "step": 7731 }, { "epoch": 2.832501259561215, "grad_norm": 0.13436390218380995, "learning_rate": 1.9019782457790538e-05, "loss": 0.4507, "num_tokens": 1736211421.0, "step": 7732 }, { "epoch": 2.83286767736912, "grad_norm": 0.14673999876075355, "learning_rate": 1.9015570441443966e-05, "loss": 0.4799, "num_tokens": 1736840302.0, "step": 7733 }, { "epoch": 2.8332340951770254, "grad_norm": 0.1302977026403142, "learning_rate": 1.9011358593126728e-05, "loss": 0.4377, "num_tokens": 1737671095.0, "step": 7734 }, { "epoch": 2.833600512984931, "grad_norm": 0.1361250553448687, "learning_rate": 1.900714691307596e-05, "loss": 0.4769, "num_tokens": 1738504933.0, "step": 7735 }, { "epoch": 2.833966930792837, "grad_norm": 0.1400861554701726, "learning_rate": 1.9002935401528785e-05, "loss": 0.4642, "num_tokens": 1739272907.0, "step": 7736 }, { "epoch": 2.834333348600742, "grad_norm": 0.1477608155445935, "learning_rate": 1.8998724058722318e-05, "loss": 0.4248, "num_tokens": 1739881419.0, "step": 7737 }, { "epoch": 2.8346997664086473, "grad_norm": 0.1367784372849773, "learning_rate": 1.8994512884893672e-05, "loss": 0.4439, "num_tokens": 1740632778.0, "step": 7738 }, { "epoch": 2.835066184216553, "grad_norm": 0.11882848818236887, "learning_rate": 1.899030188027995e-05, "loss": 0.4368, "num_tokens": 1741493605.0, "step": 7739 }, { "epoch": 2.8354326020244582, "grad_norm": 0.12199892575440566, "learning_rate": 1.8986091045118226e-05, "loss": 0.4548, "num_tokens": 1742318992.0, "step": 7740 }, { "epoch": 2.835799019832364, "grad_norm": 0.14511995462234803, "learning_rate": 1.8981880379645585e-05, "loss": 0.4863, "num_tokens": 1743090660.0, "step": 7741 }, { "epoch": 2.836165437640269, "grad_norm": 0.14178500873261185, "learning_rate": 1.89776698840991e-05, "loss": 0.4659, "num_tokens": 1743892345.0, "step": 7742 }, { "epoch": 2.836531855448175, "grad_norm": 0.1275369332510265, "learning_rate": 1.8973459558715815e-05, "loss": 0.4578, "num_tokens": 1744656786.0, "step": 7743 }, { "epoch": 2.83689827325608, "grad_norm": 0.13317429376913462, "learning_rate": 1.8969249403732798e-05, "loss": 0.4501, "num_tokens": 1745478961.0, "step": 7744 }, { "epoch": 2.837264691063986, "grad_norm": 0.13320650944643753, "learning_rate": 1.8965039419387085e-05, "loss": 0.4367, "num_tokens": 1746312055.0, "step": 7745 }, { "epoch": 2.837631108871891, "grad_norm": 0.13719227925287306, "learning_rate": 1.8960829605915704e-05, "loss": 0.4743, "num_tokens": 1747043511.0, "step": 7746 }, { "epoch": 2.8379975266797968, "grad_norm": 0.13805129985960815, "learning_rate": 1.8956619963555668e-05, "loss": 0.4899, "num_tokens": 1747975046.0, "step": 7747 }, { "epoch": 2.838363944487702, "grad_norm": 0.13252950631543986, "learning_rate": 1.895241049254399e-05, "loss": 0.4681, "num_tokens": 1748722533.0, "step": 7748 }, { "epoch": 2.8387303622956077, "grad_norm": 0.12683414082605907, "learning_rate": 1.8948201193117686e-05, "loss": 0.4454, "num_tokens": 1749508028.0, "step": 7749 }, { "epoch": 2.839096780103513, "grad_norm": 0.1241918289690995, "learning_rate": 1.8943992065513745e-05, "loss": 0.4383, "num_tokens": 1750438880.0, "step": 7750 }, { "epoch": 2.8394631979114187, "grad_norm": 0.13284255616642837, "learning_rate": 1.8939783109969134e-05, "loss": 0.4615, "num_tokens": 1751204529.0, "step": 7751 }, { "epoch": 2.839829615719324, "grad_norm": 0.13998103682047477, "learning_rate": 1.8935574326720836e-05, "loss": 0.4503, "num_tokens": 1751933170.0, "step": 7752 }, { "epoch": 2.8401960335272296, "grad_norm": 0.12937136613885317, "learning_rate": 1.8931365716005818e-05, "loss": 0.4195, "num_tokens": 1752793069.0, "step": 7753 }, { "epoch": 2.840562451335135, "grad_norm": 0.13480096813490494, "learning_rate": 1.8927157278061018e-05, "loss": 0.4497, "num_tokens": 1753543591.0, "step": 7754 }, { "epoch": 2.84092886914304, "grad_norm": 0.12817527671146178, "learning_rate": 1.8922949013123392e-05, "loss": 0.4476, "num_tokens": 1754382044.0, "step": 7755 }, { "epoch": 2.841295286950946, "grad_norm": 0.1350320351030257, "learning_rate": 1.8918740921429875e-05, "loss": 0.4274, "num_tokens": 1755148525.0, "step": 7756 }, { "epoch": 2.8416617047588515, "grad_norm": 0.12335906206777081, "learning_rate": 1.8914533003217387e-05, "loss": 0.4272, "num_tokens": 1755973402.0, "step": 7757 }, { "epoch": 2.8420281225667567, "grad_norm": 0.12449701662949933, "learning_rate": 1.8910325258722842e-05, "loss": 0.4629, "num_tokens": 1756815627.0, "step": 7758 }, { "epoch": 2.842394540374662, "grad_norm": 0.13815828974757172, "learning_rate": 1.8906117688183138e-05, "loss": 0.4532, "num_tokens": 1757518556.0, "step": 7759 }, { "epoch": 2.8427609581825677, "grad_norm": 0.12079845674784224, "learning_rate": 1.8901910291835187e-05, "loss": 0.4373, "num_tokens": 1758379993.0, "step": 7760 }, { "epoch": 2.8431273759904734, "grad_norm": 0.1321680453925721, "learning_rate": 1.8897703069915853e-05, "loss": 0.4205, "num_tokens": 1759113329.0, "step": 7761 }, { "epoch": 2.8434937937983786, "grad_norm": 0.14403059348709435, "learning_rate": 1.8893496022662025e-05, "loss": 0.4507, "num_tokens": 1759755294.0, "step": 7762 }, { "epoch": 2.843860211606284, "grad_norm": 0.14299803613148696, "learning_rate": 1.888928915031057e-05, "loss": 0.4635, "num_tokens": 1760488871.0, "step": 7763 }, { "epoch": 2.8442266294141896, "grad_norm": 0.13930370343090218, "learning_rate": 1.8885082453098334e-05, "loss": 0.4967, "num_tokens": 1761273322.0, "step": 7764 }, { "epoch": 2.844593047222095, "grad_norm": 0.13257593581067695, "learning_rate": 1.8880875931262153e-05, "loss": 0.4471, "num_tokens": 1762036631.0, "step": 7765 }, { "epoch": 2.8449594650300005, "grad_norm": 0.13249219749745714, "learning_rate": 1.887666958503889e-05, "loss": 0.4681, "num_tokens": 1762796248.0, "step": 7766 }, { "epoch": 2.8453258828379058, "grad_norm": 0.12900549940965433, "learning_rate": 1.8872463414665353e-05, "loss": 0.4355, "num_tokens": 1763599791.0, "step": 7767 }, { "epoch": 2.8456923006458115, "grad_norm": 0.13530564211799104, "learning_rate": 1.886825742037836e-05, "loss": 0.4579, "num_tokens": 1764241080.0, "step": 7768 }, { "epoch": 2.8460587184537167, "grad_norm": 0.14358400659819467, "learning_rate": 1.886405160241472e-05, "loss": 0.4736, "num_tokens": 1764874147.0, "step": 7769 }, { "epoch": 2.8464251362616224, "grad_norm": 0.13784290733346993, "learning_rate": 1.885984596101122e-05, "loss": 0.4642, "num_tokens": 1765568296.0, "step": 7770 }, { "epoch": 2.8467915540695277, "grad_norm": 0.13673928096179402, "learning_rate": 1.885564049640466e-05, "loss": 0.4427, "num_tokens": 1766272499.0, "step": 7771 }, { "epoch": 2.8471579718774334, "grad_norm": 0.1509576765203207, "learning_rate": 1.8851435208831805e-05, "loss": 0.4812, "num_tokens": 1766920202.0, "step": 7772 }, { "epoch": 2.8475243896853386, "grad_norm": 0.12089764631843448, "learning_rate": 1.8847230098529427e-05, "loss": 0.4562, "num_tokens": 1767820228.0, "step": 7773 }, { "epoch": 2.8478908074932443, "grad_norm": 0.1303438076205084, "learning_rate": 1.8843025165734282e-05, "loss": 0.4473, "num_tokens": 1768623886.0, "step": 7774 }, { "epoch": 2.8482572253011496, "grad_norm": 0.12394703942122035, "learning_rate": 1.883882041068311e-05, "loss": 0.4366, "num_tokens": 1769437518.0, "step": 7775 }, { "epoch": 2.8486236431090552, "grad_norm": 0.14277737877443483, "learning_rate": 1.883461583361264e-05, "loss": 0.4694, "num_tokens": 1770131562.0, "step": 7776 }, { "epoch": 2.8489900609169605, "grad_norm": 0.14077242428085693, "learning_rate": 1.8830411434759625e-05, "loss": 0.4904, "num_tokens": 1770893569.0, "step": 7777 }, { "epoch": 2.849356478724866, "grad_norm": 0.1353044346777397, "learning_rate": 1.8826207214360758e-05, "loss": 0.5022, "num_tokens": 1771665728.0, "step": 7778 }, { "epoch": 2.8497228965327714, "grad_norm": 0.1239995314045144, "learning_rate": 1.8822003172652757e-05, "loss": 0.4555, "num_tokens": 1772485647.0, "step": 7779 }, { "epoch": 2.8500893143406767, "grad_norm": 0.13924739354910912, "learning_rate": 1.8817799309872315e-05, "loss": 0.4415, "num_tokens": 1773138331.0, "step": 7780 }, { "epoch": 2.8504557321485824, "grad_norm": 0.13389412667333236, "learning_rate": 1.881359562625611e-05, "loss": 0.4266, "num_tokens": 1773845988.0, "step": 7781 }, { "epoch": 2.850822149956488, "grad_norm": 0.1442294798167338, "learning_rate": 1.8809392122040817e-05, "loss": 0.4841, "num_tokens": 1774591615.0, "step": 7782 }, { "epoch": 2.8511885677643933, "grad_norm": 0.13126518634407272, "learning_rate": 1.8805188797463124e-05, "loss": 0.4356, "num_tokens": 1775313573.0, "step": 7783 }, { "epoch": 2.8515549855722986, "grad_norm": 0.12552702304666197, "learning_rate": 1.880098565275966e-05, "loss": 0.4302, "num_tokens": 1776055908.0, "step": 7784 }, { "epoch": 2.8519214033802043, "grad_norm": 0.13173808302895815, "learning_rate": 1.8796782688167083e-05, "loss": 0.4622, "num_tokens": 1776908207.0, "step": 7785 }, { "epoch": 2.85228782118811, "grad_norm": 0.13126926482570117, "learning_rate": 1.8792579903922034e-05, "loss": 0.448, "num_tokens": 1777694083.0, "step": 7786 }, { "epoch": 2.8526542389960152, "grad_norm": 0.1451612510345252, "learning_rate": 1.878837730026112e-05, "loss": 0.4959, "num_tokens": 1778296509.0, "step": 7787 }, { "epoch": 2.8530206568039205, "grad_norm": 0.13217644026457812, "learning_rate": 1.8784174877420974e-05, "loss": 0.4438, "num_tokens": 1779038389.0, "step": 7788 }, { "epoch": 2.853387074611826, "grad_norm": 0.1352104813922188, "learning_rate": 1.877997263563819e-05, "loss": 0.4565, "num_tokens": 1779801449.0, "step": 7789 }, { "epoch": 2.853753492419732, "grad_norm": 0.13672781291116454, "learning_rate": 1.877577057514937e-05, "loss": 0.4892, "num_tokens": 1780522520.0, "step": 7790 }, { "epoch": 2.854119910227637, "grad_norm": 0.12463693642638955, "learning_rate": 1.8771568696191095e-05, "loss": 0.4537, "num_tokens": 1781356193.0, "step": 7791 }, { "epoch": 2.8544863280355424, "grad_norm": 0.12869858021454536, "learning_rate": 1.8767366998999934e-05, "loss": 0.4678, "num_tokens": 1782143775.0, "step": 7792 }, { "epoch": 2.854852745843448, "grad_norm": 0.1341375513946897, "learning_rate": 1.876316548381245e-05, "loss": 0.435, "num_tokens": 1782821614.0, "step": 7793 }, { "epoch": 2.8552191636513533, "grad_norm": 36.72411001619315, "learning_rate": 1.8758964150865214e-05, "loss": 0.4584, "num_tokens": 1783712652.0, "step": 7794 }, { "epoch": 2.855585581459259, "grad_norm": 0.14464376069486687, "learning_rate": 1.8754763000394752e-05, "loss": 0.4168, "num_tokens": 1784383847.0, "step": 7795 }, { "epoch": 2.8559519992671643, "grad_norm": 0.13486784027970114, "learning_rate": 1.8750562032637605e-05, "loss": 0.4564, "num_tokens": 1785125813.0, "step": 7796 }, { "epoch": 2.85631841707507, "grad_norm": 0.1317885594651598, "learning_rate": 1.8746361247830294e-05, "loss": 0.4367, "num_tokens": 1785905285.0, "step": 7797 }, { "epoch": 2.856684834882975, "grad_norm": 0.13860969381950647, "learning_rate": 1.8742160646209325e-05, "loss": 0.4742, "num_tokens": 1786586725.0, "step": 7798 }, { "epoch": 2.857051252690881, "grad_norm": 0.1260093893582304, "learning_rate": 1.8737960228011206e-05, "loss": 0.4614, "num_tokens": 1787428733.0, "step": 7799 }, { "epoch": 2.857417670498786, "grad_norm": 0.13930826715987107, "learning_rate": 1.873375999347244e-05, "loss": 0.4459, "num_tokens": 1788101972.0, "step": 7800 }, { "epoch": 2.857784088306692, "grad_norm": 0.12608685806141529, "learning_rate": 1.8729559942829496e-05, "loss": 0.4465, "num_tokens": 1788873170.0, "step": 7801 }, { "epoch": 2.858150506114597, "grad_norm": 0.12758423148053696, "learning_rate": 1.872536007631884e-05, "loss": 0.444, "num_tokens": 1789627995.0, "step": 7802 }, { "epoch": 2.858516923922503, "grad_norm": 0.12275467460002988, "learning_rate": 1.8721160394176947e-05, "loss": 0.4321, "num_tokens": 1790409369.0, "step": 7803 }, { "epoch": 2.858883341730408, "grad_norm": 0.12524681141664387, "learning_rate": 1.8716960896640253e-05, "loss": 0.4492, "num_tokens": 1791209740.0, "step": 7804 }, { "epoch": 2.8592497595383133, "grad_norm": 0.1276350758840091, "learning_rate": 1.871276158394521e-05, "loss": 0.4174, "num_tokens": 1792016098.0, "step": 7805 }, { "epoch": 2.859616177346219, "grad_norm": 0.1452740448693303, "learning_rate": 1.870856245632824e-05, "loss": 0.4717, "num_tokens": 1792737313.0, "step": 7806 }, { "epoch": 2.8599825951541247, "grad_norm": 0.13867716421736612, "learning_rate": 1.8704363514025775e-05, "loss": 0.4483, "num_tokens": 1793515218.0, "step": 7807 }, { "epoch": 2.86034901296203, "grad_norm": 0.12859820800575025, "learning_rate": 1.8700164757274213e-05, "loss": 0.4496, "num_tokens": 1794350999.0, "step": 7808 }, { "epoch": 2.860715430769935, "grad_norm": 0.12379848876790137, "learning_rate": 1.869596618630994e-05, "loss": 0.4219, "num_tokens": 1795204373.0, "step": 7809 }, { "epoch": 2.861081848577841, "grad_norm": 0.1282197688470598, "learning_rate": 1.8691767801369366e-05, "loss": 0.434, "num_tokens": 1796020762.0, "step": 7810 }, { "epoch": 2.8614482663857466, "grad_norm": 0.12697182672355672, "learning_rate": 1.8687569602688867e-05, "loss": 0.4514, "num_tokens": 1796788035.0, "step": 7811 }, { "epoch": 2.861814684193652, "grad_norm": 0.12706860684224683, "learning_rate": 1.8683371590504796e-05, "loss": 0.4314, "num_tokens": 1797586793.0, "step": 7812 }, { "epoch": 2.862181102001557, "grad_norm": 0.13261326334385318, "learning_rate": 1.867917376505352e-05, "loss": 0.4554, "num_tokens": 1798348077.0, "step": 7813 }, { "epoch": 2.8625475198094628, "grad_norm": 0.13113752413721938, "learning_rate": 1.867497612657138e-05, "loss": 0.4733, "num_tokens": 1799111618.0, "step": 7814 }, { "epoch": 2.8629139376173685, "grad_norm": 0.11384985565469767, "learning_rate": 1.8670778675294704e-05, "loss": 0.4428, "num_tokens": 1799975759.0, "step": 7815 }, { "epoch": 2.8632803554252737, "grad_norm": 0.13363113170266272, "learning_rate": 1.8666581411459834e-05, "loss": 0.4574, "num_tokens": 1800696703.0, "step": 7816 }, { "epoch": 2.863646773233179, "grad_norm": 0.1354934709020129, "learning_rate": 1.8662384335303073e-05, "loss": 0.4425, "num_tokens": 1801393725.0, "step": 7817 }, { "epoch": 2.8640131910410846, "grad_norm": 0.131785552371478, "learning_rate": 1.8658187447060735e-05, "loss": 0.4897, "num_tokens": 1802163835.0, "step": 7818 }, { "epoch": 2.86437960884899, "grad_norm": 0.13183728555573299, "learning_rate": 1.8653990746969097e-05, "loss": 0.4295, "num_tokens": 1802887383.0, "step": 7819 }, { "epoch": 2.8647460266568956, "grad_norm": 0.12231708129602656, "learning_rate": 1.864979423526445e-05, "loss": 0.4317, "num_tokens": 1803758002.0, "step": 7820 }, { "epoch": 2.865112444464801, "grad_norm": 0.12904520406324746, "learning_rate": 1.8645597912183078e-05, "loss": 0.4562, "num_tokens": 1804534307.0, "step": 7821 }, { "epoch": 2.8654788622727065, "grad_norm": 0.12606528283475538, "learning_rate": 1.8641401777961222e-05, "loss": 0.4444, "num_tokens": 1805347503.0, "step": 7822 }, { "epoch": 2.865845280080612, "grad_norm": 0.1275551810199355, "learning_rate": 1.8637205832835142e-05, "loss": 0.4706, "num_tokens": 1806118562.0, "step": 7823 }, { "epoch": 2.8662116978885175, "grad_norm": 0.13059219929764657, "learning_rate": 1.8633010077041086e-05, "loss": 0.4526, "num_tokens": 1806839136.0, "step": 7824 }, { "epoch": 2.8665781156964227, "grad_norm": 0.1254425259414537, "learning_rate": 1.862881451081527e-05, "loss": 0.464, "num_tokens": 1807681074.0, "step": 7825 }, { "epoch": 2.8669445335043284, "grad_norm": 0.11846479173474626, "learning_rate": 1.862461913439391e-05, "loss": 0.4568, "num_tokens": 1808554125.0, "step": 7826 }, { "epoch": 2.8673109513122337, "grad_norm": 0.12927082580816548, "learning_rate": 1.8620423948013225e-05, "loss": 0.4468, "num_tokens": 1809367404.0, "step": 7827 }, { "epoch": 2.8676773691201394, "grad_norm": 0.13125383238614938, "learning_rate": 1.8616228951909415e-05, "loss": 0.4535, "num_tokens": 1810200038.0, "step": 7828 }, { "epoch": 2.8680437869280446, "grad_norm": 0.1353426540199925, "learning_rate": 1.8612034146318664e-05, "loss": 0.4624, "num_tokens": 1810966137.0, "step": 7829 }, { "epoch": 2.8684102047359503, "grad_norm": 0.12857006962195386, "learning_rate": 1.860783953147714e-05, "loss": 0.4795, "num_tokens": 1811782496.0, "step": 7830 }, { "epoch": 2.8687766225438556, "grad_norm": 0.13811330897393745, "learning_rate": 1.8603645107621003e-05, "loss": 0.4855, "num_tokens": 1812493047.0, "step": 7831 }, { "epoch": 2.8691430403517613, "grad_norm": 0.12000773833805863, "learning_rate": 1.8599450874986434e-05, "loss": 0.448, "num_tokens": 1813313777.0, "step": 7832 }, { "epoch": 2.8695094581596665, "grad_norm": 0.1258556932037321, "learning_rate": 1.8595256833809548e-05, "loss": 0.4652, "num_tokens": 1814127012.0, "step": 7833 }, { "epoch": 2.8698758759675718, "grad_norm": 0.14212228539981744, "learning_rate": 1.8591062984326497e-05, "loss": 0.4549, "num_tokens": 1814907204.0, "step": 7834 }, { "epoch": 2.8702422937754775, "grad_norm": 0.12872515411793004, "learning_rate": 1.8586869326773397e-05, "loss": 0.4584, "num_tokens": 1815788821.0, "step": 7835 }, { "epoch": 2.870608711583383, "grad_norm": 0.15533238623338744, "learning_rate": 1.8582675861386357e-05, "loss": 0.4469, "num_tokens": 1816428076.0, "step": 7836 }, { "epoch": 2.8709751293912884, "grad_norm": 0.12824512311880923, "learning_rate": 1.8578482588401468e-05, "loss": 0.4416, "num_tokens": 1817171377.0, "step": 7837 }, { "epoch": 2.8713415471991937, "grad_norm": 0.13717757377291576, "learning_rate": 1.8574289508054844e-05, "loss": 0.4773, "num_tokens": 1817898968.0, "step": 7838 }, { "epoch": 2.8717079650070994, "grad_norm": 0.12677717854501655, "learning_rate": 1.8570096620582547e-05, "loss": 0.4607, "num_tokens": 1818823334.0, "step": 7839 }, { "epoch": 2.872074382815005, "grad_norm": 0.13103034382763434, "learning_rate": 1.8565903926220644e-05, "loss": 0.4742, "num_tokens": 1819691002.0, "step": 7840 }, { "epoch": 2.8724408006229103, "grad_norm": 0.14023088637423098, "learning_rate": 1.8561711425205206e-05, "loss": 0.4665, "num_tokens": 1820425831.0, "step": 7841 }, { "epoch": 2.8728072184308155, "grad_norm": 0.1400568461261186, "learning_rate": 1.8557519117772256e-05, "loss": 0.4128, "num_tokens": 1821126744.0, "step": 7842 }, { "epoch": 2.8731736362387212, "grad_norm": 0.12879947308781936, "learning_rate": 1.8553327004157852e-05, "loss": 0.4502, "num_tokens": 1821922257.0, "step": 7843 }, { "epoch": 2.873540054046627, "grad_norm": 0.13545051817184187, "learning_rate": 1.8549135084598006e-05, "loss": 0.454, "num_tokens": 1822699967.0, "step": 7844 }, { "epoch": 2.873906471854532, "grad_norm": 0.13250351262154508, "learning_rate": 1.8544943359328734e-05, "loss": 0.4648, "num_tokens": 1823496757.0, "step": 7845 }, { "epoch": 2.8742728896624374, "grad_norm": 0.12281873936769787, "learning_rate": 1.8540751828586043e-05, "loss": 0.4574, "num_tokens": 1824293088.0, "step": 7846 }, { "epoch": 2.874639307470343, "grad_norm": 0.14263537221007438, "learning_rate": 1.8536560492605917e-05, "loss": 0.4426, "num_tokens": 1825007198.0, "step": 7847 }, { "epoch": 2.8750057252782484, "grad_norm": 0.135706132216994, "learning_rate": 1.8532369351624333e-05, "loss": 0.4346, "num_tokens": 1825797612.0, "step": 7848 }, { "epoch": 2.875372143086154, "grad_norm": 0.12622823463084665, "learning_rate": 1.852817840587728e-05, "loss": 0.4401, "num_tokens": 1826565229.0, "step": 7849 }, { "epoch": 2.8757385608940593, "grad_norm": 0.1286752737727122, "learning_rate": 1.8523987655600698e-05, "loss": 0.4437, "num_tokens": 1827331812.0, "step": 7850 }, { "epoch": 2.876104978701965, "grad_norm": 0.13400097104201258, "learning_rate": 1.8519797101030543e-05, "loss": 0.4343, "num_tokens": 1827992236.0, "step": 7851 }, { "epoch": 2.8764713965098703, "grad_norm": 0.13848414145454696, "learning_rate": 1.851560674240275e-05, "loss": 0.4551, "num_tokens": 1828694296.0, "step": 7852 }, { "epoch": 2.876837814317776, "grad_norm": 0.12628354601515934, "learning_rate": 1.8511416579953234e-05, "loss": 0.4581, "num_tokens": 1829507748.0, "step": 7853 }, { "epoch": 2.877204232125681, "grad_norm": 0.12010126409591014, "learning_rate": 1.850722661391793e-05, "loss": 0.4275, "num_tokens": 1830233008.0, "step": 7854 }, { "epoch": 2.877570649933587, "grad_norm": 0.23286815322318274, "learning_rate": 1.8503036844532727e-05, "loss": 0.4377, "num_tokens": 1831119846.0, "step": 7855 }, { "epoch": 2.877937067741492, "grad_norm": 0.1292600094188538, "learning_rate": 1.8498847272033527e-05, "loss": 0.4613, "num_tokens": 1831970932.0, "step": 7856 }, { "epoch": 2.878303485549398, "grad_norm": 0.1324688083164804, "learning_rate": 1.8494657896656196e-05, "loss": 0.4367, "num_tokens": 1832742836.0, "step": 7857 }, { "epoch": 2.878669903357303, "grad_norm": 0.13024293212394433, "learning_rate": 1.849046871863662e-05, "loss": 0.4406, "num_tokens": 1833483868.0, "step": 7858 }, { "epoch": 2.8790363211652084, "grad_norm": 0.12580098947545537, "learning_rate": 1.848627973821065e-05, "loss": 0.4646, "num_tokens": 1834305172.0, "step": 7859 }, { "epoch": 2.879402738973114, "grad_norm": 0.1304321473718548, "learning_rate": 1.8482090955614138e-05, "loss": 0.4696, "num_tokens": 1835081016.0, "step": 7860 }, { "epoch": 2.8797691567810197, "grad_norm": 0.12833484066951772, "learning_rate": 1.8477902371082915e-05, "loss": 0.4456, "num_tokens": 1835974354.0, "step": 7861 }, { "epoch": 2.880135574588925, "grad_norm": 0.13221182976962362, "learning_rate": 1.8473713984852812e-05, "loss": 0.4953, "num_tokens": 1836700526.0, "step": 7862 }, { "epoch": 2.8805019923968302, "grad_norm": 0.1468333214026622, "learning_rate": 1.8469525797159646e-05, "loss": 0.5076, "num_tokens": 1837388324.0, "step": 7863 }, { "epoch": 2.880868410204736, "grad_norm": 0.129934648330653, "learning_rate": 1.8465337808239213e-05, "loss": 0.4433, "num_tokens": 1838168670.0, "step": 7864 }, { "epoch": 2.8812348280126416, "grad_norm": 0.11711021382259455, "learning_rate": 1.8461150018327298e-05, "loss": 0.4369, "num_tokens": 1838999675.0, "step": 7865 }, { "epoch": 2.881601245820547, "grad_norm": 0.13737045946351953, "learning_rate": 1.8456962427659704e-05, "loss": 0.4658, "num_tokens": 1839804632.0, "step": 7866 }, { "epoch": 2.881967663628452, "grad_norm": 0.13203449269370732, "learning_rate": 1.845277503647219e-05, "loss": 0.4943, "num_tokens": 1840632427.0, "step": 7867 }, { "epoch": 2.882334081436358, "grad_norm": 0.12559955526736485, "learning_rate": 1.8448587845000505e-05, "loss": 0.4761, "num_tokens": 1841516833.0, "step": 7868 }, { "epoch": 2.8827004992442635, "grad_norm": 0.12757488248821225, "learning_rate": 1.844440085348041e-05, "loss": 0.446, "num_tokens": 1842282075.0, "step": 7869 }, { "epoch": 2.8830669170521688, "grad_norm": 0.13605180963808208, "learning_rate": 1.844021406214763e-05, "loss": 0.4519, "num_tokens": 1843014681.0, "step": 7870 }, { "epoch": 2.883433334860074, "grad_norm": 0.13604788928772973, "learning_rate": 1.8436027471237895e-05, "loss": 0.4468, "num_tokens": 1843678676.0, "step": 7871 }, { "epoch": 2.8837997526679797, "grad_norm": 0.1312740016783456, "learning_rate": 1.8431841080986928e-05, "loss": 0.4525, "num_tokens": 1844415780.0, "step": 7872 }, { "epoch": 2.884166170475885, "grad_norm": 0.137196607059986, "learning_rate": 1.842765489163041e-05, "loss": 0.4556, "num_tokens": 1845174858.0, "step": 7873 }, { "epoch": 2.8845325882837907, "grad_norm": 0.13038355374329905, "learning_rate": 1.842346890340405e-05, "loss": 0.43, "num_tokens": 1845914763.0, "step": 7874 }, { "epoch": 2.884899006091696, "grad_norm": 0.15850191798765387, "learning_rate": 1.8419283116543526e-05, "loss": 0.4653, "num_tokens": 1846565453.0, "step": 7875 }, { "epoch": 2.8852654238996016, "grad_norm": 0.13547998759955457, "learning_rate": 1.841509753128449e-05, "loss": 0.4809, "num_tokens": 1847312953.0, "step": 7876 }, { "epoch": 2.885631841707507, "grad_norm": 0.12525527266976402, "learning_rate": 1.8410912147862618e-05, "loss": 0.4373, "num_tokens": 1848174055.0, "step": 7877 }, { "epoch": 2.8859982595154126, "grad_norm": 0.12122213693777073, "learning_rate": 1.840672696651354e-05, "loss": 0.4031, "num_tokens": 1848954510.0, "step": 7878 }, { "epoch": 2.886364677323318, "grad_norm": 0.1439881449734925, "learning_rate": 1.8402541987472905e-05, "loss": 0.4514, "num_tokens": 1849700063.0, "step": 7879 }, { "epoch": 2.8867310951312235, "grad_norm": 0.13841010037171222, "learning_rate": 1.8398357210976333e-05, "loss": 0.4841, "num_tokens": 1850621487.0, "step": 7880 }, { "epoch": 2.8870975129391288, "grad_norm": 0.1447335078684676, "learning_rate": 1.8394172637259415e-05, "loss": 0.4709, "num_tokens": 1851396207.0, "step": 7881 }, { "epoch": 2.8874639307470344, "grad_norm": 0.14484543311505718, "learning_rate": 1.838998826655777e-05, "loss": 0.5256, "num_tokens": 1852212772.0, "step": 7882 }, { "epoch": 2.8878303485549397, "grad_norm": 0.14017152435644792, "learning_rate": 1.838580409910699e-05, "loss": 0.4944, "num_tokens": 1852896770.0, "step": 7883 }, { "epoch": 2.8881967663628454, "grad_norm": 0.14155817163911355, "learning_rate": 1.838162013514264e-05, "loss": 0.4453, "num_tokens": 1853729026.0, "step": 7884 }, { "epoch": 2.8885631841707506, "grad_norm": 0.137717695030953, "learning_rate": 1.837743637490029e-05, "loss": 0.4567, "num_tokens": 1854500546.0, "step": 7885 }, { "epoch": 2.8889296019786563, "grad_norm": 0.1268244830470926, "learning_rate": 1.8373252818615498e-05, "loss": 0.4507, "num_tokens": 1855308487.0, "step": 7886 }, { "epoch": 2.8892960197865616, "grad_norm": 0.1441552759808841, "learning_rate": 1.836906946652379e-05, "loss": 0.4833, "num_tokens": 1856052543.0, "step": 7887 }, { "epoch": 2.889662437594467, "grad_norm": 0.13467534770555462, "learning_rate": 1.836488631886072e-05, "loss": 0.4825, "num_tokens": 1856866850.0, "step": 7888 }, { "epoch": 2.8900288554023725, "grad_norm": 0.1232362056489837, "learning_rate": 1.83607033758618e-05, "loss": 0.4513, "num_tokens": 1857751548.0, "step": 7889 }, { "epoch": 2.8903952732102782, "grad_norm": 0.12060052688886178, "learning_rate": 1.8356520637762535e-05, "loss": 0.4441, "num_tokens": 1858601153.0, "step": 7890 }, { "epoch": 2.8907616910181835, "grad_norm": 0.13335929439450964, "learning_rate": 1.8352338104798414e-05, "loss": 0.468, "num_tokens": 1859377958.0, "step": 7891 }, { "epoch": 2.8911281088260887, "grad_norm": 0.13888855581184278, "learning_rate": 1.834815577720493e-05, "loss": 0.4654, "num_tokens": 1860187059.0, "step": 7892 }, { "epoch": 2.8914945266339944, "grad_norm": 0.13148848952485412, "learning_rate": 1.834397365521756e-05, "loss": 0.4479, "num_tokens": 1860912383.0, "step": 7893 }, { "epoch": 2.8918609444419, "grad_norm": 0.14345562553040375, "learning_rate": 1.8339791739071766e-05, "loss": 0.4387, "num_tokens": 1861703386.0, "step": 7894 }, { "epoch": 2.8922273622498054, "grad_norm": 0.12543828544869046, "learning_rate": 1.833561002900299e-05, "loss": 0.4271, "num_tokens": 1862529150.0, "step": 7895 }, { "epoch": 2.8925937800577106, "grad_norm": 0.1293971236473211, "learning_rate": 1.8331428525246674e-05, "loss": 0.472, "num_tokens": 1863362608.0, "step": 7896 }, { "epoch": 2.8929601978656163, "grad_norm": 0.13215203182251056, "learning_rate": 1.8327247228038252e-05, "loss": 0.4317, "num_tokens": 1864114330.0, "step": 7897 }, { "epoch": 2.893326615673522, "grad_norm": 0.13727757871821641, "learning_rate": 1.832306613761312e-05, "loss": 0.4428, "num_tokens": 1864882810.0, "step": 7898 }, { "epoch": 2.8936930334814273, "grad_norm": 0.13909417789972414, "learning_rate": 1.83188852542067e-05, "loss": 0.4703, "num_tokens": 1865601408.0, "step": 7899 }, { "epoch": 2.8940594512893325, "grad_norm": 0.12917524228247637, "learning_rate": 1.8314704578054386e-05, "loss": 0.4534, "num_tokens": 1866390487.0, "step": 7900 }, { "epoch": 2.894425869097238, "grad_norm": 0.13302550964022733, "learning_rate": 1.8310524109391546e-05, "loss": 0.4354, "num_tokens": 1867158284.0, "step": 7901 }, { "epoch": 2.8947922869051435, "grad_norm": 0.13053751100829863, "learning_rate": 1.8306343848453556e-05, "loss": 0.4562, "num_tokens": 1867948697.0, "step": 7902 }, { "epoch": 2.895158704713049, "grad_norm": 0.13184382673274522, "learning_rate": 1.8302163795475764e-05, "loss": 0.4209, "num_tokens": 1868673698.0, "step": 7903 }, { "epoch": 2.8955251225209544, "grad_norm": 0.13478949694581258, "learning_rate": 1.8297983950693527e-05, "loss": 0.4587, "num_tokens": 1869388525.0, "step": 7904 }, { "epoch": 2.89589154032886, "grad_norm": 0.12032131974420648, "learning_rate": 1.8293804314342175e-05, "loss": 0.4092, "num_tokens": 1870153476.0, "step": 7905 }, { "epoch": 2.8962579581367653, "grad_norm": 0.13203175391979907, "learning_rate": 1.828962488665703e-05, "loss": 0.4538, "num_tokens": 1870958632.0, "step": 7906 }, { "epoch": 2.896624375944671, "grad_norm": 0.13676976711856825, "learning_rate": 1.8285445667873403e-05, "loss": 0.439, "num_tokens": 1871649634.0, "step": 7907 }, { "epoch": 2.8969907937525763, "grad_norm": 0.12497913913470288, "learning_rate": 1.8281266658226587e-05, "loss": 0.4458, "num_tokens": 1872426888.0, "step": 7908 }, { "epoch": 2.897357211560482, "grad_norm": 0.12924406975254552, "learning_rate": 1.8277087857951862e-05, "loss": 0.4556, "num_tokens": 1873212038.0, "step": 7909 }, { "epoch": 2.8977236293683872, "grad_norm": 0.15084392621855863, "learning_rate": 1.827290926728453e-05, "loss": 0.4766, "num_tokens": 1873954799.0, "step": 7910 }, { "epoch": 2.898090047176293, "grad_norm": 0.12291886618999945, "learning_rate": 1.8268730886459826e-05, "loss": 0.4115, "num_tokens": 1874677748.0, "step": 7911 }, { "epoch": 2.898456464984198, "grad_norm": 0.14201147294237645, "learning_rate": 1.8264552715713015e-05, "loss": 0.4969, "num_tokens": 1875409248.0, "step": 7912 }, { "epoch": 2.8988228827921034, "grad_norm": 0.1306571053697752, "learning_rate": 1.8260374755279334e-05, "loss": 0.4776, "num_tokens": 1876198417.0, "step": 7913 }, { "epoch": 2.899189300600009, "grad_norm": 0.12445505927936308, "learning_rate": 1.8256197005394006e-05, "loss": 0.4436, "num_tokens": 1877143314.0, "step": 7914 }, { "epoch": 2.899555718407915, "grad_norm": 0.14905750309064741, "learning_rate": 1.8252019466292257e-05, "loss": 0.4657, "num_tokens": 1877891033.0, "step": 7915 }, { "epoch": 2.89992213621582, "grad_norm": 0.1424074680826994, "learning_rate": 1.8247842138209275e-05, "loss": 0.4428, "num_tokens": 1878656393.0, "step": 7916 }, { "epoch": 2.9002885540237253, "grad_norm": 0.12385858759558663, "learning_rate": 1.8243665021380267e-05, "loss": 0.4507, "num_tokens": 1879460309.0, "step": 7917 }, { "epoch": 2.900654971831631, "grad_norm": 0.1508494022456864, "learning_rate": 1.823948811604041e-05, "loss": 0.4692, "num_tokens": 1880219176.0, "step": 7918 }, { "epoch": 2.9010213896395367, "grad_norm": 0.14004003724482358, "learning_rate": 1.8235311422424863e-05, "loss": 0.4422, "num_tokens": 1881052816.0, "step": 7919 }, { "epoch": 2.901387807447442, "grad_norm": 0.14435303926451318, "learning_rate": 1.8231134940768783e-05, "loss": 0.4636, "num_tokens": 1881857207.0, "step": 7920 }, { "epoch": 2.901754225255347, "grad_norm": 0.13268579470541783, "learning_rate": 1.822695867130733e-05, "loss": 0.4131, "num_tokens": 1882511166.0, "step": 7921 }, { "epoch": 2.902120643063253, "grad_norm": 0.14367700709776487, "learning_rate": 1.8222782614275627e-05, "loss": 0.4308, "num_tokens": 1883195141.0, "step": 7922 }, { "epoch": 2.9024870608711586, "grad_norm": 0.14148367523388183, "learning_rate": 1.8218606769908787e-05, "loss": 0.4671, "num_tokens": 1883973334.0, "step": 7923 }, { "epoch": 2.902853478679064, "grad_norm": 0.13684156317044616, "learning_rate": 1.821443113844193e-05, "loss": 0.495, "num_tokens": 1884759587.0, "step": 7924 }, { "epoch": 2.903219896486969, "grad_norm": 0.1403942011131741, "learning_rate": 1.821025572011013e-05, "loss": 0.4762, "num_tokens": 1885531859.0, "step": 7925 }, { "epoch": 2.903586314294875, "grad_norm": 0.1350208137157977, "learning_rate": 1.82060805151485e-05, "loss": 0.4338, "num_tokens": 1886328887.0, "step": 7926 }, { "epoch": 2.90395273210278, "grad_norm": 0.1628366509012367, "learning_rate": 1.8201905523792106e-05, "loss": 0.4645, "num_tokens": 1887173623.0, "step": 7927 }, { "epoch": 2.9043191499106857, "grad_norm": 0.12758044796128917, "learning_rate": 1.8197730746276002e-05, "loss": 0.4509, "num_tokens": 1888016903.0, "step": 7928 }, { "epoch": 2.904685567718591, "grad_norm": 0.15557147337152882, "learning_rate": 1.819355618283523e-05, "loss": 0.5003, "num_tokens": 1888669543.0, "step": 7929 }, { "epoch": 2.9050519855264967, "grad_norm": 0.1409107023659395, "learning_rate": 1.818938183370484e-05, "loss": 0.46, "num_tokens": 1889540090.0, "step": 7930 }, { "epoch": 2.905418403334402, "grad_norm": 0.1371017891951305, "learning_rate": 1.818520769911984e-05, "loss": 0.4355, "num_tokens": 1890367939.0, "step": 7931 }, { "epoch": 2.9057848211423076, "grad_norm": 0.12467733201681713, "learning_rate": 1.8181033779315263e-05, "loss": 0.4358, "num_tokens": 1891124554.0, "step": 7932 }, { "epoch": 2.906151238950213, "grad_norm": 0.1416765454849527, "learning_rate": 1.817686007452609e-05, "loss": 0.4322, "num_tokens": 1891920991.0, "step": 7933 }, { "epoch": 2.9065176567581186, "grad_norm": 0.13961571633087064, "learning_rate": 1.8172686584987325e-05, "loss": 0.4734, "num_tokens": 1892680564.0, "step": 7934 }, { "epoch": 2.906884074566024, "grad_norm": 0.1398315672631423, "learning_rate": 1.816851331093393e-05, "loss": 0.483, "num_tokens": 1893436677.0, "step": 7935 }, { "epoch": 2.9072504923739295, "grad_norm": 0.12736776196744792, "learning_rate": 1.8164340252600872e-05, "loss": 0.4267, "num_tokens": 1894210532.0, "step": 7936 }, { "epoch": 2.9076169101818348, "grad_norm": 0.13344308420584397, "learning_rate": 1.81601674102231e-05, "loss": 0.4787, "num_tokens": 1894945189.0, "step": 7937 }, { "epoch": 2.9079833279897405, "grad_norm": 0.14464691896160242, "learning_rate": 1.815599478403557e-05, "loss": 0.4981, "num_tokens": 1895755899.0, "step": 7938 }, { "epoch": 2.9083497457976457, "grad_norm": 0.12517538104460513, "learning_rate": 1.8151822374273195e-05, "loss": 0.4569, "num_tokens": 1896662990.0, "step": 7939 }, { "epoch": 2.9087161636055514, "grad_norm": 0.13152899480439117, "learning_rate": 1.8147650181170887e-05, "loss": 0.4623, "num_tokens": 1897477096.0, "step": 7940 }, { "epoch": 2.9090825814134567, "grad_norm": 0.13678706741418964, "learning_rate": 1.8143478204963562e-05, "loss": 0.4622, "num_tokens": 1898232956.0, "step": 7941 }, { "epoch": 2.909448999221362, "grad_norm": 0.14200796447960481, "learning_rate": 1.813930644588609e-05, "loss": 0.4827, "num_tokens": 1898938622.0, "step": 7942 }, { "epoch": 2.9098154170292676, "grad_norm": 0.1235650979852715, "learning_rate": 1.8135134904173365e-05, "loss": 0.4643, "num_tokens": 1899783900.0, "step": 7943 }, { "epoch": 2.9101818348371733, "grad_norm": 0.13838465598120195, "learning_rate": 1.8130963580060256e-05, "loss": 0.4675, "num_tokens": 1900526057.0, "step": 7944 }, { "epoch": 2.9105482526450785, "grad_norm": 0.13002124532846798, "learning_rate": 1.812679247378161e-05, "loss": 0.4617, "num_tokens": 1901371410.0, "step": 7945 }, { "epoch": 2.910914670452984, "grad_norm": 0.13790971499656907, "learning_rate": 1.8122621585572266e-05, "loss": 0.4773, "num_tokens": 1902099624.0, "step": 7946 }, { "epoch": 2.9112810882608895, "grad_norm": 0.13217398384765158, "learning_rate": 1.8118450915667057e-05, "loss": 0.4754, "num_tokens": 1902839381.0, "step": 7947 }, { "epoch": 2.911647506068795, "grad_norm": 0.1482114803598617, "learning_rate": 1.8114280464300797e-05, "loss": 0.4618, "num_tokens": 1903525998.0, "step": 7948 }, { "epoch": 2.9120139238767004, "grad_norm": 0.13471006158369955, "learning_rate": 1.81101102317083e-05, "loss": 0.4469, "num_tokens": 1904378516.0, "step": 7949 }, { "epoch": 2.9123803416846057, "grad_norm": 0.13172250014451134, "learning_rate": 1.8105940218124352e-05, "loss": 0.4838, "num_tokens": 1905197666.0, "step": 7950 }, { "epoch": 2.9127467594925114, "grad_norm": 0.1365220019051733, "learning_rate": 1.8101770423783727e-05, "loss": 0.4656, "num_tokens": 1905928607.0, "step": 7951 }, { "epoch": 2.9131131773004166, "grad_norm": 0.13814000684472091, "learning_rate": 1.8097600848921205e-05, "loss": 0.4962, "num_tokens": 1906726257.0, "step": 7952 }, { "epoch": 2.9134795951083223, "grad_norm": 0.13184278081939357, "learning_rate": 1.8093431493771528e-05, "loss": 0.4563, "num_tokens": 1907443355.0, "step": 7953 }, { "epoch": 2.9138460129162276, "grad_norm": 0.1397730856693541, "learning_rate": 1.808926235856945e-05, "loss": 0.4597, "num_tokens": 1908114715.0, "step": 7954 }, { "epoch": 2.9142124307241333, "grad_norm": 0.13493215220877963, "learning_rate": 1.80850934435497e-05, "loss": 0.4519, "num_tokens": 1908873381.0, "step": 7955 }, { "epoch": 2.9145788485320385, "grad_norm": 0.14729688160720059, "learning_rate": 1.808092474894699e-05, "loss": 0.4874, "num_tokens": 1909627014.0, "step": 7956 }, { "epoch": 2.914945266339944, "grad_norm": 0.13187811237029723, "learning_rate": 1.8076756274996032e-05, "loss": 0.4321, "num_tokens": 1910414819.0, "step": 7957 }, { "epoch": 2.9153116841478495, "grad_norm": 0.15540043349753954, "learning_rate": 1.807258802193152e-05, "loss": 0.4398, "num_tokens": 1911001068.0, "step": 7958 }, { "epoch": 2.915678101955755, "grad_norm": 0.13648997880133956, "learning_rate": 1.806841998998812e-05, "loss": 0.4561, "num_tokens": 1911703700.0, "step": 7959 }, { "epoch": 2.9160445197636604, "grad_norm": 0.13047048530014044, "learning_rate": 1.806425217940053e-05, "loss": 0.4281, "num_tokens": 1912483730.0, "step": 7960 }, { "epoch": 2.916410937571566, "grad_norm": 0.15038471345882345, "learning_rate": 1.8060084590403374e-05, "loss": 0.4603, "num_tokens": 1913246402.0, "step": 7961 }, { "epoch": 2.9167773553794714, "grad_norm": 0.13644318274680867, "learning_rate": 1.8055917223231313e-05, "loss": 0.4654, "num_tokens": 1914079424.0, "step": 7962 }, { "epoch": 2.917143773187377, "grad_norm": 0.13156664353866632, "learning_rate": 1.805175007811898e-05, "loss": 0.4383, "num_tokens": 1914777345.0, "step": 7963 }, { "epoch": 2.9175101909952823, "grad_norm": 0.12955488729910067, "learning_rate": 1.8047583155300984e-05, "loss": 0.4571, "num_tokens": 1915604794.0, "step": 7964 }, { "epoch": 2.917876608803188, "grad_norm": 0.14048409904056766, "learning_rate": 1.8043416455011938e-05, "loss": 0.4409, "num_tokens": 1916333166.0, "step": 7965 }, { "epoch": 2.9182430266110932, "grad_norm": 0.13248010897247603, "learning_rate": 1.803924997748644e-05, "loss": 0.4335, "num_tokens": 1917110405.0, "step": 7966 }, { "epoch": 2.9186094444189985, "grad_norm": 0.14254406321107796, "learning_rate": 1.8035083722959056e-05, "loss": 0.4441, "num_tokens": 1917848612.0, "step": 7967 }, { "epoch": 2.918975862226904, "grad_norm": 0.12985253979767175, "learning_rate": 1.8030917691664367e-05, "loss": 0.4416, "num_tokens": 1918601478.0, "step": 7968 }, { "epoch": 2.91934228003481, "grad_norm": 0.1364801555727388, "learning_rate": 1.8026751883836926e-05, "loss": 0.4319, "num_tokens": 1919303023.0, "step": 7969 }, { "epoch": 2.919708697842715, "grad_norm": 0.13657652321814182, "learning_rate": 1.8022586299711266e-05, "loss": 0.4776, "num_tokens": 1920031266.0, "step": 7970 }, { "epoch": 2.9200751156506204, "grad_norm": 0.14653412067985722, "learning_rate": 1.8018420939521933e-05, "loss": 0.4458, "num_tokens": 1920777851.0, "step": 7971 }, { "epoch": 2.920441533458526, "grad_norm": 0.13406345560754168, "learning_rate": 1.801425580350344e-05, "loss": 0.4353, "num_tokens": 1921482085.0, "step": 7972 }, { "epoch": 2.920807951266432, "grad_norm": 0.13190656043338808, "learning_rate": 1.8010090891890292e-05, "loss": 0.4693, "num_tokens": 1922203958.0, "step": 7973 }, { "epoch": 2.921174369074337, "grad_norm": 0.13003875666820333, "learning_rate": 1.8005926204916983e-05, "loss": 0.4617, "num_tokens": 1922983487.0, "step": 7974 }, { "epoch": 2.9215407868822423, "grad_norm": 0.1345776010565572, "learning_rate": 1.8001761742817982e-05, "loss": 0.4492, "num_tokens": 1923779498.0, "step": 7975 }, { "epoch": 2.921907204690148, "grad_norm": 0.14161629270426912, "learning_rate": 1.799759750582777e-05, "loss": 0.4683, "num_tokens": 1924451972.0, "step": 7976 }, { "epoch": 2.9222736224980537, "grad_norm": 0.13855103283472878, "learning_rate": 1.7993433494180804e-05, "loss": 0.4523, "num_tokens": 1925140208.0, "step": 7977 }, { "epoch": 2.922640040305959, "grad_norm": 0.14149642700361253, "learning_rate": 1.7989269708111513e-05, "loss": 0.4577, "num_tokens": 1925874923.0, "step": 7978 }, { "epoch": 2.923006458113864, "grad_norm": 0.1355870623634165, "learning_rate": 1.7985106147854336e-05, "loss": 0.4468, "num_tokens": 1926580024.0, "step": 7979 }, { "epoch": 2.92337287592177, "grad_norm": 0.12979740788341357, "learning_rate": 1.798094281364369e-05, "loss": 0.425, "num_tokens": 1927358154.0, "step": 7980 }, { "epoch": 2.923739293729675, "grad_norm": 0.1385062151534895, "learning_rate": 1.797677970571396e-05, "loss": 0.4659, "num_tokens": 1928075520.0, "step": 7981 }, { "epoch": 2.924105711537581, "grad_norm": 0.13905067517248965, "learning_rate": 1.797261682429957e-05, "loss": 0.4576, "num_tokens": 1928859368.0, "step": 7982 }, { "epoch": 2.924472129345486, "grad_norm": 0.11852947017462613, "learning_rate": 1.7968454169634882e-05, "loss": 0.4358, "num_tokens": 1929760784.0, "step": 7983 }, { "epoch": 2.9248385471533918, "grad_norm": 0.1289227909855525, "learning_rate": 1.7964291741954257e-05, "loss": 0.4387, "num_tokens": 1930482381.0, "step": 7984 }, { "epoch": 2.925204964961297, "grad_norm": 0.1362365786497064, "learning_rate": 1.7960129541492053e-05, "loss": 0.4307, "num_tokens": 1931139082.0, "step": 7985 }, { "epoch": 2.9255713827692027, "grad_norm": 0.13806066680112883, "learning_rate": 1.7955967568482604e-05, "loss": 0.4225, "num_tokens": 1931885069.0, "step": 7986 }, { "epoch": 2.925937800577108, "grad_norm": 0.16122735863879045, "learning_rate": 1.7951805823160252e-05, "loss": 0.4646, "num_tokens": 1932556081.0, "step": 7987 }, { "epoch": 2.9263042183850136, "grad_norm": 0.12323952661108713, "learning_rate": 1.7947644305759295e-05, "loss": 0.4446, "num_tokens": 1933316696.0, "step": 7988 }, { "epoch": 2.926670636192919, "grad_norm": 0.13314969606215693, "learning_rate": 1.7943483016514048e-05, "loss": 0.4566, "num_tokens": 1934052812.0, "step": 7989 }, { "epoch": 2.9270370540008246, "grad_norm": 0.139710774740663, "learning_rate": 1.79393219556588e-05, "loss": 0.4691, "num_tokens": 1934701702.0, "step": 7990 }, { "epoch": 2.92740347180873, "grad_norm": 0.13280364710659404, "learning_rate": 1.793516112342781e-05, "loss": 0.4658, "num_tokens": 1935525333.0, "step": 7991 }, { "epoch": 2.9277698896166355, "grad_norm": 0.1457809869975033, "learning_rate": 1.793100052005535e-05, "loss": 0.4732, "num_tokens": 1936187391.0, "step": 7992 }, { "epoch": 2.928136307424541, "grad_norm": 0.13643287821435615, "learning_rate": 1.792684014577568e-05, "loss": 0.4389, "num_tokens": 1936857435.0, "step": 7993 }, { "epoch": 2.9285027252324465, "grad_norm": 0.1357816315958163, "learning_rate": 1.792268000082303e-05, "loss": 0.4464, "num_tokens": 1937716838.0, "step": 7994 }, { "epoch": 2.9288691430403517, "grad_norm": 0.13149410155766317, "learning_rate": 1.7918520085431627e-05, "loss": 0.4468, "num_tokens": 1938406395.0, "step": 7995 }, { "epoch": 2.929235560848257, "grad_norm": 0.12984715302639208, "learning_rate": 1.791436039983568e-05, "loss": 0.4713, "num_tokens": 1939159340.0, "step": 7996 }, { "epoch": 2.9296019786561627, "grad_norm": 0.135454576510474, "learning_rate": 1.7910200944269375e-05, "loss": 0.4482, "num_tokens": 1939823907.0, "step": 7997 }, { "epoch": 2.9299683964640684, "grad_norm": 0.12939762916229075, "learning_rate": 1.7906041718966922e-05, "loss": 0.4423, "num_tokens": 1940583977.0, "step": 7998 }, { "epoch": 2.9303348142719736, "grad_norm": 0.14053121800262078, "learning_rate": 1.790188272416248e-05, "loss": 0.4803, "num_tokens": 1941374246.0, "step": 7999 }, { "epoch": 2.930701232079879, "grad_norm": 0.1294387411090539, "learning_rate": 1.7897723960090215e-05, "loss": 0.4582, "num_tokens": 1942201877.0, "step": 8000 }, { "epoch": 2.9310676498877846, "grad_norm": 0.133486635052163, "learning_rate": 1.7893565426984262e-05, "loss": 0.4743, "num_tokens": 1942912358.0, "step": 8001 }, { "epoch": 2.9314340676956903, "grad_norm": 0.13695278001844674, "learning_rate": 1.7889407125078764e-05, "loss": 0.4902, "num_tokens": 1943653521.0, "step": 8002 }, { "epoch": 2.9318004855035955, "grad_norm": 0.12106856444434036, "learning_rate": 1.7885249054607835e-05, "loss": 0.4383, "num_tokens": 1944480867.0, "step": 8003 }, { "epoch": 2.9321669033115008, "grad_norm": 0.12059777946851265, "learning_rate": 1.7881091215805593e-05, "loss": 0.4269, "num_tokens": 1945297483.0, "step": 8004 }, { "epoch": 2.9325333211194065, "grad_norm": 0.13283354114210402, "learning_rate": 1.787693360890613e-05, "loss": 0.4559, "num_tokens": 1946009400.0, "step": 8005 }, { "epoch": 2.9328997389273117, "grad_norm": 0.13473067719330528, "learning_rate": 1.7872776234143526e-05, "loss": 0.4808, "num_tokens": 1946733900.0, "step": 8006 }, { "epoch": 2.9332661567352174, "grad_norm": 0.14730024520553517, "learning_rate": 1.7868619091751848e-05, "loss": 0.5062, "num_tokens": 1947440369.0, "step": 8007 }, { "epoch": 2.9336325745431227, "grad_norm": 0.1324532497401966, "learning_rate": 1.7864462181965138e-05, "loss": 0.4207, "num_tokens": 1948153885.0, "step": 8008 }, { "epoch": 2.9339989923510283, "grad_norm": 0.12182620600676818, "learning_rate": 1.7860305505017458e-05, "loss": 0.4552, "num_tokens": 1949019086.0, "step": 8009 }, { "epoch": 2.9343654101589336, "grad_norm": 0.12297630439848051, "learning_rate": 1.785614906114284e-05, "loss": 0.4488, "num_tokens": 1949830087.0, "step": 8010 }, { "epoch": 2.9347318279668393, "grad_norm": 0.12668098703655356, "learning_rate": 1.7851992850575282e-05, "loss": 0.4794, "num_tokens": 1950705055.0, "step": 8011 }, { "epoch": 2.9350982457747445, "grad_norm": 0.13302950419760043, "learning_rate": 1.7847836873548803e-05, "loss": 0.4665, "num_tokens": 1951429855.0, "step": 8012 }, { "epoch": 2.9354646635826502, "grad_norm": 0.12689097216506048, "learning_rate": 1.7843681130297382e-05, "loss": 0.4525, "num_tokens": 1952331168.0, "step": 8013 }, { "epoch": 2.9358310813905555, "grad_norm": 0.1401244839482645, "learning_rate": 1.7839525621054995e-05, "loss": 0.4759, "num_tokens": 1953000861.0, "step": 8014 }, { "epoch": 2.936197499198461, "grad_norm": 0.13574654653924362, "learning_rate": 1.7835370346055615e-05, "loss": 0.4606, "num_tokens": 1953736423.0, "step": 8015 }, { "epoch": 2.9365639170063664, "grad_norm": 0.12971453961184734, "learning_rate": 1.7831215305533187e-05, "loss": 0.4627, "num_tokens": 1954527439.0, "step": 8016 }, { "epoch": 2.936930334814272, "grad_norm": 0.14069685042914212, "learning_rate": 1.7827060499721647e-05, "loss": 0.4586, "num_tokens": 1955276102.0, "step": 8017 }, { "epoch": 2.9372967526221774, "grad_norm": 0.13737260220754263, "learning_rate": 1.7822905928854923e-05, "loss": 0.4338, "num_tokens": 1956050505.0, "step": 8018 }, { "epoch": 2.937663170430083, "grad_norm": 0.12686983472590024, "learning_rate": 1.7818751593166923e-05, "loss": 0.47, "num_tokens": 1956929065.0, "step": 8019 }, { "epoch": 2.9380295882379883, "grad_norm": 0.13731668301453265, "learning_rate": 1.7814597492891534e-05, "loss": 0.48, "num_tokens": 1957698386.0, "step": 8020 }, { "epoch": 2.9383960060458936, "grad_norm": 0.14199789778724192, "learning_rate": 1.7810443628262662e-05, "loss": 0.4249, "num_tokens": 1958412833.0, "step": 8021 }, { "epoch": 2.9387624238537993, "grad_norm": 0.14730127628829962, "learning_rate": 1.780628999951416e-05, "loss": 0.4547, "num_tokens": 1959039447.0, "step": 8022 }, { "epoch": 2.939128841661705, "grad_norm": 0.12844622124906113, "learning_rate": 1.7802136606879895e-05, "loss": 0.4837, "num_tokens": 1959858374.0, "step": 8023 }, { "epoch": 2.93949525946961, "grad_norm": 0.11922832967145428, "learning_rate": 1.779798345059371e-05, "loss": 0.4078, "num_tokens": 1960737981.0, "step": 8024 }, { "epoch": 2.9398616772775155, "grad_norm": 0.1536989542809556, "learning_rate": 1.7793830530889424e-05, "loss": 0.4781, "num_tokens": 1961526007.0, "step": 8025 }, { "epoch": 2.940228095085421, "grad_norm": 0.1430621564149489, "learning_rate": 1.778967784800087e-05, "loss": 0.4832, "num_tokens": 1962259242.0, "step": 8026 }, { "epoch": 2.940594512893327, "grad_norm": 0.12414688386291255, "learning_rate": 1.778552540216185e-05, "loss": 0.4509, "num_tokens": 1963118909.0, "step": 8027 }, { "epoch": 2.940960930701232, "grad_norm": 0.12883018264310733, "learning_rate": 1.778137319360615e-05, "loss": 0.4456, "num_tokens": 1963925443.0, "step": 8028 }, { "epoch": 2.9413273485091374, "grad_norm": 0.1417950848814394, "learning_rate": 1.777722122256755e-05, "loss": 0.4958, "num_tokens": 1964773340.0, "step": 8029 }, { "epoch": 2.941693766317043, "grad_norm": 0.14546243698507563, "learning_rate": 1.7773069489279816e-05, "loss": 0.4461, "num_tokens": 1965449385.0, "step": 8030 }, { "epoch": 2.9420601841249487, "grad_norm": 0.1432105223883278, "learning_rate": 1.7768917993976686e-05, "loss": 0.4385, "num_tokens": 1966106776.0, "step": 8031 }, { "epoch": 2.942426601932854, "grad_norm": 0.13033889908002178, "learning_rate": 1.7764766736891918e-05, "loss": 0.4487, "num_tokens": 1966863379.0, "step": 8032 }, { "epoch": 2.9427930197407592, "grad_norm": 0.13899375565876126, "learning_rate": 1.776061571825922e-05, "loss": 0.4551, "num_tokens": 1967534071.0, "step": 8033 }, { "epoch": 2.943159437548665, "grad_norm": 0.13608145375797706, "learning_rate": 1.7756464938312318e-05, "loss": 0.4966, "num_tokens": 1968277986.0, "step": 8034 }, { "epoch": 2.94352585535657, "grad_norm": 0.1206116104711106, "learning_rate": 1.775231439728489e-05, "loss": 0.4409, "num_tokens": 1969173552.0, "step": 8035 }, { "epoch": 2.943892273164476, "grad_norm": 0.13170549043770957, "learning_rate": 1.7748164095410634e-05, "loss": 0.4977, "num_tokens": 1970040764.0, "step": 8036 }, { "epoch": 2.944258690972381, "grad_norm": 0.13002609897333683, "learning_rate": 1.7744014032923215e-05, "loss": 0.4145, "num_tokens": 1970755315.0, "step": 8037 }, { "epoch": 2.944625108780287, "grad_norm": 0.13340987533657103, "learning_rate": 1.7739864210056294e-05, "loss": 0.4434, "num_tokens": 1971511941.0, "step": 8038 }, { "epoch": 2.944991526588192, "grad_norm": 0.12932804912128576, "learning_rate": 1.773571462704351e-05, "loss": 0.4582, "num_tokens": 1972268762.0, "step": 8039 }, { "epoch": 2.9453579443960978, "grad_norm": 0.12687191531112474, "learning_rate": 1.7731565284118492e-05, "loss": 0.4489, "num_tokens": 1973104686.0, "step": 8040 }, { "epoch": 2.945724362204003, "grad_norm": 0.12429620590581722, "learning_rate": 1.772741618151486e-05, "loss": 0.449, "num_tokens": 1973983830.0, "step": 8041 }, { "epoch": 2.9460907800119087, "grad_norm": 0.13529420639454304, "learning_rate": 1.7723267319466208e-05, "loss": 0.4403, "num_tokens": 1974768475.0, "step": 8042 }, { "epoch": 2.946457197819814, "grad_norm": 0.13503276880740525, "learning_rate": 1.771911869820614e-05, "loss": 0.4555, "num_tokens": 1975506190.0, "step": 8043 }, { "epoch": 2.9468236156277197, "grad_norm": 0.14071873031765325, "learning_rate": 1.771497031796822e-05, "loss": 0.421, "num_tokens": 1976196597.0, "step": 8044 }, { "epoch": 2.947190033435625, "grad_norm": 0.14277337049539662, "learning_rate": 1.7710822178986017e-05, "loss": 0.4766, "num_tokens": 1977042948.0, "step": 8045 }, { "epoch": 2.94755645124353, "grad_norm": 0.14184405669497316, "learning_rate": 1.7706674281493074e-05, "loss": 0.4531, "num_tokens": 1977786932.0, "step": 8046 }, { "epoch": 2.947922869051436, "grad_norm": 0.1381846656256579, "learning_rate": 1.770252662572292e-05, "loss": 0.463, "num_tokens": 1978501552.0, "step": 8047 }, { "epoch": 2.9482892868593416, "grad_norm": 0.1268622666602519, "learning_rate": 1.76983792119091e-05, "loss": 0.4485, "num_tokens": 1979215413.0, "step": 8048 }, { "epoch": 2.948655704667247, "grad_norm": 0.13309960247958746, "learning_rate": 1.76942320402851e-05, "loss": 0.4712, "num_tokens": 1980077179.0, "step": 8049 }, { "epoch": 2.949022122475152, "grad_norm": 0.12842732508040705, "learning_rate": 1.7690085111084415e-05, "loss": 0.4318, "num_tokens": 1980848715.0, "step": 8050 }, { "epoch": 2.9493885402830577, "grad_norm": 0.15546902410944918, "learning_rate": 1.768593842454054e-05, "loss": 0.4736, "num_tokens": 1981604120.0, "step": 8051 }, { "epoch": 2.9497549580909634, "grad_norm": 0.13078185320237934, "learning_rate": 1.768179198088692e-05, "loss": 0.4426, "num_tokens": 1982493679.0, "step": 8052 }, { "epoch": 2.9501213758988687, "grad_norm": 0.1260141817842202, "learning_rate": 1.7677645780357026e-05, "loss": 0.4566, "num_tokens": 1983368979.0, "step": 8053 }, { "epoch": 2.950487793706774, "grad_norm": 0.1333894172195427, "learning_rate": 1.767349982318429e-05, "loss": 0.4278, "num_tokens": 1984137763.0, "step": 8054 }, { "epoch": 2.9508542115146796, "grad_norm": 0.14654784330801746, "learning_rate": 1.7669354109602145e-05, "loss": 0.4473, "num_tokens": 1984822600.0, "step": 8055 }, { "epoch": 2.9512206293225853, "grad_norm": 0.13759540387867364, "learning_rate": 1.7665208639843996e-05, "loss": 0.4293, "num_tokens": 1985557598.0, "step": 8056 }, { "epoch": 2.9515870471304906, "grad_norm": 0.14057015105388443, "learning_rate": 1.7661063414143242e-05, "loss": 0.4748, "num_tokens": 1986243078.0, "step": 8057 }, { "epoch": 2.951953464938396, "grad_norm": 0.12957137510194539, "learning_rate": 1.7656918432733262e-05, "loss": 0.46, "num_tokens": 1987079387.0, "step": 8058 }, { "epoch": 2.9523198827463015, "grad_norm": 0.13321351753828428, "learning_rate": 1.765277369584744e-05, "loss": 0.4429, "num_tokens": 1987931373.0, "step": 8059 }, { "epoch": 2.9526863005542068, "grad_norm": 0.13780187657586107, "learning_rate": 1.764862920371912e-05, "loss": 0.4508, "num_tokens": 1988637639.0, "step": 8060 }, { "epoch": 2.9530527183621125, "grad_norm": 0.13761781352412933, "learning_rate": 1.764448495658166e-05, "loss": 0.4671, "num_tokens": 1989410711.0, "step": 8061 }, { "epoch": 2.9534191361700177, "grad_norm": 0.13140703466516324, "learning_rate": 1.7640340954668372e-05, "loss": 0.4491, "num_tokens": 1990167898.0, "step": 8062 }, { "epoch": 2.9537855539779234, "grad_norm": 0.12507611855159226, "learning_rate": 1.763619719821258e-05, "loss": 0.4376, "num_tokens": 1991010815.0, "step": 8063 }, { "epoch": 2.9541519717858287, "grad_norm": 0.12763984838297857, "learning_rate": 1.763205368744758e-05, "loss": 0.4428, "num_tokens": 1991845125.0, "step": 8064 }, { "epoch": 2.9545183895937344, "grad_norm": 0.1511309585921293, "learning_rate": 1.762791042260668e-05, "loss": 0.479, "num_tokens": 1992572731.0, "step": 8065 }, { "epoch": 2.9548848074016396, "grad_norm": 0.1305846939465111, "learning_rate": 1.762376740392313e-05, "loss": 0.4584, "num_tokens": 1993348754.0, "step": 8066 }, { "epoch": 2.9552512252095453, "grad_norm": 0.13393137551750728, "learning_rate": 1.76196246316302e-05, "loss": 0.4705, "num_tokens": 1994040456.0, "step": 8067 }, { "epoch": 2.9556176430174506, "grad_norm": 0.1417183899074299, "learning_rate": 1.761548210596114e-05, "loss": 0.4742, "num_tokens": 1994827000.0, "step": 8068 }, { "epoch": 2.9559840608253563, "grad_norm": 0.13382650792847992, "learning_rate": 1.7611339827149173e-05, "loss": 0.4459, "num_tokens": 1995556454.0, "step": 8069 }, { "epoch": 2.9563504786332615, "grad_norm": 0.1345840606420328, "learning_rate": 1.7607197795427527e-05, "loss": 0.4733, "num_tokens": 1996245780.0, "step": 8070 }, { "epoch": 2.956716896441167, "grad_norm": 0.13148488435013944, "learning_rate": 1.76030560110294e-05, "loss": 0.4605, "num_tokens": 1997015127.0, "step": 8071 }, { "epoch": 2.9570833142490724, "grad_norm": 0.15078549084878157, "learning_rate": 1.7598914474187987e-05, "loss": 0.4891, "num_tokens": 1997648079.0, "step": 8072 }, { "epoch": 2.957449732056978, "grad_norm": 0.13392779155702975, "learning_rate": 1.759477318513646e-05, "loss": 0.4083, "num_tokens": 1998304839.0, "step": 8073 }, { "epoch": 2.9578161498648834, "grad_norm": 0.1341207289261348, "learning_rate": 1.7590632144107994e-05, "loss": 0.4811, "num_tokens": 1999052094.0, "step": 8074 }, { "epoch": 2.9581825676727886, "grad_norm": 0.13181425248034373, "learning_rate": 1.7586491351335718e-05, "loss": 0.4571, "num_tokens": 1999817295.0, "step": 8075 }, { "epoch": 2.9585489854806943, "grad_norm": 0.12728727445737203, "learning_rate": 1.7582350807052785e-05, "loss": 0.4715, "num_tokens": 2000652513.0, "step": 8076 }, { "epoch": 2.9589154032886, "grad_norm": 0.13338375801221392, "learning_rate": 1.7578210511492302e-05, "loss": 0.4557, "num_tokens": 2001356550.0, "step": 8077 }, { "epoch": 2.9592818210965053, "grad_norm": 0.1266182538862241, "learning_rate": 1.757407046488739e-05, "loss": 0.4435, "num_tokens": 2002146258.0, "step": 8078 }, { "epoch": 2.9596482389044105, "grad_norm": 0.12537511060973888, "learning_rate": 1.7569930667471132e-05, "loss": 0.4372, "num_tokens": 2002969538.0, "step": 8079 }, { "epoch": 2.9600146567123162, "grad_norm": 0.12847658224871653, "learning_rate": 1.7565791119476605e-05, "loss": 0.4634, "num_tokens": 2003721519.0, "step": 8080 }, { "epoch": 2.960381074520222, "grad_norm": 0.13040107647670224, "learning_rate": 1.756165182113688e-05, "loss": 0.4428, "num_tokens": 2004537448.0, "step": 8081 }, { "epoch": 2.960747492328127, "grad_norm": 0.13104307742887839, "learning_rate": 1.755751277268501e-05, "loss": 0.4735, "num_tokens": 2005368284.0, "step": 8082 }, { "epoch": 2.9611139101360324, "grad_norm": 0.11645306840826475, "learning_rate": 1.7553373974354024e-05, "loss": 0.4273, "num_tokens": 2006264601.0, "step": 8083 }, { "epoch": 2.961480327943938, "grad_norm": 0.12193802644149657, "learning_rate": 1.754923542637695e-05, "loss": 0.4485, "num_tokens": 2007061418.0, "step": 8084 }, { "epoch": 2.961846745751844, "grad_norm": 0.13265257911964115, "learning_rate": 1.7545097128986795e-05, "loss": 0.4818, "num_tokens": 2007848810.0, "step": 8085 }, { "epoch": 2.962213163559749, "grad_norm": 0.13119220515318566, "learning_rate": 1.7540959082416544e-05, "loss": 0.4759, "num_tokens": 2008629755.0, "step": 8086 }, { "epoch": 2.9625795813676543, "grad_norm": 0.13960663332783135, "learning_rate": 1.7536821286899196e-05, "loss": 0.4456, "num_tokens": 2009271380.0, "step": 8087 }, { "epoch": 2.96294599917556, "grad_norm": 0.12997415330024223, "learning_rate": 1.7532683742667703e-05, "loss": 0.4068, "num_tokens": 2009966203.0, "step": 8088 }, { "epoch": 2.9633124169834653, "grad_norm": 0.14260291556831228, "learning_rate": 1.7528546449955027e-05, "loss": 0.4477, "num_tokens": 2010609988.0, "step": 8089 }, { "epoch": 2.963678834791371, "grad_norm": 0.13063114623734295, "learning_rate": 1.7524409408994096e-05, "loss": 0.4622, "num_tokens": 2011390978.0, "step": 8090 }, { "epoch": 2.964045252599276, "grad_norm": 0.12705939290011303, "learning_rate": 1.7520272620017837e-05, "loss": 0.4635, "num_tokens": 2012137822.0, "step": 8091 }, { "epoch": 2.964411670407182, "grad_norm": 0.14072274563568007, "learning_rate": 1.751613608325916e-05, "loss": 0.4403, "num_tokens": 2012897330.0, "step": 8092 }, { "epoch": 2.964778088215087, "grad_norm": 0.13752990262103237, "learning_rate": 1.7511999798950964e-05, "loss": 0.4431, "num_tokens": 2013647357.0, "step": 8093 }, { "epoch": 2.965144506022993, "grad_norm": 0.15250019580026997, "learning_rate": 1.7507863767326125e-05, "loss": 0.4779, "num_tokens": 2014383645.0, "step": 8094 }, { "epoch": 2.965510923830898, "grad_norm": 0.12831939238135548, "learning_rate": 1.750372798861751e-05, "loss": 0.4474, "num_tokens": 2015137547.0, "step": 8095 }, { "epoch": 2.965877341638804, "grad_norm": 0.13013889789730324, "learning_rate": 1.7499592463057976e-05, "loss": 0.4327, "num_tokens": 2015929405.0, "step": 8096 }, { "epoch": 2.966243759446709, "grad_norm": 0.12709813035704107, "learning_rate": 1.7495457190880353e-05, "loss": 0.4537, "num_tokens": 2016755613.0, "step": 8097 }, { "epoch": 2.9666101772546147, "grad_norm": 0.1291169288122127, "learning_rate": 1.7491322172317472e-05, "loss": 0.4233, "num_tokens": 2017496361.0, "step": 8098 }, { "epoch": 2.96697659506252, "grad_norm": 0.1448009096367391, "learning_rate": 1.748718740760215e-05, "loss": 0.4853, "num_tokens": 2018121099.0, "step": 8099 }, { "epoch": 2.9673430128704252, "grad_norm": 0.13359555740945456, "learning_rate": 1.7483052896967167e-05, "loss": 0.468, "num_tokens": 2018852114.0, "step": 8100 }, { "epoch": 2.967709430678331, "grad_norm": 0.14497074066159304, "learning_rate": 1.747891864064531e-05, "loss": 0.4737, "num_tokens": 2019508690.0, "step": 8101 }, { "epoch": 2.9680758484862366, "grad_norm": 0.13198703294641653, "learning_rate": 1.747478463886935e-05, "loss": 0.4409, "num_tokens": 2020248092.0, "step": 8102 }, { "epoch": 2.968442266294142, "grad_norm": 0.1346206717294231, "learning_rate": 1.747065089187203e-05, "loss": 0.4746, "num_tokens": 2021054459.0, "step": 8103 }, { "epoch": 2.968808684102047, "grad_norm": 0.12686368765776634, "learning_rate": 1.74665173998861e-05, "loss": 0.4773, "num_tokens": 2021887122.0, "step": 8104 }, { "epoch": 2.969175101909953, "grad_norm": 0.1254863885187153, "learning_rate": 1.7462384163144275e-05, "loss": 0.4379, "num_tokens": 2022765084.0, "step": 8105 }, { "epoch": 2.9695415197178585, "grad_norm": 0.13831191447862226, "learning_rate": 1.745825118187927e-05, "loss": 0.4506, "num_tokens": 2023583465.0, "step": 8106 }, { "epoch": 2.9699079375257638, "grad_norm": 0.12560520906606917, "learning_rate": 1.745411845632378e-05, "loss": 0.4748, "num_tokens": 2024401051.0, "step": 8107 }, { "epoch": 2.970274355333669, "grad_norm": 0.12555886022854104, "learning_rate": 1.744998598671048e-05, "loss": 0.474, "num_tokens": 2025170200.0, "step": 8108 }, { "epoch": 2.9706407731415747, "grad_norm": 0.13162962205065318, "learning_rate": 1.7445853773272037e-05, "loss": 0.4344, "num_tokens": 2025986272.0, "step": 8109 }, { "epoch": 2.9710071909494804, "grad_norm": 0.12905172141836377, "learning_rate": 1.7441721816241114e-05, "loss": 0.4422, "num_tokens": 2026758037.0, "step": 8110 }, { "epoch": 2.9713736087573857, "grad_norm": 0.13906035268932632, "learning_rate": 1.7437590115850333e-05, "loss": 0.4707, "num_tokens": 2027514807.0, "step": 8111 }, { "epoch": 2.971740026565291, "grad_norm": 0.14435623571173906, "learning_rate": 1.743345867233233e-05, "loss": 0.4629, "num_tokens": 2028222272.0, "step": 8112 }, { "epoch": 2.9721064443731966, "grad_norm": 0.13952718024872393, "learning_rate": 1.7429327485919708e-05, "loss": 0.421, "num_tokens": 2028947294.0, "step": 8113 }, { "epoch": 2.972472862181102, "grad_norm": 0.14591195050017366, "learning_rate": 1.7425196556845056e-05, "loss": 0.4584, "num_tokens": 2029793779.0, "step": 8114 }, { "epoch": 2.9728392799890075, "grad_norm": 0.15307469726200276, "learning_rate": 1.7421065885340957e-05, "loss": 0.4444, "num_tokens": 2030476558.0, "step": 8115 }, { "epoch": 2.973205697796913, "grad_norm": 0.14183812586308994, "learning_rate": 1.7416935471639985e-05, "loss": 0.4611, "num_tokens": 2031242786.0, "step": 8116 }, { "epoch": 2.9735721156048185, "grad_norm": 0.13043335008685, "learning_rate": 1.7412805315974678e-05, "loss": 0.4632, "num_tokens": 2032091334.0, "step": 8117 }, { "epoch": 2.9739385334127237, "grad_norm": 0.12960273431512137, "learning_rate": 1.7408675418577573e-05, "loss": 0.4241, "num_tokens": 2032930977.0, "step": 8118 }, { "epoch": 2.9743049512206294, "grad_norm": 0.1268457921368534, "learning_rate": 1.7404545779681196e-05, "loss": 0.4374, "num_tokens": 2033812261.0, "step": 8119 }, { "epoch": 2.9746713690285347, "grad_norm": 0.1416325277157875, "learning_rate": 1.7400416399518056e-05, "loss": 0.4555, "num_tokens": 2034688416.0, "step": 8120 }, { "epoch": 2.9750377868364404, "grad_norm": 0.14332878073778338, "learning_rate": 1.7396287278320646e-05, "loss": 0.4712, "num_tokens": 2035428912.0, "step": 8121 }, { "epoch": 2.9754042046443456, "grad_norm": 0.1278908200169876, "learning_rate": 1.7392158416321438e-05, "loss": 0.4656, "num_tokens": 2036307788.0, "step": 8122 }, { "epoch": 2.9757706224522513, "grad_norm": 0.13733276145060866, "learning_rate": 1.7388029813752897e-05, "loss": 0.4658, "num_tokens": 2037067991.0, "step": 8123 }, { "epoch": 2.9761370402601566, "grad_norm": 0.14513684715559286, "learning_rate": 1.738390147084747e-05, "loss": 0.445, "num_tokens": 2037909612.0, "step": 8124 }, { "epoch": 2.9765034580680623, "grad_norm": 0.1329897614199892, "learning_rate": 1.7379773387837595e-05, "loss": 0.4225, "num_tokens": 2038643585.0, "step": 8125 }, { "epoch": 2.9768698758759675, "grad_norm": 0.12993734088091366, "learning_rate": 1.737564556495568e-05, "loss": 0.4516, "num_tokens": 2039369300.0, "step": 8126 }, { "epoch": 2.977236293683873, "grad_norm": 0.1326813771120368, "learning_rate": 1.7371518002434156e-05, "loss": 0.4441, "num_tokens": 2040229828.0, "step": 8127 }, { "epoch": 2.9776027114917785, "grad_norm": 0.1606638487886762, "learning_rate": 1.7367390700505384e-05, "loss": 0.4894, "num_tokens": 2040888901.0, "step": 8128 }, { "epoch": 2.9779691292996837, "grad_norm": 0.13712471444191046, "learning_rate": 1.7363263659401755e-05, "loss": 0.4152, "num_tokens": 2041581936.0, "step": 8129 }, { "epoch": 2.9783355471075894, "grad_norm": 0.1180649644337708, "learning_rate": 1.735913687935562e-05, "loss": 0.4421, "num_tokens": 2042463808.0, "step": 8130 }, { "epoch": 2.978701964915495, "grad_norm": 0.15892363404449888, "learning_rate": 1.7355010360599337e-05, "loss": 0.5072, "num_tokens": 2043148490.0, "step": 8131 }, { "epoch": 2.9790683827234004, "grad_norm": 0.1332873658244756, "learning_rate": 1.735088410336523e-05, "loss": 0.4368, "num_tokens": 2044029542.0, "step": 8132 }, { "epoch": 2.9794348005313056, "grad_norm": 0.11672064957338495, "learning_rate": 1.734675810788562e-05, "loss": 0.433, "num_tokens": 2044884675.0, "step": 8133 }, { "epoch": 2.9798012183392113, "grad_norm": 0.12378142385824767, "learning_rate": 1.73426323743928e-05, "loss": 0.4295, "num_tokens": 2045705824.0, "step": 8134 }, { "epoch": 2.980167636147117, "grad_norm": 0.14596256520071277, "learning_rate": 1.7338506903119064e-05, "loss": 0.492, "num_tokens": 2046399397.0, "step": 8135 }, { "epoch": 2.9805340539550222, "grad_norm": 0.14184722054097396, "learning_rate": 1.7334381694296677e-05, "loss": 0.4448, "num_tokens": 2047146273.0, "step": 8136 }, { "epoch": 2.9809004717629275, "grad_norm": 0.1304578552549747, "learning_rate": 1.733025674815791e-05, "loss": 0.4375, "num_tokens": 2047828381.0, "step": 8137 }, { "epoch": 2.981266889570833, "grad_norm": 0.1525648712313166, "learning_rate": 1.7326132064934997e-05, "loss": 0.4609, "num_tokens": 2048534821.0, "step": 8138 }, { "epoch": 2.981633307378739, "grad_norm": 0.15988652483206614, "learning_rate": 1.7322007644860166e-05, "loss": 0.4577, "num_tokens": 2049227937.0, "step": 8139 }, { "epoch": 2.981999725186644, "grad_norm": 0.13323733336764054, "learning_rate": 1.731788348816563e-05, "loss": 0.4586, "num_tokens": 2049959333.0, "step": 8140 }, { "epoch": 2.9823661429945494, "grad_norm": 0.1258040345830452, "learning_rate": 1.731375959508358e-05, "loss": 0.4532, "num_tokens": 2050802589.0, "step": 8141 }, { "epoch": 2.982732560802455, "grad_norm": 0.14063704174199645, "learning_rate": 1.7309635965846218e-05, "loss": 0.4417, "num_tokens": 2051639737.0, "step": 8142 }, { "epoch": 2.9830989786103603, "grad_norm": 0.14442938483014478, "learning_rate": 1.7305512600685698e-05, "loss": 0.4783, "num_tokens": 2052438626.0, "step": 8143 }, { "epoch": 2.983465396418266, "grad_norm": 0.1236255102138666, "learning_rate": 1.730138949983418e-05, "loss": 0.4545, "num_tokens": 2053244550.0, "step": 8144 }, { "epoch": 2.9838318142261713, "grad_norm": 0.12611785786426186, "learning_rate": 1.72972666635238e-05, "loss": 0.4113, "num_tokens": 2054139717.0, "step": 8145 }, { "epoch": 2.984198232034077, "grad_norm": 0.16618559256029058, "learning_rate": 1.729314409198668e-05, "loss": 0.4782, "num_tokens": 2054963704.0, "step": 8146 }, { "epoch": 2.984564649841982, "grad_norm": 0.13114137345354435, "learning_rate": 1.7289021785454923e-05, "loss": 0.4742, "num_tokens": 2055847490.0, "step": 8147 }, { "epoch": 2.984931067649888, "grad_norm": 0.12141417477252592, "learning_rate": 1.7284899744160645e-05, "loss": 0.4657, "num_tokens": 2056748467.0, "step": 8148 }, { "epoch": 2.985297485457793, "grad_norm": 0.16127608181624312, "learning_rate": 1.7280777968335908e-05, "loss": 0.4342, "num_tokens": 2057561731.0, "step": 8149 }, { "epoch": 2.985663903265699, "grad_norm": 0.15429208582940132, "learning_rate": 1.727665645821278e-05, "loss": 0.4666, "num_tokens": 2058189224.0, "step": 8150 }, { "epoch": 2.986030321073604, "grad_norm": 0.14147158725844639, "learning_rate": 1.7272535214023308e-05, "loss": 0.4634, "num_tokens": 2058913446.0, "step": 8151 }, { "epoch": 2.98639673888151, "grad_norm": 0.1416247593637114, "learning_rate": 1.7268414235999524e-05, "loss": 0.4469, "num_tokens": 2059632650.0, "step": 8152 }, { "epoch": 2.986763156689415, "grad_norm": 0.15044292500598772, "learning_rate": 1.7264293524373453e-05, "loss": 0.4662, "num_tokens": 2060435156.0, "step": 8153 }, { "epoch": 2.9871295744973203, "grad_norm": 0.134267078796754, "learning_rate": 1.72601730793771e-05, "loss": 0.466, "num_tokens": 2061195065.0, "step": 8154 }, { "epoch": 2.987495992305226, "grad_norm": 0.12048167151044553, "learning_rate": 1.725605290124246e-05, "loss": 0.4505, "num_tokens": 2062034673.0, "step": 8155 }, { "epoch": 2.9878624101131317, "grad_norm": 0.13618665680994102, "learning_rate": 1.7251932990201493e-05, "loss": 0.4467, "num_tokens": 2062905621.0, "step": 8156 }, { "epoch": 2.988228827921037, "grad_norm": 0.1534818967761841, "learning_rate": 1.7247813346486162e-05, "loss": 0.458, "num_tokens": 2063575732.0, "step": 8157 }, { "epoch": 2.988595245728942, "grad_norm": 0.12821136310400402, "learning_rate": 1.7243693970328408e-05, "loss": 0.4285, "num_tokens": 2064293833.0, "step": 8158 }, { "epoch": 2.988961663536848, "grad_norm": 0.14809538176468537, "learning_rate": 1.7239574861960177e-05, "loss": 0.4762, "num_tokens": 2065179249.0, "step": 8159 }, { "epoch": 2.9893280813447536, "grad_norm": 0.14436090414765582, "learning_rate": 1.7235456021613365e-05, "loss": 0.4923, "num_tokens": 2065935213.0, "step": 8160 }, { "epoch": 2.989694499152659, "grad_norm": 0.13216762550840802, "learning_rate": 1.723133744951988e-05, "loss": 0.4167, "num_tokens": 2066749554.0, "step": 8161 }, { "epoch": 2.990060916960564, "grad_norm": 0.1387082672340596, "learning_rate": 1.7227219145911602e-05, "loss": 0.488, "num_tokens": 2067421189.0, "step": 8162 }, { "epoch": 2.99042733476847, "grad_norm": 0.14564177574139148, "learning_rate": 1.7223101111020402e-05, "loss": 0.4409, "num_tokens": 2068080425.0, "step": 8163 }, { "epoch": 2.9907937525763755, "grad_norm": 0.13559858709469746, "learning_rate": 1.7218983345078132e-05, "loss": 0.4569, "num_tokens": 2068853729.0, "step": 8164 }, { "epoch": 2.9911601703842807, "grad_norm": 0.12400396122055668, "learning_rate": 1.7214865848316637e-05, "loss": 0.4483, "num_tokens": 2069757084.0, "step": 8165 }, { "epoch": 2.991526588192186, "grad_norm": 0.12529083102275745, "learning_rate": 1.7210748620967727e-05, "loss": 0.4629, "num_tokens": 2070509541.0, "step": 8166 }, { "epoch": 2.9918930060000917, "grad_norm": 0.13612700253702542, "learning_rate": 1.7206631663263224e-05, "loss": 0.471, "num_tokens": 2071219737.0, "step": 8167 }, { "epoch": 2.992259423807997, "grad_norm": 0.13382959361463415, "learning_rate": 1.720251497543491e-05, "loss": 0.4268, "num_tokens": 2071951339.0, "step": 8168 }, { "epoch": 2.9926258416159026, "grad_norm": 0.13054936900170697, "learning_rate": 1.7198398557714566e-05, "loss": 0.4445, "num_tokens": 2072776993.0, "step": 8169 }, { "epoch": 2.992992259423808, "grad_norm": 0.13773069952442746, "learning_rate": 1.7194282410333954e-05, "loss": 0.4853, "num_tokens": 2073528782.0, "step": 8170 }, { "epoch": 2.9933586772317136, "grad_norm": 0.12195267494482721, "learning_rate": 1.7190166533524827e-05, "loss": 0.4476, "num_tokens": 2074290776.0, "step": 8171 }, { "epoch": 2.993725095039619, "grad_norm": 0.12770945455439836, "learning_rate": 1.7186050927518917e-05, "loss": 0.4878, "num_tokens": 2075123019.0, "step": 8172 }, { "epoch": 2.9940915128475245, "grad_norm": 0.1323507427283949, "learning_rate": 1.718193559254793e-05, "loss": 0.471, "num_tokens": 2075876527.0, "step": 8173 }, { "epoch": 2.9944579306554298, "grad_norm": 0.13154993596375453, "learning_rate": 1.7177820528843582e-05, "loss": 0.4471, "num_tokens": 2076575910.0, "step": 8174 }, { "epoch": 2.9948243484633355, "grad_norm": 0.13169116759607008, "learning_rate": 1.717370573663754e-05, "loss": 0.4489, "num_tokens": 2077294671.0, "step": 8175 }, { "epoch": 2.9951907662712407, "grad_norm": 0.13086685254720992, "learning_rate": 1.7169591216161495e-05, "loss": 0.452, "num_tokens": 2078069304.0, "step": 8176 }, { "epoch": 2.9955571840791464, "grad_norm": 0.15182744457225145, "learning_rate": 1.716547696764709e-05, "loss": 0.4831, "num_tokens": 2078859578.0, "step": 8177 }, { "epoch": 2.9959236018870516, "grad_norm": 0.1305066386174831, "learning_rate": 1.7161362991325976e-05, "loss": 0.4815, "num_tokens": 2079639972.0, "step": 8178 }, { "epoch": 2.9962900196949573, "grad_norm": 0.1300038229527975, "learning_rate": 1.7157249287429768e-05, "loss": 0.4522, "num_tokens": 2080424439.0, "step": 8179 }, { "epoch": 2.9966564375028626, "grad_norm": 0.11958435928781638, "learning_rate": 1.715313585619008e-05, "loss": 0.4341, "num_tokens": 2081253599.0, "step": 8180 }, { "epoch": 2.9970228553107683, "grad_norm": 0.14065928358900592, "learning_rate": 1.7149022697838503e-05, "loss": 0.4622, "num_tokens": 2081946497.0, "step": 8181 }, { "epoch": 2.9973892731186735, "grad_norm": 0.12493849074261648, "learning_rate": 1.714490981260663e-05, "loss": 0.4125, "num_tokens": 2082752254.0, "step": 8182 }, { "epoch": 2.997755690926579, "grad_norm": 0.13215689216191823, "learning_rate": 1.7140797200726004e-05, "loss": 0.4362, "num_tokens": 2083423101.0, "step": 8183 }, { "epoch": 2.9981221087344845, "grad_norm": 0.12340530614619102, "learning_rate": 1.7136684862428182e-05, "loss": 0.4551, "num_tokens": 2084242282.0, "step": 8184 }, { "epoch": 2.99848852654239, "grad_norm": 0.14798929219711024, "learning_rate": 1.7132572797944707e-05, "loss": 0.4768, "num_tokens": 2084903707.0, "step": 8185 }, { "epoch": 2.9988549443502954, "grad_norm": 0.1591600909236383, "learning_rate": 1.7128461007507077e-05, "loss": 0.4904, "num_tokens": 2085548042.0, "step": 8186 }, { "epoch": 2.9992213621582007, "grad_norm": 0.12264513584373325, "learning_rate": 1.7124349491346803e-05, "loss": 0.4554, "num_tokens": 2086355723.0, "step": 8187 }, { "epoch": 2.9995877799661064, "grad_norm": 0.13001588306974668, "learning_rate": 1.7120238249695385e-05, "loss": 0.4571, "num_tokens": 2087096535.0, "step": 8188 }, { "epoch": 2.999954197774012, "grad_norm": 0.14436606998824816, "learning_rate": 1.7116127282784275e-05, "loss": 0.4835, "num_tokens": 2087776378.0, "step": 8189 }, { "epoch": 3.0, "grad_norm": 0.14436606998824816, "learning_rate": 1.7112016590844937e-05, "loss": 0.29, "num_tokens": 2087810693.0, "step": 8190 }, { "epoch": 3.0003664178079053, "grad_norm": 0.5482149343156838, "learning_rate": 1.71079061741088e-05, "loss": 0.4244, "num_tokens": 2088546729.0, "step": 8191 }, { "epoch": 3.000732835615811, "grad_norm": 0.1747887899097011, "learning_rate": 1.710379603280731e-05, "loss": 0.4028, "num_tokens": 2089351888.0, "step": 8192 }, { "epoch": 3.001099253423716, "grad_norm": 0.13607123016630704, "learning_rate": 1.7099686167171862e-05, "loss": 0.4008, "num_tokens": 2090106043.0, "step": 8193 }, { "epoch": 3.001465671231622, "grad_norm": 0.2103813743394157, "learning_rate": 1.709557657743385e-05, "loss": 0.4269, "num_tokens": 2090951858.0, "step": 8194 }, { "epoch": 3.001832089039527, "grad_norm": 0.2183438363065536, "learning_rate": 1.7091467263824658e-05, "loss": 0.3966, "num_tokens": 2091783438.0, "step": 8195 }, { "epoch": 3.002198506847433, "grad_norm": 0.1421180184072548, "learning_rate": 1.7087358226575642e-05, "loss": 0.3986, "num_tokens": 2092495279.0, "step": 8196 }, { "epoch": 3.002564924655338, "grad_norm": 0.16850181974244213, "learning_rate": 1.708324946591815e-05, "loss": 0.3648, "num_tokens": 2093181635.0, "step": 8197 }, { "epoch": 3.002931342463244, "grad_norm": 0.19043930793995145, "learning_rate": 1.7079140982083514e-05, "loss": 0.4082, "num_tokens": 2093866006.0, "step": 8198 }, { "epoch": 3.003297760271149, "grad_norm": 0.16923057669228903, "learning_rate": 1.707503277530306e-05, "loss": 0.409, "num_tokens": 2094677414.0, "step": 8199 }, { "epoch": 3.0036641780790547, "grad_norm": 0.15856109667980012, "learning_rate": 1.707092484580807e-05, "loss": 0.4265, "num_tokens": 2095392568.0, "step": 8200 }, { "epoch": 3.00403059588696, "grad_norm": 0.1790880661809774, "learning_rate": 1.706681719382984e-05, "loss": 0.3953, "num_tokens": 2096224994.0, "step": 8201 }, { "epoch": 3.0043970136948657, "grad_norm": 0.18549666088397893, "learning_rate": 1.7062709819599637e-05, "loss": 0.3979, "num_tokens": 2096925861.0, "step": 8202 }, { "epoch": 3.004763431502771, "grad_norm": 0.1880835938809184, "learning_rate": 1.7058602723348717e-05, "loss": 0.426, "num_tokens": 2097569790.0, "step": 8203 }, { "epoch": 3.0051298493106766, "grad_norm": 0.1508832034076166, "learning_rate": 1.7054495905308313e-05, "loss": 0.399, "num_tokens": 2098334985.0, "step": 8204 }, { "epoch": 3.005496267118582, "grad_norm": 0.14715669907658538, "learning_rate": 1.7050389365709648e-05, "loss": 0.406, "num_tokens": 2099238620.0, "step": 8205 }, { "epoch": 3.0058626849264876, "grad_norm": 0.17772017509158997, "learning_rate": 1.7046283104783935e-05, "loss": 0.4319, "num_tokens": 2099992088.0, "step": 8206 }, { "epoch": 3.006229102734393, "grad_norm": 0.18453584431791822, "learning_rate": 1.704217712276235e-05, "loss": 0.4289, "num_tokens": 2100639047.0, "step": 8207 }, { "epoch": 3.0065955205422985, "grad_norm": 0.15880263977594042, "learning_rate": 1.7038071419876074e-05, "loss": 0.4332, "num_tokens": 2101373415.0, "step": 8208 }, { "epoch": 3.0069619383502038, "grad_norm": 0.1605577293512814, "learning_rate": 1.703396599635628e-05, "loss": 0.4093, "num_tokens": 2102019959.0, "step": 8209 }, { "epoch": 3.0073283561581095, "grad_norm": 0.15639071601464283, "learning_rate": 1.70298608524341e-05, "loss": 0.3863, "num_tokens": 2102826831.0, "step": 8210 }, { "epoch": 3.0076947739660147, "grad_norm": 0.13964730977552048, "learning_rate": 1.702575598834066e-05, "loss": 0.4092, "num_tokens": 2103615446.0, "step": 8211 }, { "epoch": 3.0080611917739204, "grad_norm": 0.1553363961242302, "learning_rate": 1.7021651404307077e-05, "loss": 0.3881, "num_tokens": 2104377456.0, "step": 8212 }, { "epoch": 3.0084276095818256, "grad_norm": 0.14235257675950153, "learning_rate": 1.7017547100564433e-05, "loss": 0.3975, "num_tokens": 2105133672.0, "step": 8213 }, { "epoch": 3.0087940273897313, "grad_norm": 0.15607506966014137, "learning_rate": 1.7013443077343832e-05, "loss": 0.4075, "num_tokens": 2105839008.0, "step": 8214 }, { "epoch": 3.0091604451976366, "grad_norm": 0.15869058263583116, "learning_rate": 1.7009339334876326e-05, "loss": 0.4177, "num_tokens": 2106590015.0, "step": 8215 }, { "epoch": 3.0095268630055423, "grad_norm": 0.14460196800476086, "learning_rate": 1.7005235873392962e-05, "loss": 0.4023, "num_tokens": 2107341207.0, "step": 8216 }, { "epoch": 3.0098932808134475, "grad_norm": 0.14345274006644254, "learning_rate": 1.7001132693124777e-05, "loss": 0.4109, "num_tokens": 2108111960.0, "step": 8217 }, { "epoch": 3.010259698621353, "grad_norm": 0.16020787516086857, "learning_rate": 1.699702979430279e-05, "loss": 0.4094, "num_tokens": 2108914149.0, "step": 8218 }, { "epoch": 3.0106261164292585, "grad_norm": 0.1523427630201236, "learning_rate": 1.6992927177157986e-05, "loss": 0.3773, "num_tokens": 2109688369.0, "step": 8219 }, { "epoch": 3.0109925342371637, "grad_norm": 0.1718534797158294, "learning_rate": 1.6988824841921383e-05, "loss": 0.4038, "num_tokens": 2110398422.0, "step": 8220 }, { "epoch": 3.0113589520450694, "grad_norm": 0.14971723287695138, "learning_rate": 1.698472278882392e-05, "loss": 0.4219, "num_tokens": 2111128832.0, "step": 8221 }, { "epoch": 3.0117253698529747, "grad_norm": 0.1617850167902569, "learning_rate": 1.6980621018096566e-05, "loss": 0.3996, "num_tokens": 2111742251.0, "step": 8222 }, { "epoch": 3.0120917876608804, "grad_norm": 0.14930244049307478, "learning_rate": 1.6976519529970258e-05, "loss": 0.3994, "num_tokens": 2112468661.0, "step": 8223 }, { "epoch": 3.0124582054687856, "grad_norm": 0.15872017978644287, "learning_rate": 1.697241832467591e-05, "loss": 0.4104, "num_tokens": 2113178786.0, "step": 8224 }, { "epoch": 3.0128246232766913, "grad_norm": 0.15483037440182032, "learning_rate": 1.6968317402444433e-05, "loss": 0.4157, "num_tokens": 2113984395.0, "step": 8225 }, { "epoch": 3.0131910410845966, "grad_norm": 0.16790171559326786, "learning_rate": 1.6964216763506723e-05, "loss": 0.4323, "num_tokens": 2114683481.0, "step": 8226 }, { "epoch": 3.0135574588925023, "grad_norm": 0.16080973084687367, "learning_rate": 1.696011640809365e-05, "loss": 0.4083, "num_tokens": 2115458591.0, "step": 8227 }, { "epoch": 3.0139238767004075, "grad_norm": 0.14650593227743247, "learning_rate": 1.6956016336436064e-05, "loss": 0.3785, "num_tokens": 2116192111.0, "step": 8228 }, { "epoch": 3.014290294508313, "grad_norm": 0.15192530800738865, "learning_rate": 1.695191654876482e-05, "loss": 0.4315, "num_tokens": 2116858231.0, "step": 8229 }, { "epoch": 3.0146567123162185, "grad_norm": 0.15600783410603433, "learning_rate": 1.694781704531073e-05, "loss": 0.3915, "num_tokens": 2117634757.0, "step": 8230 }, { "epoch": 3.015023130124124, "grad_norm": 0.13524927725146302, "learning_rate": 1.6943717826304616e-05, "loss": 0.3835, "num_tokens": 2118428932.0, "step": 8231 }, { "epoch": 3.0153895479320294, "grad_norm": 0.15016810743093187, "learning_rate": 1.693961889197727e-05, "loss": 0.381, "num_tokens": 2119128666.0, "step": 8232 }, { "epoch": 3.015755965739935, "grad_norm": 0.17500110064443222, "learning_rate": 1.6935520242559474e-05, "loss": 0.4195, "num_tokens": 2119860576.0, "step": 8233 }, { "epoch": 3.0161223835478403, "grad_norm": 0.2526228637250217, "learning_rate": 1.6931421878281974e-05, "loss": 0.3582, "num_tokens": 2120616438.0, "step": 8234 }, { "epoch": 3.016488801355746, "grad_norm": 0.15170725188944945, "learning_rate": 1.6927323799375536e-05, "loss": 0.4023, "num_tokens": 2121268120.0, "step": 8235 }, { "epoch": 3.0168552191636513, "grad_norm": 0.15951123816823015, "learning_rate": 1.6923226006070877e-05, "loss": 0.366, "num_tokens": 2121949202.0, "step": 8236 }, { "epoch": 3.017221636971557, "grad_norm": 0.17133472353378412, "learning_rate": 1.691912849859872e-05, "loss": 0.4284, "num_tokens": 2122651879.0, "step": 8237 }, { "epoch": 3.0175880547794622, "grad_norm": 0.14050021241367386, "learning_rate": 1.691503127718975e-05, "loss": 0.3876, "num_tokens": 2123455099.0, "step": 8238 }, { "epoch": 3.017954472587368, "grad_norm": 0.15079637915558314, "learning_rate": 1.691093434207466e-05, "loss": 0.43, "num_tokens": 2124207835.0, "step": 8239 }, { "epoch": 3.018320890395273, "grad_norm": 0.15508681167454685, "learning_rate": 1.6906837693484112e-05, "loss": 0.4009, "num_tokens": 2124921383.0, "step": 8240 }, { "epoch": 3.018687308203179, "grad_norm": 0.15746519461200853, "learning_rate": 1.6902741331648753e-05, "loss": 0.4121, "num_tokens": 2125702983.0, "step": 8241 }, { "epoch": 3.019053726011084, "grad_norm": 0.15131574227071512, "learning_rate": 1.689864525679922e-05, "loss": 0.4139, "num_tokens": 2126454021.0, "step": 8242 }, { "epoch": 3.01942014381899, "grad_norm": 0.13172471648479564, "learning_rate": 1.6894549469166138e-05, "loss": 0.3729, "num_tokens": 2127247652.0, "step": 8243 }, { "epoch": 3.019786561626895, "grad_norm": 0.15460204212786938, "learning_rate": 1.689045396898009e-05, "loss": 0.4124, "num_tokens": 2128079166.0, "step": 8244 }, { "epoch": 3.0201529794348003, "grad_norm": 0.14838022367445464, "learning_rate": 1.688635875647167e-05, "loss": 0.4406, "num_tokens": 2128826026.0, "step": 8245 }, { "epoch": 3.020519397242706, "grad_norm": 0.1346657997929315, "learning_rate": 1.6882263831871445e-05, "loss": 0.3873, "num_tokens": 2129709266.0, "step": 8246 }, { "epoch": 3.0208858150506113, "grad_norm": 0.13545182738982664, "learning_rate": 1.6878169195409975e-05, "loss": 0.4103, "num_tokens": 2130534259.0, "step": 8247 }, { "epoch": 3.021252232858517, "grad_norm": 0.13144249328787913, "learning_rate": 1.6874074847317788e-05, "loss": 0.4176, "num_tokens": 2131378166.0, "step": 8248 }, { "epoch": 3.021618650666422, "grad_norm": 0.14608764436603275, "learning_rate": 1.6869980787825407e-05, "loss": 0.4131, "num_tokens": 2132127844.0, "step": 8249 }, { "epoch": 3.021985068474328, "grad_norm": 0.1252344507807133, "learning_rate": 1.6865887017163335e-05, "loss": 0.3996, "num_tokens": 2133052864.0, "step": 8250 }, { "epoch": 3.022351486282233, "grad_norm": 0.14799600327989218, "learning_rate": 1.686179353556206e-05, "loss": 0.3683, "num_tokens": 2133658291.0, "step": 8251 }, { "epoch": 3.022717904090139, "grad_norm": 0.14533661968330147, "learning_rate": 1.6857700343252048e-05, "loss": 0.3843, "num_tokens": 2134408233.0, "step": 8252 }, { "epoch": 3.023084321898044, "grad_norm": 0.1362447863241795, "learning_rate": 1.685360744046376e-05, "loss": 0.4073, "num_tokens": 2135236615.0, "step": 8253 }, { "epoch": 3.02345073970595, "grad_norm": 0.16223178175375272, "learning_rate": 1.6849514827427635e-05, "loss": 0.4602, "num_tokens": 2135871856.0, "step": 8254 }, { "epoch": 3.023817157513855, "grad_norm": 0.139950164584155, "learning_rate": 1.6845422504374093e-05, "loss": 0.3754, "num_tokens": 2136659286.0, "step": 8255 }, { "epoch": 3.0241835753217607, "grad_norm": 0.14377454261923542, "learning_rate": 1.6841330471533544e-05, "loss": 0.4188, "num_tokens": 2137327688.0, "step": 8256 }, { "epoch": 3.024549993129666, "grad_norm": 0.16006864203429716, "learning_rate": 1.6837238729136372e-05, "loss": 0.4141, "num_tokens": 2138056191.0, "step": 8257 }, { "epoch": 3.0249164109375717, "grad_norm": 0.14247065447934823, "learning_rate": 1.683314727741294e-05, "loss": 0.3938, "num_tokens": 2138825734.0, "step": 8258 }, { "epoch": 3.025282828745477, "grad_norm": 0.1399072881385752, "learning_rate": 1.682905611659363e-05, "loss": 0.3761, "num_tokens": 2139566974.0, "step": 8259 }, { "epoch": 3.0256492465533826, "grad_norm": 0.14749587968598787, "learning_rate": 1.6824965246908766e-05, "loss": 0.4241, "num_tokens": 2140379182.0, "step": 8260 }, { "epoch": 3.026015664361288, "grad_norm": 0.14413772281634804, "learning_rate": 1.6820874668588678e-05, "loss": 0.4038, "num_tokens": 2141069328.0, "step": 8261 }, { "epoch": 3.0263820821691936, "grad_norm": 0.14229269177855888, "learning_rate": 1.6816784381863667e-05, "loss": 0.4332, "num_tokens": 2141857124.0, "step": 8262 }, { "epoch": 3.026748499977099, "grad_norm": 0.14848997475564235, "learning_rate": 1.6812694386964023e-05, "loss": 0.4157, "num_tokens": 2142628051.0, "step": 8263 }, { "epoch": 3.0271149177850045, "grad_norm": 0.16539909316551615, "learning_rate": 1.680860468412004e-05, "loss": 0.4011, "num_tokens": 2143274098.0, "step": 8264 }, { "epoch": 3.0274813355929098, "grad_norm": 0.14718614774287506, "learning_rate": 1.6804515273561958e-05, "loss": 0.4177, "num_tokens": 2143934476.0, "step": 8265 }, { "epoch": 3.0278477534008155, "grad_norm": 0.13290543127599733, "learning_rate": 1.6800426155520023e-05, "loss": 0.4015, "num_tokens": 2144856385.0, "step": 8266 }, { "epoch": 3.0282141712087207, "grad_norm": 0.14214199700026506, "learning_rate": 1.6796337330224463e-05, "loss": 0.3955, "num_tokens": 2145709245.0, "step": 8267 }, { "epoch": 3.0285805890166264, "grad_norm": 0.13264404017951484, "learning_rate": 1.6792248797905486e-05, "loss": 0.4067, "num_tokens": 2146526499.0, "step": 8268 }, { "epoch": 3.0289470068245317, "grad_norm": 0.14924585252752495, "learning_rate": 1.6788160558793273e-05, "loss": 0.4216, "num_tokens": 2147326510.0, "step": 8269 }, { "epoch": 3.029313424632437, "grad_norm": 0.1392521216228167, "learning_rate": 1.678407261311802e-05, "loss": 0.4354, "num_tokens": 2148086040.0, "step": 8270 }, { "epoch": 3.0296798424403426, "grad_norm": 0.13754496733355662, "learning_rate": 1.6779984961109882e-05, "loss": 0.4068, "num_tokens": 2148875468.0, "step": 8271 }, { "epoch": 3.030046260248248, "grad_norm": 0.13594077381429087, "learning_rate": 1.677589760299899e-05, "loss": 0.4163, "num_tokens": 2149697907.0, "step": 8272 }, { "epoch": 3.0304126780561536, "grad_norm": 0.14592432230111996, "learning_rate": 1.677181053901548e-05, "loss": 0.3944, "num_tokens": 2150386611.0, "step": 8273 }, { "epoch": 3.030779095864059, "grad_norm": 0.154763664805419, "learning_rate": 1.6767723769389454e-05, "loss": 0.4234, "num_tokens": 2151188564.0, "step": 8274 }, { "epoch": 3.0311455136719645, "grad_norm": 0.16106731418402137, "learning_rate": 1.676363729435102e-05, "loss": 0.4367, "num_tokens": 2151878619.0, "step": 8275 }, { "epoch": 3.0315119314798697, "grad_norm": 0.13280064830485713, "learning_rate": 1.675955111413024e-05, "loss": 0.3695, "num_tokens": 2152742461.0, "step": 8276 }, { "epoch": 3.0318783492877754, "grad_norm": 0.14385483547541972, "learning_rate": 1.6755465228957183e-05, "loss": 0.3732, "num_tokens": 2153489321.0, "step": 8277 }, { "epoch": 3.0322447670956807, "grad_norm": 0.12870628311243087, "learning_rate": 1.6751379639061892e-05, "loss": 0.4056, "num_tokens": 2154374214.0, "step": 8278 }, { "epoch": 3.0326111849035864, "grad_norm": 0.14814501050313922, "learning_rate": 1.6747294344674384e-05, "loss": 0.3756, "num_tokens": 2155043049.0, "step": 8279 }, { "epoch": 3.0329776027114916, "grad_norm": 0.1491622601771291, "learning_rate": 1.6743209346024672e-05, "loss": 0.4114, "num_tokens": 2155736181.0, "step": 8280 }, { "epoch": 3.0333440205193973, "grad_norm": 0.14496317698725744, "learning_rate": 1.6739124643342766e-05, "loss": 0.4378, "num_tokens": 2156622291.0, "step": 8281 }, { "epoch": 3.0337104383273026, "grad_norm": 0.13498193068778158, "learning_rate": 1.6735040236858626e-05, "loss": 0.4099, "num_tokens": 2157513683.0, "step": 8282 }, { "epoch": 3.0340768561352083, "grad_norm": 0.14246926315114397, "learning_rate": 1.6730956126802217e-05, "loss": 0.3904, "num_tokens": 2158261675.0, "step": 8283 }, { "epoch": 3.0344432739431135, "grad_norm": 0.16292160360629154, "learning_rate": 1.6726872313403482e-05, "loss": 0.4109, "num_tokens": 2158949962.0, "step": 8284 }, { "epoch": 3.0348096917510192, "grad_norm": 0.14189690142910144, "learning_rate": 1.6722788796892343e-05, "loss": 0.4047, "num_tokens": 2159708018.0, "step": 8285 }, { "epoch": 3.0351761095589245, "grad_norm": 0.13687311946145433, "learning_rate": 1.671870557749873e-05, "loss": 0.3897, "num_tokens": 2160472786.0, "step": 8286 }, { "epoch": 3.03554252736683, "grad_norm": 0.1742635528269111, "learning_rate": 1.6714622655452507e-05, "loss": 0.4036, "num_tokens": 2161091353.0, "step": 8287 }, { "epoch": 3.0359089451747354, "grad_norm": 0.13854368186484933, "learning_rate": 1.6710540030983576e-05, "loss": 0.3902, "num_tokens": 2161932052.0, "step": 8288 }, { "epoch": 3.036275362982641, "grad_norm": 0.1290594190787016, "learning_rate": 1.6706457704321783e-05, "loss": 0.3826, "num_tokens": 2162773274.0, "step": 8289 }, { "epoch": 3.0366417807905464, "grad_norm": 0.13252052365827927, "learning_rate": 1.670237567569697e-05, "loss": 0.3714, "num_tokens": 2163622947.0, "step": 8290 }, { "epoch": 3.037008198598452, "grad_norm": 0.1396890020932392, "learning_rate": 1.6698293945338964e-05, "loss": 0.3745, "num_tokens": 2164346743.0, "step": 8291 }, { "epoch": 3.0373746164063573, "grad_norm": 0.14685247550991334, "learning_rate": 1.669421251347759e-05, "loss": 0.3751, "num_tokens": 2165038410.0, "step": 8292 }, { "epoch": 3.037741034214263, "grad_norm": 0.14894238982893898, "learning_rate": 1.669013138034262e-05, "loss": 0.4329, "num_tokens": 2165880077.0, "step": 8293 }, { "epoch": 3.0381074520221683, "grad_norm": 0.1435680733359994, "learning_rate": 1.668605054616385e-05, "loss": 0.4084, "num_tokens": 2166635939.0, "step": 8294 }, { "epoch": 3.038473869830074, "grad_norm": 0.13969623667689582, "learning_rate": 1.668197001117102e-05, "loss": 0.4291, "num_tokens": 2167356486.0, "step": 8295 }, { "epoch": 3.038840287637979, "grad_norm": 0.151074533065221, "learning_rate": 1.6677889775593875e-05, "loss": 0.4012, "num_tokens": 2168095437.0, "step": 8296 }, { "epoch": 3.039206705445885, "grad_norm": 0.14782246407104435, "learning_rate": 1.6673809839662155e-05, "loss": 0.4362, "num_tokens": 2168818673.0, "step": 8297 }, { "epoch": 3.03957312325379, "grad_norm": 0.15109058402626663, "learning_rate": 1.666973020360556e-05, "loss": 0.4096, "num_tokens": 2169553849.0, "step": 8298 }, { "epoch": 3.0399395410616954, "grad_norm": 0.14147729084910776, "learning_rate": 1.666565086765378e-05, "loss": 0.3935, "num_tokens": 2170312826.0, "step": 8299 }, { "epoch": 3.040305958869601, "grad_norm": 0.13692293302674377, "learning_rate": 1.666157183203649e-05, "loss": 0.3985, "num_tokens": 2171083260.0, "step": 8300 }, { "epoch": 3.0406723766775063, "grad_norm": 0.1302360082975598, "learning_rate": 1.6657493096983347e-05, "loss": 0.4057, "num_tokens": 2171941589.0, "step": 8301 }, { "epoch": 3.041038794485412, "grad_norm": 0.1643222362962166, "learning_rate": 1.665341466272399e-05, "loss": 0.4316, "num_tokens": 2172620150.0, "step": 8302 }, { "epoch": 3.0414052122933173, "grad_norm": 0.1285931882771349, "learning_rate": 1.664933652948805e-05, "loss": 0.4028, "num_tokens": 2173428995.0, "step": 8303 }, { "epoch": 3.041771630101223, "grad_norm": 0.1351245863229739, "learning_rate": 1.664525869750513e-05, "loss": 0.4208, "num_tokens": 2174353643.0, "step": 8304 }, { "epoch": 3.0421380479091282, "grad_norm": 0.1436731197513522, "learning_rate": 1.6641181167004825e-05, "loss": 0.4136, "num_tokens": 2175114438.0, "step": 8305 }, { "epoch": 3.042504465717034, "grad_norm": 0.14283008635920963, "learning_rate": 1.66371039382167e-05, "loss": 0.3989, "num_tokens": 2175928697.0, "step": 8306 }, { "epoch": 3.042870883524939, "grad_norm": 0.14358066460616212, "learning_rate": 1.663302701137031e-05, "loss": 0.4351, "num_tokens": 2176597076.0, "step": 8307 }, { "epoch": 3.043237301332845, "grad_norm": 0.15203905860069455, "learning_rate": 1.6628950386695205e-05, "loss": 0.4009, "num_tokens": 2177324597.0, "step": 8308 }, { "epoch": 3.04360371914075, "grad_norm": 0.15427715129409653, "learning_rate": 1.6624874064420904e-05, "loss": 0.4151, "num_tokens": 2178069514.0, "step": 8309 }, { "epoch": 3.043970136948656, "grad_norm": 0.1360205323879825, "learning_rate": 1.6620798044776902e-05, "loss": 0.3934, "num_tokens": 2178897341.0, "step": 8310 }, { "epoch": 3.044336554756561, "grad_norm": 0.1363165509273579, "learning_rate": 1.66167223279927e-05, "loss": 0.3978, "num_tokens": 2179709914.0, "step": 8311 }, { "epoch": 3.0447029725644668, "grad_norm": 0.1485946416511627, "learning_rate": 1.661264691429776e-05, "loss": 0.3998, "num_tokens": 2180519142.0, "step": 8312 }, { "epoch": 3.045069390372372, "grad_norm": 0.13094446832749715, "learning_rate": 1.6608571803921538e-05, "loss": 0.4008, "num_tokens": 2181425284.0, "step": 8313 }, { "epoch": 3.0454358081802777, "grad_norm": 0.14956610283639168, "learning_rate": 1.6604496997093473e-05, "loss": 0.4435, "num_tokens": 2182189659.0, "step": 8314 }, { "epoch": 3.045802225988183, "grad_norm": 0.1275698466057926, "learning_rate": 1.6600422494042986e-05, "loss": 0.3873, "num_tokens": 2183062438.0, "step": 8315 }, { "epoch": 3.0461686437960886, "grad_norm": 0.1488843778399859, "learning_rate": 1.6596348294999475e-05, "loss": 0.3879, "num_tokens": 2183901488.0, "step": 8316 }, { "epoch": 3.046535061603994, "grad_norm": 0.1449541278366144, "learning_rate": 1.6592274400192323e-05, "loss": 0.41, "num_tokens": 2184661424.0, "step": 8317 }, { "epoch": 3.0469014794118996, "grad_norm": 0.14855201321339823, "learning_rate": 1.6588200809850906e-05, "loss": 0.4104, "num_tokens": 2185388208.0, "step": 8318 }, { "epoch": 3.047267897219805, "grad_norm": 0.1382902068396181, "learning_rate": 1.6584127524204578e-05, "loss": 0.3919, "num_tokens": 2186186465.0, "step": 8319 }, { "epoch": 3.0476343150277105, "grad_norm": 0.14942995860576558, "learning_rate": 1.6580054543482663e-05, "loss": 0.3962, "num_tokens": 2186954718.0, "step": 8320 }, { "epoch": 3.048000732835616, "grad_norm": 0.13067985092913373, "learning_rate": 1.6575981867914484e-05, "loss": 0.3456, "num_tokens": 2187707384.0, "step": 8321 }, { "epoch": 3.0483671506435215, "grad_norm": 0.144297518949975, "learning_rate": 1.657190949772934e-05, "loss": 0.3909, "num_tokens": 2188438185.0, "step": 8322 }, { "epoch": 3.0487335684514267, "grad_norm": 0.15452840323033726, "learning_rate": 1.6567837433156514e-05, "loss": 0.4316, "num_tokens": 2189215283.0, "step": 8323 }, { "epoch": 3.049099986259332, "grad_norm": 0.13716016827313093, "learning_rate": 1.6563765674425264e-05, "loss": 0.3884, "num_tokens": 2189973781.0, "step": 8324 }, { "epoch": 3.0494664040672377, "grad_norm": 0.13454317765725246, "learning_rate": 1.6559694221764844e-05, "loss": 0.3985, "num_tokens": 2190722887.0, "step": 8325 }, { "epoch": 3.049832821875143, "grad_norm": 0.1318591920718038, "learning_rate": 1.655562307540449e-05, "loss": 0.3848, "num_tokens": 2191533511.0, "step": 8326 }, { "epoch": 3.0501992396830486, "grad_norm": 0.14772187766962844, "learning_rate": 1.655155223557341e-05, "loss": 0.4295, "num_tokens": 2192314445.0, "step": 8327 }, { "epoch": 3.050565657490954, "grad_norm": 0.15301893633695196, "learning_rate": 1.65474817025008e-05, "loss": 0.403, "num_tokens": 2192993083.0, "step": 8328 }, { "epoch": 3.0509320752988596, "grad_norm": 0.14377280580178858, "learning_rate": 1.6543411476415838e-05, "loss": 0.4185, "num_tokens": 2193752059.0, "step": 8329 }, { "epoch": 3.051298493106765, "grad_norm": 0.1390461095652771, "learning_rate": 1.6539341557547692e-05, "loss": 0.4178, "num_tokens": 2194572609.0, "step": 8330 }, { "epoch": 3.0516649109146705, "grad_norm": 0.13931740309855092, "learning_rate": 1.65352719461255e-05, "loss": 0.4256, "num_tokens": 2195313654.0, "step": 8331 }, { "epoch": 3.0520313287225758, "grad_norm": 0.14577523731021436, "learning_rate": 1.6531202642378397e-05, "loss": 0.3962, "num_tokens": 2196029639.0, "step": 8332 }, { "epoch": 3.0523977465304815, "grad_norm": 0.14553817035190292, "learning_rate": 1.6527133646535488e-05, "loss": 0.3951, "num_tokens": 2196788402.0, "step": 8333 }, { "epoch": 3.0527641643383867, "grad_norm": 0.12830254219247475, "learning_rate": 1.652306495882586e-05, "loss": 0.374, "num_tokens": 2197579143.0, "step": 8334 }, { "epoch": 3.0531305821462924, "grad_norm": 0.14221800002182558, "learning_rate": 1.651899657947859e-05, "loss": 0.3967, "num_tokens": 2198294237.0, "step": 8335 }, { "epoch": 3.0534969999541977, "grad_norm": 0.13442239150297963, "learning_rate": 1.6514928508722753e-05, "loss": 0.38, "num_tokens": 2199059011.0, "step": 8336 }, { "epoch": 3.0538634177621033, "grad_norm": 0.14211901482475525, "learning_rate": 1.651086074678737e-05, "loss": 0.4455, "num_tokens": 2199822627.0, "step": 8337 }, { "epoch": 3.0542298355700086, "grad_norm": 0.1358755864529504, "learning_rate": 1.6506793293901466e-05, "loss": 0.4005, "num_tokens": 2200598140.0, "step": 8338 }, { "epoch": 3.0545962533779143, "grad_norm": 0.1412384053610325, "learning_rate": 1.6502726150294055e-05, "loss": 0.4376, "num_tokens": 2201409309.0, "step": 8339 }, { "epoch": 3.0549626711858195, "grad_norm": 0.13337753894474888, "learning_rate": 1.6498659316194122e-05, "loss": 0.3832, "num_tokens": 2202217152.0, "step": 8340 }, { "epoch": 3.0553290889937252, "grad_norm": 0.14217872969253287, "learning_rate": 1.649459279183063e-05, "loss": 0.4155, "num_tokens": 2202970750.0, "step": 8341 }, { "epoch": 3.0556955068016305, "grad_norm": 0.14793046632148263, "learning_rate": 1.6490526577432543e-05, "loss": 0.4194, "num_tokens": 2203674927.0, "step": 8342 }, { "epoch": 3.056061924609536, "grad_norm": 0.14655265522107405, "learning_rate": 1.6486460673228796e-05, "loss": 0.4193, "num_tokens": 2204468189.0, "step": 8343 }, { "epoch": 3.0564283424174414, "grad_norm": 0.12187294760545561, "learning_rate": 1.6482395079448305e-05, "loss": 0.4034, "num_tokens": 2205422374.0, "step": 8344 }, { "epoch": 3.056794760225347, "grad_norm": 0.1428848043886958, "learning_rate": 1.6478329796319967e-05, "loss": 0.4107, "num_tokens": 2206229905.0, "step": 8345 }, { "epoch": 3.0571611780332524, "grad_norm": 0.1380578420599923, "learning_rate": 1.647426482407267e-05, "loss": 0.3962, "num_tokens": 2206872925.0, "step": 8346 }, { "epoch": 3.057527595841158, "grad_norm": 0.144876554993457, "learning_rate": 1.6470200162935285e-05, "loss": 0.3925, "num_tokens": 2207617371.0, "step": 8347 }, { "epoch": 3.0578940136490633, "grad_norm": 0.15122960556106776, "learning_rate": 1.6466135813136648e-05, "loss": 0.4055, "num_tokens": 2208388167.0, "step": 8348 }, { "epoch": 3.058260431456969, "grad_norm": 0.13950407655635808, "learning_rate": 1.64620717749056e-05, "loss": 0.3683, "num_tokens": 2209030670.0, "step": 8349 }, { "epoch": 3.0586268492648743, "grad_norm": 0.139908121226235, "learning_rate": 1.6458008048470958e-05, "loss": 0.3966, "num_tokens": 2209930176.0, "step": 8350 }, { "epoch": 3.0589932670727795, "grad_norm": 0.1406298984966347, "learning_rate": 1.64539446340615e-05, "loss": 0.3725, "num_tokens": 2210707159.0, "step": 8351 }, { "epoch": 3.059359684880685, "grad_norm": 0.13811660435308656, "learning_rate": 1.644988153190602e-05, "loss": 0.4012, "num_tokens": 2211524624.0, "step": 8352 }, { "epoch": 3.0597261026885905, "grad_norm": 0.13932110944495096, "learning_rate": 1.644581874223328e-05, "loss": 0.4287, "num_tokens": 2212392149.0, "step": 8353 }, { "epoch": 3.060092520496496, "grad_norm": 0.14178567127976135, "learning_rate": 1.6441756265272012e-05, "loss": 0.4112, "num_tokens": 2213154450.0, "step": 8354 }, { "epoch": 3.0604589383044014, "grad_norm": 0.1396369936895751, "learning_rate": 1.643769410125095e-05, "loss": 0.41, "num_tokens": 2214022759.0, "step": 8355 }, { "epoch": 3.060825356112307, "grad_norm": 0.14196025818446673, "learning_rate": 1.64336322503988e-05, "loss": 0.4122, "num_tokens": 2214732628.0, "step": 8356 }, { "epoch": 3.0611917739202124, "grad_norm": 0.13648392515989755, "learning_rate": 1.6429570712944244e-05, "loss": 0.4115, "num_tokens": 2215561142.0, "step": 8357 }, { "epoch": 3.061558191728118, "grad_norm": 0.14945763551969773, "learning_rate": 1.642550948911597e-05, "loss": 0.412, "num_tokens": 2216297980.0, "step": 8358 }, { "epoch": 3.0619246095360233, "grad_norm": 0.13941344579935835, "learning_rate": 1.6421448579142626e-05, "loss": 0.4342, "num_tokens": 2217085231.0, "step": 8359 }, { "epoch": 3.062291027343929, "grad_norm": 0.14128948704852734, "learning_rate": 1.6417387983252844e-05, "loss": 0.4104, "num_tokens": 2217914127.0, "step": 8360 }, { "epoch": 3.0626574451518342, "grad_norm": 0.1391867320858473, "learning_rate": 1.641332770167525e-05, "loss": 0.402, "num_tokens": 2218750045.0, "step": 8361 }, { "epoch": 3.06302386295974, "grad_norm": 0.14400416772264732, "learning_rate": 1.640926773463844e-05, "loss": 0.402, "num_tokens": 2219503819.0, "step": 8362 }, { "epoch": 3.063390280767645, "grad_norm": 0.14064259689305084, "learning_rate": 1.6405208082371e-05, "loss": 0.4005, "num_tokens": 2220247515.0, "step": 8363 }, { "epoch": 3.063756698575551, "grad_norm": 0.13766120960996892, "learning_rate": 1.6401148745101506e-05, "loss": 0.3834, "num_tokens": 2221048611.0, "step": 8364 }, { "epoch": 3.064123116383456, "grad_norm": 0.12337820805599224, "learning_rate": 1.63970897230585e-05, "loss": 0.3803, "num_tokens": 2221918659.0, "step": 8365 }, { "epoch": 3.064489534191362, "grad_norm": 0.14194503326601632, "learning_rate": 1.6393031016470514e-05, "loss": 0.3885, "num_tokens": 2222659848.0, "step": 8366 }, { "epoch": 3.064855951999267, "grad_norm": 0.14540905108034183, "learning_rate": 1.638897262556606e-05, "loss": 0.3928, "num_tokens": 2223384923.0, "step": 8367 }, { "epoch": 3.0652223698071728, "grad_norm": 0.14231538844283695, "learning_rate": 1.6384914550573623e-05, "loss": 0.4006, "num_tokens": 2224135834.0, "step": 8368 }, { "epoch": 3.065588787615078, "grad_norm": 0.13263249416173015, "learning_rate": 1.6380856791721696e-05, "loss": 0.3815, "num_tokens": 2224974663.0, "step": 8369 }, { "epoch": 3.0659552054229837, "grad_norm": 0.13842027262392362, "learning_rate": 1.6376799349238738e-05, "loss": 0.4243, "num_tokens": 2225750797.0, "step": 8370 }, { "epoch": 3.066321623230889, "grad_norm": 0.13859950484922978, "learning_rate": 1.6372742223353188e-05, "loss": 0.3935, "num_tokens": 2226449444.0, "step": 8371 }, { "epoch": 3.0666880410387947, "grad_norm": 0.13735161603170573, "learning_rate": 1.6368685414293468e-05, "loss": 0.3868, "num_tokens": 2227147859.0, "step": 8372 }, { "epoch": 3.0670544588467, "grad_norm": 0.1493276904087564, "learning_rate": 1.6364628922287985e-05, "loss": 0.3987, "num_tokens": 2227909961.0, "step": 8373 }, { "epoch": 3.0674208766546056, "grad_norm": 0.1328890522327297, "learning_rate": 1.636057274756512e-05, "loss": 0.3968, "num_tokens": 2228695471.0, "step": 8374 }, { "epoch": 3.067787294462511, "grad_norm": 0.16643270494335685, "learning_rate": 1.635651689035326e-05, "loss": 0.4137, "num_tokens": 2229318357.0, "step": 8375 }, { "epoch": 3.0681537122704166, "grad_norm": 0.13336210589753048, "learning_rate": 1.6352461350880752e-05, "loss": 0.3725, "num_tokens": 2230171623.0, "step": 8376 }, { "epoch": 3.068520130078322, "grad_norm": 0.12797946151951703, "learning_rate": 1.6348406129375925e-05, "loss": 0.3746, "num_tokens": 2230992501.0, "step": 8377 }, { "epoch": 3.068886547886227, "grad_norm": 0.12880391662023447, "learning_rate": 1.6344351226067105e-05, "loss": 0.3789, "num_tokens": 2231748092.0, "step": 8378 }, { "epoch": 3.0692529656941328, "grad_norm": 0.16649592043023353, "learning_rate": 1.6340296641182578e-05, "loss": 0.4385, "num_tokens": 2232463886.0, "step": 8379 }, { "epoch": 3.069619383502038, "grad_norm": 0.1477278439970316, "learning_rate": 1.6336242374950636e-05, "loss": 0.3753, "num_tokens": 2233198887.0, "step": 8380 }, { "epoch": 3.0699858013099437, "grad_norm": 0.13480795238626297, "learning_rate": 1.633218842759954e-05, "loss": 0.4455, "num_tokens": 2234017982.0, "step": 8381 }, { "epoch": 3.070352219117849, "grad_norm": 0.14510010854068148, "learning_rate": 1.6328134799357545e-05, "loss": 0.4078, "num_tokens": 2234820989.0, "step": 8382 }, { "epoch": 3.0707186369257546, "grad_norm": 0.15532334174221546, "learning_rate": 1.632408149045286e-05, "loss": 0.399, "num_tokens": 2235545068.0, "step": 8383 }, { "epoch": 3.07108505473366, "grad_norm": 0.13628586696815098, "learning_rate": 1.6320028501113704e-05, "loss": 0.4056, "num_tokens": 2236265351.0, "step": 8384 }, { "epoch": 3.0714514725415656, "grad_norm": 0.1387181974155892, "learning_rate": 1.631597583156826e-05, "loss": 0.3875, "num_tokens": 2236931392.0, "step": 8385 }, { "epoch": 3.071817890349471, "grad_norm": 0.1607062731454762, "learning_rate": 1.631192348204471e-05, "loss": 0.4063, "num_tokens": 2237611613.0, "step": 8386 }, { "epoch": 3.0721843081573765, "grad_norm": 0.1452426826699673, "learning_rate": 1.6307871452771214e-05, "loss": 0.4004, "num_tokens": 2238313117.0, "step": 8387 }, { "epoch": 3.072550725965282, "grad_norm": 0.1437789343111056, "learning_rate": 1.6303819743975902e-05, "loss": 0.3951, "num_tokens": 2239052402.0, "step": 8388 }, { "epoch": 3.0729171437731875, "grad_norm": 0.1462173949527859, "learning_rate": 1.629976835588689e-05, "loss": 0.421, "num_tokens": 2239876271.0, "step": 8389 }, { "epoch": 3.0732835615810927, "grad_norm": 0.14304947334153992, "learning_rate": 1.6295717288732285e-05, "loss": 0.3886, "num_tokens": 2240760401.0, "step": 8390 }, { "epoch": 3.0736499793889984, "grad_norm": 0.14128159494805428, "learning_rate": 1.6291666542740175e-05, "loss": 0.4275, "num_tokens": 2241531712.0, "step": 8391 }, { "epoch": 3.0740163971969037, "grad_norm": 0.15475265862206916, "learning_rate": 1.6287616118138613e-05, "loss": 0.4299, "num_tokens": 2242225808.0, "step": 8392 }, { "epoch": 3.0743828150048094, "grad_norm": 0.1458313880969449, "learning_rate": 1.628356601515565e-05, "loss": 0.402, "num_tokens": 2242975917.0, "step": 8393 }, { "epoch": 3.0747492328127146, "grad_norm": 0.1360698973935371, "learning_rate": 1.6279516234019322e-05, "loss": 0.3946, "num_tokens": 2243775003.0, "step": 8394 }, { "epoch": 3.0751156506206203, "grad_norm": 0.1466971726228807, "learning_rate": 1.6275466774957637e-05, "loss": 0.4334, "num_tokens": 2244466794.0, "step": 8395 }, { "epoch": 3.0754820684285256, "grad_norm": 0.13961122998808181, "learning_rate": 1.6271417638198578e-05, "loss": 0.4044, "num_tokens": 2245246882.0, "step": 8396 }, { "epoch": 3.0758484862364313, "grad_norm": 0.1349385596463149, "learning_rate": 1.6267368823970126e-05, "loss": 0.3931, "num_tokens": 2246091952.0, "step": 8397 }, { "epoch": 3.0762149040443365, "grad_norm": 0.14682700147440675, "learning_rate": 1.6263320332500244e-05, "loss": 0.3825, "num_tokens": 2246849388.0, "step": 8398 }, { "epoch": 3.076581321852242, "grad_norm": 0.1375311632830878, "learning_rate": 1.625927216401686e-05, "loss": 0.3781, "num_tokens": 2247591551.0, "step": 8399 }, { "epoch": 3.0769477396601475, "grad_norm": 0.14109218262695641, "learning_rate": 1.62552243187479e-05, "loss": 0.3923, "num_tokens": 2248362718.0, "step": 8400 }, { "epoch": 3.077314157468053, "grad_norm": 0.14577028151019156, "learning_rate": 1.6251176796921257e-05, "loss": 0.3792, "num_tokens": 2249108692.0, "step": 8401 }, { "epoch": 3.0776805752759584, "grad_norm": 0.1518742608726735, "learning_rate": 1.6247129598764827e-05, "loss": 0.4205, "num_tokens": 2249769841.0, "step": 8402 }, { "epoch": 3.0780469930838636, "grad_norm": 0.1497734761140867, "learning_rate": 1.6243082724506468e-05, "loss": 0.4282, "num_tokens": 2250516690.0, "step": 8403 }, { "epoch": 3.0784134108917693, "grad_norm": 0.15286528075162367, "learning_rate": 1.623903617437403e-05, "loss": 0.4055, "num_tokens": 2251298148.0, "step": 8404 }, { "epoch": 3.0787798286996746, "grad_norm": 0.13711489164119134, "learning_rate": 1.6234989948595345e-05, "loss": 0.411, "num_tokens": 2252093741.0, "step": 8405 }, { "epoch": 3.0791462465075803, "grad_norm": 0.13979084932861147, "learning_rate": 1.623094404739821e-05, "loss": 0.3866, "num_tokens": 2252811685.0, "step": 8406 }, { "epoch": 3.0795126643154855, "grad_norm": 0.1563670847010891, "learning_rate": 1.6226898471010427e-05, "loss": 0.4196, "num_tokens": 2253535563.0, "step": 8407 }, { "epoch": 3.0798790821233912, "grad_norm": 0.14223233385719355, "learning_rate": 1.6222853219659772e-05, "loss": 0.4017, "num_tokens": 2254337866.0, "step": 8408 }, { "epoch": 3.0802454999312965, "grad_norm": 0.14652098741646585, "learning_rate": 1.6218808293574003e-05, "loss": 0.4183, "num_tokens": 2255057395.0, "step": 8409 }, { "epoch": 3.080611917739202, "grad_norm": 0.1531586479801084, "learning_rate": 1.621476369298085e-05, "loss": 0.4196, "num_tokens": 2255733281.0, "step": 8410 }, { "epoch": 3.0809783355471074, "grad_norm": 0.13460458009975382, "learning_rate": 1.6210719418108035e-05, "loss": 0.3974, "num_tokens": 2256577003.0, "step": 8411 }, { "epoch": 3.081344753355013, "grad_norm": 0.14482268479290053, "learning_rate": 1.6206675469183256e-05, "loss": 0.4008, "num_tokens": 2257342657.0, "step": 8412 }, { "epoch": 3.0817111711629184, "grad_norm": 0.13571501217848256, "learning_rate": 1.6202631846434194e-05, "loss": 0.4123, "num_tokens": 2258118735.0, "step": 8413 }, { "epoch": 3.082077588970824, "grad_norm": 0.13895682766849596, "learning_rate": 1.6198588550088517e-05, "loss": 0.402, "num_tokens": 2258920039.0, "step": 8414 }, { "epoch": 3.0824440067787293, "grad_norm": 0.13663819220678952, "learning_rate": 1.6194545580373876e-05, "loss": 0.3863, "num_tokens": 2259792753.0, "step": 8415 }, { "epoch": 3.082810424586635, "grad_norm": 0.13398205779070657, "learning_rate": 1.6190502937517898e-05, "loss": 0.3943, "num_tokens": 2260605982.0, "step": 8416 }, { "epoch": 3.0831768423945403, "grad_norm": 0.13285764730012273, "learning_rate": 1.6186460621748176e-05, "loss": 0.4009, "num_tokens": 2261499451.0, "step": 8417 }, { "epoch": 3.083543260202446, "grad_norm": 0.139244449076812, "learning_rate": 1.6182418633292306e-05, "loss": 0.444, "num_tokens": 2262277030.0, "step": 8418 }, { "epoch": 3.083909678010351, "grad_norm": 0.1398964321165249, "learning_rate": 1.6178376972377877e-05, "loss": 0.3716, "num_tokens": 2262993739.0, "step": 8419 }, { "epoch": 3.084276095818257, "grad_norm": 0.13230280192471486, "learning_rate": 1.6174335639232426e-05, "loss": 0.3993, "num_tokens": 2263837384.0, "step": 8420 }, { "epoch": 3.084642513626162, "grad_norm": 0.13882236283835617, "learning_rate": 1.6170294634083498e-05, "loss": 0.445, "num_tokens": 2264551286.0, "step": 8421 }, { "epoch": 3.085008931434068, "grad_norm": 0.14863472955080576, "learning_rate": 1.61662539571586e-05, "loss": 0.3751, "num_tokens": 2265293799.0, "step": 8422 }, { "epoch": 3.085375349241973, "grad_norm": 0.12860523718639133, "learning_rate": 1.6162213608685236e-05, "loss": 0.4093, "num_tokens": 2266056762.0, "step": 8423 }, { "epoch": 3.085741767049879, "grad_norm": 0.14449415270619803, "learning_rate": 1.6158173588890875e-05, "loss": 0.3883, "num_tokens": 2266729567.0, "step": 8424 }, { "epoch": 3.086108184857784, "grad_norm": 0.13963100502727155, "learning_rate": 1.6154133898002994e-05, "loss": 0.434, "num_tokens": 2267649119.0, "step": 8425 }, { "epoch": 3.0864746026656897, "grad_norm": 0.13536332176907803, "learning_rate": 1.6150094536249034e-05, "loss": 0.3994, "num_tokens": 2268431065.0, "step": 8426 }, { "epoch": 3.086841020473595, "grad_norm": 0.12853116050509064, "learning_rate": 1.6146055503856408e-05, "loss": 0.4076, "num_tokens": 2269232272.0, "step": 8427 }, { "epoch": 3.0872074382815007, "grad_norm": 0.13669533609589818, "learning_rate": 1.6142016801052532e-05, "loss": 0.4034, "num_tokens": 2270092056.0, "step": 8428 }, { "epoch": 3.087573856089406, "grad_norm": 0.1392639250184953, "learning_rate": 1.613797842806478e-05, "loss": 0.3765, "num_tokens": 2270818300.0, "step": 8429 }, { "epoch": 3.0879402738973116, "grad_norm": 0.13759755150347047, "learning_rate": 1.6133940385120535e-05, "loss": 0.4022, "num_tokens": 2271633607.0, "step": 8430 }, { "epoch": 3.088306691705217, "grad_norm": 0.13869531312074215, "learning_rate": 1.612990267244714e-05, "loss": 0.4021, "num_tokens": 2272428999.0, "step": 8431 }, { "epoch": 3.088673109513122, "grad_norm": 0.14736392410767937, "learning_rate": 1.6125865290271928e-05, "loss": 0.3896, "num_tokens": 2273107562.0, "step": 8432 }, { "epoch": 3.089039527321028, "grad_norm": 0.14635097530997757, "learning_rate": 1.6121828238822215e-05, "loss": 0.4263, "num_tokens": 2273809984.0, "step": 8433 }, { "epoch": 3.089405945128933, "grad_norm": 0.14841441464574034, "learning_rate": 1.6117791518325284e-05, "loss": 0.4223, "num_tokens": 2274581872.0, "step": 8434 }, { "epoch": 3.0897723629368388, "grad_norm": 0.1564319067290726, "learning_rate": 1.611375512900842e-05, "loss": 0.4153, "num_tokens": 2275230565.0, "step": 8435 }, { "epoch": 3.090138780744744, "grad_norm": 0.13310073888852295, "learning_rate": 1.6109719071098876e-05, "loss": 0.3913, "num_tokens": 2276025231.0, "step": 8436 }, { "epoch": 3.0905051985526497, "grad_norm": 0.14567905129512654, "learning_rate": 1.610568334482389e-05, "loss": 0.4045, "num_tokens": 2276791081.0, "step": 8437 }, { "epoch": 3.090871616360555, "grad_norm": 0.14100060089909106, "learning_rate": 1.610164795041069e-05, "loss": 0.3958, "num_tokens": 2277546151.0, "step": 8438 }, { "epoch": 3.0912380341684607, "grad_norm": 0.12424525876107445, "learning_rate": 1.6097612888086462e-05, "loss": 0.4029, "num_tokens": 2278610335.0, "step": 8439 }, { "epoch": 3.091604451976366, "grad_norm": 0.1488002606432183, "learning_rate": 1.6093578158078392e-05, "loss": 0.4061, "num_tokens": 2279280123.0, "step": 8440 }, { "epoch": 3.0919708697842716, "grad_norm": 0.14329605957605032, "learning_rate": 1.6089543760613652e-05, "loss": 0.4015, "num_tokens": 2279996557.0, "step": 8441 }, { "epoch": 3.092337287592177, "grad_norm": 0.1567488190266164, "learning_rate": 1.608550969591938e-05, "loss": 0.4138, "num_tokens": 2280714611.0, "step": 8442 }, { "epoch": 3.0927037054000825, "grad_norm": 0.1379901262257793, "learning_rate": 1.6081475964222713e-05, "loss": 0.4045, "num_tokens": 2281396383.0, "step": 8443 }, { "epoch": 3.093070123207988, "grad_norm": 0.1367541590824109, "learning_rate": 1.6077442565750742e-05, "loss": 0.4117, "num_tokens": 2282188973.0, "step": 8444 }, { "epoch": 3.0934365410158935, "grad_norm": 0.14145170528256132, "learning_rate": 1.6073409500730556e-05, "loss": 0.3883, "num_tokens": 2282978222.0, "step": 8445 }, { "epoch": 3.0938029588237987, "grad_norm": 0.14445396839042243, "learning_rate": 1.6069376769389232e-05, "loss": 0.3706, "num_tokens": 2283729291.0, "step": 8446 }, { "epoch": 3.0941693766317044, "grad_norm": 0.1304254202291871, "learning_rate": 1.6065344371953825e-05, "loss": 0.4022, "num_tokens": 2284532766.0, "step": 8447 }, { "epoch": 3.0945357944396097, "grad_norm": 0.13418061668005796, "learning_rate": 1.6061312308651357e-05, "loss": 0.3752, "num_tokens": 2285287576.0, "step": 8448 }, { "epoch": 3.0949022122475154, "grad_norm": 0.1308619706318634, "learning_rate": 1.6057280579708846e-05, "loss": 0.4136, "num_tokens": 2286189823.0, "step": 8449 }, { "epoch": 3.0952686300554206, "grad_norm": 0.13876232496925478, "learning_rate": 1.6053249185353294e-05, "loss": 0.3628, "num_tokens": 2286887075.0, "step": 8450 }, { "epoch": 3.0956350478633263, "grad_norm": 0.1380409203914392, "learning_rate": 1.604921812581165e-05, "loss": 0.4098, "num_tokens": 2287599591.0, "step": 8451 }, { "epoch": 3.0960014656712316, "grad_norm": 0.14042641381230045, "learning_rate": 1.60451874013109e-05, "loss": 0.3824, "num_tokens": 2288379544.0, "step": 8452 }, { "epoch": 3.0963678834791373, "grad_norm": 0.14753687090545164, "learning_rate": 1.6041157012077975e-05, "loss": 0.3948, "num_tokens": 2289151163.0, "step": 8453 }, { "epoch": 3.0967343012870425, "grad_norm": 0.13471081654682818, "learning_rate": 1.603712695833979e-05, "loss": 0.3737, "num_tokens": 2289944835.0, "step": 8454 }, { "epoch": 3.097100719094948, "grad_norm": 0.13998513143476765, "learning_rate": 1.603309724032324e-05, "loss": 0.4184, "num_tokens": 2290757067.0, "step": 8455 }, { "epoch": 3.0974671369028535, "grad_norm": 0.13898420978933348, "learning_rate": 1.602906785825521e-05, "loss": 0.422, "num_tokens": 2291534081.0, "step": 8456 }, { "epoch": 3.0978335547107587, "grad_norm": 0.13602104662182427, "learning_rate": 1.602503881236256e-05, "loss": 0.4302, "num_tokens": 2292332393.0, "step": 8457 }, { "epoch": 3.0981999725186644, "grad_norm": 0.1453626285316447, "learning_rate": 1.602101010287214e-05, "loss": 0.3923, "num_tokens": 2293078608.0, "step": 8458 }, { "epoch": 3.0985663903265697, "grad_norm": 0.1476684418502829, "learning_rate": 1.6016981730010776e-05, "loss": 0.4176, "num_tokens": 2293782958.0, "step": 8459 }, { "epoch": 3.0989328081344754, "grad_norm": 0.14416428381152974, "learning_rate": 1.6012953694005267e-05, "loss": 0.3956, "num_tokens": 2294493288.0, "step": 8460 }, { "epoch": 3.0992992259423806, "grad_norm": 0.13328121214942498, "learning_rate": 1.6008925995082397e-05, "loss": 0.3857, "num_tokens": 2295381010.0, "step": 8461 }, { "epoch": 3.0996656437502863, "grad_norm": 0.1440673694519419, "learning_rate": 1.6004898633468936e-05, "loss": 0.3941, "num_tokens": 2296040463.0, "step": 8462 }, { "epoch": 3.1000320615581916, "grad_norm": 0.14727476261257996, "learning_rate": 1.600087160939164e-05, "loss": 0.3654, "num_tokens": 2296748132.0, "step": 8463 }, { "epoch": 3.1003984793660972, "grad_norm": 0.1407270054219836, "learning_rate": 1.599684492307723e-05, "loss": 0.4302, "num_tokens": 2297513090.0, "step": 8464 }, { "epoch": 3.1007648971740025, "grad_norm": 0.13368495040575626, "learning_rate": 1.5992818574752417e-05, "loss": 0.3981, "num_tokens": 2298335056.0, "step": 8465 }, { "epoch": 3.101131314981908, "grad_norm": 0.13689434820601387, "learning_rate": 1.5988792564643896e-05, "loss": 0.3758, "num_tokens": 2299095048.0, "step": 8466 }, { "epoch": 3.1014977327898134, "grad_norm": 0.14589587764366424, "learning_rate": 1.598476689297834e-05, "loss": 0.3922, "num_tokens": 2299791560.0, "step": 8467 }, { "epoch": 3.101864150597719, "grad_norm": 0.16033047840705958, "learning_rate": 1.598074155998239e-05, "loss": 0.4147, "num_tokens": 2300550304.0, "step": 8468 }, { "epoch": 3.1022305684056244, "grad_norm": 0.13750930286806987, "learning_rate": 1.5976716565882698e-05, "loss": 0.4104, "num_tokens": 2301420181.0, "step": 8469 }, { "epoch": 3.10259698621353, "grad_norm": 0.14092568304499267, "learning_rate": 1.5972691910905868e-05, "loss": 0.3953, "num_tokens": 2302154936.0, "step": 8470 }, { "epoch": 3.1029634040214353, "grad_norm": 0.14044723620643784, "learning_rate": 1.5968667595278504e-05, "loss": 0.3922, "num_tokens": 2302955123.0, "step": 8471 }, { "epoch": 3.103329821829341, "grad_norm": 0.13688087069669333, "learning_rate": 1.5964643619227177e-05, "loss": 0.4296, "num_tokens": 2303758696.0, "step": 8472 }, { "epoch": 3.1036962396372463, "grad_norm": 0.13868680257866073, "learning_rate": 1.596061998297844e-05, "loss": 0.3864, "num_tokens": 2304544352.0, "step": 8473 }, { "epoch": 3.104062657445152, "grad_norm": 0.12616548691438145, "learning_rate": 1.5956596686758845e-05, "loss": 0.4046, "num_tokens": 2305417063.0, "step": 8474 }, { "epoch": 3.1044290752530572, "grad_norm": 0.13252526987021754, "learning_rate": 1.5952573730794907e-05, "loss": 0.3955, "num_tokens": 2306189668.0, "step": 8475 }, { "epoch": 3.104795493060963, "grad_norm": 0.14874943153055808, "learning_rate": 1.594855111531312e-05, "loss": 0.3898, "num_tokens": 2306962078.0, "step": 8476 }, { "epoch": 3.105161910868868, "grad_norm": 0.13384373925892, "learning_rate": 1.594452884053997e-05, "loss": 0.377, "num_tokens": 2307715557.0, "step": 8477 }, { "epoch": 3.105528328676774, "grad_norm": 0.14692881896651355, "learning_rate": 1.594050690670192e-05, "loss": 0.4225, "num_tokens": 2308469663.0, "step": 8478 }, { "epoch": 3.105894746484679, "grad_norm": 0.14346349181738627, "learning_rate": 1.59364853140254e-05, "loss": 0.3948, "num_tokens": 2309232938.0, "step": 8479 }, { "epoch": 3.106261164292585, "grad_norm": 0.1442780993224529, "learning_rate": 1.5932464062736863e-05, "loss": 0.413, "num_tokens": 2309963467.0, "step": 8480 }, { "epoch": 3.10662758210049, "grad_norm": 0.13835101615709042, "learning_rate": 1.5928443153062688e-05, "loss": 0.3946, "num_tokens": 2310790526.0, "step": 8481 }, { "epoch": 3.1069939999083958, "grad_norm": 0.13829014143206106, "learning_rate": 1.5924422585229266e-05, "loss": 0.3915, "num_tokens": 2311598937.0, "step": 8482 }, { "epoch": 3.107360417716301, "grad_norm": 0.13925497786486707, "learning_rate": 1.592040235946297e-05, "loss": 0.4096, "num_tokens": 2312415580.0, "step": 8483 }, { "epoch": 3.1077268355242067, "grad_norm": 0.13321681317946837, "learning_rate": 1.591638247599013e-05, "loss": 0.4176, "num_tokens": 2313167130.0, "step": 8484 }, { "epoch": 3.108093253332112, "grad_norm": 0.13268836827420982, "learning_rate": 1.59123629350371e-05, "loss": 0.4057, "num_tokens": 2314113457.0, "step": 8485 }, { "epoch": 3.108459671140017, "grad_norm": 0.1495905529528359, "learning_rate": 1.590834373683016e-05, "loss": 0.389, "num_tokens": 2314766525.0, "step": 8486 }, { "epoch": 3.108826088947923, "grad_norm": 0.1476842337079134, "learning_rate": 1.590432488159562e-05, "loss": 0.4044, "num_tokens": 2315541239.0, "step": 8487 }, { "epoch": 3.109192506755828, "grad_norm": 0.1392969594657334, "learning_rate": 1.590030636955974e-05, "loss": 0.4003, "num_tokens": 2316432403.0, "step": 8488 }, { "epoch": 3.109558924563734, "grad_norm": 0.12867986220129563, "learning_rate": 1.5896288200948774e-05, "loss": 0.4275, "num_tokens": 2317239426.0, "step": 8489 }, { "epoch": 3.109925342371639, "grad_norm": 0.15412947748360384, "learning_rate": 1.589227037598894e-05, "loss": 0.3942, "num_tokens": 2317955249.0, "step": 8490 }, { "epoch": 3.110291760179545, "grad_norm": 0.15528438393690855, "learning_rate": 1.588825289490647e-05, "loss": 0.4052, "num_tokens": 2318686303.0, "step": 8491 }, { "epoch": 3.11065817798745, "grad_norm": 0.1465715034383516, "learning_rate": 1.5884235757927546e-05, "loss": 0.4444, "num_tokens": 2319384856.0, "step": 8492 }, { "epoch": 3.1110245957953557, "grad_norm": 0.1427157455501277, "learning_rate": 1.5880218965278344e-05, "loss": 0.4053, "num_tokens": 2320223271.0, "step": 8493 }, { "epoch": 3.111391013603261, "grad_norm": 0.12799342942015818, "learning_rate": 1.587620251718501e-05, "loss": 0.3874, "num_tokens": 2321002276.0, "step": 8494 }, { "epoch": 3.1117574314111667, "grad_norm": 0.14320928806052982, "learning_rate": 1.5872186413873683e-05, "loss": 0.4204, "num_tokens": 2321737922.0, "step": 8495 }, { "epoch": 3.112123849219072, "grad_norm": 0.1351939636969234, "learning_rate": 1.586817065557047e-05, "loss": 0.4082, "num_tokens": 2322577570.0, "step": 8496 }, { "epoch": 3.1124902670269776, "grad_norm": 0.13645048915173208, "learning_rate": 1.5864155242501484e-05, "loss": 0.3867, "num_tokens": 2323238364.0, "step": 8497 }, { "epoch": 3.112856684834883, "grad_norm": 0.14892233608885316, "learning_rate": 1.586014017489279e-05, "loss": 0.4223, "num_tokens": 2324078123.0, "step": 8498 }, { "epoch": 3.1132231026427886, "grad_norm": 0.13583544918368493, "learning_rate": 1.585612545297044e-05, "loss": 0.4082, "num_tokens": 2324871719.0, "step": 8499 }, { "epoch": 3.113589520450694, "grad_norm": 0.12551740770515082, "learning_rate": 1.585211107696048e-05, "loss": 0.3883, "num_tokens": 2325631396.0, "step": 8500 }, { "epoch": 3.1139559382585995, "grad_norm": 0.15368681814836913, "learning_rate": 1.5848097047088916e-05, "loss": 0.3887, "num_tokens": 2326318165.0, "step": 8501 }, { "epoch": 3.1143223560665048, "grad_norm": 0.13895742767329042, "learning_rate": 1.584408336358176e-05, "loss": 0.4338, "num_tokens": 2327079708.0, "step": 8502 }, { "epoch": 3.1146887738744105, "grad_norm": 0.1386569119437822, "learning_rate": 1.584007002666498e-05, "loss": 0.4026, "num_tokens": 2327945136.0, "step": 8503 }, { "epoch": 3.1150551916823157, "grad_norm": 0.14284775761700572, "learning_rate": 1.583605703656454e-05, "loss": 0.4392, "num_tokens": 2328718002.0, "step": 8504 }, { "epoch": 3.1154216094902214, "grad_norm": 0.14806605767093028, "learning_rate": 1.5832044393506375e-05, "loss": 0.4015, "num_tokens": 2329458589.0, "step": 8505 }, { "epoch": 3.1157880272981267, "grad_norm": 0.1354733559584673, "learning_rate": 1.5828032097716407e-05, "loss": 0.3745, "num_tokens": 2330301291.0, "step": 8506 }, { "epoch": 3.1161544451060323, "grad_norm": 0.13148047531744766, "learning_rate": 1.5824020149420533e-05, "loss": 0.3964, "num_tokens": 2331112323.0, "step": 8507 }, { "epoch": 3.1165208629139376, "grad_norm": 0.14085370512864628, "learning_rate": 1.5820008548844644e-05, "loss": 0.3922, "num_tokens": 2331985249.0, "step": 8508 }, { "epoch": 3.1168872807218433, "grad_norm": 0.13788798301171992, "learning_rate": 1.5815997296214592e-05, "loss": 0.434, "num_tokens": 2332735709.0, "step": 8509 }, { "epoch": 3.1172536985297485, "grad_norm": 0.1499803527179656, "learning_rate": 1.581198639175622e-05, "loss": 0.4427, "num_tokens": 2333597424.0, "step": 8510 }, { "epoch": 3.117620116337654, "grad_norm": 0.1459981265697148, "learning_rate": 1.5807975835695354e-05, "loss": 0.417, "num_tokens": 2334402335.0, "step": 8511 }, { "epoch": 3.1179865341455595, "grad_norm": 0.1441257579907886, "learning_rate": 1.5803965628257785e-05, "loss": 0.4221, "num_tokens": 2335102072.0, "step": 8512 }, { "epoch": 3.1183529519534647, "grad_norm": 0.13886948155288148, "learning_rate": 1.5799955769669306e-05, "loss": 0.3813, "num_tokens": 2335873077.0, "step": 8513 }, { "epoch": 3.1187193697613704, "grad_norm": 0.22725147954645236, "learning_rate": 1.579594626015568e-05, "loss": 0.3938, "num_tokens": 2336609676.0, "step": 8514 }, { "epoch": 3.1190857875692757, "grad_norm": 0.12568795766087382, "learning_rate": 1.5791937099942652e-05, "loss": 0.359, "num_tokens": 2337475230.0, "step": 8515 }, { "epoch": 3.1194522053771814, "grad_norm": 0.1556516674038231, "learning_rate": 1.5787928289255936e-05, "loss": 0.3962, "num_tokens": 2338189063.0, "step": 8516 }, { "epoch": 3.1198186231850866, "grad_norm": 0.14654737262346523, "learning_rate": 1.5783919828321246e-05, "loss": 0.412, "num_tokens": 2338929908.0, "step": 8517 }, { "epoch": 3.1201850409929923, "grad_norm": 0.1560860607619838, "learning_rate": 1.5779911717364256e-05, "loss": 0.4496, "num_tokens": 2339686792.0, "step": 8518 }, { "epoch": 3.1205514588008976, "grad_norm": 0.14797883063796544, "learning_rate": 1.5775903956610643e-05, "loss": 0.4, "num_tokens": 2340438920.0, "step": 8519 }, { "epoch": 3.1209178766088033, "grad_norm": 0.14426967254822107, "learning_rate": 1.5771896546286042e-05, "loss": 0.4001, "num_tokens": 2341061619.0, "step": 8520 }, { "epoch": 3.1212842944167085, "grad_norm": 0.14102800604016555, "learning_rate": 1.5767889486616083e-05, "loss": 0.3883, "num_tokens": 2341840606.0, "step": 8521 }, { "epoch": 3.121650712224614, "grad_norm": 0.13499878857355038, "learning_rate": 1.5763882777826372e-05, "loss": 0.4063, "num_tokens": 2342644043.0, "step": 8522 }, { "epoch": 3.1220171300325195, "grad_norm": 0.12770257988947548, "learning_rate": 1.5759876420142486e-05, "loss": 0.3787, "num_tokens": 2343460423.0, "step": 8523 }, { "epoch": 3.122383547840425, "grad_norm": 0.13745763616235165, "learning_rate": 1.575587041379e-05, "loss": 0.4336, "num_tokens": 2344267621.0, "step": 8524 }, { "epoch": 3.1227499656483304, "grad_norm": 0.13686508499006578, "learning_rate": 1.5751864758994463e-05, "loss": 0.394, "num_tokens": 2345012418.0, "step": 8525 }, { "epoch": 3.123116383456236, "grad_norm": 0.1393799024381488, "learning_rate": 1.5747859455981397e-05, "loss": 0.3866, "num_tokens": 2345829631.0, "step": 8526 }, { "epoch": 3.1234828012641414, "grad_norm": 0.14091847741125074, "learning_rate": 1.5743854504976304e-05, "loss": 0.407, "num_tokens": 2346557188.0, "step": 8527 }, { "epoch": 3.123849219072047, "grad_norm": 0.12816977039080793, "learning_rate": 1.573984990620468e-05, "loss": 0.3924, "num_tokens": 2347462464.0, "step": 8528 }, { "epoch": 3.1242156368799523, "grad_norm": 0.1474723002555252, "learning_rate": 1.5735845659891974e-05, "loss": 0.3844, "num_tokens": 2348304808.0, "step": 8529 }, { "epoch": 3.124582054687858, "grad_norm": 0.132882040996307, "learning_rate": 1.573184176626366e-05, "loss": 0.4014, "num_tokens": 2349087707.0, "step": 8530 }, { "epoch": 3.1249484724957632, "grad_norm": 0.14225252984631193, "learning_rate": 1.572783822554514e-05, "loss": 0.4114, "num_tokens": 2349819162.0, "step": 8531 }, { "epoch": 3.125314890303669, "grad_norm": 0.14150932023547882, "learning_rate": 1.5723835037961837e-05, "loss": 0.392, "num_tokens": 2350632009.0, "step": 8532 }, { "epoch": 3.125681308111574, "grad_norm": 0.13330005559229108, "learning_rate": 1.5719832203739138e-05, "loss": 0.3861, "num_tokens": 2351440800.0, "step": 8533 }, { "epoch": 3.12604772591948, "grad_norm": 0.14951874661995626, "learning_rate": 1.5715829723102393e-05, "loss": 0.4326, "num_tokens": 2352128350.0, "step": 8534 }, { "epoch": 3.126414143727385, "grad_norm": 0.13691552850881564, "learning_rate": 1.5711827596276976e-05, "loss": 0.3851, "num_tokens": 2352964277.0, "step": 8535 }, { "epoch": 3.1267805615352904, "grad_norm": 0.13678376593223826, "learning_rate": 1.57078258234882e-05, "loss": 0.3991, "num_tokens": 2353731827.0, "step": 8536 }, { "epoch": 3.127146979343196, "grad_norm": 0.1494293261007223, "learning_rate": 1.570382440496137e-05, "loss": 0.3921, "num_tokens": 2354449890.0, "step": 8537 }, { "epoch": 3.1275133971511018, "grad_norm": 0.14038496759173708, "learning_rate": 1.569982334092178e-05, "loss": 0.3941, "num_tokens": 2355237852.0, "step": 8538 }, { "epoch": 3.127879814959007, "grad_norm": 0.15285635405908904, "learning_rate": 1.5695822631594698e-05, "loss": 0.3819, "num_tokens": 2355903124.0, "step": 8539 }, { "epoch": 3.1282462327669123, "grad_norm": 0.13230023929909554, "learning_rate": 1.5691822277205364e-05, "loss": 0.3831, "num_tokens": 2356730067.0, "step": 8540 }, { "epoch": 3.128612650574818, "grad_norm": 0.1366476261620165, "learning_rate": 1.568782227797901e-05, "loss": 0.3863, "num_tokens": 2357517280.0, "step": 8541 }, { "epoch": 3.128979068382723, "grad_norm": 0.1355960858488488, "learning_rate": 1.5683822634140856e-05, "loss": 0.403, "num_tokens": 2358404733.0, "step": 8542 }, { "epoch": 3.129345486190629, "grad_norm": 0.14949277262798197, "learning_rate": 1.5679823345916077e-05, "loss": 0.4099, "num_tokens": 2359103695.0, "step": 8543 }, { "epoch": 3.129711903998534, "grad_norm": 0.12360779117691315, "learning_rate": 1.567582441352984e-05, "loss": 0.3891, "num_tokens": 2360105570.0, "step": 8544 }, { "epoch": 3.13007832180644, "grad_norm": 0.1391156297303208, "learning_rate": 1.5671825837207294e-05, "loss": 0.386, "num_tokens": 2360900355.0, "step": 8545 }, { "epoch": 3.130444739614345, "grad_norm": 0.13507407305188596, "learning_rate": 1.5667827617173572e-05, "loss": 0.3597, "num_tokens": 2361662827.0, "step": 8546 }, { "epoch": 3.130811157422251, "grad_norm": 0.13893411296867994, "learning_rate": 1.5663829753653782e-05, "loss": 0.4075, "num_tokens": 2362412427.0, "step": 8547 }, { "epoch": 3.131177575230156, "grad_norm": 0.13710058299899083, "learning_rate": 1.5659832246873007e-05, "loss": 0.4052, "num_tokens": 2363233551.0, "step": 8548 }, { "epoch": 3.1315439930380617, "grad_norm": 0.13337158666539206, "learning_rate": 1.5655835097056317e-05, "loss": 0.3977, "num_tokens": 2364065943.0, "step": 8549 }, { "epoch": 3.131910410845967, "grad_norm": 0.13776513026820475, "learning_rate": 1.5651838304428756e-05, "loss": 0.4124, "num_tokens": 2364863600.0, "step": 8550 }, { "epoch": 3.1322768286538727, "grad_norm": 0.15026446071289687, "learning_rate": 1.5647841869215353e-05, "loss": 0.4049, "num_tokens": 2365525995.0, "step": 8551 }, { "epoch": 3.132643246461778, "grad_norm": 0.1442252668335455, "learning_rate": 1.564384579164112e-05, "loss": 0.4173, "num_tokens": 2366255634.0, "step": 8552 }, { "epoch": 3.1330096642696836, "grad_norm": 0.1483533943257873, "learning_rate": 1.5639850071931043e-05, "loss": 0.3995, "num_tokens": 2366941199.0, "step": 8553 }, { "epoch": 3.133376082077589, "grad_norm": 0.13171170274783392, "learning_rate": 1.5635854710310084e-05, "loss": 0.3904, "num_tokens": 2367811724.0, "step": 8554 }, { "epoch": 3.1337424998854946, "grad_norm": 0.1449958966799834, "learning_rate": 1.563185970700319e-05, "loss": 0.3891, "num_tokens": 2368463731.0, "step": 8555 }, { "epoch": 3.1341089176934, "grad_norm": 0.13067233478638637, "learning_rate": 1.562786506223529e-05, "loss": 0.4089, "num_tokens": 2369308491.0, "step": 8556 }, { "epoch": 3.1344753355013055, "grad_norm": 0.146896207586776, "learning_rate": 1.5623870776231295e-05, "loss": 0.4266, "num_tokens": 2370081349.0, "step": 8557 }, { "epoch": 3.1348417533092108, "grad_norm": 0.14220229879643323, "learning_rate": 1.5619876849216084e-05, "loss": 0.4024, "num_tokens": 2370827118.0, "step": 8558 }, { "epoch": 3.1352081711171165, "grad_norm": 0.13891588457488505, "learning_rate": 1.5615883281414527e-05, "loss": 0.3731, "num_tokens": 2371644086.0, "step": 8559 }, { "epoch": 3.1355745889250217, "grad_norm": 0.149399496201374, "learning_rate": 1.561189007305147e-05, "loss": 0.4166, "num_tokens": 2372420703.0, "step": 8560 }, { "epoch": 3.1359410067329274, "grad_norm": 0.1503804086184554, "learning_rate": 1.5607897224351735e-05, "loss": 0.431, "num_tokens": 2373124908.0, "step": 8561 }, { "epoch": 3.1363074245408327, "grad_norm": 0.13656471873200404, "learning_rate": 1.5603904735540124e-05, "loss": 0.3979, "num_tokens": 2373881417.0, "step": 8562 }, { "epoch": 3.1366738423487384, "grad_norm": 0.14480033402105763, "learning_rate": 1.5599912606841433e-05, "loss": 0.3861, "num_tokens": 2374619920.0, "step": 8563 }, { "epoch": 3.1370402601566436, "grad_norm": 0.14980470348534158, "learning_rate": 1.5595920838480427e-05, "loss": 0.436, "num_tokens": 2375383851.0, "step": 8564 }, { "epoch": 3.137406677964549, "grad_norm": 0.1299302057731794, "learning_rate": 1.559192943068184e-05, "loss": 0.4096, "num_tokens": 2376300388.0, "step": 8565 }, { "epoch": 3.1377730957724546, "grad_norm": 0.12624073773984018, "learning_rate": 1.5587938383670405e-05, "loss": 0.3847, "num_tokens": 2377079740.0, "step": 8566 }, { "epoch": 3.13813951358036, "grad_norm": 0.1438677803017984, "learning_rate": 1.558394769767081e-05, "loss": 0.4037, "num_tokens": 2377837954.0, "step": 8567 }, { "epoch": 3.1385059313882655, "grad_norm": 0.1501259250390869, "learning_rate": 1.557995737290776e-05, "loss": 0.4088, "num_tokens": 2378589914.0, "step": 8568 }, { "epoch": 3.1388723491961708, "grad_norm": 0.14002896422429448, "learning_rate": 1.557596740960591e-05, "loss": 0.386, "num_tokens": 2379298467.0, "step": 8569 }, { "epoch": 3.1392387670040764, "grad_norm": 0.15324997662778292, "learning_rate": 1.5571977807989905e-05, "loss": 0.3891, "num_tokens": 2380009623.0, "step": 8570 }, { "epoch": 3.1396051848119817, "grad_norm": 0.147599713006148, "learning_rate": 1.5567988568284356e-05, "loss": 0.4314, "num_tokens": 2380656870.0, "step": 8571 }, { "epoch": 3.1399716026198874, "grad_norm": 0.16351963049943724, "learning_rate": 1.5563999690713877e-05, "loss": 0.425, "num_tokens": 2381208292.0, "step": 8572 }, { "epoch": 3.1403380204277926, "grad_norm": 0.14554476370980696, "learning_rate": 1.556001117550304e-05, "loss": 0.394, "num_tokens": 2381996533.0, "step": 8573 }, { "epoch": 3.1407044382356983, "grad_norm": 0.14377270897603583, "learning_rate": 1.5556023022876426e-05, "loss": 0.4118, "num_tokens": 2382687285.0, "step": 8574 }, { "epoch": 3.1410708560436036, "grad_norm": 0.13670760933411139, "learning_rate": 1.5552035233058553e-05, "loss": 0.4084, "num_tokens": 2383516374.0, "step": 8575 }, { "epoch": 3.1414372738515093, "grad_norm": 0.13199113199232396, "learning_rate": 1.5548047806273955e-05, "loss": 0.3798, "num_tokens": 2384279890.0, "step": 8576 }, { "epoch": 3.1418036916594145, "grad_norm": 0.1410239443340636, "learning_rate": 1.5544060742747128e-05, "loss": 0.4164, "num_tokens": 2385078423.0, "step": 8577 }, { "epoch": 3.1421701094673202, "grad_norm": 0.13007563046972553, "learning_rate": 1.5540074042702545e-05, "loss": 0.3782, "num_tokens": 2385883521.0, "step": 8578 }, { "epoch": 3.1425365272752255, "grad_norm": 0.13910436078762084, "learning_rate": 1.5536087706364678e-05, "loss": 0.4066, "num_tokens": 2386682987.0, "step": 8579 }, { "epoch": 3.142902945083131, "grad_norm": 0.1525493529799785, "learning_rate": 1.553210173395796e-05, "loss": 0.4157, "num_tokens": 2387316750.0, "step": 8580 }, { "epoch": 3.1432693628910364, "grad_norm": 0.13783168520925893, "learning_rate": 1.5528116125706813e-05, "loss": 0.4226, "num_tokens": 2388129499.0, "step": 8581 }, { "epoch": 3.143635780698942, "grad_norm": 0.1431587446142963, "learning_rate": 1.5524130881835618e-05, "loss": 0.3926, "num_tokens": 2388959619.0, "step": 8582 }, { "epoch": 3.1440021985068474, "grad_norm": 0.12888161812990925, "learning_rate": 1.552014600256878e-05, "loss": 0.4029, "num_tokens": 2389828856.0, "step": 8583 }, { "epoch": 3.144368616314753, "grad_norm": 0.14734006202566247, "learning_rate": 1.5516161488130626e-05, "loss": 0.4089, "num_tokens": 2390499069.0, "step": 8584 }, { "epoch": 3.1447350341226583, "grad_norm": 0.1448490704353248, "learning_rate": 1.5512177338745512e-05, "loss": 0.3853, "num_tokens": 2391264883.0, "step": 8585 }, { "epoch": 3.145101451930564, "grad_norm": 0.13562158989909967, "learning_rate": 1.5508193554637752e-05, "loss": 0.4365, "num_tokens": 2392041031.0, "step": 8586 }, { "epoch": 3.1454678697384693, "grad_norm": 0.13457443996124868, "learning_rate": 1.5504210136031635e-05, "loss": 0.4236, "num_tokens": 2392864739.0, "step": 8587 }, { "epoch": 3.145834287546375, "grad_norm": 0.1513802754008709, "learning_rate": 1.5500227083151437e-05, "loss": 0.4204, "num_tokens": 2393712759.0, "step": 8588 }, { "epoch": 3.14620070535428, "grad_norm": 0.13797730569329386, "learning_rate": 1.5496244396221412e-05, "loss": 0.3981, "num_tokens": 2394398670.0, "step": 8589 }, { "epoch": 3.1465671231621855, "grad_norm": 0.13618065166147467, "learning_rate": 1.5492262075465787e-05, "loss": 0.395, "num_tokens": 2395148781.0, "step": 8590 }, { "epoch": 3.146933540970091, "grad_norm": 0.14717300453024582, "learning_rate": 1.548828012110879e-05, "loss": 0.4276, "num_tokens": 2395896843.0, "step": 8591 }, { "epoch": 3.147299958777997, "grad_norm": 0.14225856098102202, "learning_rate": 1.54842985333746e-05, "loss": 0.3912, "num_tokens": 2396680959.0, "step": 8592 }, { "epoch": 3.147666376585902, "grad_norm": 0.1368122465361683, "learning_rate": 1.5480317312487393e-05, "loss": 0.3953, "num_tokens": 2397550250.0, "step": 8593 }, { "epoch": 3.1480327943938073, "grad_norm": 0.1344320769040459, "learning_rate": 1.547633645867132e-05, "loss": 0.4067, "num_tokens": 2398395013.0, "step": 8594 }, { "epoch": 3.148399212201713, "grad_norm": 0.14524785650998465, "learning_rate": 1.5472355972150506e-05, "loss": 0.4138, "num_tokens": 2399134873.0, "step": 8595 }, { "epoch": 3.1487656300096183, "grad_norm": 0.13696992387535464, "learning_rate": 1.5468375853149068e-05, "loss": 0.4142, "num_tokens": 2399985624.0, "step": 8596 }, { "epoch": 3.149132047817524, "grad_norm": 0.14486421202458444, "learning_rate": 1.546439610189109e-05, "loss": 0.4093, "num_tokens": 2400802114.0, "step": 8597 }, { "epoch": 3.1494984656254292, "grad_norm": 0.14387416479063597, "learning_rate": 1.5460416718600647e-05, "loss": 0.4306, "num_tokens": 2401470275.0, "step": 8598 }, { "epoch": 3.149864883433335, "grad_norm": 0.16146289448533466, "learning_rate": 1.5456437703501772e-05, "loss": 0.4173, "num_tokens": 2402248250.0, "step": 8599 }, { "epoch": 3.15023130124124, "grad_norm": 0.1495883043339418, "learning_rate": 1.5452459056818505e-05, "loss": 0.4022, "num_tokens": 2403098035.0, "step": 8600 }, { "epoch": 3.150597719049146, "grad_norm": 0.14352820765748117, "learning_rate": 1.5448480778774843e-05, "loss": 0.4128, "num_tokens": 2403808611.0, "step": 8601 }, { "epoch": 3.150964136857051, "grad_norm": 0.17118044846876795, "learning_rate": 1.5444502869594774e-05, "loss": 0.3882, "num_tokens": 2404643483.0, "step": 8602 }, { "epoch": 3.151330554664957, "grad_norm": 0.14595168613710188, "learning_rate": 1.544052532950226e-05, "loss": 0.4007, "num_tokens": 2405471625.0, "step": 8603 }, { "epoch": 3.151696972472862, "grad_norm": 0.14103550541709065, "learning_rate": 1.5436548158721257e-05, "loss": 0.4278, "num_tokens": 2406285101.0, "step": 8604 }, { "epoch": 3.1520633902807678, "grad_norm": 0.14235668219720785, "learning_rate": 1.543257135747567e-05, "loss": 0.3945, "num_tokens": 2407029361.0, "step": 8605 }, { "epoch": 3.152429808088673, "grad_norm": 0.15875530166736368, "learning_rate": 1.5428594925989398e-05, "loss": 0.3997, "num_tokens": 2407677054.0, "step": 8606 }, { "epoch": 3.1527962258965787, "grad_norm": 0.13747931044016068, "learning_rate": 1.542461886448635e-05, "loss": 0.3754, "num_tokens": 2408437972.0, "step": 8607 }, { "epoch": 3.153162643704484, "grad_norm": 0.13326526026010943, "learning_rate": 1.5420643173190364e-05, "loss": 0.41, "num_tokens": 2409262430.0, "step": 8608 }, { "epoch": 3.1535290615123897, "grad_norm": 0.13974529612818665, "learning_rate": 1.541666785232528e-05, "loss": 0.4049, "num_tokens": 2410058187.0, "step": 8609 }, { "epoch": 3.153895479320295, "grad_norm": 0.23633924451050145, "learning_rate": 1.541269290211493e-05, "loss": 0.3897, "num_tokens": 2410770527.0, "step": 8610 }, { "epoch": 3.1542618971282006, "grad_norm": 0.15708797254471693, "learning_rate": 1.5408718322783097e-05, "loss": 0.384, "num_tokens": 2411448911.0, "step": 8611 }, { "epoch": 3.154628314936106, "grad_norm": 0.1433213630390766, "learning_rate": 1.5404744114553554e-05, "loss": 0.3897, "num_tokens": 2412126620.0, "step": 8612 }, { "epoch": 3.1549947327440115, "grad_norm": 0.1460944670486191, "learning_rate": 1.540077027765007e-05, "loss": 0.3937, "num_tokens": 2412813769.0, "step": 8613 }, { "epoch": 3.155361150551917, "grad_norm": 0.13462479353815474, "learning_rate": 1.539679681229638e-05, "loss": 0.3875, "num_tokens": 2413641912.0, "step": 8614 }, { "epoch": 3.1557275683598225, "grad_norm": 0.1452842611278584, "learning_rate": 1.53928237187162e-05, "loss": 0.4304, "num_tokens": 2414348017.0, "step": 8615 }, { "epoch": 3.1560939861677277, "grad_norm": 0.1371861477169824, "learning_rate": 1.5388850997133204e-05, "loss": 0.4233, "num_tokens": 2415286225.0, "step": 8616 }, { "epoch": 3.1564604039756334, "grad_norm": 0.13576100303399588, "learning_rate": 1.5384878647771078e-05, "loss": 0.4032, "num_tokens": 2416094315.0, "step": 8617 }, { "epoch": 3.1568268217835387, "grad_norm": 0.13427572508225238, "learning_rate": 1.538090667085348e-05, "loss": 0.3673, "num_tokens": 2416885183.0, "step": 8618 }, { "epoch": 3.157193239591444, "grad_norm": 0.14189857418765758, "learning_rate": 1.5376935066604028e-05, "loss": 0.4181, "num_tokens": 2417712924.0, "step": 8619 }, { "epoch": 3.1575596573993496, "grad_norm": 0.13883884019118634, "learning_rate": 1.5372963835246334e-05, "loss": 0.4109, "num_tokens": 2418493780.0, "step": 8620 }, { "epoch": 3.157926075207255, "grad_norm": 0.1459108839200788, "learning_rate": 1.5368992977003993e-05, "loss": 0.4312, "num_tokens": 2419204533.0, "step": 8621 }, { "epoch": 3.1582924930151606, "grad_norm": 0.143795127562497, "learning_rate": 1.536502249210056e-05, "loss": 0.4145, "num_tokens": 2420033452.0, "step": 8622 }, { "epoch": 3.158658910823066, "grad_norm": 0.14276307655190223, "learning_rate": 1.5361052380759592e-05, "loss": 0.4127, "num_tokens": 2420810712.0, "step": 8623 }, { "epoch": 3.1590253286309715, "grad_norm": 0.14393811840446588, "learning_rate": 1.535708264320461e-05, "loss": 0.3796, "num_tokens": 2421505368.0, "step": 8624 }, { "epoch": 3.1593917464388768, "grad_norm": 0.1414000094634378, "learning_rate": 1.535311327965912e-05, "loss": 0.3993, "num_tokens": 2422240852.0, "step": 8625 }, { "epoch": 3.1597581642467825, "grad_norm": 0.14291570590969657, "learning_rate": 1.53491442903466e-05, "loss": 0.4203, "num_tokens": 2423091110.0, "step": 8626 }, { "epoch": 3.1601245820546877, "grad_norm": 0.14169751467470565, "learning_rate": 1.534517567549052e-05, "loss": 0.4242, "num_tokens": 2423899457.0, "step": 8627 }, { "epoch": 3.1604909998625934, "grad_norm": 0.13341667929985332, "learning_rate": 1.5341207435314315e-05, "loss": 0.4059, "num_tokens": 2424752391.0, "step": 8628 }, { "epoch": 3.1608574176704987, "grad_norm": 0.14197338656901295, "learning_rate": 1.5337239570041406e-05, "loss": 0.4233, "num_tokens": 2425595395.0, "step": 8629 }, { "epoch": 3.1612238354784044, "grad_norm": 0.14699037052741243, "learning_rate": 1.5333272079895188e-05, "loss": 0.4085, "num_tokens": 2426318656.0, "step": 8630 }, { "epoch": 3.1615902532863096, "grad_norm": 0.1620495151076569, "learning_rate": 1.5329304965099047e-05, "loss": 0.4174, "num_tokens": 2426979542.0, "step": 8631 }, { "epoch": 3.1619566710942153, "grad_norm": 0.13528982630056421, "learning_rate": 1.532533822587634e-05, "loss": 0.3943, "num_tokens": 2427772330.0, "step": 8632 }, { "epoch": 3.1623230889021205, "grad_norm": 0.13493662813006774, "learning_rate": 1.5321371862450393e-05, "loss": 0.402, "num_tokens": 2428553591.0, "step": 8633 }, { "epoch": 3.1626895067100262, "grad_norm": 0.14908082045791426, "learning_rate": 1.531740587504452e-05, "loss": 0.4154, "num_tokens": 2429352431.0, "step": 8634 }, { "epoch": 3.1630559245179315, "grad_norm": 0.1538287531800044, "learning_rate": 1.5313440263882026e-05, "loss": 0.3889, "num_tokens": 2430052216.0, "step": 8635 }, { "epoch": 3.163422342325837, "grad_norm": 0.13171198990888633, "learning_rate": 1.5309475029186175e-05, "loss": 0.4214, "num_tokens": 2430891404.0, "step": 8636 }, { "epoch": 3.1637887601337424, "grad_norm": 0.14968811301511964, "learning_rate": 1.5305510171180214e-05, "loss": 0.417, "num_tokens": 2431546890.0, "step": 8637 }, { "epoch": 3.164155177941648, "grad_norm": 0.1526018629769454, "learning_rate": 1.530154569008738e-05, "loss": 0.4459, "num_tokens": 2432314237.0, "step": 8638 }, { "epoch": 3.1645215957495534, "grad_norm": 0.14381717547124462, "learning_rate": 1.5297581586130874e-05, "loss": 0.434, "num_tokens": 2433160367.0, "step": 8639 }, { "epoch": 3.164888013557459, "grad_norm": 0.12925902181159293, "learning_rate": 1.529361785953389e-05, "loss": 0.3962, "num_tokens": 2434011668.0, "step": 8640 }, { "epoch": 3.1652544313653643, "grad_norm": 0.1421447186579698, "learning_rate": 1.528965451051959e-05, "loss": 0.3915, "num_tokens": 2434836484.0, "step": 8641 }, { "epoch": 3.16562084917327, "grad_norm": 0.14943575383867602, "learning_rate": 1.528569153931112e-05, "loss": 0.3952, "num_tokens": 2435545819.0, "step": 8642 }, { "epoch": 3.1659872669811753, "grad_norm": 0.125281133496658, "learning_rate": 1.5281728946131594e-05, "loss": 0.3852, "num_tokens": 2436332284.0, "step": 8643 }, { "epoch": 3.1663536847890805, "grad_norm": 0.13901934726693638, "learning_rate": 1.527776673120413e-05, "loss": 0.4012, "num_tokens": 2437174589.0, "step": 8644 }, { "epoch": 3.166720102596986, "grad_norm": 0.14588904899792385, "learning_rate": 1.5273804894751795e-05, "loss": 0.4028, "num_tokens": 2437866107.0, "step": 8645 }, { "epoch": 3.1670865204048915, "grad_norm": 0.1456756343316248, "learning_rate": 1.5269843436997662e-05, "loss": 0.4089, "num_tokens": 2438619985.0, "step": 8646 }, { "epoch": 3.167452938212797, "grad_norm": 0.12921110952320192, "learning_rate": 1.526588235816475e-05, "loss": 0.4169, "num_tokens": 2439535008.0, "step": 8647 }, { "epoch": 3.1678193560207024, "grad_norm": 0.1423169943885719, "learning_rate": 1.52619216584761e-05, "loss": 0.412, "num_tokens": 2440336104.0, "step": 8648 }, { "epoch": 3.168185773828608, "grad_norm": 0.15010941971645564, "learning_rate": 1.5257961338154687e-05, "loss": 0.4455, "num_tokens": 2441041132.0, "step": 8649 }, { "epoch": 3.1685521916365134, "grad_norm": 0.1462225023884169, "learning_rate": 1.5254001397423488e-05, "loss": 0.4007, "num_tokens": 2441806498.0, "step": 8650 }, { "epoch": 3.168918609444419, "grad_norm": 0.1295106516725792, "learning_rate": 1.5250041836505456e-05, "loss": 0.4197, "num_tokens": 2442682347.0, "step": 8651 }, { "epoch": 3.1692850272523243, "grad_norm": 0.14915467427623205, "learning_rate": 1.5246082655623537e-05, "loss": 0.4256, "num_tokens": 2443450836.0, "step": 8652 }, { "epoch": 3.16965144506023, "grad_norm": 0.1403041943924986, "learning_rate": 1.5242123855000626e-05, "loss": 0.3883, "num_tokens": 2444232994.0, "step": 8653 }, { "epoch": 3.1700178628681352, "grad_norm": 0.1402299840214233, "learning_rate": 1.5238165434859609e-05, "loss": 0.409, "num_tokens": 2445013606.0, "step": 8654 }, { "epoch": 3.170384280676041, "grad_norm": 0.1453921041336433, "learning_rate": 1.5234207395423365e-05, "loss": 0.398, "num_tokens": 2445793876.0, "step": 8655 }, { "epoch": 3.170750698483946, "grad_norm": 0.1476038028188478, "learning_rate": 1.5230249736914728e-05, "loss": 0.3612, "num_tokens": 2446514806.0, "step": 8656 }, { "epoch": 3.171117116291852, "grad_norm": 0.14131667568736575, "learning_rate": 1.5226292459556531e-05, "loss": 0.4136, "num_tokens": 2447241231.0, "step": 8657 }, { "epoch": 3.171483534099757, "grad_norm": 0.1398590983592592, "learning_rate": 1.5222335563571568e-05, "loss": 0.4012, "num_tokens": 2447989241.0, "step": 8658 }, { "epoch": 3.171849951907663, "grad_norm": 0.1419832927361045, "learning_rate": 1.5218379049182628e-05, "loss": 0.4184, "num_tokens": 2448772264.0, "step": 8659 }, { "epoch": 3.172216369715568, "grad_norm": 0.1432026379673984, "learning_rate": 1.5214422916612468e-05, "loss": 0.395, "num_tokens": 2449473305.0, "step": 8660 }, { "epoch": 3.172582787523474, "grad_norm": 0.13402466977424618, "learning_rate": 1.5210467166083826e-05, "loss": 0.4225, "num_tokens": 2450302836.0, "step": 8661 }, { "epoch": 3.172949205331379, "grad_norm": 0.13844035480944497, "learning_rate": 1.520651179781941e-05, "loss": 0.4119, "num_tokens": 2451004820.0, "step": 8662 }, { "epoch": 3.1733156231392847, "grad_norm": 0.13901417407889688, "learning_rate": 1.5202556812041934e-05, "loss": 0.4118, "num_tokens": 2451834439.0, "step": 8663 }, { "epoch": 3.17368204094719, "grad_norm": 0.13897129736266203, "learning_rate": 1.5198602208974054e-05, "loss": 0.4248, "num_tokens": 2452618514.0, "step": 8664 }, { "epoch": 3.1740484587550957, "grad_norm": 0.14714846316296412, "learning_rate": 1.5194647988838433e-05, "loss": 0.404, "num_tokens": 2453372144.0, "step": 8665 }, { "epoch": 3.174414876563001, "grad_norm": 0.13812482976484483, "learning_rate": 1.5190694151857699e-05, "loss": 0.393, "num_tokens": 2454123917.0, "step": 8666 }, { "epoch": 3.1747812943709066, "grad_norm": 0.1354410798230022, "learning_rate": 1.518674069825445e-05, "loss": 0.3779, "num_tokens": 2454946096.0, "step": 8667 }, { "epoch": 3.175147712178812, "grad_norm": 0.14188231151117953, "learning_rate": 1.518278762825129e-05, "loss": 0.4352, "num_tokens": 2455779163.0, "step": 8668 }, { "epoch": 3.1755141299867176, "grad_norm": 0.14404319827277548, "learning_rate": 1.517883494207078e-05, "loss": 0.3849, "num_tokens": 2456515298.0, "step": 8669 }, { "epoch": 3.175880547794623, "grad_norm": 0.14951185849254967, "learning_rate": 1.517488263993546e-05, "loss": 0.3919, "num_tokens": 2457305211.0, "step": 8670 }, { "epoch": 3.1762469656025285, "grad_norm": 0.146010712514575, "learning_rate": 1.5170930722067847e-05, "loss": 0.4047, "num_tokens": 2458132868.0, "step": 8671 }, { "epoch": 3.1766133834104338, "grad_norm": 0.14628505158195473, "learning_rate": 1.5166979188690456e-05, "loss": 0.3673, "num_tokens": 2458785479.0, "step": 8672 }, { "epoch": 3.176979801218339, "grad_norm": 0.13411272726083828, "learning_rate": 1.5163028040025753e-05, "loss": 0.4016, "num_tokens": 2459502426.0, "step": 8673 }, { "epoch": 3.1773462190262447, "grad_norm": 0.15398440341987202, "learning_rate": 1.515907727629621e-05, "loss": 0.3985, "num_tokens": 2460288291.0, "step": 8674 }, { "epoch": 3.17771263683415, "grad_norm": 0.1450916366423785, "learning_rate": 1.5155126897724248e-05, "loss": 0.3954, "num_tokens": 2461113060.0, "step": 8675 }, { "epoch": 3.1780790546420556, "grad_norm": 0.14664766077154284, "learning_rate": 1.5151176904532289e-05, "loss": 0.4345, "num_tokens": 2461939533.0, "step": 8676 }, { "epoch": 3.178445472449961, "grad_norm": 0.14725493930868483, "learning_rate": 1.5147227296942726e-05, "loss": 0.3974, "num_tokens": 2462649490.0, "step": 8677 }, { "epoch": 3.1788118902578666, "grad_norm": 0.1296880224866904, "learning_rate": 1.5143278075177926e-05, "loss": 0.4049, "num_tokens": 2463499759.0, "step": 8678 }, { "epoch": 3.179178308065772, "grad_norm": 0.1434515791350615, "learning_rate": 1.513932923946024e-05, "loss": 0.4031, "num_tokens": 2464268822.0, "step": 8679 }, { "epoch": 3.1795447258736775, "grad_norm": 0.14419351744404907, "learning_rate": 1.5135380790011995e-05, "loss": 0.4003, "num_tokens": 2465106518.0, "step": 8680 }, { "epoch": 3.179911143681583, "grad_norm": 0.14787480321450097, "learning_rate": 1.5131432727055497e-05, "loss": 0.4134, "num_tokens": 2465813569.0, "step": 8681 }, { "epoch": 3.1802775614894885, "grad_norm": 0.14490021895199348, "learning_rate": 1.5127485050813033e-05, "loss": 0.4255, "num_tokens": 2466523492.0, "step": 8682 }, { "epoch": 3.1806439792973937, "grad_norm": 0.15104791092424957, "learning_rate": 1.5123537761506861e-05, "loss": 0.3862, "num_tokens": 2467338762.0, "step": 8683 }, { "epoch": 3.1810103971052994, "grad_norm": 0.14269553190238962, "learning_rate": 1.5119590859359217e-05, "loss": 0.4083, "num_tokens": 2468117788.0, "step": 8684 }, { "epoch": 3.1813768149132047, "grad_norm": 0.13496580003383013, "learning_rate": 1.5115644344592326e-05, "loss": 0.4157, "num_tokens": 2468913233.0, "step": 8685 }, { "epoch": 3.1817432327211104, "grad_norm": 0.14237980916755127, "learning_rate": 1.5111698217428385e-05, "loss": 0.3891, "num_tokens": 2469719386.0, "step": 8686 }, { "epoch": 3.1821096505290156, "grad_norm": 0.15449291423653325, "learning_rate": 1.5107752478089573e-05, "loss": 0.4334, "num_tokens": 2470457166.0, "step": 8687 }, { "epoch": 3.1824760683369213, "grad_norm": 0.1447917497392947, "learning_rate": 1.5103807126798029e-05, "loss": 0.4202, "num_tokens": 2471219703.0, "step": 8688 }, { "epoch": 3.1828424861448266, "grad_norm": 0.1462253554736254, "learning_rate": 1.5099862163775889e-05, "loss": 0.4102, "num_tokens": 2471903766.0, "step": 8689 }, { "epoch": 3.1832089039527323, "grad_norm": 0.14726071524722792, "learning_rate": 1.5095917589245272e-05, "loss": 0.4127, "num_tokens": 2472702186.0, "step": 8690 }, { "epoch": 3.1835753217606375, "grad_norm": 0.15414174622686078, "learning_rate": 1.5091973403428255e-05, "loss": 0.4055, "num_tokens": 2473369577.0, "step": 8691 }, { "epoch": 3.183941739568543, "grad_norm": 0.14837946033291666, "learning_rate": 1.5088029606546908e-05, "loss": 0.4435, "num_tokens": 2474129160.0, "step": 8692 }, { "epoch": 3.1843081573764485, "grad_norm": 0.13902371224323373, "learning_rate": 1.508408619882328e-05, "loss": 0.4242, "num_tokens": 2474963599.0, "step": 8693 }, { "epoch": 3.184674575184354, "grad_norm": 0.1421346644349941, "learning_rate": 1.5080143180479381e-05, "loss": 0.4048, "num_tokens": 2475719368.0, "step": 8694 }, { "epoch": 3.1850409929922594, "grad_norm": 0.13325992988934682, "learning_rate": 1.5076200551737214e-05, "loss": 0.4053, "num_tokens": 2476517139.0, "step": 8695 }, { "epoch": 3.185407410800165, "grad_norm": 0.15372002854053843, "learning_rate": 1.5072258312818764e-05, "loss": 0.4099, "num_tokens": 2477204141.0, "step": 8696 }, { "epoch": 3.1857738286080703, "grad_norm": 0.15112444314694878, "learning_rate": 1.506831646394598e-05, "loss": 0.4491, "num_tokens": 2477906631.0, "step": 8697 }, { "epoch": 3.1861402464159756, "grad_norm": 0.14438601985581528, "learning_rate": 1.5064375005340796e-05, "loss": 0.3917, "num_tokens": 2478616878.0, "step": 8698 }, { "epoch": 3.1865066642238813, "grad_norm": 0.13508428545448414, "learning_rate": 1.5060433937225133e-05, "loss": 0.4174, "num_tokens": 2479465919.0, "step": 8699 }, { "epoch": 3.1868730820317865, "grad_norm": 0.145860395198018, "learning_rate": 1.5056493259820868e-05, "loss": 0.4358, "num_tokens": 2480236898.0, "step": 8700 }, { "epoch": 3.1872394998396922, "grad_norm": 0.16045413897853983, "learning_rate": 1.5052552973349879e-05, "loss": 0.4147, "num_tokens": 2480875821.0, "step": 8701 }, { "epoch": 3.1876059176475975, "grad_norm": 0.14546992637493183, "learning_rate": 1.5048613078034002e-05, "loss": 0.4, "num_tokens": 2481597722.0, "step": 8702 }, { "epoch": 3.187972335455503, "grad_norm": 0.13374259783181236, "learning_rate": 1.5044673574095074e-05, "loss": 0.3977, "num_tokens": 2482443382.0, "step": 8703 }, { "epoch": 3.1883387532634084, "grad_norm": 0.14789714174572732, "learning_rate": 1.5040734461754891e-05, "loss": 0.4011, "num_tokens": 2483203999.0, "step": 8704 }, { "epoch": 3.188705171071314, "grad_norm": 0.14394617519575248, "learning_rate": 1.5036795741235226e-05, "loss": 0.4023, "num_tokens": 2484016518.0, "step": 8705 }, { "epoch": 3.1890715888792194, "grad_norm": 0.1473315833566474, "learning_rate": 1.503285741275784e-05, "loss": 0.4195, "num_tokens": 2484718446.0, "step": 8706 }, { "epoch": 3.189438006687125, "grad_norm": 0.1324103504353287, "learning_rate": 1.502891947654448e-05, "loss": 0.4093, "num_tokens": 2485531294.0, "step": 8707 }, { "epoch": 3.1898044244950303, "grad_norm": 0.1457103907958806, "learning_rate": 1.5024981932816852e-05, "loss": 0.4051, "num_tokens": 2486352540.0, "step": 8708 }, { "epoch": 3.190170842302936, "grad_norm": 0.1553734755999064, "learning_rate": 1.5021044781796636e-05, "loss": 0.4168, "num_tokens": 2487111792.0, "step": 8709 }, { "epoch": 3.1905372601108413, "grad_norm": 0.15399877545291688, "learning_rate": 1.501710802370552e-05, "loss": 0.414, "num_tokens": 2487876592.0, "step": 8710 }, { "epoch": 3.190903677918747, "grad_norm": 0.1433872520685645, "learning_rate": 1.5013171658765138e-05, "loss": 0.4207, "num_tokens": 2488764043.0, "step": 8711 }, { "epoch": 3.191270095726652, "grad_norm": 0.15403900352378022, "learning_rate": 1.5009235687197126e-05, "loss": 0.4322, "num_tokens": 2489438000.0, "step": 8712 }, { "epoch": 3.191636513534558, "grad_norm": 0.1561825136526477, "learning_rate": 1.500530010922308e-05, "loss": 0.4075, "num_tokens": 2490106202.0, "step": 8713 }, { "epoch": 3.192002931342463, "grad_norm": 0.15211510727693406, "learning_rate": 1.5001364925064583e-05, "loss": 0.409, "num_tokens": 2490887642.0, "step": 8714 }, { "epoch": 3.192369349150369, "grad_norm": 0.15559435224809678, "learning_rate": 1.4997430134943192e-05, "loss": 0.462, "num_tokens": 2491580131.0, "step": 8715 }, { "epoch": 3.192735766958274, "grad_norm": 0.14616730607336734, "learning_rate": 1.4993495739080447e-05, "loss": 0.3928, "num_tokens": 2492373389.0, "step": 8716 }, { "epoch": 3.19310218476618, "grad_norm": 0.14707225067513205, "learning_rate": 1.4989561737697851e-05, "loss": 0.4311, "num_tokens": 2493161429.0, "step": 8717 }, { "epoch": 3.193468602574085, "grad_norm": 0.15740507573107457, "learning_rate": 1.4985628131016916e-05, "loss": 0.3996, "num_tokens": 2494064794.0, "step": 8718 }, { "epoch": 3.1938350203819907, "grad_norm": 0.13454172130426617, "learning_rate": 1.4981694919259097e-05, "loss": 0.3835, "num_tokens": 2494816883.0, "step": 8719 }, { "epoch": 3.194201438189896, "grad_norm": 0.14388235941589606, "learning_rate": 1.497776210264585e-05, "loss": 0.4105, "num_tokens": 2495566933.0, "step": 8720 }, { "epoch": 3.1945678559978017, "grad_norm": 0.1510386426884579, "learning_rate": 1.4973829681398595e-05, "loss": 0.4058, "num_tokens": 2496291210.0, "step": 8721 }, { "epoch": 3.194934273805707, "grad_norm": 0.14687926287359498, "learning_rate": 1.496989765573873e-05, "loss": 0.4233, "num_tokens": 2497112933.0, "step": 8722 }, { "epoch": 3.195300691613612, "grad_norm": 0.14329904861326545, "learning_rate": 1.4965966025887642e-05, "loss": 0.4138, "num_tokens": 2497892720.0, "step": 8723 }, { "epoch": 3.195667109421518, "grad_norm": 0.14595905725049071, "learning_rate": 1.4962034792066695e-05, "loss": 0.4081, "num_tokens": 2498714965.0, "step": 8724 }, { "epoch": 3.1960335272294236, "grad_norm": 0.1533136180037896, "learning_rate": 1.4958103954497225e-05, "loss": 0.4419, "num_tokens": 2499438540.0, "step": 8725 }, { "epoch": 3.196399945037329, "grad_norm": 0.14244019210555478, "learning_rate": 1.4954173513400535e-05, "loss": 0.4311, "num_tokens": 2500187660.0, "step": 8726 }, { "epoch": 3.196766362845234, "grad_norm": 0.1496377284297993, "learning_rate": 1.4950243468997925e-05, "loss": 0.4023, "num_tokens": 2500875702.0, "step": 8727 }, { "epoch": 3.1971327806531398, "grad_norm": 0.14496183009572097, "learning_rate": 1.4946313821510655e-05, "loss": 0.4154, "num_tokens": 2501561626.0, "step": 8728 }, { "epoch": 3.197499198461045, "grad_norm": 0.14617903513590855, "learning_rate": 1.4942384571159987e-05, "loss": 0.4481, "num_tokens": 2502365245.0, "step": 8729 }, { "epoch": 3.1978656162689507, "grad_norm": 0.14231345230827067, "learning_rate": 1.4938455718167131e-05, "loss": 0.4104, "num_tokens": 2503156544.0, "step": 8730 }, { "epoch": 3.198232034076856, "grad_norm": 0.14039017717605845, "learning_rate": 1.4934527262753303e-05, "loss": 0.3913, "num_tokens": 2503887628.0, "step": 8731 }, { "epoch": 3.1985984518847617, "grad_norm": 0.1386268240068354, "learning_rate": 1.4930599205139675e-05, "loss": 0.4034, "num_tokens": 2504734579.0, "step": 8732 }, { "epoch": 3.198964869692667, "grad_norm": 0.1432337633280196, "learning_rate": 1.4926671545547402e-05, "loss": 0.4173, "num_tokens": 2505447541.0, "step": 8733 }, { "epoch": 3.1993312875005726, "grad_norm": 0.1321570260541699, "learning_rate": 1.4922744284197622e-05, "loss": 0.3837, "num_tokens": 2506242117.0, "step": 8734 }, { "epoch": 3.199697705308478, "grad_norm": 0.13264957003652816, "learning_rate": 1.4918817421311454e-05, "loss": 0.4069, "num_tokens": 2507055487.0, "step": 8735 }, { "epoch": 3.2000641231163836, "grad_norm": 0.14108888511150505, "learning_rate": 1.4914890957109976e-05, "loss": 0.3999, "num_tokens": 2507855299.0, "step": 8736 }, { "epoch": 3.200430540924289, "grad_norm": 0.13383710556964434, "learning_rate": 1.4910964891814268e-05, "loss": 0.4042, "num_tokens": 2508573626.0, "step": 8737 }, { "epoch": 3.2007969587321945, "grad_norm": 0.13039933658894715, "learning_rate": 1.4907039225645367e-05, "loss": 0.4361, "num_tokens": 2509455684.0, "step": 8738 }, { "epoch": 3.2011633765400997, "grad_norm": 0.15382808649584054, "learning_rate": 1.4903113958824293e-05, "loss": 0.419, "num_tokens": 2510091332.0, "step": 8739 }, { "epoch": 3.2015297943480054, "grad_norm": 0.14870314716500097, "learning_rate": 1.4899189091572055e-05, "loss": 0.4102, "num_tokens": 2510792581.0, "step": 8740 }, { "epoch": 3.2018962121559107, "grad_norm": 0.1232616955442393, "learning_rate": 1.4895264624109626e-05, "loss": 0.3824, "num_tokens": 2511718418.0, "step": 8741 }, { "epoch": 3.2022626299638164, "grad_norm": 0.13935339214006076, "learning_rate": 1.489134055665797e-05, "loss": 0.4361, "num_tokens": 2512461002.0, "step": 8742 }, { "epoch": 3.2026290477717216, "grad_norm": 0.15076156461655638, "learning_rate": 1.4887416889438005e-05, "loss": 0.4343, "num_tokens": 2513264663.0, "step": 8743 }, { "epoch": 3.2029954655796273, "grad_norm": 0.1324438156394471, "learning_rate": 1.4883493622670653e-05, "loss": 0.4078, "num_tokens": 2514100589.0, "step": 8744 }, { "epoch": 3.2033618833875326, "grad_norm": 0.1347530146645402, "learning_rate": 1.4879570756576791e-05, "loss": 0.3932, "num_tokens": 2514868718.0, "step": 8745 }, { "epoch": 3.2037283011954383, "grad_norm": 0.1458495933316685, "learning_rate": 1.4875648291377296e-05, "loss": 0.4422, "num_tokens": 2515594808.0, "step": 8746 }, { "epoch": 3.2040947190033435, "grad_norm": 0.13392661298443895, "learning_rate": 1.4871726227293004e-05, "loss": 0.435, "num_tokens": 2516477240.0, "step": 8747 }, { "epoch": 3.2044611368112492, "grad_norm": 0.14103061566643138, "learning_rate": 1.4867804564544738e-05, "loss": 0.4031, "num_tokens": 2517251505.0, "step": 8748 }, { "epoch": 3.2048275546191545, "grad_norm": 0.1416910324516276, "learning_rate": 1.4863883303353292e-05, "loss": 0.4037, "num_tokens": 2518092429.0, "step": 8749 }, { "epoch": 3.20519397242706, "grad_norm": 0.14970457098376846, "learning_rate": 1.4859962443939444e-05, "loss": 0.3997, "num_tokens": 2518812248.0, "step": 8750 }, { "epoch": 3.2055603902349654, "grad_norm": 0.1491432804012586, "learning_rate": 1.4856041986523946e-05, "loss": 0.4116, "num_tokens": 2519502462.0, "step": 8751 }, { "epoch": 3.2059268080428707, "grad_norm": 0.14116077893536147, "learning_rate": 1.4852121931327524e-05, "loss": 0.417, "num_tokens": 2520320941.0, "step": 8752 }, { "epoch": 3.2062932258507764, "grad_norm": 0.1429654216711921, "learning_rate": 1.484820227857089e-05, "loss": 0.3757, "num_tokens": 2521126532.0, "step": 8753 }, { "epoch": 3.2066596436586816, "grad_norm": 0.1423763754486087, "learning_rate": 1.4844283028474725e-05, "loss": 0.4193, "num_tokens": 2521878378.0, "step": 8754 }, { "epoch": 3.2070260614665873, "grad_norm": 0.14765500034379603, "learning_rate": 1.4840364181259692e-05, "loss": 0.4072, "num_tokens": 2522581348.0, "step": 8755 }, { "epoch": 3.2073924792744926, "grad_norm": 0.15336361584340855, "learning_rate": 1.4836445737146422e-05, "loss": 0.4177, "num_tokens": 2523268653.0, "step": 8756 }, { "epoch": 3.2077588970823983, "grad_norm": 0.1610788661347038, "learning_rate": 1.4832527696355543e-05, "loss": 0.3847, "num_tokens": 2524006465.0, "step": 8757 }, { "epoch": 3.2081253148903035, "grad_norm": 0.12987358577211083, "learning_rate": 1.4828610059107646e-05, "loss": 0.3914, "num_tokens": 2524843479.0, "step": 8758 }, { "epoch": 3.208491732698209, "grad_norm": 0.14569327277272, "learning_rate": 1.4824692825623297e-05, "loss": 0.3956, "num_tokens": 2525560619.0, "step": 8759 }, { "epoch": 3.2088581505061144, "grad_norm": 0.14296702010577658, "learning_rate": 1.4820775996123047e-05, "loss": 0.4232, "num_tokens": 2526317574.0, "step": 8760 }, { "epoch": 3.20922456831402, "grad_norm": 0.142284818339155, "learning_rate": 1.4816859570827417e-05, "loss": 0.4025, "num_tokens": 2527098581.0, "step": 8761 }, { "epoch": 3.2095909861219254, "grad_norm": 0.15805459166847588, "learning_rate": 1.4812943549956921e-05, "loss": 0.3896, "num_tokens": 2527781160.0, "step": 8762 }, { "epoch": 3.209957403929831, "grad_norm": 0.1364368187582133, "learning_rate": 1.4809027933732032e-05, "loss": 0.403, "num_tokens": 2528553825.0, "step": 8763 }, { "epoch": 3.2103238217377363, "grad_norm": 0.14495692090459933, "learning_rate": 1.4805112722373199e-05, "loss": 0.4262, "num_tokens": 2529346970.0, "step": 8764 }, { "epoch": 3.210690239545642, "grad_norm": 0.1375944097449793, "learning_rate": 1.4801197916100871e-05, "loss": 0.3994, "num_tokens": 2530102340.0, "step": 8765 }, { "epoch": 3.2110566573535473, "grad_norm": 0.13717409826381827, "learning_rate": 1.4797283515135448e-05, "loss": 0.3882, "num_tokens": 2530875159.0, "step": 8766 }, { "epoch": 3.211423075161453, "grad_norm": 0.16093183981156417, "learning_rate": 1.479336951969732e-05, "loss": 0.3986, "num_tokens": 2531485375.0, "step": 8767 }, { "epoch": 3.2117894929693582, "grad_norm": 0.14019679778217561, "learning_rate": 1.4789455930006855e-05, "loss": 0.3999, "num_tokens": 2532313242.0, "step": 8768 }, { "epoch": 3.212155910777264, "grad_norm": 0.1343616212518773, "learning_rate": 1.47855427462844e-05, "loss": 0.4065, "num_tokens": 2533163095.0, "step": 8769 }, { "epoch": 3.212522328585169, "grad_norm": 0.14552204886702633, "learning_rate": 1.4781629968750272e-05, "loss": 0.3905, "num_tokens": 2533850545.0, "step": 8770 }, { "epoch": 3.212888746393075, "grad_norm": 0.12953239268471642, "learning_rate": 1.4777717597624767e-05, "loss": 0.4024, "num_tokens": 2534659055.0, "step": 8771 }, { "epoch": 3.21325516420098, "grad_norm": 0.1488970726069808, "learning_rate": 1.4773805633128154e-05, "loss": 0.4392, "num_tokens": 2535436311.0, "step": 8772 }, { "epoch": 3.213621582008886, "grad_norm": 0.14394674057966803, "learning_rate": 1.4769894075480693e-05, "loss": 0.3942, "num_tokens": 2536130569.0, "step": 8773 }, { "epoch": 3.213987999816791, "grad_norm": 0.15678561950607758, "learning_rate": 1.476598292490261e-05, "loss": 0.4188, "num_tokens": 2536866764.0, "step": 8774 }, { "epoch": 3.2143544176246968, "grad_norm": 0.13852828065530062, "learning_rate": 1.4762072181614117e-05, "loss": 0.4013, "num_tokens": 2537622866.0, "step": 8775 }, { "epoch": 3.214720835432602, "grad_norm": 0.14778508866897538, "learning_rate": 1.4758161845835384e-05, "loss": 0.4291, "num_tokens": 2538384591.0, "step": 8776 }, { "epoch": 3.2150872532405073, "grad_norm": 0.14951462826215617, "learning_rate": 1.4754251917786575e-05, "loss": 0.4115, "num_tokens": 2539089911.0, "step": 8777 }, { "epoch": 3.215453671048413, "grad_norm": 0.13584737860296164, "learning_rate": 1.4750342397687822e-05, "loss": 0.4101, "num_tokens": 2539929813.0, "step": 8778 }, { "epoch": 3.2158200888563186, "grad_norm": 0.14572984921043108, "learning_rate": 1.4746433285759257e-05, "loss": 0.3874, "num_tokens": 2540637051.0, "step": 8779 }, { "epoch": 3.216186506664224, "grad_norm": 0.1413368120755774, "learning_rate": 1.4742524582220958e-05, "loss": 0.3881, "num_tokens": 2541370687.0, "step": 8780 }, { "epoch": 3.216552924472129, "grad_norm": 0.15606989431448695, "learning_rate": 1.4738616287292988e-05, "loss": 0.4264, "num_tokens": 2542098162.0, "step": 8781 }, { "epoch": 3.216919342280035, "grad_norm": 0.1338197799994512, "learning_rate": 1.47347084011954e-05, "loss": 0.3869, "num_tokens": 2542898024.0, "step": 8782 }, { "epoch": 3.21728576008794, "grad_norm": 0.13484085184683078, "learning_rate": 1.4730800924148207e-05, "loss": 0.3703, "num_tokens": 2543668185.0, "step": 8783 }, { "epoch": 3.217652177895846, "grad_norm": 0.15137196773920703, "learning_rate": 1.4726893856371423e-05, "loss": 0.4062, "num_tokens": 2544287237.0, "step": 8784 }, { "epoch": 3.218018595703751, "grad_norm": 0.1639113128363252, "learning_rate": 1.4722987198085008e-05, "loss": 0.4276, "num_tokens": 2545118938.0, "step": 8785 }, { "epoch": 3.2183850135116567, "grad_norm": 0.14737672006670066, "learning_rate": 1.4719080949508931e-05, "loss": 0.3937, "num_tokens": 2545812883.0, "step": 8786 }, { "epoch": 3.218751431319562, "grad_norm": 0.14551090900216404, "learning_rate": 1.4715175110863098e-05, "loss": 0.4178, "num_tokens": 2546623245.0, "step": 8787 }, { "epoch": 3.2191178491274677, "grad_norm": 0.13056836036872463, "learning_rate": 1.4711269682367441e-05, "loss": 0.3945, "num_tokens": 2547519258.0, "step": 8788 }, { "epoch": 3.219484266935373, "grad_norm": 0.15072262493421018, "learning_rate": 1.4707364664241822e-05, "loss": 0.4347, "num_tokens": 2548272099.0, "step": 8789 }, { "epoch": 3.2198506847432786, "grad_norm": 0.14130423360545025, "learning_rate": 1.4703460056706118e-05, "loss": 0.3899, "num_tokens": 2549050346.0, "step": 8790 }, { "epoch": 3.220217102551184, "grad_norm": 0.14802486281829555, "learning_rate": 1.4699555859980157e-05, "loss": 0.401, "num_tokens": 2549753353.0, "step": 8791 }, { "epoch": 3.2205835203590896, "grad_norm": 0.13912243060486018, "learning_rate": 1.4695652074283757e-05, "loss": 0.396, "num_tokens": 2550445938.0, "step": 8792 }, { "epoch": 3.220949938166995, "grad_norm": 0.1554598223371933, "learning_rate": 1.4691748699836707e-05, "loss": 0.4086, "num_tokens": 2551279754.0, "step": 8793 }, { "epoch": 3.2213163559749005, "grad_norm": 0.1509284064250975, "learning_rate": 1.468784573685877e-05, "loss": 0.4348, "num_tokens": 2552014573.0, "step": 8794 }, { "epoch": 3.2216827737828058, "grad_norm": 0.13224858068993328, "learning_rate": 1.4683943185569699e-05, "loss": 0.389, "num_tokens": 2552812347.0, "step": 8795 }, { "epoch": 3.2220491915907115, "grad_norm": 0.14022446511783612, "learning_rate": 1.4680041046189214e-05, "loss": 0.412, "num_tokens": 2553607473.0, "step": 8796 }, { "epoch": 3.2224156093986167, "grad_norm": 0.150033013770025, "learning_rate": 1.467613931893701e-05, "loss": 0.4408, "num_tokens": 2554252120.0, "step": 8797 }, { "epoch": 3.2227820272065224, "grad_norm": 0.14909284387111155, "learning_rate": 1.4672238004032763e-05, "loss": 0.4208, "num_tokens": 2555027355.0, "step": 8798 }, { "epoch": 3.2231484450144277, "grad_norm": 0.14437947720739341, "learning_rate": 1.4668337101696129e-05, "loss": 0.3985, "num_tokens": 2555754860.0, "step": 8799 }, { "epoch": 3.2235148628223333, "grad_norm": 0.12875278753729286, "learning_rate": 1.466443661214672e-05, "loss": 0.4301, "num_tokens": 2556621656.0, "step": 8800 }, { "epoch": 3.2238812806302386, "grad_norm": 0.14374572934989777, "learning_rate": 1.4660536535604165e-05, "loss": 0.383, "num_tokens": 2557307101.0, "step": 8801 }, { "epoch": 3.2242476984381443, "grad_norm": 0.14454929589233315, "learning_rate": 1.4656636872288036e-05, "loss": 0.4091, "num_tokens": 2558071332.0, "step": 8802 }, { "epoch": 3.2246141162460495, "grad_norm": 0.14802182901330893, "learning_rate": 1.4652737622417888e-05, "loss": 0.4027, "num_tokens": 2558807780.0, "step": 8803 }, { "epoch": 3.2249805340539552, "grad_norm": 0.13910095955576635, "learning_rate": 1.4648838786213256e-05, "loss": 0.4179, "num_tokens": 2559595204.0, "step": 8804 }, { "epoch": 3.2253469518618605, "grad_norm": 0.13359367619410228, "learning_rate": 1.464494036389366e-05, "loss": 0.3937, "num_tokens": 2560469400.0, "step": 8805 }, { "epoch": 3.2257133696697657, "grad_norm": 0.13133761644784703, "learning_rate": 1.4641042355678581e-05, "loss": 0.3945, "num_tokens": 2561260001.0, "step": 8806 }, { "epoch": 3.2260797874776714, "grad_norm": 0.14864785564581723, "learning_rate": 1.4637144761787493e-05, "loss": 0.4186, "num_tokens": 2561957877.0, "step": 8807 }, { "epoch": 3.2264462052855767, "grad_norm": 0.1632824894853858, "learning_rate": 1.4633247582439829e-05, "loss": 0.4382, "num_tokens": 2562567705.0, "step": 8808 }, { "epoch": 3.2268126230934824, "grad_norm": 0.1358983816781044, "learning_rate": 1.4629350817855014e-05, "loss": 0.3536, "num_tokens": 2563365202.0, "step": 8809 }, { "epoch": 3.2271790409013876, "grad_norm": 0.1418368455256302, "learning_rate": 1.462545446825245e-05, "loss": 0.4044, "num_tokens": 2564132413.0, "step": 8810 }, { "epoch": 3.2275454587092933, "grad_norm": 0.131194092053369, "learning_rate": 1.4621558533851486e-05, "loss": 0.3971, "num_tokens": 2564984918.0, "step": 8811 }, { "epoch": 3.2279118765171986, "grad_norm": 0.14203220447645845, "learning_rate": 1.461766301487149e-05, "loss": 0.4026, "num_tokens": 2565676980.0, "step": 8812 }, { "epoch": 3.2282782943251043, "grad_norm": 0.13945883950737048, "learning_rate": 1.4613767911531796e-05, "loss": 0.4056, "num_tokens": 2566391451.0, "step": 8813 }, { "epoch": 3.2286447121330095, "grad_norm": 0.1483515654030651, "learning_rate": 1.4609873224051687e-05, "loss": 0.4216, "num_tokens": 2567128920.0, "step": 8814 }, { "epoch": 3.229011129940915, "grad_norm": 0.14887363584454627, "learning_rate": 1.4605978952650443e-05, "loss": 0.4273, "num_tokens": 2567841596.0, "step": 8815 }, { "epoch": 3.2293775477488205, "grad_norm": 0.1547099894280726, "learning_rate": 1.4602085097547334e-05, "loss": 0.4219, "num_tokens": 2568556386.0, "step": 8816 }, { "epoch": 3.229743965556726, "grad_norm": 0.14694870977714333, "learning_rate": 1.4598191658961574e-05, "loss": 0.3991, "num_tokens": 2569327164.0, "step": 8817 }, { "epoch": 3.2301103833646314, "grad_norm": 0.14219362456617787, "learning_rate": 1.4594298637112384e-05, "loss": 0.3776, "num_tokens": 2570039364.0, "step": 8818 }, { "epoch": 3.230476801172537, "grad_norm": 0.14147116520273503, "learning_rate": 1.4590406032218946e-05, "loss": 0.3804, "num_tokens": 2570848510.0, "step": 8819 }, { "epoch": 3.2308432189804424, "grad_norm": 0.14371369729694564, "learning_rate": 1.4586513844500418e-05, "loss": 0.4175, "num_tokens": 2571636990.0, "step": 8820 }, { "epoch": 3.231209636788348, "grad_norm": 0.14625667378148505, "learning_rate": 1.4582622074175948e-05, "loss": 0.4065, "num_tokens": 2572413366.0, "step": 8821 }, { "epoch": 3.2315760545962533, "grad_norm": 0.14186848133779342, "learning_rate": 1.4578730721464624e-05, "loss": 0.3871, "num_tokens": 2573130145.0, "step": 8822 }, { "epoch": 3.231942472404159, "grad_norm": 0.13453508172175396, "learning_rate": 1.4574839786585569e-05, "loss": 0.396, "num_tokens": 2573820089.0, "step": 8823 }, { "epoch": 3.2323088902120642, "grad_norm": 0.16166494120378402, "learning_rate": 1.4570949269757834e-05, "loss": 0.4124, "num_tokens": 2574537410.0, "step": 8824 }, { "epoch": 3.23267530801997, "grad_norm": 0.14661160013663738, "learning_rate": 1.4567059171200466e-05, "loss": 0.4267, "num_tokens": 2575316368.0, "step": 8825 }, { "epoch": 3.233041725827875, "grad_norm": 0.1441985027042073, "learning_rate": 1.4563169491132491e-05, "loss": 0.3978, "num_tokens": 2576099276.0, "step": 8826 }, { "epoch": 3.233408143635781, "grad_norm": 0.1402507182963235, "learning_rate": 1.4559280229772896e-05, "loss": 0.4386, "num_tokens": 2576902497.0, "step": 8827 }, { "epoch": 3.233774561443686, "grad_norm": 0.140135762840013, "learning_rate": 1.4555391387340652e-05, "loss": 0.3972, "num_tokens": 2577570407.0, "step": 8828 }, { "epoch": 3.234140979251592, "grad_norm": 0.16904539683306793, "learning_rate": 1.4551502964054716e-05, "loss": 0.4164, "num_tokens": 2578179919.0, "step": 8829 }, { "epoch": 3.234507397059497, "grad_norm": 0.1637212734507906, "learning_rate": 1.454761496013402e-05, "loss": 0.4392, "num_tokens": 2578831298.0, "step": 8830 }, { "epoch": 3.2348738148674023, "grad_norm": 0.15223626672201185, "learning_rate": 1.4543727375797456e-05, "loss": 0.4127, "num_tokens": 2579423764.0, "step": 8831 }, { "epoch": 3.235240232675308, "grad_norm": 0.13573982262918782, "learning_rate": 1.4539840211263899e-05, "loss": 0.433, "num_tokens": 2580262896.0, "step": 8832 }, { "epoch": 3.2356066504832137, "grad_norm": 0.148479184095408, "learning_rate": 1.4535953466752216e-05, "loss": 0.3954, "num_tokens": 2580931477.0, "step": 8833 }, { "epoch": 3.235973068291119, "grad_norm": 0.13567104899797586, "learning_rate": 1.4532067142481232e-05, "loss": 0.4155, "num_tokens": 2581760499.0, "step": 8834 }, { "epoch": 3.236339486099024, "grad_norm": 0.13989128977663623, "learning_rate": 1.452818123866976e-05, "loss": 0.4003, "num_tokens": 2582617310.0, "step": 8835 }, { "epoch": 3.23670590390693, "grad_norm": 0.14753458153332677, "learning_rate": 1.4524295755536578e-05, "loss": 0.4055, "num_tokens": 2583343189.0, "step": 8836 }, { "epoch": 3.237072321714835, "grad_norm": 0.14294962147634996, "learning_rate": 1.4520410693300448e-05, "loss": 0.4032, "num_tokens": 2584101443.0, "step": 8837 }, { "epoch": 3.237438739522741, "grad_norm": 0.13724061492700024, "learning_rate": 1.4516526052180106e-05, "loss": 0.4067, "num_tokens": 2584831968.0, "step": 8838 }, { "epoch": 3.237805157330646, "grad_norm": 0.15582524968410225, "learning_rate": 1.4512641832394266e-05, "loss": 0.4084, "num_tokens": 2585498781.0, "step": 8839 }, { "epoch": 3.238171575138552, "grad_norm": 0.14992179564359692, "learning_rate": 1.4508758034161616e-05, "loss": 0.4047, "num_tokens": 2586246701.0, "step": 8840 }, { "epoch": 3.238537992946457, "grad_norm": 0.14706611585165, "learning_rate": 1.4504874657700825e-05, "loss": 0.3657, "num_tokens": 2586957386.0, "step": 8841 }, { "epoch": 3.2389044107543628, "grad_norm": 0.1386119952358398, "learning_rate": 1.4500991703230533e-05, "loss": 0.3909, "num_tokens": 2587692130.0, "step": 8842 }, { "epoch": 3.239270828562268, "grad_norm": 0.16420885421910814, "learning_rate": 1.4497109170969364e-05, "loss": 0.4063, "num_tokens": 2588329481.0, "step": 8843 }, { "epoch": 3.2396372463701737, "grad_norm": 0.13264933576071194, "learning_rate": 1.44932270611359e-05, "loss": 0.3996, "num_tokens": 2589221381.0, "step": 8844 }, { "epoch": 3.240003664178079, "grad_norm": 0.1507244228319892, "learning_rate": 1.4489345373948722e-05, "loss": 0.4139, "num_tokens": 2590009084.0, "step": 8845 }, { "epoch": 3.2403700819859846, "grad_norm": 0.13463022770296426, "learning_rate": 1.4485464109626372e-05, "loss": 0.3934, "num_tokens": 2590881374.0, "step": 8846 }, { "epoch": 3.24073649979389, "grad_norm": 0.1475488405060933, "learning_rate": 1.4481583268387374e-05, "loss": 0.4035, "num_tokens": 2591626975.0, "step": 8847 }, { "epoch": 3.2411029176017956, "grad_norm": 0.1492320794055139, "learning_rate": 1.4477702850450221e-05, "loss": 0.3958, "num_tokens": 2592453611.0, "step": 8848 }, { "epoch": 3.241469335409701, "grad_norm": 0.14430518232482012, "learning_rate": 1.44738228560334e-05, "loss": 0.3889, "num_tokens": 2593206609.0, "step": 8849 }, { "epoch": 3.2418357532176065, "grad_norm": 0.13858267450144077, "learning_rate": 1.446994328535535e-05, "loss": 0.4418, "num_tokens": 2594100343.0, "step": 8850 }, { "epoch": 3.242202171025512, "grad_norm": 0.1408234209714933, "learning_rate": 1.4466064138634513e-05, "loss": 0.428, "num_tokens": 2594886385.0, "step": 8851 }, { "epoch": 3.2425685888334175, "grad_norm": 0.15191564273520117, "learning_rate": 1.4462185416089285e-05, "loss": 0.4233, "num_tokens": 2595608829.0, "step": 8852 }, { "epoch": 3.2429350066413227, "grad_norm": 0.13827161185236722, "learning_rate": 1.4458307117938045e-05, "loss": 0.398, "num_tokens": 2596379087.0, "step": 8853 }, { "epoch": 3.2433014244492284, "grad_norm": 0.138950511040848, "learning_rate": 1.445442924439914e-05, "loss": 0.4001, "num_tokens": 2597175043.0, "step": 8854 }, { "epoch": 3.2436678422571337, "grad_norm": 0.13831741949012546, "learning_rate": 1.4450551795690912e-05, "loss": 0.3754, "num_tokens": 2597899511.0, "step": 8855 }, { "epoch": 3.2440342600650394, "grad_norm": 0.14181368331233887, "learning_rate": 1.4446674772031677e-05, "loss": 0.4055, "num_tokens": 2598708227.0, "step": 8856 }, { "epoch": 3.2444006778729446, "grad_norm": 0.13267840131700584, "learning_rate": 1.4442798173639712e-05, "loss": 0.4158, "num_tokens": 2599632416.0, "step": 8857 }, { "epoch": 3.2447670956808503, "grad_norm": 0.14279097843012908, "learning_rate": 1.4438922000733273e-05, "loss": 0.3766, "num_tokens": 2600332735.0, "step": 8858 }, { "epoch": 3.2451335134887556, "grad_norm": 0.13076736490448776, "learning_rate": 1.44350462535306e-05, "loss": 0.3853, "num_tokens": 2601131848.0, "step": 8859 }, { "epoch": 3.245499931296661, "grad_norm": 0.15224105051348377, "learning_rate": 1.4431170932249904e-05, "loss": 0.4332, "num_tokens": 2601787276.0, "step": 8860 }, { "epoch": 3.2458663491045665, "grad_norm": 0.1439371580948393, "learning_rate": 1.4427296037109368e-05, "loss": 0.3833, "num_tokens": 2602509222.0, "step": 8861 }, { "epoch": 3.2462327669124718, "grad_norm": 0.13069518021263954, "learning_rate": 1.4423421568327163e-05, "loss": 0.389, "num_tokens": 2603341351.0, "step": 8862 }, { "epoch": 3.2465991847203775, "grad_norm": 0.14571263526507075, "learning_rate": 1.4419547526121434e-05, "loss": 0.4072, "num_tokens": 2604137795.0, "step": 8863 }, { "epoch": 3.2469656025282827, "grad_norm": 0.14278367714134388, "learning_rate": 1.441567391071029e-05, "loss": 0.4009, "num_tokens": 2604952122.0, "step": 8864 }, { "epoch": 3.2473320203361884, "grad_norm": 0.13809020159735724, "learning_rate": 1.441180072231183e-05, "loss": 0.4177, "num_tokens": 2605703833.0, "step": 8865 }, { "epoch": 3.2476984381440936, "grad_norm": 0.14304609410642743, "learning_rate": 1.4407927961144106e-05, "loss": 0.3919, "num_tokens": 2606408293.0, "step": 8866 }, { "epoch": 3.2480648559519993, "grad_norm": 0.14014481253220468, "learning_rate": 1.4404055627425182e-05, "loss": 0.3881, "num_tokens": 2607195690.0, "step": 8867 }, { "epoch": 3.2484312737599046, "grad_norm": 0.138143797823876, "learning_rate": 1.4400183721373072e-05, "loss": 0.4416, "num_tokens": 2607985362.0, "step": 8868 }, { "epoch": 3.2487976915678103, "grad_norm": 0.1263211550065716, "learning_rate": 1.4396312243205759e-05, "loss": 0.3881, "num_tokens": 2608777057.0, "step": 8869 }, { "epoch": 3.2491641093757155, "grad_norm": 0.15023485334896425, "learning_rate": 1.439244119314123e-05, "loss": 0.4175, "num_tokens": 2609560284.0, "step": 8870 }, { "epoch": 3.2495305271836212, "grad_norm": 0.1599666235348635, "learning_rate": 1.4388570571397433e-05, "loss": 0.4337, "num_tokens": 2610235262.0, "step": 8871 }, { "epoch": 3.2498969449915265, "grad_norm": 0.1468313882756596, "learning_rate": 1.438470037819228e-05, "loss": 0.4056, "num_tokens": 2610996475.0, "step": 8872 }, { "epoch": 3.250263362799432, "grad_norm": 0.12910442906352587, "learning_rate": 1.4380830613743682e-05, "loss": 0.3937, "num_tokens": 2611805333.0, "step": 8873 }, { "epoch": 3.2506297806073374, "grad_norm": 0.15199078293269846, "learning_rate": 1.4376961278269512e-05, "loss": 0.397, "num_tokens": 2612458033.0, "step": 8874 }, { "epoch": 3.250996198415243, "grad_norm": 0.14681404316427704, "learning_rate": 1.4373092371987612e-05, "loss": 0.3863, "num_tokens": 2613114863.0, "step": 8875 }, { "epoch": 3.2513626162231484, "grad_norm": 0.16051830940940445, "learning_rate": 1.4369223895115822e-05, "loss": 0.4107, "num_tokens": 2613754666.0, "step": 8876 }, { "epoch": 3.251729034031054, "grad_norm": 0.14421266380472472, "learning_rate": 1.4365355847871935e-05, "loss": 0.384, "num_tokens": 2614511612.0, "step": 8877 }, { "epoch": 3.2520954518389593, "grad_norm": 0.15460740250604688, "learning_rate": 1.4361488230473737e-05, "loss": 0.3941, "num_tokens": 2615190390.0, "step": 8878 }, { "epoch": 3.252461869646865, "grad_norm": 0.15749209115766627, "learning_rate": 1.4357621043138985e-05, "loss": 0.4126, "num_tokens": 2615859911.0, "step": 8879 }, { "epoch": 3.2528282874547703, "grad_norm": 0.12986796960886765, "learning_rate": 1.4353754286085404e-05, "loss": 0.3955, "num_tokens": 2616693588.0, "step": 8880 }, { "epoch": 3.253194705262676, "grad_norm": 0.1440879555775772, "learning_rate": 1.4349887959530695e-05, "loss": 0.3959, "num_tokens": 2617480454.0, "step": 8881 }, { "epoch": 3.253561123070581, "grad_norm": 0.14975854489414353, "learning_rate": 1.4346022063692542e-05, "loss": 0.4111, "num_tokens": 2618266709.0, "step": 8882 }, { "epoch": 3.253927540878487, "grad_norm": 0.13686529279490034, "learning_rate": 1.4342156598788606e-05, "loss": 0.4056, "num_tokens": 2619107341.0, "step": 8883 }, { "epoch": 3.254293958686392, "grad_norm": 0.1446051920085295, "learning_rate": 1.4338291565036526e-05, "loss": 0.4109, "num_tokens": 2619951022.0, "step": 8884 }, { "epoch": 3.2546603764942974, "grad_norm": 0.13672642143245067, "learning_rate": 1.4334426962653907e-05, "loss": 0.3922, "num_tokens": 2620741602.0, "step": 8885 }, { "epoch": 3.255026794302203, "grad_norm": 0.15459265598654479, "learning_rate": 1.433056279185833e-05, "loss": 0.4352, "num_tokens": 2621482678.0, "step": 8886 }, { "epoch": 3.255393212110109, "grad_norm": 0.13523024772378606, "learning_rate": 1.4326699052867362e-05, "loss": 0.4154, "num_tokens": 2622332534.0, "step": 8887 }, { "epoch": 3.255759629918014, "grad_norm": 0.1375251288686935, "learning_rate": 1.4322835745898523e-05, "loss": 0.4117, "num_tokens": 2623189997.0, "step": 8888 }, { "epoch": 3.2561260477259193, "grad_norm": 0.1287611806999202, "learning_rate": 1.4318972871169346e-05, "loss": 0.407, "num_tokens": 2623954241.0, "step": 8889 }, { "epoch": 3.256492465533825, "grad_norm": 0.14502396515084867, "learning_rate": 1.4315110428897301e-05, "loss": 0.4082, "num_tokens": 2624688990.0, "step": 8890 }, { "epoch": 3.2568588833417302, "grad_norm": 0.139275150045397, "learning_rate": 1.4311248419299868e-05, "loss": 0.394, "num_tokens": 2625558359.0, "step": 8891 }, { "epoch": 3.257225301149636, "grad_norm": 0.13005854358315214, "learning_rate": 1.4307386842594477e-05, "loss": 0.41, "num_tokens": 2626468091.0, "step": 8892 }, { "epoch": 3.257591718957541, "grad_norm": 0.13264328286227137, "learning_rate": 1.4303525698998542e-05, "loss": 0.4025, "num_tokens": 2627131678.0, "step": 8893 }, { "epoch": 3.257958136765447, "grad_norm": 0.15209576205172498, "learning_rate": 1.4299664988729446e-05, "loss": 0.4309, "num_tokens": 2627992202.0, "step": 8894 }, { "epoch": 3.258324554573352, "grad_norm": 0.14919838692698376, "learning_rate": 1.4295804712004571e-05, "loss": 0.4002, "num_tokens": 2628619358.0, "step": 8895 }, { "epoch": 3.258690972381258, "grad_norm": 0.15832748199835764, "learning_rate": 1.4291944869041242e-05, "loss": 0.4136, "num_tokens": 2629354524.0, "step": 8896 }, { "epoch": 3.259057390189163, "grad_norm": 0.12708582625341797, "learning_rate": 1.4288085460056796e-05, "loss": 0.4217, "num_tokens": 2630164828.0, "step": 8897 }, { "epoch": 3.2594238079970688, "grad_norm": 0.13830354428655875, "learning_rate": 1.4284226485268505e-05, "loss": 0.3817, "num_tokens": 2630910052.0, "step": 8898 }, { "epoch": 3.259790225804974, "grad_norm": 0.14088650250264145, "learning_rate": 1.4280367944893648e-05, "loss": 0.3994, "num_tokens": 2631702511.0, "step": 8899 }, { "epoch": 3.2601566436128797, "grad_norm": 0.1508428642533494, "learning_rate": 1.427650983914946e-05, "loss": 0.4182, "num_tokens": 2632451700.0, "step": 8900 }, { "epoch": 3.260523061420785, "grad_norm": 0.14021563678549387, "learning_rate": 1.4272652168253171e-05, "loss": 0.3654, "num_tokens": 2633138921.0, "step": 8901 }, { "epoch": 3.2608894792286907, "grad_norm": 0.13451752875152057, "learning_rate": 1.426879493242197e-05, "loss": 0.4117, "num_tokens": 2633939671.0, "step": 8902 }, { "epoch": 3.261255897036596, "grad_norm": 0.15400240350509298, "learning_rate": 1.4264938131873023e-05, "loss": 0.4154, "num_tokens": 2634671558.0, "step": 8903 }, { "epoch": 3.2616223148445016, "grad_norm": 0.13888800337213367, "learning_rate": 1.4261081766823485e-05, "loss": 0.3942, "num_tokens": 2635487937.0, "step": 8904 }, { "epoch": 3.261988732652407, "grad_norm": 0.1417088403687406, "learning_rate": 1.4257225837490469e-05, "loss": 0.4173, "num_tokens": 2636246411.0, "step": 8905 }, { "epoch": 3.2623551504603125, "grad_norm": 0.13737049372907376, "learning_rate": 1.4253370344091076e-05, "loss": 0.3939, "num_tokens": 2637043866.0, "step": 8906 }, { "epoch": 3.262721568268218, "grad_norm": 0.1327618524767415, "learning_rate": 1.4249515286842379e-05, "loss": 0.4199, "num_tokens": 2637754428.0, "step": 8907 }, { "epoch": 3.2630879860761235, "grad_norm": 0.1433846026358493, "learning_rate": 1.424566066596142e-05, "loss": 0.4019, "num_tokens": 2638495968.0, "step": 8908 }, { "epoch": 3.2634544038840287, "grad_norm": 0.1426614087793755, "learning_rate": 1.424180648166522e-05, "loss": 0.4056, "num_tokens": 2639203171.0, "step": 8909 }, { "epoch": 3.263820821691934, "grad_norm": 0.14716635303217648, "learning_rate": 1.423795273417079e-05, "loss": 0.3981, "num_tokens": 2639848736.0, "step": 8910 }, { "epoch": 3.2641872394998397, "grad_norm": 0.13076030369896607, "learning_rate": 1.4234099423695086e-05, "loss": 0.3961, "num_tokens": 2640583652.0, "step": 8911 }, { "epoch": 3.2645536573077454, "grad_norm": 0.14144370257150538, "learning_rate": 1.4230246550455076e-05, "loss": 0.4012, "num_tokens": 2641511818.0, "step": 8912 }, { "epoch": 3.2649200751156506, "grad_norm": 0.14654777287933365, "learning_rate": 1.4226394114667671e-05, "loss": 0.4667, "num_tokens": 2642255771.0, "step": 8913 }, { "epoch": 3.265286492923556, "grad_norm": 0.14030312092626285, "learning_rate": 1.4222542116549778e-05, "loss": 0.4188, "num_tokens": 2643074196.0, "step": 8914 }, { "epoch": 3.2656529107314616, "grad_norm": 0.1335365806759458, "learning_rate": 1.421869055631827e-05, "loss": 0.4033, "num_tokens": 2643932549.0, "step": 8915 }, { "epoch": 3.266019328539367, "grad_norm": 0.1446829703319962, "learning_rate": 1.4214839434189988e-05, "loss": 0.3968, "num_tokens": 2644629796.0, "step": 8916 }, { "epoch": 3.2663857463472725, "grad_norm": 0.14231638721629344, "learning_rate": 1.4210988750381764e-05, "loss": 0.4216, "num_tokens": 2645335383.0, "step": 8917 }, { "epoch": 3.2667521641551778, "grad_norm": 0.15174052486914186, "learning_rate": 1.4207138505110414e-05, "loss": 0.4264, "num_tokens": 2646017502.0, "step": 8918 }, { "epoch": 3.2671185819630835, "grad_norm": 0.14723174812515846, "learning_rate": 1.4203288698592697e-05, "loss": 0.3921, "num_tokens": 2646750914.0, "step": 8919 }, { "epoch": 3.2674849997709887, "grad_norm": 0.14209263476310868, "learning_rate": 1.4199439331045373e-05, "loss": 0.412, "num_tokens": 2647575529.0, "step": 8920 }, { "epoch": 3.2678514175788944, "grad_norm": 0.1366637219264227, "learning_rate": 1.4195590402685166e-05, "loss": 0.3922, "num_tokens": 2648331533.0, "step": 8921 }, { "epoch": 3.2682178353867997, "grad_norm": 0.14672078088089702, "learning_rate": 1.4191741913728768e-05, "loss": 0.4337, "num_tokens": 2649000374.0, "step": 8922 }, { "epoch": 3.2685842531947054, "grad_norm": 0.1292185090971889, "learning_rate": 1.4187893864392877e-05, "loss": 0.4065, "num_tokens": 2649872141.0, "step": 8923 }, { "epoch": 3.2689506710026106, "grad_norm": 0.13423132110047406, "learning_rate": 1.4184046254894126e-05, "loss": 0.3885, "num_tokens": 2650679631.0, "step": 8924 }, { "epoch": 3.2693170888105163, "grad_norm": 0.1346633435428643, "learning_rate": 1.4180199085449164e-05, "loss": 0.4251, "num_tokens": 2651576023.0, "step": 8925 }, { "epoch": 3.2696835066184216, "grad_norm": 0.16214243055164115, "learning_rate": 1.417635235627458e-05, "loss": 0.4373, "num_tokens": 2652183963.0, "step": 8926 }, { "epoch": 3.2700499244263272, "grad_norm": 0.14344959357115453, "learning_rate": 1.4172506067586951e-05, "loss": 0.4132, "num_tokens": 2652959325.0, "step": 8927 }, { "epoch": 3.2704163422342325, "grad_norm": 0.14334809802112466, "learning_rate": 1.416866021960284e-05, "loss": 0.4047, "num_tokens": 2653633809.0, "step": 8928 }, { "epoch": 3.270782760042138, "grad_norm": 0.13563312616792564, "learning_rate": 1.4164814812538769e-05, "loss": 0.4306, "num_tokens": 2654537843.0, "step": 8929 }, { "epoch": 3.2711491778500434, "grad_norm": 0.12734685257628034, "learning_rate": 1.4160969846611244e-05, "loss": 0.3697, "num_tokens": 2655329329.0, "step": 8930 }, { "epoch": 3.271515595657949, "grad_norm": 0.1416631339066319, "learning_rate": 1.4157125322036747e-05, "loss": 0.4418, "num_tokens": 2656015438.0, "step": 8931 }, { "epoch": 3.2718820134658544, "grad_norm": 0.1337921012403212, "learning_rate": 1.4153281239031729e-05, "loss": 0.4196, "num_tokens": 2656780331.0, "step": 8932 }, { "epoch": 3.27224843127376, "grad_norm": 0.13586390935927797, "learning_rate": 1.4149437597812617e-05, "loss": 0.396, "num_tokens": 2657602277.0, "step": 8933 }, { "epoch": 3.2726148490816653, "grad_norm": 0.1425971137475909, "learning_rate": 1.4145594398595823e-05, "loss": 0.3957, "num_tokens": 2658392700.0, "step": 8934 }, { "epoch": 3.2729812668895706, "grad_norm": 0.14242646341159912, "learning_rate": 1.4141751641597727e-05, "loss": 0.3965, "num_tokens": 2659227778.0, "step": 8935 }, { "epoch": 3.2733476846974763, "grad_norm": 0.13675744417804853, "learning_rate": 1.4137909327034676e-05, "loss": 0.4037, "num_tokens": 2660030010.0, "step": 8936 }, { "epoch": 3.273714102505382, "grad_norm": 0.14485439142219134, "learning_rate": 1.4134067455123002e-05, "loss": 0.4117, "num_tokens": 2660739923.0, "step": 8937 }, { "epoch": 3.2740805203132872, "grad_norm": 0.13539635782840173, "learning_rate": 1.4130226026079004e-05, "loss": 0.3997, "num_tokens": 2661445504.0, "step": 8938 }, { "epoch": 3.2744469381211925, "grad_norm": 0.14208547204092803, "learning_rate": 1.412638504011898e-05, "loss": 0.4019, "num_tokens": 2662202667.0, "step": 8939 }, { "epoch": 3.274813355929098, "grad_norm": 0.14005791997094658, "learning_rate": 1.4122544497459178e-05, "loss": 0.3926, "num_tokens": 2662913667.0, "step": 8940 }, { "epoch": 3.275179773737004, "grad_norm": 0.14000175579273536, "learning_rate": 1.4118704398315824e-05, "loss": 0.4286, "num_tokens": 2663654557.0, "step": 8941 }, { "epoch": 3.275546191544909, "grad_norm": 0.13819368053172076, "learning_rate": 1.4114864742905123e-05, "loss": 0.4158, "num_tokens": 2664524076.0, "step": 8942 }, { "epoch": 3.2759126093528144, "grad_norm": 0.1323631773698024, "learning_rate": 1.4111025531443253e-05, "loss": 0.4088, "num_tokens": 2665396781.0, "step": 8943 }, { "epoch": 3.27627902716072, "grad_norm": 0.13745664886704573, "learning_rate": 1.4107186764146378e-05, "loss": 0.437, "num_tokens": 2666201454.0, "step": 8944 }, { "epoch": 3.2766454449686253, "grad_norm": 0.13842525480397191, "learning_rate": 1.4103348441230615e-05, "loss": 0.3981, "num_tokens": 2666960112.0, "step": 8945 }, { "epoch": 3.277011862776531, "grad_norm": 0.13863503859371692, "learning_rate": 1.4099510562912086e-05, "loss": 0.4067, "num_tokens": 2667681128.0, "step": 8946 }, { "epoch": 3.2773782805844363, "grad_norm": 0.15077822536152435, "learning_rate": 1.4095673129406864e-05, "loss": 0.4197, "num_tokens": 2668406466.0, "step": 8947 }, { "epoch": 3.277744698392342, "grad_norm": 0.13216284761854533, "learning_rate": 1.4091836140931002e-05, "loss": 0.3996, "num_tokens": 2669232355.0, "step": 8948 }, { "epoch": 3.278111116200247, "grad_norm": 0.1525478447918902, "learning_rate": 1.408799959770052e-05, "loss": 0.3962, "num_tokens": 2669906335.0, "step": 8949 }, { "epoch": 3.278477534008153, "grad_norm": 0.13539239855306054, "learning_rate": 1.4084163499931446e-05, "loss": 0.3969, "num_tokens": 2670711422.0, "step": 8950 }, { "epoch": 3.278843951816058, "grad_norm": 0.1526870299589814, "learning_rate": 1.4080327847839743e-05, "loss": 0.4442, "num_tokens": 2671412383.0, "step": 8951 }, { "epoch": 3.279210369623964, "grad_norm": 0.13266716743186527, "learning_rate": 1.4076492641641376e-05, "loss": 0.4332, "num_tokens": 2672288960.0, "step": 8952 }, { "epoch": 3.279576787431869, "grad_norm": 0.1397058793967497, "learning_rate": 1.4072657881552268e-05, "loss": 0.4033, "num_tokens": 2673010663.0, "step": 8953 }, { "epoch": 3.279943205239775, "grad_norm": 0.13847042201555232, "learning_rate": 1.4068823567788327e-05, "loss": 0.4312, "num_tokens": 2673870502.0, "step": 8954 }, { "epoch": 3.28030962304768, "grad_norm": 0.14255637053448048, "learning_rate": 1.406498970056542e-05, "loss": 0.3981, "num_tokens": 2674626230.0, "step": 8955 }, { "epoch": 3.2806760408555857, "grad_norm": 0.13728525401839844, "learning_rate": 1.4061156280099421e-05, "loss": 0.4572, "num_tokens": 2675439884.0, "step": 8956 }, { "epoch": 3.281042458663491, "grad_norm": 0.1478127424151345, "learning_rate": 1.4057323306606155e-05, "loss": 0.414, "num_tokens": 2676221180.0, "step": 8957 }, { "epoch": 3.2814088764713967, "grad_norm": 0.13390271362273137, "learning_rate": 1.405349078030141e-05, "loss": 0.3974, "num_tokens": 2676982424.0, "step": 8958 }, { "epoch": 3.281775294279302, "grad_norm": 0.13719963420538822, "learning_rate": 1.4049658701400986e-05, "loss": 0.4204, "num_tokens": 2677769475.0, "step": 8959 }, { "epoch": 3.2821417120872076, "grad_norm": 0.12221638548108175, "learning_rate": 1.4045827070120615e-05, "loss": 0.3944, "num_tokens": 2678630945.0, "step": 8960 }, { "epoch": 3.282508129895113, "grad_norm": 0.13832563043051238, "learning_rate": 1.4041995886676048e-05, "loss": 0.3826, "num_tokens": 2679391250.0, "step": 8961 }, { "epoch": 3.2828745477030186, "grad_norm": 0.12784523595388222, "learning_rate": 1.403816515128298e-05, "loss": 0.4126, "num_tokens": 2680266698.0, "step": 8962 }, { "epoch": 3.283240965510924, "grad_norm": 0.14464770587271722, "learning_rate": 1.403433486415708e-05, "loss": 0.4008, "num_tokens": 2680940520.0, "step": 8963 }, { "epoch": 3.283607383318829, "grad_norm": 0.14518302445586734, "learning_rate": 1.4030505025514006e-05, "loss": 0.4052, "num_tokens": 2681693741.0, "step": 8964 }, { "epoch": 3.2839738011267348, "grad_norm": 0.128449816334592, "learning_rate": 1.402667563556939e-05, "loss": 0.3742, "num_tokens": 2682387646.0, "step": 8965 }, { "epoch": 3.2843402189346405, "grad_norm": 0.13404390600879082, "learning_rate": 1.4022846694538826e-05, "loss": 0.3645, "num_tokens": 2683185443.0, "step": 8966 }, { "epoch": 3.2847066367425457, "grad_norm": 0.14891458873018854, "learning_rate": 1.4019018202637904e-05, "loss": 0.4014, "num_tokens": 2683917745.0, "step": 8967 }, { "epoch": 3.285073054550451, "grad_norm": 0.13370982880469132, "learning_rate": 1.401519016008217e-05, "loss": 0.4449, "num_tokens": 2684811161.0, "step": 8968 }, { "epoch": 3.2854394723583566, "grad_norm": 0.1463194371374595, "learning_rate": 1.4011362567087146e-05, "loss": 0.3946, "num_tokens": 2685437484.0, "step": 8969 }, { "epoch": 3.285805890166262, "grad_norm": 0.14225791818525066, "learning_rate": 1.4007535423868336e-05, "loss": 0.4259, "num_tokens": 2686236209.0, "step": 8970 }, { "epoch": 3.2861723079741676, "grad_norm": 0.14106548966878465, "learning_rate": 1.4003708730641213e-05, "loss": 0.4229, "num_tokens": 2687053391.0, "step": 8971 }, { "epoch": 3.286538725782073, "grad_norm": 0.14671662425822982, "learning_rate": 1.3999882487621222e-05, "loss": 0.4262, "num_tokens": 2687777147.0, "step": 8972 }, { "epoch": 3.2869051435899785, "grad_norm": 0.12936687071974245, "learning_rate": 1.3996056695023807e-05, "loss": 0.3739, "num_tokens": 2688559165.0, "step": 8973 }, { "epoch": 3.287271561397884, "grad_norm": 0.13934429533813092, "learning_rate": 1.399223135306436e-05, "loss": 0.3797, "num_tokens": 2689304648.0, "step": 8974 }, { "epoch": 3.2876379792057895, "grad_norm": 0.13302329308733854, "learning_rate": 1.3988406461958249e-05, "loss": 0.3764, "num_tokens": 2690030169.0, "step": 8975 }, { "epoch": 3.2880043970136947, "grad_norm": 0.1350627924535935, "learning_rate": 1.3984582021920825e-05, "loss": 0.3991, "num_tokens": 2690856034.0, "step": 8976 }, { "epoch": 3.2883708148216004, "grad_norm": 0.1387157378232073, "learning_rate": 1.3980758033167408e-05, "loss": 0.4259, "num_tokens": 2691582369.0, "step": 8977 }, { "epoch": 3.2887372326295057, "grad_norm": 0.12532673727672794, "learning_rate": 1.3976934495913308e-05, "loss": 0.392, "num_tokens": 2692554362.0, "step": 8978 }, { "epoch": 3.2891036504374114, "grad_norm": 0.14536208967308548, "learning_rate": 1.397311141037378e-05, "loss": 0.4177, "num_tokens": 2693256920.0, "step": 8979 }, { "epoch": 3.2894700682453166, "grad_norm": 0.14236845774505863, "learning_rate": 1.3969288776764093e-05, "loss": 0.4099, "num_tokens": 2694013693.0, "step": 8980 }, { "epoch": 3.2898364860532223, "grad_norm": 0.1415505555714574, "learning_rate": 1.3965466595299457e-05, "loss": 0.3793, "num_tokens": 2694719527.0, "step": 8981 }, { "epoch": 3.2902029038611276, "grad_norm": 0.14681311429049745, "learning_rate": 1.396164486619507e-05, "loss": 0.4155, "num_tokens": 2695483589.0, "step": 8982 }, { "epoch": 3.2905693216690333, "grad_norm": 0.13621729764515472, "learning_rate": 1.3957823589666094e-05, "loss": 0.4405, "num_tokens": 2696254969.0, "step": 8983 }, { "epoch": 3.2909357394769385, "grad_norm": 0.142209315740592, "learning_rate": 1.3954002765927692e-05, "loss": 0.422, "num_tokens": 2696919521.0, "step": 8984 }, { "epoch": 3.291302157284844, "grad_norm": 0.13976798887639455, "learning_rate": 1.3950182395194966e-05, "loss": 0.4203, "num_tokens": 2697746463.0, "step": 8985 }, { "epoch": 3.2916685750927495, "grad_norm": 0.15639522265345457, "learning_rate": 1.3946362477683026e-05, "loss": 0.4255, "num_tokens": 2698476602.0, "step": 8986 }, { "epoch": 3.292034992900655, "grad_norm": 0.12586179764670732, "learning_rate": 1.3942543013606932e-05, "loss": 0.3939, "num_tokens": 2699386081.0, "step": 8987 }, { "epoch": 3.2924014107085604, "grad_norm": 0.14966780108367236, "learning_rate": 1.393872400318173e-05, "loss": 0.4242, "num_tokens": 2700135514.0, "step": 8988 }, { "epoch": 3.2927678285164657, "grad_norm": 0.14661723030822013, "learning_rate": 1.3934905446622438e-05, "loss": 0.3968, "num_tokens": 2700814830.0, "step": 8989 }, { "epoch": 3.2931342463243714, "grad_norm": 0.15897376892728568, "learning_rate": 1.3931087344144053e-05, "loss": 0.4206, "num_tokens": 2701518245.0, "step": 8990 }, { "epoch": 3.293500664132277, "grad_norm": 0.14108221094496065, "learning_rate": 1.392726969596153e-05, "loss": 0.4369, "num_tokens": 2702283629.0, "step": 8991 }, { "epoch": 3.2938670819401823, "grad_norm": 0.13915242871017128, "learning_rate": 1.3923452502289816e-05, "loss": 0.4074, "num_tokens": 2703073158.0, "step": 8992 }, { "epoch": 3.2942334997480875, "grad_norm": 0.14480548138305988, "learning_rate": 1.3919635763343833e-05, "loss": 0.4327, "num_tokens": 2703836618.0, "step": 8993 }, { "epoch": 3.2945999175559932, "grad_norm": 0.14249969714157468, "learning_rate": 1.3915819479338457e-05, "loss": 0.4102, "num_tokens": 2704539770.0, "step": 8994 }, { "epoch": 3.294966335363899, "grad_norm": 0.14736428966959378, "learning_rate": 1.391200365048857e-05, "loss": 0.3995, "num_tokens": 2705221469.0, "step": 8995 }, { "epoch": 3.295332753171804, "grad_norm": 0.13616972190006288, "learning_rate": 1.3908188277009002e-05, "loss": 0.378, "num_tokens": 2706078386.0, "step": 8996 }, { "epoch": 3.2956991709797094, "grad_norm": 0.13730426548386845, "learning_rate": 1.3904373359114564e-05, "loss": 0.4237, "num_tokens": 2706765339.0, "step": 8997 }, { "epoch": 3.296065588787615, "grad_norm": 0.14058965218846387, "learning_rate": 1.390055889702004e-05, "loss": 0.3808, "num_tokens": 2707547410.0, "step": 8998 }, { "epoch": 3.2964320065955204, "grad_norm": 0.13939397825155456, "learning_rate": 1.3896744890940202e-05, "loss": 0.3845, "num_tokens": 2708319009.0, "step": 8999 }, { "epoch": 3.296798424403426, "grad_norm": 0.12897383137802665, "learning_rate": 1.3892931341089777e-05, "loss": 0.4138, "num_tokens": 2709186808.0, "step": 9000 }, { "epoch": 3.2971648422113313, "grad_norm": 0.14057703839438565, "learning_rate": 1.3889118247683486e-05, "loss": 0.3833, "num_tokens": 2709892764.0, "step": 9001 }, { "epoch": 3.297531260019237, "grad_norm": 0.14164512684507613, "learning_rate": 1.388530561093601e-05, "loss": 0.4114, "num_tokens": 2710630305.0, "step": 9002 }, { "epoch": 3.2978976778271423, "grad_norm": 0.15104336864948326, "learning_rate": 1.3881493431061999e-05, "loss": 0.4042, "num_tokens": 2711377860.0, "step": 9003 }, { "epoch": 3.298264095635048, "grad_norm": 0.13814306390098893, "learning_rate": 1.3877681708276098e-05, "loss": 0.4168, "num_tokens": 2712090718.0, "step": 9004 }, { "epoch": 3.298630513442953, "grad_norm": 0.13346875493029456, "learning_rate": 1.3873870442792901e-05, "loss": 0.399, "num_tokens": 2712904299.0, "step": 9005 }, { "epoch": 3.298996931250859, "grad_norm": 0.1433836606431644, "learning_rate": 1.3870059634826998e-05, "loss": 0.4241, "num_tokens": 2713672374.0, "step": 9006 }, { "epoch": 3.299363349058764, "grad_norm": 0.1392781717724656, "learning_rate": 1.3866249284592957e-05, "loss": 0.4281, "num_tokens": 2714406305.0, "step": 9007 }, { "epoch": 3.29972976686667, "grad_norm": 0.13564456571038608, "learning_rate": 1.3862439392305291e-05, "loss": 0.3963, "num_tokens": 2715242284.0, "step": 9008 }, { "epoch": 3.300096184674575, "grad_norm": 0.13664647793954826, "learning_rate": 1.3858629958178514e-05, "loss": 0.3739, "num_tokens": 2715937337.0, "step": 9009 }, { "epoch": 3.300462602482481, "grad_norm": 0.14689534052339917, "learning_rate": 1.385482098242709e-05, "loss": 0.3897, "num_tokens": 2716686498.0, "step": 9010 }, { "epoch": 3.300829020290386, "grad_norm": 0.1504061457201697, "learning_rate": 1.3851012465265492e-05, "loss": 0.4003, "num_tokens": 2717458572.0, "step": 9011 }, { "epoch": 3.3011954380982917, "grad_norm": 0.12427226395934916, "learning_rate": 1.3847204406908139e-05, "loss": 0.4069, "num_tokens": 2718347427.0, "step": 9012 }, { "epoch": 3.301561855906197, "grad_norm": 0.1609053484766566, "learning_rate": 1.3843396807569427e-05, "loss": 0.4299, "num_tokens": 2719006687.0, "step": 9013 }, { "epoch": 3.3019282737141027, "grad_norm": 0.14361173619473191, "learning_rate": 1.3839589667463738e-05, "loss": 0.4028, "num_tokens": 2719800089.0, "step": 9014 }, { "epoch": 3.302294691522008, "grad_norm": 0.13987517367070634, "learning_rate": 1.3835782986805421e-05, "loss": 0.3988, "num_tokens": 2720520947.0, "step": 9015 }, { "epoch": 3.3026611093299136, "grad_norm": 0.1416511522892015, "learning_rate": 1.383197676580879e-05, "loss": 0.4451, "num_tokens": 2721240938.0, "step": 9016 }, { "epoch": 3.303027527137819, "grad_norm": 0.14723744380643441, "learning_rate": 1.382817100468816e-05, "loss": 0.389, "num_tokens": 2722083723.0, "step": 9017 }, { "epoch": 3.303393944945724, "grad_norm": 0.15384352499290335, "learning_rate": 1.3824365703657793e-05, "loss": 0.4133, "num_tokens": 2722799286.0, "step": 9018 }, { "epoch": 3.30376036275363, "grad_norm": 0.1624205936536824, "learning_rate": 1.382056086293193e-05, "loss": 0.411, "num_tokens": 2723408586.0, "step": 9019 }, { "epoch": 3.3041267805615355, "grad_norm": 0.14355387012276055, "learning_rate": 1.3816756482724803e-05, "loss": 0.4226, "num_tokens": 2724136515.0, "step": 9020 }, { "epoch": 3.3044931983694408, "grad_norm": 0.15727489916108872, "learning_rate": 1.3812952563250593e-05, "loss": 0.4264, "num_tokens": 2724864874.0, "step": 9021 }, { "epoch": 3.304859616177346, "grad_norm": 0.15874229427071346, "learning_rate": 1.3809149104723481e-05, "loss": 0.3981, "num_tokens": 2725550790.0, "step": 9022 }, { "epoch": 3.3052260339852517, "grad_norm": 0.13885423229759486, "learning_rate": 1.3805346107357607e-05, "loss": 0.392, "num_tokens": 2726258671.0, "step": 9023 }, { "epoch": 3.305592451793157, "grad_norm": 0.14227302937795436, "learning_rate": 1.3801543571367083e-05, "loss": 0.4106, "num_tokens": 2727005391.0, "step": 9024 }, { "epoch": 3.3059588696010627, "grad_norm": 0.15386517589574827, "learning_rate": 1.3797741496966e-05, "loss": 0.3847, "num_tokens": 2727783941.0, "step": 9025 }, { "epoch": 3.306325287408968, "grad_norm": 0.13840624813201224, "learning_rate": 1.3793939884368415e-05, "loss": 0.4168, "num_tokens": 2728696800.0, "step": 9026 }, { "epoch": 3.3066917052168736, "grad_norm": 0.1348715779359537, "learning_rate": 1.3790138733788373e-05, "loss": 0.3998, "num_tokens": 2729443409.0, "step": 9027 }, { "epoch": 3.307058123024779, "grad_norm": 0.12974400105661957, "learning_rate": 1.37863380454399e-05, "loss": 0.3469, "num_tokens": 2730294869.0, "step": 9028 }, { "epoch": 3.3074245408326846, "grad_norm": 0.15752448074493247, "learning_rate": 1.3782537819536965e-05, "loss": 0.3903, "num_tokens": 2730940526.0, "step": 9029 }, { "epoch": 3.30779095864059, "grad_norm": 0.14432219145687825, "learning_rate": 1.3778738056293533e-05, "loss": 0.4341, "num_tokens": 2731708447.0, "step": 9030 }, { "epoch": 3.3081573764484955, "grad_norm": 0.1570573068286094, "learning_rate": 1.3774938755923542e-05, "loss": 0.4567, "num_tokens": 2732364357.0, "step": 9031 }, { "epoch": 3.3085237942564008, "grad_norm": 0.1468605833396249, "learning_rate": 1.3771139918640885e-05, "loss": 0.4111, "num_tokens": 2733101640.0, "step": 9032 }, { "epoch": 3.3088902120643064, "grad_norm": 0.1344141780840804, "learning_rate": 1.3767341544659468e-05, "loss": 0.4083, "num_tokens": 2733874427.0, "step": 9033 }, { "epoch": 3.3092566298722117, "grad_norm": 0.1515035079708701, "learning_rate": 1.3763543634193121e-05, "loss": 0.4026, "num_tokens": 2734511970.0, "step": 9034 }, { "epoch": 3.3096230476801174, "grad_norm": 0.12803190275314388, "learning_rate": 1.37597461874557e-05, "loss": 0.4213, "num_tokens": 2735357514.0, "step": 9035 }, { "epoch": 3.3099894654880226, "grad_norm": 0.13008058919862306, "learning_rate": 1.3755949204660994e-05, "loss": 0.3596, "num_tokens": 2736219777.0, "step": 9036 }, { "epoch": 3.3103558832959283, "grad_norm": 0.14890195781250126, "learning_rate": 1.3752152686022786e-05, "loss": 0.4611, "num_tokens": 2736959827.0, "step": 9037 }, { "epoch": 3.3107223011038336, "grad_norm": 0.15780208325689318, "learning_rate": 1.3748356631754818e-05, "loss": 0.4133, "num_tokens": 2737580697.0, "step": 9038 }, { "epoch": 3.3110887189117393, "grad_norm": 0.1568835729067771, "learning_rate": 1.3744561042070826e-05, "loss": 0.4564, "num_tokens": 2738228786.0, "step": 9039 }, { "epoch": 3.3114551367196445, "grad_norm": 0.1509593098823844, "learning_rate": 1.37407659171845e-05, "loss": 0.4072, "num_tokens": 2738881456.0, "step": 9040 }, { "epoch": 3.3118215545275502, "grad_norm": 0.14728463503206068, "learning_rate": 1.3736971257309524e-05, "loss": 0.3899, "num_tokens": 2739617279.0, "step": 9041 }, { "epoch": 3.3121879723354555, "grad_norm": 0.14835871809906678, "learning_rate": 1.3733177062659541e-05, "loss": 0.419, "num_tokens": 2740380993.0, "step": 9042 }, { "epoch": 3.3125543901433607, "grad_norm": 0.14811789837435096, "learning_rate": 1.372938333344817e-05, "loss": 0.4444, "num_tokens": 2741091158.0, "step": 9043 }, { "epoch": 3.3129208079512664, "grad_norm": 0.14790410943910134, "learning_rate": 1.3725590069888997e-05, "loss": 0.4149, "num_tokens": 2741869155.0, "step": 9044 }, { "epoch": 3.313287225759172, "grad_norm": 0.13846607212641548, "learning_rate": 1.3721797272195608e-05, "loss": 0.3892, "num_tokens": 2742669642.0, "step": 9045 }, { "epoch": 3.3136536435670774, "grad_norm": 0.1349459062024363, "learning_rate": 1.3718004940581537e-05, "loss": 0.3907, "num_tokens": 2743435106.0, "step": 9046 }, { "epoch": 3.3140200613749826, "grad_norm": 0.13992412162367507, "learning_rate": 1.3714213075260289e-05, "loss": 0.3765, "num_tokens": 2744233814.0, "step": 9047 }, { "epoch": 3.3143864791828883, "grad_norm": 0.13324346996072717, "learning_rate": 1.3710421676445373e-05, "loss": 0.3953, "num_tokens": 2745036970.0, "step": 9048 }, { "epoch": 3.3147528969907936, "grad_norm": 0.14019133900982358, "learning_rate": 1.3706630744350235e-05, "loss": 0.4198, "num_tokens": 2745823004.0, "step": 9049 }, { "epoch": 3.3151193147986993, "grad_norm": 0.13346249824861603, "learning_rate": 1.3702840279188326e-05, "loss": 0.4107, "num_tokens": 2746676754.0, "step": 9050 }, { "epoch": 3.3154857326066045, "grad_norm": 0.1543988722550259, "learning_rate": 1.3699050281173052e-05, "loss": 0.4507, "num_tokens": 2747309366.0, "step": 9051 }, { "epoch": 3.31585215041451, "grad_norm": 0.13815377838774986, "learning_rate": 1.3695260750517796e-05, "loss": 0.412, "num_tokens": 2748174272.0, "step": 9052 }, { "epoch": 3.3162185682224155, "grad_norm": 0.13939789990176676, "learning_rate": 1.3691471687435907e-05, "loss": 0.3896, "num_tokens": 2748866465.0, "step": 9053 }, { "epoch": 3.316584986030321, "grad_norm": 0.14071717177918328, "learning_rate": 1.3687683092140733e-05, "loss": 0.4148, "num_tokens": 2749613356.0, "step": 9054 }, { "epoch": 3.3169514038382264, "grad_norm": 0.13647745528161462, "learning_rate": 1.3683894964845567e-05, "loss": 0.3931, "num_tokens": 2750469428.0, "step": 9055 }, { "epoch": 3.317317821646132, "grad_norm": 0.13927500345015262, "learning_rate": 1.3680107305763702e-05, "loss": 0.3983, "num_tokens": 2751220680.0, "step": 9056 }, { "epoch": 3.3176842394540373, "grad_norm": 0.1582216613073279, "learning_rate": 1.3676320115108379e-05, "loss": 0.4079, "num_tokens": 2751899889.0, "step": 9057 }, { "epoch": 3.318050657261943, "grad_norm": 0.13503704706550804, "learning_rate": 1.367253339309283e-05, "loss": 0.4039, "num_tokens": 2752614736.0, "step": 9058 }, { "epoch": 3.3184170750698483, "grad_norm": 0.14135898422271606, "learning_rate": 1.366874713993025e-05, "loss": 0.3781, "num_tokens": 2753352657.0, "step": 9059 }, { "epoch": 3.318783492877754, "grad_norm": 0.16263660683694736, "learning_rate": 1.366496135583381e-05, "loss": 0.4287, "num_tokens": 2754011824.0, "step": 9060 }, { "epoch": 3.3191499106856592, "grad_norm": 0.13160474959327395, "learning_rate": 1.3661176041016664e-05, "loss": 0.3915, "num_tokens": 2754774850.0, "step": 9061 }, { "epoch": 3.319516328493565, "grad_norm": 0.1541317885680304, "learning_rate": 1.3657391195691937e-05, "loss": 0.4284, "num_tokens": 2755474976.0, "step": 9062 }, { "epoch": 3.31988274630147, "grad_norm": 0.14904711740746412, "learning_rate": 1.3653606820072718e-05, "loss": 0.4447, "num_tokens": 2756214356.0, "step": 9063 }, { "epoch": 3.320249164109376, "grad_norm": 0.13094728580114184, "learning_rate": 1.3649822914372073e-05, "loss": 0.4079, "num_tokens": 2757009541.0, "step": 9064 }, { "epoch": 3.320615581917281, "grad_norm": 0.1404177249390472, "learning_rate": 1.3646039478803049e-05, "loss": 0.4186, "num_tokens": 2757771649.0, "step": 9065 }, { "epoch": 3.320981999725187, "grad_norm": 0.1364702560362247, "learning_rate": 1.3642256513578644e-05, "loss": 0.4095, "num_tokens": 2758496452.0, "step": 9066 }, { "epoch": 3.321348417533092, "grad_norm": 0.14123932357063468, "learning_rate": 1.363847401891187e-05, "loss": 0.4089, "num_tokens": 2759218554.0, "step": 9067 }, { "epoch": 3.3217148353409978, "grad_norm": 0.13356590935083223, "learning_rate": 1.363469199501567e-05, "loss": 0.4037, "num_tokens": 2760021244.0, "step": 9068 }, { "epoch": 3.322081253148903, "grad_norm": 0.1458827324098886, "learning_rate": 1.3630910442102993e-05, "loss": 0.4005, "num_tokens": 2760738593.0, "step": 9069 }, { "epoch": 3.3224476709568087, "grad_norm": 0.14396464760005812, "learning_rate": 1.3627129360386749e-05, "loss": 0.4111, "num_tokens": 2761445747.0, "step": 9070 }, { "epoch": 3.322814088764714, "grad_norm": 0.13882847124756376, "learning_rate": 1.3623348750079801e-05, "loss": 0.4073, "num_tokens": 2762286175.0, "step": 9071 }, { "epoch": 3.323180506572619, "grad_norm": 0.13608519498687222, "learning_rate": 1.3619568611395027e-05, "loss": 0.4251, "num_tokens": 2763056239.0, "step": 9072 }, { "epoch": 3.323546924380525, "grad_norm": 0.13434403465096867, "learning_rate": 1.3615788944545247e-05, "loss": 0.3961, "num_tokens": 2763847136.0, "step": 9073 }, { "epoch": 3.3239133421884306, "grad_norm": 0.14293381932554763, "learning_rate": 1.361200974974326e-05, "loss": 0.4103, "num_tokens": 2764633796.0, "step": 9074 }, { "epoch": 3.324279759996336, "grad_norm": 0.13626251429361805, "learning_rate": 1.3608231027201853e-05, "loss": 0.4008, "num_tokens": 2765468993.0, "step": 9075 }, { "epoch": 3.324646177804241, "grad_norm": 0.13441243766737082, "learning_rate": 1.3604452777133766e-05, "loss": 0.3968, "num_tokens": 2766215107.0, "step": 9076 }, { "epoch": 3.325012595612147, "grad_norm": 0.1399436035184767, "learning_rate": 1.360067499975172e-05, "loss": 0.4019, "num_tokens": 2766933868.0, "step": 9077 }, { "epoch": 3.325379013420052, "grad_norm": 0.14218782057132615, "learning_rate": 1.3596897695268423e-05, "loss": 0.4009, "num_tokens": 2767589934.0, "step": 9078 }, { "epoch": 3.3257454312279577, "grad_norm": 0.15389802435240452, "learning_rate": 1.3593120863896536e-05, "loss": 0.4331, "num_tokens": 2768349211.0, "step": 9079 }, { "epoch": 3.326111849035863, "grad_norm": 0.14933569891411796, "learning_rate": 1.3589344505848707e-05, "loss": 0.4151, "num_tokens": 2769032329.0, "step": 9080 }, { "epoch": 3.3264782668437687, "grad_norm": 0.13465708191088563, "learning_rate": 1.3585568621337544e-05, "loss": 0.3945, "num_tokens": 2769871678.0, "step": 9081 }, { "epoch": 3.326844684651674, "grad_norm": 0.13860167396271117, "learning_rate": 1.358179321057564e-05, "loss": 0.3959, "num_tokens": 2770651869.0, "step": 9082 }, { "epoch": 3.3272111024595796, "grad_norm": 0.1422976352560815, "learning_rate": 1.357801827377557e-05, "loss": 0.4151, "num_tokens": 2771427938.0, "step": 9083 }, { "epoch": 3.327577520267485, "grad_norm": 0.14679913492504731, "learning_rate": 1.3574243811149862e-05, "loss": 0.4289, "num_tokens": 2772151274.0, "step": 9084 }, { "epoch": 3.3279439380753906, "grad_norm": 0.12859702973146248, "learning_rate": 1.3570469822911023e-05, "loss": 0.4371, "num_tokens": 2773085357.0, "step": 9085 }, { "epoch": 3.328310355883296, "grad_norm": 0.13837285514125308, "learning_rate": 1.3566696309271539e-05, "loss": 0.4188, "num_tokens": 2773871202.0, "step": 9086 }, { "epoch": 3.3286767736912015, "grad_norm": 0.1342658826528085, "learning_rate": 1.3562923270443864e-05, "loss": 0.3901, "num_tokens": 2774712002.0, "step": 9087 }, { "epoch": 3.3290431914991068, "grad_norm": 0.14380378706987193, "learning_rate": 1.3559150706640427e-05, "loss": 0.4229, "num_tokens": 2775460511.0, "step": 9088 }, { "epoch": 3.3294096093070125, "grad_norm": 0.14944447194875735, "learning_rate": 1.3555378618073625e-05, "loss": 0.3715, "num_tokens": 2776086275.0, "step": 9089 }, { "epoch": 3.3297760271149177, "grad_norm": 0.13529195381283965, "learning_rate": 1.3551607004955856e-05, "loss": 0.4089, "num_tokens": 2776938517.0, "step": 9090 }, { "epoch": 3.3301424449228234, "grad_norm": 0.1386526148348701, "learning_rate": 1.354783586749945e-05, "loss": 0.3995, "num_tokens": 2777803384.0, "step": 9091 }, { "epoch": 3.3305088627307287, "grad_norm": 0.13936751720256116, "learning_rate": 1.3544065205916739e-05, "loss": 0.4123, "num_tokens": 2778619650.0, "step": 9092 }, { "epoch": 3.3308752805386344, "grad_norm": 0.15014008819272967, "learning_rate": 1.3540295020420007e-05, "loss": 0.4366, "num_tokens": 2779322029.0, "step": 9093 }, { "epoch": 3.3312416983465396, "grad_norm": 0.1516115164123932, "learning_rate": 1.3536525311221538e-05, "loss": 0.4178, "num_tokens": 2780120365.0, "step": 9094 }, { "epoch": 3.3316081161544453, "grad_norm": 0.13977052706859916, "learning_rate": 1.3532756078533557e-05, "loss": 0.4035, "num_tokens": 2781002220.0, "step": 9095 }, { "epoch": 3.3319745339623505, "grad_norm": 0.1413223937113139, "learning_rate": 1.3528987322568301e-05, "loss": 0.3991, "num_tokens": 2781811912.0, "step": 9096 }, { "epoch": 3.332340951770256, "grad_norm": 0.15372746892919759, "learning_rate": 1.3525219043537945e-05, "loss": 0.4305, "num_tokens": 2782598230.0, "step": 9097 }, { "epoch": 3.3327073695781615, "grad_norm": 0.14278593859314234, "learning_rate": 1.3521451241654648e-05, "loss": 0.3727, "num_tokens": 2783301890.0, "step": 9098 }, { "epoch": 3.333073787386067, "grad_norm": 0.14113905666862897, "learning_rate": 1.3517683917130549e-05, "loss": 0.3998, "num_tokens": 2784013913.0, "step": 9099 }, { "epoch": 3.3334402051939724, "grad_norm": 0.14866364113528147, "learning_rate": 1.3513917070177755e-05, "loss": 0.3824, "num_tokens": 2784766209.0, "step": 9100 }, { "epoch": 3.3338066230018777, "grad_norm": 0.1291676093239674, "learning_rate": 1.3510150701008353e-05, "loss": 0.3767, "num_tokens": 2785549682.0, "step": 9101 }, { "epoch": 3.3341730408097834, "grad_norm": 0.15479172005501515, "learning_rate": 1.3506384809834385e-05, "loss": 0.407, "num_tokens": 2786297750.0, "step": 9102 }, { "epoch": 3.3345394586176886, "grad_norm": 0.1332763859784182, "learning_rate": 1.350261939686789e-05, "loss": 0.4138, "num_tokens": 2787063293.0, "step": 9103 }, { "epoch": 3.3349058764255943, "grad_norm": 0.14675644632359955, "learning_rate": 1.3498854462320857e-05, "loss": 0.3929, "num_tokens": 2787751281.0, "step": 9104 }, { "epoch": 3.3352722942334996, "grad_norm": 0.13958361283579976, "learning_rate": 1.3495090006405272e-05, "loss": 0.4014, "num_tokens": 2788620255.0, "step": 9105 }, { "epoch": 3.3356387120414053, "grad_norm": 0.13892650538538315, "learning_rate": 1.3491326029333078e-05, "loss": 0.4129, "num_tokens": 2789376633.0, "step": 9106 }, { "epoch": 3.3360051298493105, "grad_norm": 0.14164554046449204, "learning_rate": 1.3487562531316187e-05, "loss": 0.3832, "num_tokens": 2790110607.0, "step": 9107 }, { "epoch": 3.336371547657216, "grad_norm": 0.13360339707323507, "learning_rate": 1.3483799512566496e-05, "loss": 0.3818, "num_tokens": 2790938559.0, "step": 9108 }, { "epoch": 3.3367379654651215, "grad_norm": 0.15369514617590868, "learning_rate": 1.348003697329587e-05, "loss": 0.4179, "num_tokens": 2791585969.0, "step": 9109 }, { "epoch": 3.337104383273027, "grad_norm": 0.14583439441332222, "learning_rate": 1.3476274913716147e-05, "loss": 0.3892, "num_tokens": 2792486188.0, "step": 9110 }, { "epoch": 3.3374708010809324, "grad_norm": 0.13807823742785677, "learning_rate": 1.347251333403914e-05, "loss": 0.407, "num_tokens": 2793228611.0, "step": 9111 }, { "epoch": 3.337837218888838, "grad_norm": 0.13901043878017286, "learning_rate": 1.3468752234476639e-05, "loss": 0.381, "num_tokens": 2793975722.0, "step": 9112 }, { "epoch": 3.3382036366967434, "grad_norm": 0.13193086363251252, "learning_rate": 1.3464991615240392e-05, "loss": 0.4188, "num_tokens": 2794796447.0, "step": 9113 }, { "epoch": 3.338570054504649, "grad_norm": 0.1454971246037273, "learning_rate": 1.346123147654213e-05, "loss": 0.4065, "num_tokens": 2795565675.0, "step": 9114 }, { "epoch": 3.3389364723125543, "grad_norm": 0.14631312956964526, "learning_rate": 1.3457471818593557e-05, "loss": 0.4264, "num_tokens": 2796353108.0, "step": 9115 }, { "epoch": 3.33930289012046, "grad_norm": 0.13883473869928944, "learning_rate": 1.3453712641606349e-05, "loss": 0.4246, "num_tokens": 2797096287.0, "step": 9116 }, { "epoch": 3.3396693079283652, "grad_norm": 0.15136279360572957, "learning_rate": 1.3449953945792162e-05, "loss": 0.4276, "num_tokens": 2797811842.0, "step": 9117 }, { "epoch": 3.340035725736271, "grad_norm": 0.19023119392936763, "learning_rate": 1.3446195731362614e-05, "loss": 0.4097, "num_tokens": 2798405239.0, "step": 9118 }, { "epoch": 3.340402143544176, "grad_norm": 0.1492145303323352, "learning_rate": 1.3442437998529301e-05, "loss": 0.4327, "num_tokens": 2799190030.0, "step": 9119 }, { "epoch": 3.340768561352082, "grad_norm": 0.12182897952663521, "learning_rate": 1.343868074750379e-05, "loss": 0.3814, "num_tokens": 2800109874.0, "step": 9120 }, { "epoch": 3.341134979159987, "grad_norm": 0.13538876142319234, "learning_rate": 1.3434923978497611e-05, "loss": 0.3892, "num_tokens": 2800909317.0, "step": 9121 }, { "epoch": 3.3415013969678924, "grad_norm": 0.15208444893115577, "learning_rate": 1.3431167691722295e-05, "loss": 0.3879, "num_tokens": 2801630967.0, "step": 9122 }, { "epoch": 3.341867814775798, "grad_norm": 0.13784877431974413, "learning_rate": 1.3427411887389317e-05, "loss": 0.4081, "num_tokens": 2802444718.0, "step": 9123 }, { "epoch": 3.342234232583704, "grad_norm": 0.13342907385262504, "learning_rate": 1.3423656565710142e-05, "loss": 0.3971, "num_tokens": 2803253274.0, "step": 9124 }, { "epoch": 3.342600650391609, "grad_norm": 0.14364078554348417, "learning_rate": 1.3419901726896205e-05, "loss": 0.3744, "num_tokens": 2803942986.0, "step": 9125 }, { "epoch": 3.3429670681995143, "grad_norm": 0.14986683308970702, "learning_rate": 1.3416147371158904e-05, "loss": 0.4166, "num_tokens": 2804571915.0, "step": 9126 }, { "epoch": 3.34333348600742, "grad_norm": 0.15227283896144286, "learning_rate": 1.341239349870961e-05, "loss": 0.4353, "num_tokens": 2805263104.0, "step": 9127 }, { "epoch": 3.3436999038153257, "grad_norm": 0.14388102533409441, "learning_rate": 1.3408640109759695e-05, "loss": 0.4069, "num_tokens": 2806011879.0, "step": 9128 }, { "epoch": 3.344066321623231, "grad_norm": 0.1377383736227562, "learning_rate": 1.3404887204520456e-05, "loss": 0.4201, "num_tokens": 2806858995.0, "step": 9129 }, { "epoch": 3.344432739431136, "grad_norm": 0.1265237241786068, "learning_rate": 1.3401134783203213e-05, "loss": 0.3965, "num_tokens": 2807656004.0, "step": 9130 }, { "epoch": 3.344799157239042, "grad_norm": 0.14392250994435882, "learning_rate": 1.3397382846019227e-05, "loss": 0.4334, "num_tokens": 2808595462.0, "step": 9131 }, { "epoch": 3.345165575046947, "grad_norm": 0.1390521861643117, "learning_rate": 1.3393631393179724e-05, "loss": 0.3994, "num_tokens": 2809392814.0, "step": 9132 }, { "epoch": 3.345531992854853, "grad_norm": 0.13845000514861813, "learning_rate": 1.3389880424895945e-05, "loss": 0.3963, "num_tokens": 2810184160.0, "step": 9133 }, { "epoch": 3.345898410662758, "grad_norm": 0.13274588865178935, "learning_rate": 1.3386129941379058e-05, "loss": 0.394, "num_tokens": 2810967431.0, "step": 9134 }, { "epoch": 3.3462648284706638, "grad_norm": 0.14493201512763867, "learning_rate": 1.338237994284023e-05, "loss": 0.3935, "num_tokens": 2811675706.0, "step": 9135 }, { "epoch": 3.346631246278569, "grad_norm": 0.14186326439799365, "learning_rate": 1.3378630429490581e-05, "loss": 0.3925, "num_tokens": 2812362266.0, "step": 9136 }, { "epoch": 3.3469976640864747, "grad_norm": 0.13818206758838977, "learning_rate": 1.337488140154124e-05, "loss": 0.4368, "num_tokens": 2813212238.0, "step": 9137 }, { "epoch": 3.34736408189438, "grad_norm": 0.13820969791719281, "learning_rate": 1.3371132859203261e-05, "loss": 0.3887, "num_tokens": 2813948425.0, "step": 9138 }, { "epoch": 3.3477304997022856, "grad_norm": 0.14380965707837992, "learning_rate": 1.3367384802687711e-05, "loss": 0.405, "num_tokens": 2814678513.0, "step": 9139 }, { "epoch": 3.348096917510191, "grad_norm": 0.14868238271822043, "learning_rate": 1.3363637232205607e-05, "loss": 0.4099, "num_tokens": 2815431325.0, "step": 9140 }, { "epoch": 3.3484633353180966, "grad_norm": 0.14636838239579808, "learning_rate": 1.3359890147967942e-05, "loss": 0.4162, "num_tokens": 2816099099.0, "step": 9141 }, { "epoch": 3.348829753126002, "grad_norm": 0.1281186921901977, "learning_rate": 1.3356143550185689e-05, "loss": 0.3978, "num_tokens": 2816905917.0, "step": 9142 }, { "epoch": 3.3491961709339075, "grad_norm": 0.13695760595359957, "learning_rate": 1.335239743906978e-05, "loss": 0.3756, "num_tokens": 2817692913.0, "step": 9143 }, { "epoch": 3.349562588741813, "grad_norm": 0.13637779816494117, "learning_rate": 1.3348651814831137e-05, "loss": 0.4069, "num_tokens": 2818427569.0, "step": 9144 }, { "epoch": 3.3499290065497185, "grad_norm": 0.1487075209401598, "learning_rate": 1.3344906677680647e-05, "loss": 0.4059, "num_tokens": 2819212628.0, "step": 9145 }, { "epoch": 3.3502954243576237, "grad_norm": 0.14368861650624104, "learning_rate": 1.3341162027829169e-05, "loss": 0.425, "num_tokens": 2819965018.0, "step": 9146 }, { "epoch": 3.3506618421655294, "grad_norm": 0.13986648066743748, "learning_rate": 1.3337417865487527e-05, "loss": 0.4021, "num_tokens": 2820788220.0, "step": 9147 }, { "epoch": 3.3510282599734347, "grad_norm": 0.13400830972068836, "learning_rate": 1.3333674190866535e-05, "loss": 0.3888, "num_tokens": 2821608601.0, "step": 9148 }, { "epoch": 3.3513946777813404, "grad_norm": 0.13221549132975166, "learning_rate": 1.3329931004176952e-05, "loss": 0.4017, "num_tokens": 2822445348.0, "step": 9149 }, { "epoch": 3.3517610955892456, "grad_norm": 0.1372939785172096, "learning_rate": 1.3326188305629542e-05, "loss": 0.3943, "num_tokens": 2823174899.0, "step": 9150 }, { "epoch": 3.352127513397151, "grad_norm": 0.13509978840786863, "learning_rate": 1.3322446095435025e-05, "loss": 0.4204, "num_tokens": 2823957824.0, "step": 9151 }, { "epoch": 3.3524939312050566, "grad_norm": 0.13211153054870697, "learning_rate": 1.3318704373804094e-05, "loss": 0.3945, "num_tokens": 2824873225.0, "step": 9152 }, { "epoch": 3.3528603490129623, "grad_norm": 0.1412706984737587, "learning_rate": 1.3314963140947417e-05, "loss": 0.3908, "num_tokens": 2825580859.0, "step": 9153 }, { "epoch": 3.3532267668208675, "grad_norm": 0.13933976402585696, "learning_rate": 1.3311222397075618e-05, "loss": 0.4306, "num_tokens": 2826382628.0, "step": 9154 }, { "epoch": 3.3535931846287728, "grad_norm": 0.12540987559947803, "learning_rate": 1.3307482142399328e-05, "loss": 0.4226, "num_tokens": 2827279610.0, "step": 9155 }, { "epoch": 3.3539596024366785, "grad_norm": 0.15244299498515654, "learning_rate": 1.3303742377129125e-05, "loss": 0.3986, "num_tokens": 2827926905.0, "step": 9156 }, { "epoch": 3.3543260202445837, "grad_norm": 0.13604555474578456, "learning_rate": 1.3300003101475558e-05, "loss": 0.402, "num_tokens": 2828649259.0, "step": 9157 }, { "epoch": 3.3546924380524894, "grad_norm": 0.14063633832904063, "learning_rate": 1.329626431564916e-05, "loss": 0.3776, "num_tokens": 2829391293.0, "step": 9158 }, { "epoch": 3.3550588558603947, "grad_norm": 0.1383513050636318, "learning_rate": 1.329252601986044e-05, "loss": 0.414, "num_tokens": 2830332060.0, "step": 9159 }, { "epoch": 3.3554252736683003, "grad_norm": 0.1257302321615356, "learning_rate": 1.3288788214319857e-05, "loss": 0.3742, "num_tokens": 2831182795.0, "step": 9160 }, { "epoch": 3.3557916914762056, "grad_norm": 0.13280584418401215, "learning_rate": 1.328505089923787e-05, "loss": 0.4054, "num_tokens": 2831950901.0, "step": 9161 }, { "epoch": 3.3561581092841113, "grad_norm": 0.1405392132144762, "learning_rate": 1.328131407482489e-05, "loss": 0.4107, "num_tokens": 2832773066.0, "step": 9162 }, { "epoch": 3.3565245270920165, "grad_norm": 0.14014822631796758, "learning_rate": 1.327757774129131e-05, "loss": 0.4372, "num_tokens": 2833535648.0, "step": 9163 }, { "epoch": 3.3568909448999222, "grad_norm": 0.15768345752739318, "learning_rate": 1.3273841898847484e-05, "loss": 0.4739, "num_tokens": 2834222286.0, "step": 9164 }, { "epoch": 3.3572573627078275, "grad_norm": 0.13855047242073218, "learning_rate": 1.3270106547703759e-05, "loss": 0.3949, "num_tokens": 2835037465.0, "step": 9165 }, { "epoch": 3.357623780515733, "grad_norm": 0.1325293728203938, "learning_rate": 1.3266371688070443e-05, "loss": 0.3902, "num_tokens": 2835850004.0, "step": 9166 }, { "epoch": 3.3579901983236384, "grad_norm": 0.15053790997457575, "learning_rate": 1.3262637320157814e-05, "loss": 0.4124, "num_tokens": 2836540464.0, "step": 9167 }, { "epoch": 3.358356616131544, "grad_norm": 0.15818427831244705, "learning_rate": 1.3258903444176123e-05, "loss": 0.4135, "num_tokens": 2837230293.0, "step": 9168 }, { "epoch": 3.3587230339394494, "grad_norm": 0.14254649960220703, "learning_rate": 1.3255170060335596e-05, "loss": 0.4581, "num_tokens": 2837936955.0, "step": 9169 }, { "epoch": 3.359089451747355, "grad_norm": 0.14659176340477004, "learning_rate": 1.3251437168846417e-05, "loss": 0.3935, "num_tokens": 2838627570.0, "step": 9170 }, { "epoch": 3.3594558695552603, "grad_norm": 0.14172934879289104, "learning_rate": 1.324770476991877e-05, "loss": 0.424, "num_tokens": 2839347580.0, "step": 9171 }, { "epoch": 3.359822287363166, "grad_norm": 0.15882929800745207, "learning_rate": 1.3243972863762799e-05, "loss": 0.4124, "num_tokens": 2840074148.0, "step": 9172 }, { "epoch": 3.3601887051710713, "grad_norm": 0.14011430530153882, "learning_rate": 1.3240241450588612e-05, "loss": 0.4026, "num_tokens": 2840843207.0, "step": 9173 }, { "epoch": 3.360555122978977, "grad_norm": 0.14358085789792713, "learning_rate": 1.3236510530606295e-05, "loss": 0.3981, "num_tokens": 2841544953.0, "step": 9174 }, { "epoch": 3.360921540786882, "grad_norm": 0.1529835489736132, "learning_rate": 1.3232780104025907e-05, "loss": 0.4178, "num_tokens": 2842274593.0, "step": 9175 }, { "epoch": 3.3612879585947875, "grad_norm": 0.14445758097035744, "learning_rate": 1.3229050171057467e-05, "loss": 0.4311, "num_tokens": 2842999176.0, "step": 9176 }, { "epoch": 3.361654376402693, "grad_norm": 0.14063397546004658, "learning_rate": 1.3225320731910999e-05, "loss": 0.4083, "num_tokens": 2843712717.0, "step": 9177 }, { "epoch": 3.362020794210599, "grad_norm": 0.13357564843868652, "learning_rate": 1.322159178679646e-05, "loss": 0.4291, "num_tokens": 2844481194.0, "step": 9178 }, { "epoch": 3.362387212018504, "grad_norm": 0.14981729443217168, "learning_rate": 1.3217863335923809e-05, "loss": 0.3989, "num_tokens": 2845153328.0, "step": 9179 }, { "epoch": 3.3627536298264094, "grad_norm": 0.14583324811705534, "learning_rate": 1.3214135379502961e-05, "loss": 0.4067, "num_tokens": 2845894927.0, "step": 9180 }, { "epoch": 3.363120047634315, "grad_norm": 0.13824110711253593, "learning_rate": 1.3210407917743809e-05, "loss": 0.3885, "num_tokens": 2846653049.0, "step": 9181 }, { "epoch": 3.3634864654422207, "grad_norm": 0.1333230410060625, "learning_rate": 1.3206680950856204e-05, "loss": 0.3933, "num_tokens": 2847423908.0, "step": 9182 }, { "epoch": 3.363852883250126, "grad_norm": 0.13270149676053947, "learning_rate": 1.3202954479049998e-05, "loss": 0.4162, "num_tokens": 2848296331.0, "step": 9183 }, { "epoch": 3.3642193010580312, "grad_norm": 0.15069166868930467, "learning_rate": 1.3199228502534989e-05, "loss": 0.4061, "num_tokens": 2848919869.0, "step": 9184 }, { "epoch": 3.364585718865937, "grad_norm": 0.14011701615713557, "learning_rate": 1.3195503021520968e-05, "loss": 0.3873, "num_tokens": 2849659322.0, "step": 9185 }, { "epoch": 3.364952136673842, "grad_norm": 0.1473564094396318, "learning_rate": 1.3191778036217676e-05, "loss": 0.4429, "num_tokens": 2850437055.0, "step": 9186 }, { "epoch": 3.365318554481748, "grad_norm": 0.13922508522859775, "learning_rate": 1.3188053546834836e-05, "loss": 0.3907, "num_tokens": 2851249785.0, "step": 9187 }, { "epoch": 3.365684972289653, "grad_norm": 0.13639137475238286, "learning_rate": 1.3184329553582157e-05, "loss": 0.424, "num_tokens": 2851955644.0, "step": 9188 }, { "epoch": 3.366051390097559, "grad_norm": 0.13663709953332676, "learning_rate": 1.3180606056669296e-05, "loss": 0.3748, "num_tokens": 2852719009.0, "step": 9189 }, { "epoch": 3.366417807905464, "grad_norm": 0.13549453393137115, "learning_rate": 1.3176883056305898e-05, "loss": 0.4256, "num_tokens": 2853499542.0, "step": 9190 }, { "epoch": 3.3667842257133698, "grad_norm": 0.15604738275448704, "learning_rate": 1.3173160552701566e-05, "loss": 0.4156, "num_tokens": 2854113440.0, "step": 9191 }, { "epoch": 3.367150643521275, "grad_norm": 0.13463173131957848, "learning_rate": 1.3169438546065902e-05, "loss": 0.4006, "num_tokens": 2854922704.0, "step": 9192 }, { "epoch": 3.3675170613291807, "grad_norm": 0.14856664873926798, "learning_rate": 1.3165717036608445e-05, "loss": 0.429, "num_tokens": 2855608393.0, "step": 9193 }, { "epoch": 3.367883479137086, "grad_norm": 0.14332248688198276, "learning_rate": 1.3161996024538737e-05, "loss": 0.3878, "num_tokens": 2856285734.0, "step": 9194 }, { "epoch": 3.3682498969449917, "grad_norm": 0.14348882523933898, "learning_rate": 1.3158275510066272e-05, "loss": 0.3953, "num_tokens": 2857082744.0, "step": 9195 }, { "epoch": 3.368616314752897, "grad_norm": 0.14571322859156988, "learning_rate": 1.3154555493400523e-05, "loss": 0.4181, "num_tokens": 2857768968.0, "step": 9196 }, { "epoch": 3.3689827325608026, "grad_norm": 0.1359529658077318, "learning_rate": 1.3150835974750933e-05, "loss": 0.391, "num_tokens": 2858557666.0, "step": 9197 }, { "epoch": 3.369349150368708, "grad_norm": 0.1290784203980552, "learning_rate": 1.3147116954326913e-05, "loss": 0.4173, "num_tokens": 2859523238.0, "step": 9198 }, { "epoch": 3.3697155681766136, "grad_norm": 0.14791413846448573, "learning_rate": 1.3143398432337856e-05, "loss": 0.4011, "num_tokens": 2860280079.0, "step": 9199 }, { "epoch": 3.370081985984519, "grad_norm": 0.15696297483574628, "learning_rate": 1.3139680408993134e-05, "loss": 0.4382, "num_tokens": 2860950482.0, "step": 9200 }, { "epoch": 3.3704484037924245, "grad_norm": 0.15093092028551983, "learning_rate": 1.3135962884502065e-05, "loss": 0.4095, "num_tokens": 2861625724.0, "step": 9201 }, { "epoch": 3.3708148216003297, "grad_norm": 0.13572854651477878, "learning_rate": 1.313224585907396e-05, "loss": 0.3872, "num_tokens": 2862350627.0, "step": 9202 }, { "epoch": 3.3711812394082354, "grad_norm": 0.14189804497704456, "learning_rate": 1.3128529332918091e-05, "loss": 0.3899, "num_tokens": 2863101848.0, "step": 9203 }, { "epoch": 3.3715476572161407, "grad_norm": 0.1575457986670298, "learning_rate": 1.3124813306243702e-05, "loss": 0.4135, "num_tokens": 2863855115.0, "step": 9204 }, { "epoch": 3.371914075024046, "grad_norm": 0.13897588262577096, "learning_rate": 1.3121097779260016e-05, "loss": 0.4078, "num_tokens": 2864620082.0, "step": 9205 }, { "epoch": 3.3722804928319516, "grad_norm": 0.14020601289252416, "learning_rate": 1.3117382752176231e-05, "loss": 0.4138, "num_tokens": 2865406899.0, "step": 9206 }, { "epoch": 3.3726469106398573, "grad_norm": 0.1562875547219679, "learning_rate": 1.3113668225201507e-05, "loss": 0.4237, "num_tokens": 2866063552.0, "step": 9207 }, { "epoch": 3.3730133284477626, "grad_norm": 0.13830604570879015, "learning_rate": 1.3109954198544981e-05, "loss": 0.4056, "num_tokens": 2866845690.0, "step": 9208 }, { "epoch": 3.373379746255668, "grad_norm": 0.15159036505378104, "learning_rate": 1.3106240672415753e-05, "loss": 0.3769, "num_tokens": 2867609451.0, "step": 9209 }, { "epoch": 3.3737461640635735, "grad_norm": 0.13142324044265716, "learning_rate": 1.31025276470229e-05, "loss": 0.3792, "num_tokens": 2868394096.0, "step": 9210 }, { "epoch": 3.3741125818714788, "grad_norm": 0.1380979094303508, "learning_rate": 1.3098815122575486e-05, "loss": 0.4276, "num_tokens": 2869205560.0, "step": 9211 }, { "epoch": 3.3744789996793845, "grad_norm": 0.13880031210549526, "learning_rate": 1.3095103099282518e-05, "loss": 0.417, "num_tokens": 2870072993.0, "step": 9212 }, { "epoch": 3.3748454174872897, "grad_norm": 0.14758068539340266, "learning_rate": 1.3091391577353005e-05, "loss": 0.4208, "num_tokens": 2870903462.0, "step": 9213 }, { "epoch": 3.3752118352951954, "grad_norm": 0.13406757570980393, "learning_rate": 1.308768055699591e-05, "loss": 0.3991, "num_tokens": 2871723761.0, "step": 9214 }, { "epoch": 3.3755782531031007, "grad_norm": 0.1301988344913481, "learning_rate": 1.3083970038420156e-05, "loss": 0.3782, "num_tokens": 2872497729.0, "step": 9215 }, { "epoch": 3.3759446709110064, "grad_norm": 0.14610018514687056, "learning_rate": 1.3080260021834674e-05, "loss": 0.38, "num_tokens": 2873182213.0, "step": 9216 }, { "epoch": 3.3763110887189116, "grad_norm": 0.14349070361725663, "learning_rate": 1.3076550507448334e-05, "loss": 0.4084, "num_tokens": 2873941326.0, "step": 9217 }, { "epoch": 3.3766775065268173, "grad_norm": 0.13870100610265645, "learning_rate": 1.307284149546999e-05, "loss": 0.4082, "num_tokens": 2874811187.0, "step": 9218 }, { "epoch": 3.3770439243347226, "grad_norm": 0.13492762044613504, "learning_rate": 1.306913298610846e-05, "loss": 0.3754, "num_tokens": 2875584537.0, "step": 9219 }, { "epoch": 3.3774103421426283, "grad_norm": 0.13920767810456422, "learning_rate": 1.3065424979572554e-05, "loss": 0.3632, "num_tokens": 2876259353.0, "step": 9220 }, { "epoch": 3.3777767599505335, "grad_norm": 0.14736413055526895, "learning_rate": 1.306171747607103e-05, "loss": 0.4092, "num_tokens": 2877037886.0, "step": 9221 }, { "epoch": 3.378143177758439, "grad_norm": 0.13327758277014098, "learning_rate": 1.3058010475812632e-05, "loss": 0.3856, "num_tokens": 2877803167.0, "step": 9222 }, { "epoch": 3.3785095955663444, "grad_norm": 0.13167363458373973, "learning_rate": 1.3054303979006076e-05, "loss": 0.3644, "num_tokens": 2878608011.0, "step": 9223 }, { "epoch": 3.37887601337425, "grad_norm": 0.13731630205843354, "learning_rate": 1.3050597985860038e-05, "loss": 0.4174, "num_tokens": 2879333965.0, "step": 9224 }, { "epoch": 3.3792424311821554, "grad_norm": 0.14240056384898736, "learning_rate": 1.304689249658317e-05, "loss": 0.4063, "num_tokens": 2880017016.0, "step": 9225 }, { "epoch": 3.379608848990061, "grad_norm": 0.14175467977303283, "learning_rate": 1.3043187511384104e-05, "loss": 0.4268, "num_tokens": 2880802136.0, "step": 9226 }, { "epoch": 3.3799752667979663, "grad_norm": 0.13361626239432875, "learning_rate": 1.3039483030471442e-05, "loss": 0.3868, "num_tokens": 2881507906.0, "step": 9227 }, { "epoch": 3.380341684605872, "grad_norm": 0.13211844150768312, "learning_rate": 1.303577905405375e-05, "loss": 0.3953, "num_tokens": 2882315051.0, "step": 9228 }, { "epoch": 3.3807081024137773, "grad_norm": 0.12967629422640653, "learning_rate": 1.303207558233957e-05, "loss": 0.3889, "num_tokens": 2883091364.0, "step": 9229 }, { "epoch": 3.3810745202216825, "grad_norm": 0.1478674355662108, "learning_rate": 1.3028372615537412e-05, "loss": 0.4413, "num_tokens": 2883867471.0, "step": 9230 }, { "epoch": 3.3814409380295882, "grad_norm": 0.12884440558028487, "learning_rate": 1.3024670153855761e-05, "loss": 0.3876, "num_tokens": 2884678249.0, "step": 9231 }, { "epoch": 3.381807355837494, "grad_norm": 0.1350139557079917, "learning_rate": 1.3020968197503067e-05, "loss": 0.4047, "num_tokens": 2885391544.0, "step": 9232 }, { "epoch": 3.382173773645399, "grad_norm": 0.1456455121220899, "learning_rate": 1.3017266746687765e-05, "loss": 0.4232, "num_tokens": 2886150549.0, "step": 9233 }, { "epoch": 3.3825401914533044, "grad_norm": 0.13154856286970582, "learning_rate": 1.301356580161826e-05, "loss": 0.4206, "num_tokens": 2886926768.0, "step": 9234 }, { "epoch": 3.38290660926121, "grad_norm": 0.1387186360687809, "learning_rate": 1.3009865362502917e-05, "loss": 0.3918, "num_tokens": 2887691051.0, "step": 9235 }, { "epoch": 3.3832730270691154, "grad_norm": 0.15481249491596158, "learning_rate": 1.3006165429550076e-05, "loss": 0.4433, "num_tokens": 2888398730.0, "step": 9236 }, { "epoch": 3.383639444877021, "grad_norm": 0.14508216888813555, "learning_rate": 1.3002466002968039e-05, "loss": 0.4121, "num_tokens": 2889108079.0, "step": 9237 }, { "epoch": 3.3840058626849263, "grad_norm": 0.1483734037778076, "learning_rate": 1.2998767082965118e-05, "loss": 0.406, "num_tokens": 2889862213.0, "step": 9238 }, { "epoch": 3.384372280492832, "grad_norm": 0.13905871358497118, "learning_rate": 1.2995068669749547e-05, "loss": 0.4078, "num_tokens": 2890611866.0, "step": 9239 }, { "epoch": 3.3847386983007373, "grad_norm": 0.13843928985523093, "learning_rate": 1.2991370763529565e-05, "loss": 0.381, "num_tokens": 2891347104.0, "step": 9240 }, { "epoch": 3.385105116108643, "grad_norm": 0.12915304828465785, "learning_rate": 1.2987673364513374e-05, "loss": 0.4148, "num_tokens": 2892200985.0, "step": 9241 }, { "epoch": 3.385471533916548, "grad_norm": 0.14302503496649185, "learning_rate": 1.2983976472909134e-05, "loss": 0.3816, "num_tokens": 2892912780.0, "step": 9242 }, { "epoch": 3.385837951724454, "grad_norm": 0.14235816324659517, "learning_rate": 1.2980280088924992e-05, "loss": 0.3745, "num_tokens": 2893612963.0, "step": 9243 }, { "epoch": 3.386204369532359, "grad_norm": 0.15504240759051438, "learning_rate": 1.2976584212769066e-05, "loss": 0.4479, "num_tokens": 2894357312.0, "step": 9244 }, { "epoch": 3.386570787340265, "grad_norm": 0.13118810423334587, "learning_rate": 1.2972888844649438e-05, "loss": 0.4078, "num_tokens": 2895156940.0, "step": 9245 }, { "epoch": 3.38693720514817, "grad_norm": 0.14312450745074295, "learning_rate": 1.2969193984774157e-05, "loss": 0.4226, "num_tokens": 2895965564.0, "step": 9246 }, { "epoch": 3.387303622956076, "grad_norm": 0.13697700466387902, "learning_rate": 1.2965499633351268e-05, "loss": 0.4092, "num_tokens": 2896742006.0, "step": 9247 }, { "epoch": 3.387670040763981, "grad_norm": 0.13535263226026542, "learning_rate": 1.2961805790588752e-05, "loss": 0.3611, "num_tokens": 2897505945.0, "step": 9248 }, { "epoch": 3.3880364585718867, "grad_norm": 0.13271992649657968, "learning_rate": 1.2958112456694598e-05, "loss": 0.4041, "num_tokens": 2898239090.0, "step": 9249 }, { "epoch": 3.388402876379792, "grad_norm": 0.13996588427981527, "learning_rate": 1.2954419631876736e-05, "loss": 0.4176, "num_tokens": 2899010360.0, "step": 9250 }, { "epoch": 3.3887692941876977, "grad_norm": 0.14022032362574766, "learning_rate": 1.2950727316343084e-05, "loss": 0.4144, "num_tokens": 2899774939.0, "step": 9251 }, { "epoch": 3.389135711995603, "grad_norm": 0.14455673661994514, "learning_rate": 1.2947035510301524e-05, "loss": 0.3931, "num_tokens": 2900414408.0, "step": 9252 }, { "epoch": 3.3895021298035086, "grad_norm": 0.14524461791292165, "learning_rate": 1.2943344213959908e-05, "loss": 0.383, "num_tokens": 2901049931.0, "step": 9253 }, { "epoch": 3.389868547611414, "grad_norm": 0.1318353010736065, "learning_rate": 1.2939653427526064e-05, "loss": 0.4083, "num_tokens": 2901885244.0, "step": 9254 }, { "epoch": 3.3902349654193196, "grad_norm": 0.14054316048780943, "learning_rate": 1.2935963151207804e-05, "loss": 0.4032, "num_tokens": 2902584462.0, "step": 9255 }, { "epoch": 3.390601383227225, "grad_norm": 0.1437870557863879, "learning_rate": 1.2932273385212887e-05, "loss": 0.4046, "num_tokens": 2903332791.0, "step": 9256 }, { "epoch": 3.3909678010351305, "grad_norm": 0.139828772432918, "learning_rate": 1.292858412974906e-05, "loss": 0.4165, "num_tokens": 2904065908.0, "step": 9257 }, { "epoch": 3.3913342188430358, "grad_norm": 0.12937655850375224, "learning_rate": 1.2924895385024027e-05, "loss": 0.4237, "num_tokens": 2904954403.0, "step": 9258 }, { "epoch": 3.391700636650941, "grad_norm": 0.13625874580862282, "learning_rate": 1.2921207151245475e-05, "loss": 0.4074, "num_tokens": 2905784650.0, "step": 9259 }, { "epoch": 3.3920670544588467, "grad_norm": 0.1480042031625476, "learning_rate": 1.2917519428621053e-05, "loss": 0.4165, "num_tokens": 2906539720.0, "step": 9260 }, { "epoch": 3.3924334722667524, "grad_norm": 0.13915512417841228, "learning_rate": 1.2913832217358401e-05, "loss": 0.4015, "num_tokens": 2907287409.0, "step": 9261 }, { "epoch": 3.3927998900746577, "grad_norm": 0.14349452463785636, "learning_rate": 1.2910145517665114e-05, "loss": 0.404, "num_tokens": 2908034630.0, "step": 9262 }, { "epoch": 3.393166307882563, "grad_norm": 0.14245319497813433, "learning_rate": 1.2906459329748758e-05, "loss": 0.3732, "num_tokens": 2908773695.0, "step": 9263 }, { "epoch": 3.3935327256904686, "grad_norm": 0.13457256079290322, "learning_rate": 1.2902773653816865e-05, "loss": 0.3996, "num_tokens": 2909552857.0, "step": 9264 }, { "epoch": 3.393899143498374, "grad_norm": 0.14203185254059902, "learning_rate": 1.2899088490076945e-05, "loss": 0.4115, "num_tokens": 2910301080.0, "step": 9265 }, { "epoch": 3.3942655613062795, "grad_norm": 0.14246936045898062, "learning_rate": 1.2895403838736495e-05, "loss": 0.4148, "num_tokens": 2911062730.0, "step": 9266 }, { "epoch": 3.394631979114185, "grad_norm": 0.13501571268878282, "learning_rate": 1.2891719700002953e-05, "loss": 0.408, "num_tokens": 2911840225.0, "step": 9267 }, { "epoch": 3.3949983969220905, "grad_norm": 0.1318379960669021, "learning_rate": 1.288803607408376e-05, "loss": 0.3861, "num_tokens": 2912677809.0, "step": 9268 }, { "epoch": 3.3953648147299957, "grad_norm": 0.13326015898236893, "learning_rate": 1.2884352961186297e-05, "loss": 0.4102, "num_tokens": 2913487042.0, "step": 9269 }, { "epoch": 3.3957312325379014, "grad_norm": 0.12783203172155727, "learning_rate": 1.288067036151793e-05, "loss": 0.3935, "num_tokens": 2914376066.0, "step": 9270 }, { "epoch": 3.3960976503458067, "grad_norm": 0.1413427759894273, "learning_rate": 1.287698827528601e-05, "loss": 0.3913, "num_tokens": 2915010549.0, "step": 9271 }, { "epoch": 3.3964640681537124, "grad_norm": 0.1405261025159424, "learning_rate": 1.2873306702697836e-05, "loss": 0.3786, "num_tokens": 2915819600.0, "step": 9272 }, { "epoch": 3.3968304859616176, "grad_norm": 0.1411833853355541, "learning_rate": 1.2869625643960694e-05, "loss": 0.3843, "num_tokens": 2916507497.0, "step": 9273 }, { "epoch": 3.3971969037695233, "grad_norm": 0.14626515654841268, "learning_rate": 1.286594509928182e-05, "loss": 0.4202, "num_tokens": 2917129516.0, "step": 9274 }, { "epoch": 3.3975633215774286, "grad_norm": 0.1450324437172936, "learning_rate": 1.2862265068868454e-05, "loss": 0.4073, "num_tokens": 2917852974.0, "step": 9275 }, { "epoch": 3.3979297393853343, "grad_norm": 0.14007493233218082, "learning_rate": 1.2858585552927773e-05, "loss": 0.3939, "num_tokens": 2918609498.0, "step": 9276 }, { "epoch": 3.3982961571932395, "grad_norm": 0.15620490817923743, "learning_rate": 1.285490655166696e-05, "loss": 0.434, "num_tokens": 2919250886.0, "step": 9277 }, { "epoch": 3.398662575001145, "grad_norm": 0.13870897085791675, "learning_rate": 1.285122806529314e-05, "loss": 0.397, "num_tokens": 2919954583.0, "step": 9278 }, { "epoch": 3.3990289928090505, "grad_norm": 0.14210540638575603, "learning_rate": 1.2847550094013417e-05, "loss": 0.3985, "num_tokens": 2920713483.0, "step": 9279 }, { "epoch": 3.399395410616956, "grad_norm": 0.13293633680283076, "learning_rate": 1.2843872638034861e-05, "loss": 0.4034, "num_tokens": 2921544598.0, "step": 9280 }, { "epoch": 3.3997618284248614, "grad_norm": 0.14093000728248037, "learning_rate": 1.2840195697564538e-05, "loss": 0.4122, "num_tokens": 2922316634.0, "step": 9281 }, { "epoch": 3.400128246232767, "grad_norm": 0.13230749796704708, "learning_rate": 1.2836519272809453e-05, "loss": 0.3899, "num_tokens": 2923092624.0, "step": 9282 }, { "epoch": 3.4004946640406724, "grad_norm": 0.1387555715904353, "learning_rate": 1.283284336397661e-05, "loss": 0.4045, "num_tokens": 2923877795.0, "step": 9283 }, { "epoch": 3.4008610818485776, "grad_norm": 0.1323943195079817, "learning_rate": 1.2829167971272955e-05, "loss": 0.3687, "num_tokens": 2924653399.0, "step": 9284 }, { "epoch": 3.4012274996564833, "grad_norm": 0.13203436693785947, "learning_rate": 1.2825493094905434e-05, "loss": 0.3973, "num_tokens": 2925455800.0, "step": 9285 }, { "epoch": 3.401593917464389, "grad_norm": 0.1381234945034892, "learning_rate": 1.2821818735080937e-05, "loss": 0.4079, "num_tokens": 2926162285.0, "step": 9286 }, { "epoch": 3.4019603352722942, "grad_norm": 0.15060922630006038, "learning_rate": 1.2818144892006337e-05, "loss": 0.4378, "num_tokens": 2927025870.0, "step": 9287 }, { "epoch": 3.4023267530801995, "grad_norm": 0.1385278430489592, "learning_rate": 1.2814471565888487e-05, "loss": 0.4111, "num_tokens": 2927764212.0, "step": 9288 }, { "epoch": 3.402693170888105, "grad_norm": 0.12779010059033336, "learning_rate": 1.2810798756934202e-05, "loss": 0.4139, "num_tokens": 2928687474.0, "step": 9289 }, { "epoch": 3.4030595886960104, "grad_norm": 0.46242654271093225, "learning_rate": 1.280712646535027e-05, "loss": 0.4205, "num_tokens": 2929370531.0, "step": 9290 }, { "epoch": 3.403426006503916, "grad_norm": 0.15364770678476428, "learning_rate": 1.2803454691343444e-05, "loss": 0.4037, "num_tokens": 2930123563.0, "step": 9291 }, { "epoch": 3.4037924243118214, "grad_norm": 0.1414816687820924, "learning_rate": 1.2799783435120455e-05, "loss": 0.4153, "num_tokens": 2930940277.0, "step": 9292 }, { "epoch": 3.404158842119727, "grad_norm": 0.14043516464387884, "learning_rate": 1.2796112696887991e-05, "loss": 0.4036, "num_tokens": 2931613484.0, "step": 9293 }, { "epoch": 3.4045252599276323, "grad_norm": 0.14157978803326737, "learning_rate": 1.2792442476852734e-05, "loss": 0.4271, "num_tokens": 2932308026.0, "step": 9294 }, { "epoch": 3.404891677735538, "grad_norm": 0.14427459913457552, "learning_rate": 1.2788772775221325e-05, "loss": 0.3898, "num_tokens": 2933145410.0, "step": 9295 }, { "epoch": 3.4052580955434433, "grad_norm": 0.14704222682980586, "learning_rate": 1.2785103592200377e-05, "loss": 0.4154, "num_tokens": 2933921123.0, "step": 9296 }, { "epoch": 3.405624513351349, "grad_norm": 0.13786062240458843, "learning_rate": 1.2781434927996463e-05, "loss": 0.409, "num_tokens": 2934700660.0, "step": 9297 }, { "epoch": 3.405990931159254, "grad_norm": 0.140582435750587, "learning_rate": 1.2777766782816134e-05, "loss": 0.4023, "num_tokens": 2935526555.0, "step": 9298 }, { "epoch": 3.40635734896716, "grad_norm": 0.14289259822158606, "learning_rate": 1.2774099156865934e-05, "loss": 0.379, "num_tokens": 2936250459.0, "step": 9299 }, { "epoch": 3.406723766775065, "grad_norm": 0.13620872936415787, "learning_rate": 1.2770432050352337e-05, "loss": 0.3959, "num_tokens": 2937074165.0, "step": 9300 }, { "epoch": 3.407090184582971, "grad_norm": 0.13951579780914827, "learning_rate": 1.2766765463481811e-05, "loss": 0.4002, "num_tokens": 2937845455.0, "step": 9301 }, { "epoch": 3.407456602390876, "grad_norm": 0.13189085631608927, "learning_rate": 1.2763099396460805e-05, "loss": 0.424, "num_tokens": 2938633443.0, "step": 9302 }, { "epoch": 3.407823020198782, "grad_norm": 0.15632990922016807, "learning_rate": 1.2759433849495716e-05, "loss": 0.4052, "num_tokens": 2939313891.0, "step": 9303 }, { "epoch": 3.408189438006687, "grad_norm": 0.15045240794821216, "learning_rate": 1.2755768822792916e-05, "loss": 0.3845, "num_tokens": 2939971658.0, "step": 9304 }, { "epoch": 3.4085558558145927, "grad_norm": 0.14886167163436173, "learning_rate": 1.2752104316558771e-05, "loss": 0.4479, "num_tokens": 2940596477.0, "step": 9305 }, { "epoch": 3.408922273622498, "grad_norm": 0.1420408913350782, "learning_rate": 1.2748440330999587e-05, "loss": 0.4004, "num_tokens": 2941334609.0, "step": 9306 }, { "epoch": 3.4092886914304037, "grad_norm": 0.13980345853537066, "learning_rate": 1.2744776866321657e-05, "loss": 0.3777, "num_tokens": 2941999093.0, "step": 9307 }, { "epoch": 3.409655109238309, "grad_norm": 0.13330829403480884, "learning_rate": 1.2741113922731234e-05, "loss": 0.3948, "num_tokens": 2942829380.0, "step": 9308 }, { "epoch": 3.4100215270462146, "grad_norm": 0.1241857621412425, "learning_rate": 1.2737451500434555e-05, "loss": 0.3996, "num_tokens": 2943704044.0, "step": 9309 }, { "epoch": 3.41038794485412, "grad_norm": 0.13956947998079267, "learning_rate": 1.2733789599637829e-05, "loss": 0.4068, "num_tokens": 2944430279.0, "step": 9310 }, { "epoch": 3.4107543626620256, "grad_norm": 0.14848798721403192, "learning_rate": 1.2730128220547222e-05, "loss": 0.3891, "num_tokens": 2945184491.0, "step": 9311 }, { "epoch": 3.411120780469931, "grad_norm": 0.14559902198379468, "learning_rate": 1.272646736336888e-05, "loss": 0.4033, "num_tokens": 2945882345.0, "step": 9312 }, { "epoch": 3.411487198277836, "grad_norm": 0.1323134358929189, "learning_rate": 1.2722807028308907e-05, "loss": 0.4358, "num_tokens": 2946743041.0, "step": 9313 }, { "epoch": 3.411853616085742, "grad_norm": 0.14761066858108704, "learning_rate": 1.271914721557339e-05, "loss": 0.4242, "num_tokens": 2947412718.0, "step": 9314 }, { "epoch": 3.4122200338936475, "grad_norm": 0.14139975203394559, "learning_rate": 1.271548792536839e-05, "loss": 0.3874, "num_tokens": 2948145009.0, "step": 9315 }, { "epoch": 3.4125864517015527, "grad_norm": 0.1400806450717313, "learning_rate": 1.2711829157899933e-05, "loss": 0.407, "num_tokens": 2948936089.0, "step": 9316 }, { "epoch": 3.412952869509458, "grad_norm": 0.15118123086423363, "learning_rate": 1.2708170913374009e-05, "loss": 0.4143, "num_tokens": 2949661038.0, "step": 9317 }, { "epoch": 3.4133192873173637, "grad_norm": 0.13693587634767243, "learning_rate": 1.2704513191996593e-05, "loss": 0.3838, "num_tokens": 2950524090.0, "step": 9318 }, { "epoch": 3.413685705125269, "grad_norm": 0.13943475662472318, "learning_rate": 1.2700855993973612e-05, "loss": 0.416, "num_tokens": 2951415991.0, "step": 9319 }, { "epoch": 3.4140521229331746, "grad_norm": 0.13431431800886387, "learning_rate": 1.2697199319510972e-05, "loss": 0.4088, "num_tokens": 2952192557.0, "step": 9320 }, { "epoch": 3.41441854074108, "grad_norm": 0.13438883662591186, "learning_rate": 1.2693543168814562e-05, "loss": 0.3753, "num_tokens": 2952991142.0, "step": 9321 }, { "epoch": 3.4147849585489856, "grad_norm": 0.1495524595563624, "learning_rate": 1.2689887542090218e-05, "loss": 0.4022, "num_tokens": 2953723661.0, "step": 9322 }, { "epoch": 3.415151376356891, "grad_norm": 0.13381944064671478, "learning_rate": 1.2686232439543773e-05, "loss": 0.3757, "num_tokens": 2954536721.0, "step": 9323 }, { "epoch": 3.4155177941647965, "grad_norm": 0.13775362861031532, "learning_rate": 1.2682577861381011e-05, "loss": 0.3834, "num_tokens": 2955341471.0, "step": 9324 }, { "epoch": 3.4158842119727018, "grad_norm": 0.12435449117938654, "learning_rate": 1.2678923807807691e-05, "loss": 0.4248, "num_tokens": 2956219359.0, "step": 9325 }, { "epoch": 3.4162506297806075, "grad_norm": 0.1450921554937528, "learning_rate": 1.2675270279029533e-05, "loss": 0.3776, "num_tokens": 2957009538.0, "step": 9326 }, { "epoch": 3.4166170475885127, "grad_norm": 0.1426544597589497, "learning_rate": 1.2671617275252257e-05, "loss": 0.4056, "num_tokens": 2957750211.0, "step": 9327 }, { "epoch": 3.4169834653964184, "grad_norm": 0.15037955550809523, "learning_rate": 1.2667964796681527e-05, "loss": 0.4139, "num_tokens": 2958432656.0, "step": 9328 }, { "epoch": 3.4173498832043236, "grad_norm": 0.14955039259113756, "learning_rate": 1.2664312843522973e-05, "loss": 0.4332, "num_tokens": 2959114895.0, "step": 9329 }, { "epoch": 3.4177163010122293, "grad_norm": 0.13670271029834566, "learning_rate": 1.266066141598223e-05, "loss": 0.4067, "num_tokens": 2959866769.0, "step": 9330 }, { "epoch": 3.4180827188201346, "grad_norm": 0.14316952204073846, "learning_rate": 1.2657010514264856e-05, "loss": 0.3894, "num_tokens": 2960528010.0, "step": 9331 }, { "epoch": 3.4184491366280403, "grad_norm": 0.1473112800730434, "learning_rate": 1.2653360138576425e-05, "loss": 0.3958, "num_tokens": 2961296858.0, "step": 9332 }, { "epoch": 3.4188155544359455, "grad_norm": 0.14470416309378167, "learning_rate": 1.2649710289122453e-05, "loss": 0.4044, "num_tokens": 2962094321.0, "step": 9333 }, { "epoch": 3.4191819722438512, "grad_norm": 0.11482971820600069, "learning_rate": 1.264606096610843e-05, "loss": 0.3683, "num_tokens": 2963030399.0, "step": 9334 }, { "epoch": 3.4195483900517565, "grad_norm": 0.1382639523986964, "learning_rate": 1.2642412169739818e-05, "loss": 0.4117, "num_tokens": 2963803328.0, "step": 9335 }, { "epoch": 3.419914807859662, "grad_norm": 0.12662173756970785, "learning_rate": 1.2638763900222061e-05, "loss": 0.3735, "num_tokens": 2964606406.0, "step": 9336 }, { "epoch": 3.4202812256675674, "grad_norm": 0.14664556129210998, "learning_rate": 1.2635116157760556e-05, "loss": 0.414, "num_tokens": 2965353074.0, "step": 9337 }, { "epoch": 3.4206476434754727, "grad_norm": 0.14307303256519105, "learning_rate": 1.2631468942560684e-05, "loss": 0.4169, "num_tokens": 2966071965.0, "step": 9338 }, { "epoch": 3.4210140612833784, "grad_norm": 0.15419122791050233, "learning_rate": 1.2627822254827794e-05, "loss": 0.4128, "num_tokens": 2966778190.0, "step": 9339 }, { "epoch": 3.421380479091284, "grad_norm": 0.13922859505964247, "learning_rate": 1.262417609476719e-05, "loss": 0.4054, "num_tokens": 2967565191.0, "step": 9340 }, { "epoch": 3.4217468968991893, "grad_norm": 0.13445936710570022, "learning_rate": 1.2620530462584162e-05, "loss": 0.4406, "num_tokens": 2968463172.0, "step": 9341 }, { "epoch": 3.4221133147070946, "grad_norm": 0.15978507240254095, "learning_rate": 1.2616885358483964e-05, "loss": 0.4388, "num_tokens": 2969051486.0, "step": 9342 }, { "epoch": 3.4224797325150003, "grad_norm": 0.14591488944161982, "learning_rate": 1.2613240782671827e-05, "loss": 0.3967, "num_tokens": 2969869519.0, "step": 9343 }, { "epoch": 3.4228461503229055, "grad_norm": 0.13667169644769006, "learning_rate": 1.2609596735352954e-05, "loss": 0.4024, "num_tokens": 2970619279.0, "step": 9344 }, { "epoch": 3.423212568130811, "grad_norm": 0.1408093678630223, "learning_rate": 1.2605953216732503e-05, "loss": 0.3778, "num_tokens": 2971310995.0, "step": 9345 }, { "epoch": 3.4235789859387165, "grad_norm": 0.15709896007295687, "learning_rate": 1.2602310227015618e-05, "loss": 0.4199, "num_tokens": 2972013208.0, "step": 9346 }, { "epoch": 3.423945403746622, "grad_norm": 0.13327416512108367, "learning_rate": 1.25986677664074e-05, "loss": 0.4128, "num_tokens": 2972805896.0, "step": 9347 }, { "epoch": 3.4243118215545274, "grad_norm": 0.15672118726534726, "learning_rate": 1.2595025835112923e-05, "loss": 0.407, "num_tokens": 2973523243.0, "step": 9348 }, { "epoch": 3.424678239362433, "grad_norm": 0.14069836932177368, "learning_rate": 1.2591384433337249e-05, "loss": 0.4109, "num_tokens": 2974299774.0, "step": 9349 }, { "epoch": 3.4250446571703383, "grad_norm": 0.1326012710041845, "learning_rate": 1.2587743561285382e-05, "loss": 0.4011, "num_tokens": 2974990837.0, "step": 9350 }, { "epoch": 3.425411074978244, "grad_norm": 0.12442540939076079, "learning_rate": 1.258410321916232e-05, "loss": 0.3891, "num_tokens": 2976040065.0, "step": 9351 }, { "epoch": 3.4257774927861493, "grad_norm": 0.13484283042660264, "learning_rate": 1.2580463407173022e-05, "loss": 0.4054, "num_tokens": 2976834221.0, "step": 9352 }, { "epoch": 3.426143910594055, "grad_norm": 0.1401365713662935, "learning_rate": 1.2576824125522404e-05, "loss": 0.4047, "num_tokens": 2977607600.0, "step": 9353 }, { "epoch": 3.4265103284019602, "grad_norm": 0.1264927702152471, "learning_rate": 1.2573185374415384e-05, "loss": 0.3902, "num_tokens": 2978484644.0, "step": 9354 }, { "epoch": 3.426876746209866, "grad_norm": 0.1488603911686405, "learning_rate": 1.2569547154056818e-05, "loss": 0.4353, "num_tokens": 2979200411.0, "step": 9355 }, { "epoch": 3.427243164017771, "grad_norm": 0.14098070623903466, "learning_rate": 1.2565909464651542e-05, "loss": 0.4217, "num_tokens": 2980012242.0, "step": 9356 }, { "epoch": 3.427609581825677, "grad_norm": 0.13983771893219005, "learning_rate": 1.2562272306404377e-05, "loss": 0.422, "num_tokens": 2980759110.0, "step": 9357 }, { "epoch": 3.427975999633582, "grad_norm": 0.13969772837669095, "learning_rate": 1.2558635679520098e-05, "loss": 0.3986, "num_tokens": 2981434964.0, "step": 9358 }, { "epoch": 3.428342417441488, "grad_norm": 0.13626695767857278, "learning_rate": 1.2554999584203445e-05, "loss": 0.3919, "num_tokens": 2982219943.0, "step": 9359 }, { "epoch": 3.428708835249393, "grad_norm": 0.15556916042313293, "learning_rate": 1.2551364020659148e-05, "loss": 0.3998, "num_tokens": 2982899829.0, "step": 9360 }, { "epoch": 3.4290752530572988, "grad_norm": 0.1404864253224632, "learning_rate": 1.2547728989091899e-05, "loss": 0.4057, "num_tokens": 2983643055.0, "step": 9361 }, { "epoch": 3.429441670865204, "grad_norm": 0.1352378649404131, "learning_rate": 1.2544094489706351e-05, "loss": 0.3957, "num_tokens": 2984423563.0, "step": 9362 }, { "epoch": 3.4298080886731093, "grad_norm": 0.14276240315737063, "learning_rate": 1.2540460522707125e-05, "loss": 0.4488, "num_tokens": 2985215384.0, "step": 9363 }, { "epoch": 3.430174506481015, "grad_norm": 0.13409805946380554, "learning_rate": 1.2536827088298842e-05, "loss": 0.4177, "num_tokens": 2986051163.0, "step": 9364 }, { "epoch": 3.4305409242889207, "grad_norm": 0.14225270255876796, "learning_rate": 1.2533194186686049e-05, "loss": 0.4435, "num_tokens": 2986816581.0, "step": 9365 }, { "epoch": 3.430907342096826, "grad_norm": 0.12737294179742448, "learning_rate": 1.2529561818073304e-05, "loss": 0.3912, "num_tokens": 2987585631.0, "step": 9366 }, { "epoch": 3.431273759904731, "grad_norm": 0.12039323068514715, "learning_rate": 1.2525929982665108e-05, "loss": 0.3787, "num_tokens": 2988501531.0, "step": 9367 }, { "epoch": 3.431640177712637, "grad_norm": 0.13642175307657395, "learning_rate": 1.2522298680665942e-05, "loss": 0.3986, "num_tokens": 2989254464.0, "step": 9368 }, { "epoch": 3.4320065955205425, "grad_norm": 0.12244910585439722, "learning_rate": 1.2518667912280252e-05, "loss": 0.3871, "num_tokens": 2990148741.0, "step": 9369 }, { "epoch": 3.432373013328448, "grad_norm": 0.1417299483025817, "learning_rate": 1.2515037677712456e-05, "loss": 0.4088, "num_tokens": 2990942950.0, "step": 9370 }, { "epoch": 3.432739431136353, "grad_norm": 0.13102384701564748, "learning_rate": 1.2511407977166953e-05, "loss": 0.4035, "num_tokens": 2991797829.0, "step": 9371 }, { "epoch": 3.4331058489442587, "grad_norm": 0.13677985890296745, "learning_rate": 1.2507778810848104e-05, "loss": 0.4256, "num_tokens": 2992710392.0, "step": 9372 }, { "epoch": 3.433472266752164, "grad_norm": 0.1278252980645464, "learning_rate": 1.2504150178960228e-05, "loss": 0.411, "num_tokens": 2993560142.0, "step": 9373 }, { "epoch": 3.4338386845600697, "grad_norm": 0.13845680876280234, "learning_rate": 1.2500522081707629e-05, "loss": 0.379, "num_tokens": 2994211727.0, "step": 9374 }, { "epoch": 3.434205102367975, "grad_norm": 0.15640725329910324, "learning_rate": 1.2496894519294574e-05, "loss": 0.4353, "num_tokens": 2994894826.0, "step": 9375 }, { "epoch": 3.4345715201758806, "grad_norm": 0.12739797675976466, "learning_rate": 1.2493267491925298e-05, "loss": 0.4009, "num_tokens": 2995734149.0, "step": 9376 }, { "epoch": 3.434937937983786, "grad_norm": 0.14144637032310964, "learning_rate": 1.2489640999804014e-05, "loss": 0.4227, "num_tokens": 2996585326.0, "step": 9377 }, { "epoch": 3.4353043557916916, "grad_norm": 0.13200786853258917, "learning_rate": 1.2486015043134907e-05, "loss": 0.4096, "num_tokens": 2997343149.0, "step": 9378 }, { "epoch": 3.435670773599597, "grad_norm": 0.14946474106276825, "learning_rate": 1.248238962212212e-05, "loss": 0.4376, "num_tokens": 2998149987.0, "step": 9379 }, { "epoch": 3.4360371914075025, "grad_norm": 0.12859019513644052, "learning_rate": 1.2478764736969775e-05, "loss": 0.3831, "num_tokens": 2998958042.0, "step": 9380 }, { "epoch": 3.4364036092154078, "grad_norm": 0.1369151502821235, "learning_rate": 1.2475140387881948e-05, "loss": 0.4237, "num_tokens": 2999671453.0, "step": 9381 }, { "epoch": 3.4367700270233135, "grad_norm": 0.13203302553513327, "learning_rate": 1.2471516575062714e-05, "loss": 0.4152, "num_tokens": 3000489993.0, "step": 9382 }, { "epoch": 3.4371364448312187, "grad_norm": 0.15436458637274217, "learning_rate": 1.246789329871609e-05, "loss": 0.4372, "num_tokens": 3001291110.0, "step": 9383 }, { "epoch": 3.4375028626391244, "grad_norm": 0.1348229066913913, "learning_rate": 1.246427055904607e-05, "loss": 0.4075, "num_tokens": 3002198212.0, "step": 9384 }, { "epoch": 3.4378692804470297, "grad_norm": 0.13660812812174594, "learning_rate": 1.2460648356256639e-05, "loss": 0.4105, "num_tokens": 3003020743.0, "step": 9385 }, { "epoch": 3.4382356982549354, "grad_norm": 0.12559406266409792, "learning_rate": 1.2457026690551719e-05, "loss": 0.4045, "num_tokens": 3003846843.0, "step": 9386 }, { "epoch": 3.4386021160628406, "grad_norm": 0.13659609227042763, "learning_rate": 1.2453405562135217e-05, "loss": 0.3966, "num_tokens": 3004732601.0, "step": 9387 }, { "epoch": 3.4389685338707463, "grad_norm": 0.1502736307855099, "learning_rate": 1.2449784971211018e-05, "loss": 0.4367, "num_tokens": 3005516354.0, "step": 9388 }, { "epoch": 3.4393349516786516, "grad_norm": 0.16335605572496673, "learning_rate": 1.2446164917982966e-05, "loss": 0.4102, "num_tokens": 3006230145.0, "step": 9389 }, { "epoch": 3.4397013694865572, "grad_norm": 0.1446057680306763, "learning_rate": 1.2442545402654873e-05, "loss": 0.3963, "num_tokens": 3006942793.0, "step": 9390 }, { "epoch": 3.4400677872944625, "grad_norm": 0.14237657541890872, "learning_rate": 1.2438926425430529e-05, "loss": 0.3983, "num_tokens": 3007638196.0, "step": 9391 }, { "epoch": 3.4404342051023677, "grad_norm": 0.14122465107233284, "learning_rate": 1.2435307986513685e-05, "loss": 0.3968, "num_tokens": 3008466203.0, "step": 9392 }, { "epoch": 3.4408006229102734, "grad_norm": 0.14454059790204057, "learning_rate": 1.2431690086108073e-05, "loss": 0.4135, "num_tokens": 3009407605.0, "step": 9393 }, { "epoch": 3.441167040718179, "grad_norm": 0.1430718109763116, "learning_rate": 1.2428072724417387e-05, "loss": 0.4133, "num_tokens": 3010206490.0, "step": 9394 }, { "epoch": 3.4415334585260844, "grad_norm": 0.13746348723874952, "learning_rate": 1.242445590164529e-05, "loss": 0.3769, "num_tokens": 3011048585.0, "step": 9395 }, { "epoch": 3.4418998763339896, "grad_norm": 0.1406982035540725, "learning_rate": 1.2420839617995413e-05, "loss": 0.4076, "num_tokens": 3011736531.0, "step": 9396 }, { "epoch": 3.4422662941418953, "grad_norm": 0.14401418117344816, "learning_rate": 1.2417223873671356e-05, "loss": 0.3886, "num_tokens": 3012552309.0, "step": 9397 }, { "epoch": 3.4426327119498006, "grad_norm": 0.1367780906516091, "learning_rate": 1.24136086688767e-05, "loss": 0.3864, "num_tokens": 3013385473.0, "step": 9398 }, { "epoch": 3.4429991297577063, "grad_norm": 0.1476691670877269, "learning_rate": 1.2409994003814996e-05, "loss": 0.4339, "num_tokens": 3014145351.0, "step": 9399 }, { "epoch": 3.4433655475656115, "grad_norm": 0.13429580134433292, "learning_rate": 1.2406379878689748e-05, "loss": 0.3862, "num_tokens": 3014994779.0, "step": 9400 }, { "epoch": 3.4437319653735172, "grad_norm": 0.15224716482168263, "learning_rate": 1.2402766293704435e-05, "loss": 0.4471, "num_tokens": 3015638686.0, "step": 9401 }, { "epoch": 3.4440983831814225, "grad_norm": 0.14460944773071405, "learning_rate": 1.2399153249062516e-05, "loss": 0.4215, "num_tokens": 3016417429.0, "step": 9402 }, { "epoch": 3.444464800989328, "grad_norm": 0.14624592453298213, "learning_rate": 1.2395540744967403e-05, "loss": 0.4257, "num_tokens": 3017202472.0, "step": 9403 }, { "epoch": 3.4448312187972334, "grad_norm": 0.1408541859457763, "learning_rate": 1.2391928781622497e-05, "loss": 0.3969, "num_tokens": 3017989709.0, "step": 9404 }, { "epoch": 3.445197636605139, "grad_norm": 0.1500601437842822, "learning_rate": 1.2388317359231151e-05, "loss": 0.4074, "num_tokens": 3018678280.0, "step": 9405 }, { "epoch": 3.4455640544130444, "grad_norm": 0.13688746203914512, "learning_rate": 1.2384706477996708e-05, "loss": 0.3953, "num_tokens": 3019424724.0, "step": 9406 }, { "epoch": 3.44593047222095, "grad_norm": 0.14664709270169676, "learning_rate": 1.2381096138122457e-05, "loss": 0.3888, "num_tokens": 3020149678.0, "step": 9407 }, { "epoch": 3.4462968900288553, "grad_norm": 0.13783013117519516, "learning_rate": 1.2377486339811671e-05, "loss": 0.3834, "num_tokens": 3020953399.0, "step": 9408 }, { "epoch": 3.446663307836761, "grad_norm": 0.1468649864933453, "learning_rate": 1.237387708326758e-05, "loss": 0.409, "num_tokens": 3021626177.0, "step": 9409 }, { "epoch": 3.4470297256446663, "grad_norm": 0.14028493207633386, "learning_rate": 1.2370268368693408e-05, "loss": 0.3844, "num_tokens": 3022438756.0, "step": 9410 }, { "epoch": 3.447396143452572, "grad_norm": 0.13848454423011966, "learning_rate": 1.2366660196292317e-05, "loss": 0.397, "num_tokens": 3023308296.0, "step": 9411 }, { "epoch": 3.447762561260477, "grad_norm": 0.12920619000203565, "learning_rate": 1.2363052566267469e-05, "loss": 0.3921, "num_tokens": 3024112236.0, "step": 9412 }, { "epoch": 3.448128979068383, "grad_norm": 0.14615743750505503, "learning_rate": 1.2359445478821974e-05, "loss": 0.3794, "num_tokens": 3024869144.0, "step": 9413 }, { "epoch": 3.448495396876288, "grad_norm": 0.13042627827679865, "learning_rate": 1.235583893415891e-05, "loss": 0.3937, "num_tokens": 3025637190.0, "step": 9414 }, { "epoch": 3.448861814684194, "grad_norm": 0.13140523809473692, "learning_rate": 1.2352232932481347e-05, "loss": 0.3591, "num_tokens": 3026366058.0, "step": 9415 }, { "epoch": 3.449228232492099, "grad_norm": 0.13330423297265542, "learning_rate": 1.2348627473992307e-05, "loss": 0.415, "num_tokens": 3027170628.0, "step": 9416 }, { "epoch": 3.4495946503000043, "grad_norm": 0.14308703009265974, "learning_rate": 1.234502255889478e-05, "loss": 0.429, "num_tokens": 3027900132.0, "step": 9417 }, { "epoch": 3.44996106810791, "grad_norm": 0.14595586885238365, "learning_rate": 1.2341418187391722e-05, "loss": 0.3939, "num_tokens": 3028617415.0, "step": 9418 }, { "epoch": 3.4503274859158157, "grad_norm": 0.1521970495610565, "learning_rate": 1.2337814359686084e-05, "loss": 0.447, "num_tokens": 3029330496.0, "step": 9419 }, { "epoch": 3.450693903723721, "grad_norm": 0.1454400668730296, "learning_rate": 1.2334211075980752e-05, "loss": 0.3962, "num_tokens": 3030055068.0, "step": 9420 }, { "epoch": 3.4510603215316262, "grad_norm": 0.13900954625827588, "learning_rate": 1.2330608336478616e-05, "loss": 0.4161, "num_tokens": 3030838109.0, "step": 9421 }, { "epoch": 3.451426739339532, "grad_norm": 0.14001074661121368, "learning_rate": 1.2327006141382507e-05, "loss": 0.3834, "num_tokens": 3031548509.0, "step": 9422 }, { "epoch": 3.451793157147437, "grad_norm": 0.14483489052219242, "learning_rate": 1.2323404490895236e-05, "loss": 0.4076, "num_tokens": 3032198131.0, "step": 9423 }, { "epoch": 3.452159574955343, "grad_norm": 0.14371100559530153, "learning_rate": 1.2319803385219578e-05, "loss": 0.4186, "num_tokens": 3032968430.0, "step": 9424 }, { "epoch": 3.452525992763248, "grad_norm": 0.14217108605541187, "learning_rate": 1.2316202824558283e-05, "loss": 0.3907, "num_tokens": 3033659636.0, "step": 9425 }, { "epoch": 3.452892410571154, "grad_norm": 0.1461369634238663, "learning_rate": 1.2312602809114088e-05, "loss": 0.3935, "num_tokens": 3034282074.0, "step": 9426 }, { "epoch": 3.453258828379059, "grad_norm": 0.13467972907650236, "learning_rate": 1.230900333908966e-05, "loss": 0.3944, "num_tokens": 3035044700.0, "step": 9427 }, { "epoch": 3.4536252461869648, "grad_norm": 0.14636115959677445, "learning_rate": 1.230540441468767e-05, "loss": 0.3934, "num_tokens": 3035724512.0, "step": 9428 }, { "epoch": 3.45399166399487, "grad_norm": 0.134285157452716, "learning_rate": 1.2301806036110737e-05, "loss": 0.433, "num_tokens": 3036615661.0, "step": 9429 }, { "epoch": 3.4543580818027757, "grad_norm": 0.13612225819099552, "learning_rate": 1.2298208203561459e-05, "loss": 0.4333, "num_tokens": 3037397622.0, "step": 9430 }, { "epoch": 3.454724499610681, "grad_norm": 0.15258108085042674, "learning_rate": 1.2294610917242392e-05, "loss": 0.4117, "num_tokens": 3038062320.0, "step": 9431 }, { "epoch": 3.4550909174185866, "grad_norm": 0.15280788964961137, "learning_rate": 1.2291014177356081e-05, "loss": 0.4303, "num_tokens": 3038793635.0, "step": 9432 }, { "epoch": 3.455457335226492, "grad_norm": 0.14075197145791715, "learning_rate": 1.2287417984105037e-05, "loss": 0.4213, "num_tokens": 3039568488.0, "step": 9433 }, { "epoch": 3.4558237530343976, "grad_norm": 0.1471510081725754, "learning_rate": 1.228382233769172e-05, "loss": 0.4085, "num_tokens": 3040253351.0, "step": 9434 }, { "epoch": 3.456190170842303, "grad_norm": 0.13582240184956576, "learning_rate": 1.2280227238318575e-05, "loss": 0.4257, "num_tokens": 3041081859.0, "step": 9435 }, { "epoch": 3.4565565886502085, "grad_norm": 0.14375784405036002, "learning_rate": 1.2276632686188017e-05, "loss": 0.4217, "num_tokens": 3041928010.0, "step": 9436 }, { "epoch": 3.456923006458114, "grad_norm": 0.13313637997226752, "learning_rate": 1.2273038681502414e-05, "loss": 0.3704, "num_tokens": 3042664109.0, "step": 9437 }, { "epoch": 3.4572894242660195, "grad_norm": 0.13676435134967255, "learning_rate": 1.2269445224464134e-05, "loss": 0.4105, "num_tokens": 3043501880.0, "step": 9438 }, { "epoch": 3.4576558420739247, "grad_norm": 0.12738030450093674, "learning_rate": 1.2265852315275478e-05, "loss": 0.3891, "num_tokens": 3044363316.0, "step": 9439 }, { "epoch": 3.4580222598818304, "grad_norm": 0.1491563858507009, "learning_rate": 1.226225995413875e-05, "loss": 0.4298, "num_tokens": 3045133476.0, "step": 9440 }, { "epoch": 3.4583886776897357, "grad_norm": 0.13218502060566295, "learning_rate": 1.22586681412562e-05, "loss": 0.3886, "num_tokens": 3045895264.0, "step": 9441 }, { "epoch": 3.4587550954976414, "grad_norm": 0.1403288984794223, "learning_rate": 1.2255076876830048e-05, "loss": 0.3797, "num_tokens": 3046622826.0, "step": 9442 }, { "epoch": 3.4591215133055466, "grad_norm": 0.14224609678410632, "learning_rate": 1.22514861610625e-05, "loss": 0.4107, "num_tokens": 3047373487.0, "step": 9443 }, { "epoch": 3.4594879311134523, "grad_norm": 0.14662312617254256, "learning_rate": 1.2247895994155719e-05, "loss": 0.4168, "num_tokens": 3048130145.0, "step": 9444 }, { "epoch": 3.4598543489213576, "grad_norm": 0.12300422901654291, "learning_rate": 1.2244306376311826e-05, "loss": 0.3804, "num_tokens": 3049001571.0, "step": 9445 }, { "epoch": 3.460220766729263, "grad_norm": 0.13717013272898296, "learning_rate": 1.2240717307732939e-05, "loss": 0.4121, "num_tokens": 3049846675.0, "step": 9446 }, { "epoch": 3.4605871845371685, "grad_norm": 0.1366412551400977, "learning_rate": 1.2237128788621127e-05, "loss": 0.411, "num_tokens": 3050562311.0, "step": 9447 }, { "epoch": 3.460953602345074, "grad_norm": 0.13929734859762477, "learning_rate": 1.223354081917842e-05, "loss": 0.4194, "num_tokens": 3051331077.0, "step": 9448 }, { "epoch": 3.4613200201529795, "grad_norm": 0.13697189494636086, "learning_rate": 1.2229953399606842e-05, "loss": 0.4178, "num_tokens": 3052189740.0, "step": 9449 }, { "epoch": 3.4616864379608847, "grad_norm": 0.13924917596924638, "learning_rate": 1.2226366530108362e-05, "loss": 0.4091, "num_tokens": 3053050461.0, "step": 9450 }, { "epoch": 3.4620528557687904, "grad_norm": 0.14231867324956343, "learning_rate": 1.2222780210884935e-05, "loss": 0.3809, "num_tokens": 3053780815.0, "step": 9451 }, { "epoch": 3.4624192735766957, "grad_norm": 0.14780650866259337, "learning_rate": 1.2219194442138466e-05, "loss": 0.3713, "num_tokens": 3054456927.0, "step": 9452 }, { "epoch": 3.4627856913846013, "grad_norm": 0.14823794105126523, "learning_rate": 1.221560922407085e-05, "loss": 0.4143, "num_tokens": 3055135431.0, "step": 9453 }, { "epoch": 3.4631521091925066, "grad_norm": 0.14123087354391375, "learning_rate": 1.2212024556883946e-05, "loss": 0.3878, "num_tokens": 3055946203.0, "step": 9454 }, { "epoch": 3.4635185270004123, "grad_norm": 0.1373214085238402, "learning_rate": 1.2208440440779574e-05, "loss": 0.4168, "num_tokens": 3056727151.0, "step": 9455 }, { "epoch": 3.4638849448083175, "grad_norm": 0.14217524268874038, "learning_rate": 1.2204856875959526e-05, "loss": 0.3928, "num_tokens": 3057513347.0, "step": 9456 }, { "epoch": 3.4642513626162232, "grad_norm": 0.13064566648471804, "learning_rate": 1.2201273862625562e-05, "loss": 0.3962, "num_tokens": 3058287090.0, "step": 9457 }, { "epoch": 3.4646177804241285, "grad_norm": 0.1477282950851168, "learning_rate": 1.2197691400979411e-05, "loss": 0.3722, "num_tokens": 3059045670.0, "step": 9458 }, { "epoch": 3.464984198232034, "grad_norm": 0.12838469359498933, "learning_rate": 1.219410949122278e-05, "loss": 0.4097, "num_tokens": 3059871801.0, "step": 9459 }, { "epoch": 3.4653506160399394, "grad_norm": 0.1461916719930239, "learning_rate": 1.2190528133557327e-05, "loss": 0.3866, "num_tokens": 3060621054.0, "step": 9460 }, { "epoch": 3.465717033847845, "grad_norm": 0.13445955064558865, "learning_rate": 1.2186947328184701e-05, "loss": 0.4048, "num_tokens": 3061450831.0, "step": 9461 }, { "epoch": 3.4660834516557504, "grad_norm": 0.14205746303541605, "learning_rate": 1.2183367075306508e-05, "loss": 0.4187, "num_tokens": 3062135196.0, "step": 9462 }, { "epoch": 3.466449869463656, "grad_norm": 0.15243035877388197, "learning_rate": 1.2179787375124317e-05, "loss": 0.4104, "num_tokens": 3062912146.0, "step": 9463 }, { "epoch": 3.4668162872715613, "grad_norm": 0.13536984572881572, "learning_rate": 1.2176208227839668e-05, "loss": 0.3981, "num_tokens": 3063706881.0, "step": 9464 }, { "epoch": 3.467182705079467, "grad_norm": 0.13820262992930737, "learning_rate": 1.2172629633654088e-05, "loss": 0.3788, "num_tokens": 3064478804.0, "step": 9465 }, { "epoch": 3.4675491228873723, "grad_norm": 0.1285853211234838, "learning_rate": 1.2169051592769043e-05, "loss": 0.4111, "num_tokens": 3065286876.0, "step": 9466 }, { "epoch": 3.467915540695278, "grad_norm": 0.1431700137110648, "learning_rate": 1.2165474105386e-05, "loss": 0.3981, "num_tokens": 3066036722.0, "step": 9467 }, { "epoch": 3.468281958503183, "grad_norm": 0.13975186057386538, "learning_rate": 1.2161897171706372e-05, "loss": 0.4317, "num_tokens": 3066858609.0, "step": 9468 }, { "epoch": 3.468648376311089, "grad_norm": 0.1466298954484553, "learning_rate": 1.2158320791931546e-05, "loss": 0.4468, "num_tokens": 3067645648.0, "step": 9469 }, { "epoch": 3.469014794118994, "grad_norm": 0.137409985919708, "learning_rate": 1.2154744966262875e-05, "loss": 0.3895, "num_tokens": 3068433014.0, "step": 9470 }, { "epoch": 3.4693812119268994, "grad_norm": 0.14405721465508942, "learning_rate": 1.2151169694901693e-05, "loss": 0.4145, "num_tokens": 3069231653.0, "step": 9471 }, { "epoch": 3.469747629734805, "grad_norm": 0.15176567728768084, "learning_rate": 1.2147594978049295e-05, "loss": 0.3992, "num_tokens": 3069842270.0, "step": 9472 }, { "epoch": 3.470114047542711, "grad_norm": 0.13294819037010677, "learning_rate": 1.2144020815906932e-05, "loss": 0.4156, "num_tokens": 3070608662.0, "step": 9473 }, { "epoch": 3.470480465350616, "grad_norm": 0.16046367299877975, "learning_rate": 1.2140447208675857e-05, "loss": 0.3974, "num_tokens": 3071247558.0, "step": 9474 }, { "epoch": 3.4708468831585213, "grad_norm": 0.13301703558798073, "learning_rate": 1.2136874156557252e-05, "loss": 0.3857, "num_tokens": 3072002255.0, "step": 9475 }, { "epoch": 3.471213300966427, "grad_norm": 0.15006942951273747, "learning_rate": 1.21333016597523e-05, "loss": 0.4261, "num_tokens": 3072780565.0, "step": 9476 }, { "epoch": 3.4715797187743322, "grad_norm": 0.1466683085156183, "learning_rate": 1.2129729718462136e-05, "loss": 0.4135, "num_tokens": 3073468156.0, "step": 9477 }, { "epoch": 3.471946136582238, "grad_norm": 0.15078724970086985, "learning_rate": 1.2126158332887866e-05, "loss": 0.3803, "num_tokens": 3074162051.0, "step": 9478 }, { "epoch": 3.472312554390143, "grad_norm": 0.14353820195147068, "learning_rate": 1.2122587503230555e-05, "loss": 0.4248, "num_tokens": 3074985424.0, "step": 9479 }, { "epoch": 3.472678972198049, "grad_norm": 0.14348172860840716, "learning_rate": 1.2119017229691275e-05, "loss": 0.3873, "num_tokens": 3075684956.0, "step": 9480 }, { "epoch": 3.473045390005954, "grad_norm": 0.12967510579886773, "learning_rate": 1.2115447512471011e-05, "loss": 0.3902, "num_tokens": 3076513592.0, "step": 9481 }, { "epoch": 3.47341180781386, "grad_norm": 0.14227633241195878, "learning_rate": 1.2111878351770767e-05, "loss": 0.4012, "num_tokens": 3077244906.0, "step": 9482 }, { "epoch": 3.473778225621765, "grad_norm": 0.15203964894649216, "learning_rate": 1.2108309747791484e-05, "loss": 0.4007, "num_tokens": 3078012005.0, "step": 9483 }, { "epoch": 3.4741446434296708, "grad_norm": 0.12813425505778003, "learning_rate": 1.2104741700734084e-05, "loss": 0.4114, "num_tokens": 3078869944.0, "step": 9484 }, { "epoch": 3.474511061237576, "grad_norm": 0.14132710727978567, "learning_rate": 1.210117421079945e-05, "loss": 0.4146, "num_tokens": 3079662193.0, "step": 9485 }, { "epoch": 3.4748774790454817, "grad_norm": 0.13309344487617442, "learning_rate": 1.2097607278188439e-05, "loss": 0.4063, "num_tokens": 3080439998.0, "step": 9486 }, { "epoch": 3.475243896853387, "grad_norm": 0.14100345446136114, "learning_rate": 1.209404090310188e-05, "loss": 0.3829, "num_tokens": 3081208326.0, "step": 9487 }, { "epoch": 3.4756103146612927, "grad_norm": 0.14822687940872084, "learning_rate": 1.2090475085740572e-05, "loss": 0.4304, "num_tokens": 3082024782.0, "step": 9488 }, { "epoch": 3.475976732469198, "grad_norm": 0.13979539260579782, "learning_rate": 1.208690982630527e-05, "loss": 0.4199, "num_tokens": 3082766748.0, "step": 9489 }, { "epoch": 3.4763431502771036, "grad_norm": 0.131956302064254, "learning_rate": 1.2083345124996711e-05, "loss": 0.4236, "num_tokens": 3083591866.0, "step": 9490 }, { "epoch": 3.476709568085009, "grad_norm": 0.1436314954901647, "learning_rate": 1.2079780982015592e-05, "loss": 0.4025, "num_tokens": 3084337403.0, "step": 9491 }, { "epoch": 3.4770759858929146, "grad_norm": 0.15344650684457228, "learning_rate": 1.207621739756257e-05, "loss": 0.3816, "num_tokens": 3085025543.0, "step": 9492 }, { "epoch": 3.47744240370082, "grad_norm": 0.13127431220245556, "learning_rate": 1.2072654371838302e-05, "loss": 0.424, "num_tokens": 3085836332.0, "step": 9493 }, { "epoch": 3.4778088215087255, "grad_norm": 0.1386527697488406, "learning_rate": 1.2069091905043377e-05, "loss": 0.417, "num_tokens": 3086586453.0, "step": 9494 }, { "epoch": 3.4781752393166308, "grad_norm": 0.1393097557390302, "learning_rate": 1.2065529997378383e-05, "loss": 0.3844, "num_tokens": 3087374632.0, "step": 9495 }, { "epoch": 3.4785416571245364, "grad_norm": 0.13243931140729753, "learning_rate": 1.2061968649043856e-05, "loss": 0.3887, "num_tokens": 3088166561.0, "step": 9496 }, { "epoch": 3.4789080749324417, "grad_norm": 0.15271748450000627, "learning_rate": 1.2058407860240302e-05, "loss": 0.4219, "num_tokens": 3088886830.0, "step": 9497 }, { "epoch": 3.4792744927403474, "grad_norm": 0.1399344124471547, "learning_rate": 1.2054847631168207e-05, "loss": 0.392, "num_tokens": 3089687503.0, "step": 9498 }, { "epoch": 3.4796409105482526, "grad_norm": 0.13830240314265688, "learning_rate": 1.205128796202802e-05, "loss": 0.4045, "num_tokens": 3090461505.0, "step": 9499 }, { "epoch": 3.480007328356158, "grad_norm": 0.1304314649165944, "learning_rate": 1.2047728853020147e-05, "loss": 0.3918, "num_tokens": 3091346951.0, "step": 9500 }, { "epoch": 3.4803737461640636, "grad_norm": 0.14018993922579365, "learning_rate": 1.204417030434499e-05, "loss": 0.4164, "num_tokens": 3092127772.0, "step": 9501 }, { "epoch": 3.4807401639719693, "grad_norm": 0.14918273641803284, "learning_rate": 1.2040612316202887e-05, "loss": 0.4051, "num_tokens": 3092847144.0, "step": 9502 }, { "epoch": 3.4811065817798745, "grad_norm": 0.1326140451431718, "learning_rate": 1.2037054888794165e-05, "loss": 0.3999, "num_tokens": 3093626969.0, "step": 9503 }, { "epoch": 3.48147299958778, "grad_norm": 0.13797010250017114, "learning_rate": 1.2033498022319119e-05, "loss": 0.3784, "num_tokens": 3094402113.0, "step": 9504 }, { "epoch": 3.4818394173956855, "grad_norm": 0.13513800692499486, "learning_rate": 1.2029941716978e-05, "loss": 0.393, "num_tokens": 3095120244.0, "step": 9505 }, { "epoch": 3.4822058352035907, "grad_norm": 0.14549808472067619, "learning_rate": 1.2026385972971044e-05, "loss": 0.4277, "num_tokens": 3095813444.0, "step": 9506 }, { "epoch": 3.4825722530114964, "grad_norm": 0.12704428898279505, "learning_rate": 1.2022830790498433e-05, "loss": 0.3902, "num_tokens": 3096714758.0, "step": 9507 }, { "epoch": 3.4829386708194017, "grad_norm": 0.13488105536920866, "learning_rate": 1.2019276169760339e-05, "loss": 0.392, "num_tokens": 3097549631.0, "step": 9508 }, { "epoch": 3.4833050886273074, "grad_norm": 0.1369799267712195, "learning_rate": 1.20157221109569e-05, "loss": 0.4235, "num_tokens": 3098380547.0, "step": 9509 }, { "epoch": 3.4836715064352126, "grad_norm": 0.1556277367262372, "learning_rate": 1.2012168614288213e-05, "loss": 0.4, "num_tokens": 3099124433.0, "step": 9510 }, { "epoch": 3.4840379242431183, "grad_norm": 0.13989209474522743, "learning_rate": 1.2008615679954345e-05, "loss": 0.4053, "num_tokens": 3099928609.0, "step": 9511 }, { "epoch": 3.4844043420510236, "grad_norm": 0.14010725403142682, "learning_rate": 1.2005063308155331e-05, "loss": 0.4452, "num_tokens": 3100808681.0, "step": 9512 }, { "epoch": 3.4847707598589293, "grad_norm": 0.12664061533263157, "learning_rate": 1.2001511499091173e-05, "loss": 0.3957, "num_tokens": 3101656083.0, "step": 9513 }, { "epoch": 3.4851371776668345, "grad_norm": 0.14404537210469287, "learning_rate": 1.1997960252961858e-05, "loss": 0.4388, "num_tokens": 3102482773.0, "step": 9514 }, { "epoch": 3.48550359547474, "grad_norm": 0.12917537288019051, "learning_rate": 1.1994409569967311e-05, "loss": 0.3749, "num_tokens": 3103262330.0, "step": 9515 }, { "epoch": 3.4858700132826455, "grad_norm": 0.13741128513721482, "learning_rate": 1.1990859450307462e-05, "loss": 0.3941, "num_tokens": 3104089442.0, "step": 9516 }, { "epoch": 3.486236431090551, "grad_norm": 0.13750395741139512, "learning_rate": 1.198730989418218e-05, "loss": 0.3677, "num_tokens": 3104781678.0, "step": 9517 }, { "epoch": 3.4866028488984564, "grad_norm": 0.14707955026761907, "learning_rate": 1.1983760901791312e-05, "loss": 0.435, "num_tokens": 3105565038.0, "step": 9518 }, { "epoch": 3.486969266706362, "grad_norm": 0.1350860638973272, "learning_rate": 1.1980212473334673e-05, "loss": 0.3958, "num_tokens": 3106470144.0, "step": 9519 }, { "epoch": 3.4873356845142673, "grad_norm": 0.14358314758227222, "learning_rate": 1.1976664609012044e-05, "loss": 0.4273, "num_tokens": 3107190837.0, "step": 9520 }, { "epoch": 3.487702102322173, "grad_norm": 0.13321459056950535, "learning_rate": 1.197311730902318e-05, "loss": 0.3879, "num_tokens": 3107874874.0, "step": 9521 }, { "epoch": 3.4880685201300783, "grad_norm": 0.13937937801071967, "learning_rate": 1.1969570573567802e-05, "loss": 0.3868, "num_tokens": 3108636274.0, "step": 9522 }, { "epoch": 3.488434937937984, "grad_norm": 0.1538377405978422, "learning_rate": 1.19660244028456e-05, "loss": 0.4267, "num_tokens": 3109282959.0, "step": 9523 }, { "epoch": 3.4888013557458892, "grad_norm": 0.1505613288125269, "learning_rate": 1.1962478797056232e-05, "loss": 0.4442, "num_tokens": 3109952086.0, "step": 9524 }, { "epoch": 3.4891677735537945, "grad_norm": 0.13133092090718088, "learning_rate": 1.1958933756399307e-05, "loss": 0.39, "num_tokens": 3110706562.0, "step": 9525 }, { "epoch": 3.4895341913617, "grad_norm": 0.14121900639153367, "learning_rate": 1.1955389281074438e-05, "loss": 0.4123, "num_tokens": 3111480763.0, "step": 9526 }, { "epoch": 3.489900609169606, "grad_norm": 0.12610594152765095, "learning_rate": 1.1951845371281176e-05, "loss": 0.3917, "num_tokens": 3112394276.0, "step": 9527 }, { "epoch": 3.490267026977511, "grad_norm": 0.15029921346187083, "learning_rate": 1.1948302027219047e-05, "loss": 0.4449, "num_tokens": 3113066222.0, "step": 9528 }, { "epoch": 3.4906334447854164, "grad_norm": 0.13805861573586406, "learning_rate": 1.1944759249087559e-05, "loss": 0.405, "num_tokens": 3113785347.0, "step": 9529 }, { "epoch": 3.490999862593322, "grad_norm": 0.1348995376186411, "learning_rate": 1.1941217037086172e-05, "loss": 0.407, "num_tokens": 3114578919.0, "step": 9530 }, { "epoch": 3.4913662804012273, "grad_norm": 0.1442825408372625, "learning_rate": 1.1937675391414312e-05, "loss": 0.404, "num_tokens": 3115325830.0, "step": 9531 }, { "epoch": 3.491732698209133, "grad_norm": 0.13124939300269095, "learning_rate": 1.1934134312271394e-05, "loss": 0.3999, "num_tokens": 3116119723.0, "step": 9532 }, { "epoch": 3.4920991160170383, "grad_norm": 0.14453542458501492, "learning_rate": 1.1930593799856785e-05, "loss": 0.408, "num_tokens": 3116813071.0, "step": 9533 }, { "epoch": 3.492465533824944, "grad_norm": 0.1332379202375917, "learning_rate": 1.1927053854369807e-05, "loss": 0.3996, "num_tokens": 3117625846.0, "step": 9534 }, { "epoch": 3.492831951632849, "grad_norm": 0.12996316848764222, "learning_rate": 1.1923514476009789e-05, "loss": 0.375, "num_tokens": 3118468734.0, "step": 9535 }, { "epoch": 3.493198369440755, "grad_norm": 0.1425609825879159, "learning_rate": 1.1919975664975986e-05, "loss": 0.4409, "num_tokens": 3119207755.0, "step": 9536 }, { "epoch": 3.49356478724866, "grad_norm": 0.13608414014388132, "learning_rate": 1.1916437421467656e-05, "loss": 0.3866, "num_tokens": 3120007528.0, "step": 9537 }, { "epoch": 3.493931205056566, "grad_norm": 0.13687528533906185, "learning_rate": 1.1912899745684003e-05, "loss": 0.4078, "num_tokens": 3120810293.0, "step": 9538 }, { "epoch": 3.494297622864471, "grad_norm": 0.1387528632300149, "learning_rate": 1.1909362637824203e-05, "loss": 0.3856, "num_tokens": 3121666815.0, "step": 9539 }, { "epoch": 3.494664040672377, "grad_norm": 0.13967797230809667, "learning_rate": 1.1905826098087405e-05, "loss": 0.3981, "num_tokens": 3122486881.0, "step": 9540 }, { "epoch": 3.495030458480282, "grad_norm": 0.13444968990351752, "learning_rate": 1.1902290126672714e-05, "loss": 0.404, "num_tokens": 3123270731.0, "step": 9541 }, { "epoch": 3.4953968762881877, "grad_norm": 0.15076992911230172, "learning_rate": 1.1898754723779221e-05, "loss": 0.4103, "num_tokens": 3123909682.0, "step": 9542 }, { "epoch": 3.495763294096093, "grad_norm": 0.14220113408095555, "learning_rate": 1.1895219889605976e-05, "loss": 0.4115, "num_tokens": 3124694876.0, "step": 9543 }, { "epoch": 3.4961297119039987, "grad_norm": 0.13414598244326412, "learning_rate": 1.1891685624352001e-05, "loss": 0.4214, "num_tokens": 3125478444.0, "step": 9544 }, { "epoch": 3.496496129711904, "grad_norm": 0.135064357516593, "learning_rate": 1.1888151928216276e-05, "loss": 0.4195, "num_tokens": 3126211029.0, "step": 9545 }, { "epoch": 3.4968625475198096, "grad_norm": 0.1358751717669381, "learning_rate": 1.1884618801397755e-05, "loss": 0.4338, "num_tokens": 3127126830.0, "step": 9546 }, { "epoch": 3.497228965327715, "grad_norm": 0.13746422936527514, "learning_rate": 1.1881086244095358e-05, "loss": 0.4052, "num_tokens": 3127928565.0, "step": 9547 }, { "epoch": 3.4975953831356206, "grad_norm": 0.1460035103346301, "learning_rate": 1.1877554256507984e-05, "loss": 0.4078, "num_tokens": 3128617866.0, "step": 9548 }, { "epoch": 3.497961800943526, "grad_norm": 0.1438435447183426, "learning_rate": 1.187402283883448e-05, "loss": 0.4304, "num_tokens": 3129335889.0, "step": 9549 }, { "epoch": 3.498328218751431, "grad_norm": 0.13810493595627893, "learning_rate": 1.1870491991273684e-05, "loss": 0.4192, "num_tokens": 3130060588.0, "step": 9550 }, { "epoch": 3.4986946365593368, "grad_norm": 0.14797233765689993, "learning_rate": 1.1866961714024382e-05, "loss": 0.4065, "num_tokens": 3130815614.0, "step": 9551 }, { "epoch": 3.4990610543672425, "grad_norm": 0.1275498120979259, "learning_rate": 1.1863432007285335e-05, "loss": 0.4108, "num_tokens": 3131567662.0, "step": 9552 }, { "epoch": 3.4994274721751477, "grad_norm": 0.12448705565805866, "learning_rate": 1.1859902871255269e-05, "loss": 0.3768, "num_tokens": 3132425907.0, "step": 9553 }, { "epoch": 3.499793889983053, "grad_norm": 0.13397352850411967, "learning_rate": 1.1856374306132894e-05, "loss": 0.4048, "num_tokens": 3133203266.0, "step": 9554 }, { "epoch": 3.5001603077909587, "grad_norm": 0.13644659672982137, "learning_rate": 1.1852846312116864e-05, "loss": 0.4, "num_tokens": 3134017606.0, "step": 9555 }, { "epoch": 3.5005267255988644, "grad_norm": 0.1381905867532475, "learning_rate": 1.184931888940582e-05, "loss": 0.4021, "num_tokens": 3134794861.0, "step": 9556 }, { "epoch": 3.5008931434067696, "grad_norm": 0.140596670523238, "learning_rate": 1.1845792038198358e-05, "loss": 0.4086, "num_tokens": 3135556612.0, "step": 9557 }, { "epoch": 3.501259561214675, "grad_norm": 0.1365557399708738, "learning_rate": 1.1842265758693043e-05, "loss": 0.4028, "num_tokens": 3136273360.0, "step": 9558 }, { "epoch": 3.5016259790225805, "grad_norm": 0.14085426586882763, "learning_rate": 1.1838740051088421e-05, "loss": 0.4346, "num_tokens": 3137021581.0, "step": 9559 }, { "epoch": 3.501992396830486, "grad_norm": 0.13780849339562928, "learning_rate": 1.1835214915582993e-05, "loss": 0.4188, "num_tokens": 3137772570.0, "step": 9560 }, { "epoch": 3.5023588146383915, "grad_norm": 0.135527249296766, "learning_rate": 1.1831690352375232e-05, "loss": 0.4124, "num_tokens": 3138585374.0, "step": 9561 }, { "epoch": 3.5027252324462967, "grad_norm": 0.13527170627411422, "learning_rate": 1.1828166361663566e-05, "loss": 0.4148, "num_tokens": 3139424963.0, "step": 9562 }, { "epoch": 3.5030916502542024, "grad_norm": 0.1283672820439037, "learning_rate": 1.182464294364642e-05, "loss": 0.4296, "num_tokens": 3140414552.0, "step": 9563 }, { "epoch": 3.5034580680621077, "grad_norm": 0.12723570335406303, "learning_rate": 1.1821120098522154e-05, "loss": 0.403, "num_tokens": 3141186720.0, "step": 9564 }, { "epoch": 3.5038244858700134, "grad_norm": 0.15244889383927698, "learning_rate": 1.1817597826489129e-05, "loss": 0.4321, "num_tokens": 3141892800.0, "step": 9565 }, { "epoch": 3.5041909036779186, "grad_norm": 0.14233782192815628, "learning_rate": 1.1814076127745642e-05, "loss": 0.3971, "num_tokens": 3142665179.0, "step": 9566 }, { "epoch": 3.5045573214858243, "grad_norm": 0.1403403292723673, "learning_rate": 1.181055500248998e-05, "loss": 0.4292, "num_tokens": 3143438860.0, "step": 9567 }, { "epoch": 3.5049237392937296, "grad_norm": 0.14415798094924445, "learning_rate": 1.1807034450920377e-05, "loss": 0.4082, "num_tokens": 3144139928.0, "step": 9568 }, { "epoch": 3.5052901571016353, "grad_norm": 0.14062353817484807, "learning_rate": 1.1803514473235063e-05, "loss": 0.4122, "num_tokens": 3144886292.0, "step": 9569 }, { "epoch": 3.5056565749095405, "grad_norm": 0.1460345840456592, "learning_rate": 1.1799995069632202e-05, "loss": 0.395, "num_tokens": 3145636969.0, "step": 9570 }, { "epoch": 3.506022992717446, "grad_norm": 0.1522136672292244, "learning_rate": 1.179647624030996e-05, "loss": 0.3847, "num_tokens": 3146244324.0, "step": 9571 }, { "epoch": 3.5063894105253515, "grad_norm": 0.1499612137902174, "learning_rate": 1.1792957985466452e-05, "loss": 0.3813, "num_tokens": 3146910375.0, "step": 9572 }, { "epoch": 3.506755828333257, "grad_norm": 0.1323762564792577, "learning_rate": 1.1789440305299755e-05, "loss": 0.3904, "num_tokens": 3147678888.0, "step": 9573 }, { "epoch": 3.5071222461411624, "grad_norm": 0.13722701469416293, "learning_rate": 1.1785923200007923e-05, "loss": 0.3801, "num_tokens": 3148379378.0, "step": 9574 }, { "epoch": 3.5074886639490677, "grad_norm": 0.15183160646954727, "learning_rate": 1.1782406669788973e-05, "loss": 0.4401, "num_tokens": 3149044267.0, "step": 9575 }, { "epoch": 3.5078550817569734, "grad_norm": 0.13670032127949114, "learning_rate": 1.1778890714840899e-05, "loss": 0.4182, "num_tokens": 3149805341.0, "step": 9576 }, { "epoch": 3.508221499564879, "grad_norm": 0.15259579915586427, "learning_rate": 1.1775375335361658e-05, "loss": 0.4476, "num_tokens": 3150538429.0, "step": 9577 }, { "epoch": 3.5085879173727843, "grad_norm": 0.15017432366796057, "learning_rate": 1.1771860531549174e-05, "loss": 0.3988, "num_tokens": 3151189889.0, "step": 9578 }, { "epoch": 3.5089543351806896, "grad_norm": 0.15441738394205315, "learning_rate": 1.176834630360133e-05, "loss": 0.3935, "num_tokens": 3151856546.0, "step": 9579 }, { "epoch": 3.5093207529885952, "grad_norm": 0.1532034050586164, "learning_rate": 1.1764832651715978e-05, "loss": 0.3922, "num_tokens": 3152487366.0, "step": 9580 }, { "epoch": 3.509687170796501, "grad_norm": 0.1333346675619493, "learning_rate": 1.1761319576090964e-05, "loss": 0.3926, "num_tokens": 3153279101.0, "step": 9581 }, { "epoch": 3.510053588604406, "grad_norm": 0.13485535832095466, "learning_rate": 1.1757807076924068e-05, "loss": 0.3865, "num_tokens": 3154083095.0, "step": 9582 }, { "epoch": 3.5104200064123114, "grad_norm": 0.14354821048183072, "learning_rate": 1.1754295154413047e-05, "loss": 0.4309, "num_tokens": 3154866480.0, "step": 9583 }, { "epoch": 3.510786424220217, "grad_norm": 0.13575548855041586, "learning_rate": 1.1750783808755636e-05, "loss": 0.3999, "num_tokens": 3155669768.0, "step": 9584 }, { "epoch": 3.511152842028123, "grad_norm": 0.13814339884883578, "learning_rate": 1.1747273040149536e-05, "loss": 0.4076, "num_tokens": 3156462665.0, "step": 9585 }, { "epoch": 3.511519259836028, "grad_norm": 0.13032733534951393, "learning_rate": 1.1743762848792394e-05, "loss": 0.4203, "num_tokens": 3157280667.0, "step": 9586 }, { "epoch": 3.5118856776439333, "grad_norm": 0.15647797949812645, "learning_rate": 1.174025323488186e-05, "loss": 0.4207, "num_tokens": 3157895035.0, "step": 9587 }, { "epoch": 3.512252095451839, "grad_norm": 0.12509305362224293, "learning_rate": 1.1736744198615519e-05, "loss": 0.4107, "num_tokens": 3158729867.0, "step": 9588 }, { "epoch": 3.5126185132597443, "grad_norm": 0.14608090004678928, "learning_rate": 1.1733235740190936e-05, "loss": 0.4313, "num_tokens": 3159413092.0, "step": 9589 }, { "epoch": 3.51298493106765, "grad_norm": 0.13504512216996864, "learning_rate": 1.1729727859805655e-05, "loss": 0.4192, "num_tokens": 3160141764.0, "step": 9590 }, { "epoch": 3.5133513488755552, "grad_norm": 0.13928125123889204, "learning_rate": 1.1726220557657162e-05, "loss": 0.4041, "num_tokens": 3160820731.0, "step": 9591 }, { "epoch": 3.513717766683461, "grad_norm": 0.14087057447095638, "learning_rate": 1.1722713833942942e-05, "loss": 0.4262, "num_tokens": 3161564243.0, "step": 9592 }, { "epoch": 3.514084184491366, "grad_norm": 0.14149446309450825, "learning_rate": 1.171920768886042e-05, "loss": 0.3918, "num_tokens": 3162355414.0, "step": 9593 }, { "epoch": 3.514450602299272, "grad_norm": 0.1287358249834815, "learning_rate": 1.1715702122607007e-05, "loss": 0.3843, "num_tokens": 3163105935.0, "step": 9594 }, { "epoch": 3.514817020107177, "grad_norm": 0.1398631790347411, "learning_rate": 1.1712197135380061e-05, "loss": 0.4408, "num_tokens": 3163867712.0, "step": 9595 }, { "epoch": 3.515183437915083, "grad_norm": 0.1418130604219927, "learning_rate": 1.1708692727376923e-05, "loss": 0.4179, "num_tokens": 3164614678.0, "step": 9596 }, { "epoch": 3.515549855722988, "grad_norm": 0.13828136586596468, "learning_rate": 1.1705188898794898e-05, "loss": 0.4009, "num_tokens": 3165341014.0, "step": 9597 }, { "epoch": 3.5159162735308938, "grad_norm": 0.14415629953025066, "learning_rate": 1.1701685649831272e-05, "loss": 0.401, "num_tokens": 3166044608.0, "step": 9598 }, { "epoch": 3.516282691338799, "grad_norm": 0.16164135179841915, "learning_rate": 1.1698182980683272e-05, "loss": 0.4, "num_tokens": 3166702821.0, "step": 9599 }, { "epoch": 3.5166491091467043, "grad_norm": 0.1303398824413033, "learning_rate": 1.1694680891548109e-05, "loss": 0.4023, "num_tokens": 3167515179.0, "step": 9600 }, { "epoch": 3.51701552695461, "grad_norm": 0.1370417877915701, "learning_rate": 1.1691179382622956e-05, "loss": 0.3797, "num_tokens": 3168301567.0, "step": 9601 }, { "epoch": 3.5173819447625156, "grad_norm": 0.15057744751624252, "learning_rate": 1.1687678454104957e-05, "loss": 0.4172, "num_tokens": 3169050800.0, "step": 9602 }, { "epoch": 3.517748362570421, "grad_norm": 0.14566496512775204, "learning_rate": 1.1684178106191213e-05, "loss": 0.4079, "num_tokens": 3169817346.0, "step": 9603 }, { "epoch": 3.518114780378326, "grad_norm": 0.14128301945567795, "learning_rate": 1.1680678339078808e-05, "loss": 0.4094, "num_tokens": 3170499457.0, "step": 9604 }, { "epoch": 3.518481198186232, "grad_norm": 0.1534861764451417, "learning_rate": 1.1677179152964789e-05, "loss": 0.4081, "num_tokens": 3171244174.0, "step": 9605 }, { "epoch": 3.5188476159941375, "grad_norm": 0.14351880457391517, "learning_rate": 1.1673680548046163e-05, "loss": 0.4007, "num_tokens": 3172064875.0, "step": 9606 }, { "epoch": 3.519214033802043, "grad_norm": 0.13971509874750504, "learning_rate": 1.167018252451991e-05, "loss": 0.4203, "num_tokens": 3172850926.0, "step": 9607 }, { "epoch": 3.519580451609948, "grad_norm": 0.13439481697826244, "learning_rate": 1.166668508258297e-05, "loss": 0.3811, "num_tokens": 3173615483.0, "step": 9608 }, { "epoch": 3.5199468694178537, "grad_norm": 0.13836464086676656, "learning_rate": 1.1663188222432264e-05, "loss": 0.4207, "num_tokens": 3174397122.0, "step": 9609 }, { "epoch": 3.5203132872257594, "grad_norm": 0.1261038119595321, "learning_rate": 1.1659691944264666e-05, "loss": 0.4205, "num_tokens": 3175240877.0, "step": 9610 }, { "epoch": 3.5206797050336647, "grad_norm": 0.14101938470246153, "learning_rate": 1.165619624827703e-05, "loss": 0.4146, "num_tokens": 3176027783.0, "step": 9611 }, { "epoch": 3.52104612284157, "grad_norm": 0.13100371310036438, "learning_rate": 1.1652701134666167e-05, "loss": 0.4085, "num_tokens": 3176867814.0, "step": 9612 }, { "epoch": 3.5214125406494756, "grad_norm": 0.1347804276740093, "learning_rate": 1.1649206603628858e-05, "loss": 0.4378, "num_tokens": 3177820392.0, "step": 9613 }, { "epoch": 3.521778958457381, "grad_norm": 0.15294232743219752, "learning_rate": 1.1645712655361847e-05, "loss": 0.418, "num_tokens": 3178472020.0, "step": 9614 }, { "epoch": 3.5221453762652866, "grad_norm": 0.13899727984214497, "learning_rate": 1.1642219290061863e-05, "loss": 0.418, "num_tokens": 3179250739.0, "step": 9615 }, { "epoch": 3.522511794073192, "grad_norm": 0.14376699764463466, "learning_rate": 1.1638726507925582e-05, "loss": 0.4113, "num_tokens": 3180111536.0, "step": 9616 }, { "epoch": 3.5228782118810975, "grad_norm": 0.14849842708410813, "learning_rate": 1.1635234309149648e-05, "loss": 0.3944, "num_tokens": 3180831285.0, "step": 9617 }, { "epoch": 3.5232446296890028, "grad_norm": 0.15101135906156027, "learning_rate": 1.163174269393069e-05, "loss": 0.4205, "num_tokens": 3181586829.0, "step": 9618 }, { "epoch": 3.5236110474969085, "grad_norm": 0.14428705439375775, "learning_rate": 1.1628251662465284e-05, "loss": 0.4345, "num_tokens": 3182315706.0, "step": 9619 }, { "epoch": 3.5239774653048137, "grad_norm": 0.14748076560250212, "learning_rate": 1.1624761214949993e-05, "loss": 0.4043, "num_tokens": 3182982369.0, "step": 9620 }, { "epoch": 3.5243438831127194, "grad_norm": 0.14161554694589676, "learning_rate": 1.162127135158133e-05, "loss": 0.4209, "num_tokens": 3183774342.0, "step": 9621 }, { "epoch": 3.5247103009206247, "grad_norm": 0.14989148425710536, "learning_rate": 1.1617782072555782e-05, "loss": 0.4156, "num_tokens": 3184454360.0, "step": 9622 }, { "epoch": 3.5250767187285303, "grad_norm": 0.13592111299104312, "learning_rate": 1.1614293378069792e-05, "loss": 0.4048, "num_tokens": 3185233269.0, "step": 9623 }, { "epoch": 3.5254431365364356, "grad_norm": 0.14074411028285486, "learning_rate": 1.16108052683198e-05, "loss": 0.4291, "num_tokens": 3185980049.0, "step": 9624 }, { "epoch": 3.5258095543443413, "grad_norm": 0.12697492844644645, "learning_rate": 1.1607317743502173e-05, "loss": 0.3906, "num_tokens": 3186916740.0, "step": 9625 }, { "epoch": 3.5261759721522465, "grad_norm": 0.1470404281691313, "learning_rate": 1.1603830803813285e-05, "loss": 0.3844, "num_tokens": 3187627672.0, "step": 9626 }, { "epoch": 3.5265423899601522, "grad_norm": 0.1330153173555681, "learning_rate": 1.1600344449449448e-05, "loss": 0.4215, "num_tokens": 3188496183.0, "step": 9627 }, { "epoch": 3.5269088077680575, "grad_norm": 0.1454176298431643, "learning_rate": 1.159685868060695e-05, "loss": 0.3892, "num_tokens": 3189247548.0, "step": 9628 }, { "epoch": 3.5272752255759627, "grad_norm": 0.14316570862562514, "learning_rate": 1.159337349748205e-05, "loss": 0.4105, "num_tokens": 3189904296.0, "step": 9629 }, { "epoch": 3.5276416433838684, "grad_norm": 0.14958715808171894, "learning_rate": 1.1589888900270965e-05, "loss": 0.3991, "num_tokens": 3190566349.0, "step": 9630 }, { "epoch": 3.528008061191774, "grad_norm": 0.14082096080756748, "learning_rate": 1.1586404889169884e-05, "loss": 0.4059, "num_tokens": 3191331630.0, "step": 9631 }, { "epoch": 3.5283744789996794, "grad_norm": 0.13877825479959574, "learning_rate": 1.1582921464374976e-05, "loss": 0.4008, "num_tokens": 3192045838.0, "step": 9632 }, { "epoch": 3.5287408968075846, "grad_norm": 0.14223461036899057, "learning_rate": 1.1579438626082358e-05, "loss": 0.4128, "num_tokens": 3192844186.0, "step": 9633 }, { "epoch": 3.5291073146154903, "grad_norm": 0.13554658787256985, "learning_rate": 1.157595637448812e-05, "loss": 0.4079, "num_tokens": 3193635704.0, "step": 9634 }, { "epoch": 3.529473732423396, "grad_norm": 0.13590899372081286, "learning_rate": 1.1572474709788318e-05, "loss": 0.4017, "num_tokens": 3194432237.0, "step": 9635 }, { "epoch": 3.5298401502313013, "grad_norm": 0.11938881938301553, "learning_rate": 1.1568993632178972e-05, "loss": 0.3981, "num_tokens": 3195416498.0, "step": 9636 }, { "epoch": 3.5302065680392065, "grad_norm": 0.1447121600834469, "learning_rate": 1.1565513141856086e-05, "loss": 0.3924, "num_tokens": 3196092258.0, "step": 9637 }, { "epoch": 3.530572985847112, "grad_norm": 0.13279174371933056, "learning_rate": 1.1562033239015609e-05, "loss": 0.3707, "num_tokens": 3196900311.0, "step": 9638 }, { "epoch": 3.530939403655018, "grad_norm": 0.1302178380875612, "learning_rate": 1.1558553923853475e-05, "loss": 0.4044, "num_tokens": 3197701474.0, "step": 9639 }, { "epoch": 3.531305821462923, "grad_norm": 0.12705200008332573, "learning_rate": 1.155507519656557e-05, "loss": 0.4107, "num_tokens": 3198668116.0, "step": 9640 }, { "epoch": 3.5316722392708284, "grad_norm": 0.1437711393356452, "learning_rate": 1.1551597057347747e-05, "loss": 0.4182, "num_tokens": 3199335515.0, "step": 9641 }, { "epoch": 3.532038657078734, "grad_norm": 0.13637738837024463, "learning_rate": 1.154811950639585e-05, "loss": 0.3953, "num_tokens": 3200056388.0, "step": 9642 }, { "epoch": 3.5324050748866394, "grad_norm": 0.13509517115898242, "learning_rate": 1.1544642543905657e-05, "loss": 0.3546, "num_tokens": 3200838370.0, "step": 9643 }, { "epoch": 3.532771492694545, "grad_norm": 0.1383187698686603, "learning_rate": 1.154116617007293e-05, "loss": 0.4296, "num_tokens": 3201563955.0, "step": 9644 }, { "epoch": 3.5331379105024503, "grad_norm": 0.13851296397161464, "learning_rate": 1.1537690385093403e-05, "loss": 0.4038, "num_tokens": 3202372287.0, "step": 9645 }, { "epoch": 3.533504328310356, "grad_norm": 0.15604089574774985, "learning_rate": 1.1534215189162765e-05, "loss": 0.3848, "num_tokens": 3203049298.0, "step": 9646 }, { "epoch": 3.5338707461182612, "grad_norm": 0.12653227266337247, "learning_rate": 1.153074058247667e-05, "loss": 0.3666, "num_tokens": 3203802645.0, "step": 9647 }, { "epoch": 3.534237163926167, "grad_norm": 0.13440338026242257, "learning_rate": 1.1527266565230758e-05, "loss": 0.4129, "num_tokens": 3204621451.0, "step": 9648 }, { "epoch": 3.534603581734072, "grad_norm": 0.13798229083612912, "learning_rate": 1.152379313762062e-05, "loss": 0.4303, "num_tokens": 3205371592.0, "step": 9649 }, { "epoch": 3.534969999541978, "grad_norm": 0.13935026628191272, "learning_rate": 1.1520320299841812e-05, "loss": 0.418, "num_tokens": 3206045838.0, "step": 9650 }, { "epoch": 3.535336417349883, "grad_norm": 0.1300694511852528, "learning_rate": 1.1516848052089857e-05, "loss": 0.3828, "num_tokens": 3206874200.0, "step": 9651 }, { "epoch": 3.535702835157789, "grad_norm": 0.15104574039745688, "learning_rate": 1.1513376394560256e-05, "loss": 0.4361, "num_tokens": 3207564252.0, "step": 9652 }, { "epoch": 3.536069252965694, "grad_norm": 0.1465973486698805, "learning_rate": 1.1509905327448477e-05, "loss": 0.3924, "num_tokens": 3208239219.0, "step": 9653 }, { "epoch": 3.5364356707735993, "grad_norm": 0.13118181759285683, "learning_rate": 1.1506434850949941e-05, "loss": 0.4239, "num_tokens": 3209005376.0, "step": 9654 }, { "epoch": 3.536802088581505, "grad_norm": 0.15014159054038503, "learning_rate": 1.1502964965260042e-05, "loss": 0.3723, "num_tokens": 3209597707.0, "step": 9655 }, { "epoch": 3.5371685063894107, "grad_norm": 0.13462297367431203, "learning_rate": 1.1499495670574143e-05, "loss": 0.4152, "num_tokens": 3210477781.0, "step": 9656 }, { "epoch": 3.537534924197316, "grad_norm": 0.14676340027833906, "learning_rate": 1.1496026967087573e-05, "loss": 0.4173, "num_tokens": 3211268099.0, "step": 9657 }, { "epoch": 3.537901342005221, "grad_norm": 0.13829790848754095, "learning_rate": 1.1492558854995617e-05, "loss": 0.4045, "num_tokens": 3212147719.0, "step": 9658 }, { "epoch": 3.538267759813127, "grad_norm": 0.14207677109412412, "learning_rate": 1.1489091334493545e-05, "loss": 0.4282, "num_tokens": 3212886035.0, "step": 9659 }, { "epoch": 3.5386341776210326, "grad_norm": 0.14491860081787755, "learning_rate": 1.1485624405776591e-05, "loss": 0.3735, "num_tokens": 3213606035.0, "step": 9660 }, { "epoch": 3.539000595428938, "grad_norm": 0.14225373975153613, "learning_rate": 1.1482158069039948e-05, "loss": 0.3767, "num_tokens": 3214342898.0, "step": 9661 }, { "epoch": 3.539367013236843, "grad_norm": 0.14586640658644132, "learning_rate": 1.1478692324478773e-05, "loss": 0.4342, "num_tokens": 3215118275.0, "step": 9662 }, { "epoch": 3.539733431044749, "grad_norm": 0.1448786103595186, "learning_rate": 1.1475227172288187e-05, "loss": 0.4286, "num_tokens": 3215847976.0, "step": 9663 }, { "epoch": 3.5400998488526545, "grad_norm": 0.13268876554612177, "learning_rate": 1.1471762612663303e-05, "loss": 0.3937, "num_tokens": 3216679630.0, "step": 9664 }, { "epoch": 3.5404662666605597, "grad_norm": 0.13961925848955056, "learning_rate": 1.1468298645799165e-05, "loss": 0.4158, "num_tokens": 3217540505.0, "step": 9665 }, { "epoch": 3.540832684468465, "grad_norm": 0.13478207697955671, "learning_rate": 1.1464835271890817e-05, "loss": 0.4065, "num_tokens": 3218328725.0, "step": 9666 }, { "epoch": 3.5411991022763707, "grad_norm": 0.13665458537665887, "learning_rate": 1.1461372491133245e-05, "loss": 0.4226, "num_tokens": 3219109751.0, "step": 9667 }, { "epoch": 3.541565520084276, "grad_norm": 0.13910199288289443, "learning_rate": 1.1457910303721412e-05, "loss": 0.3966, "num_tokens": 3219961387.0, "step": 9668 }, { "epoch": 3.5419319378921816, "grad_norm": 0.1318848113485011, "learning_rate": 1.1454448709850238e-05, "loss": 0.402, "num_tokens": 3220755043.0, "step": 9669 }, { "epoch": 3.542298355700087, "grad_norm": 0.14393326162814074, "learning_rate": 1.1450987709714631e-05, "loss": 0.3996, "num_tokens": 3221516025.0, "step": 9670 }, { "epoch": 3.5426647735079926, "grad_norm": 0.1487140076700661, "learning_rate": 1.1447527303509451e-05, "loss": 0.3846, "num_tokens": 3222103570.0, "step": 9671 }, { "epoch": 3.543031191315898, "grad_norm": 0.13886361517949314, "learning_rate": 1.144406749142951e-05, "loss": 0.4225, "num_tokens": 3222882625.0, "step": 9672 }, { "epoch": 3.5433976091238035, "grad_norm": 0.12932216451158646, "learning_rate": 1.1440608273669624e-05, "loss": 0.3975, "num_tokens": 3223745082.0, "step": 9673 }, { "epoch": 3.5437640269317088, "grad_norm": 0.14000049177788257, "learning_rate": 1.1437149650424542e-05, "loss": 0.3888, "num_tokens": 3224477925.0, "step": 9674 }, { "epoch": 3.5441304447396145, "grad_norm": 0.14316815642321434, "learning_rate": 1.1433691621888985e-05, "loss": 0.4048, "num_tokens": 3225148496.0, "step": 9675 }, { "epoch": 3.5444968625475197, "grad_norm": 0.13135745787623343, "learning_rate": 1.1430234188257664e-05, "loss": 0.3961, "num_tokens": 3225976504.0, "step": 9676 }, { "epoch": 3.5448632803554254, "grad_norm": 0.13431338269310972, "learning_rate": 1.1426777349725227e-05, "loss": 0.3807, "num_tokens": 3226664156.0, "step": 9677 }, { "epoch": 3.5452296981633307, "grad_norm": 0.1611592174991206, "learning_rate": 1.14233211064863e-05, "loss": 0.4495, "num_tokens": 3227383745.0, "step": 9678 }, { "epoch": 3.5455961159712364, "grad_norm": 0.14385125490080558, "learning_rate": 1.1419865458735488e-05, "loss": 0.3763, "num_tokens": 3228047891.0, "step": 9679 }, { "epoch": 3.5459625337791416, "grad_norm": 0.14298735925086425, "learning_rate": 1.141641040666733e-05, "loss": 0.4337, "num_tokens": 3228883382.0, "step": 9680 }, { "epoch": 3.5463289515870473, "grad_norm": 0.13609368599177404, "learning_rate": 1.141295595047638e-05, "loss": 0.4039, "num_tokens": 3229720376.0, "step": 9681 }, { "epoch": 3.5466953693949526, "grad_norm": 0.14116562501355348, "learning_rate": 1.1409502090357116e-05, "loss": 0.4333, "num_tokens": 3230510663.0, "step": 9682 }, { "epoch": 3.547061787202858, "grad_norm": 0.15945392976383832, "learning_rate": 1.1406048826503993e-05, "loss": 0.4035, "num_tokens": 3231196928.0, "step": 9683 }, { "epoch": 3.5474282050107635, "grad_norm": 0.13035308493279524, "learning_rate": 1.1402596159111443e-05, "loss": 0.4046, "num_tokens": 3231998484.0, "step": 9684 }, { "epoch": 3.547794622818669, "grad_norm": 0.15241530609240717, "learning_rate": 1.139914408837385e-05, "loss": 0.43, "num_tokens": 3232759941.0, "step": 9685 }, { "epoch": 3.5481610406265744, "grad_norm": 0.12895334318670443, "learning_rate": 1.1395692614485579e-05, "loss": 0.3971, "num_tokens": 3233587993.0, "step": 9686 }, { "epoch": 3.5485274584344797, "grad_norm": 0.13796309611435986, "learning_rate": 1.1392241737640961e-05, "loss": 0.4066, "num_tokens": 3234339968.0, "step": 9687 }, { "epoch": 3.5488938762423854, "grad_norm": 0.1430618794124386, "learning_rate": 1.1388791458034283e-05, "loss": 0.4459, "num_tokens": 3235039437.0, "step": 9688 }, { "epoch": 3.549260294050291, "grad_norm": 0.1489918392330633, "learning_rate": 1.1385341775859797e-05, "loss": 0.4153, "num_tokens": 3235924534.0, "step": 9689 }, { "epoch": 3.5496267118581963, "grad_norm": 0.12959219054048465, "learning_rate": 1.1381892691311732e-05, "loss": 0.4257, "num_tokens": 3236736217.0, "step": 9690 }, { "epoch": 3.5499931296661016, "grad_norm": 0.14620105166988642, "learning_rate": 1.137844420458427e-05, "loss": 0.38, "num_tokens": 3237461320.0, "step": 9691 }, { "epoch": 3.5503595474740073, "grad_norm": 0.1412386668384294, "learning_rate": 1.1374996315871582e-05, "loss": 0.4067, "num_tokens": 3238217901.0, "step": 9692 }, { "epoch": 3.550725965281913, "grad_norm": 0.14004085034956262, "learning_rate": 1.1371549025367776e-05, "loss": 0.3783, "num_tokens": 3238937804.0, "step": 9693 }, { "epoch": 3.5510923830898182, "grad_norm": 0.1405391124305337, "learning_rate": 1.1368102333266956e-05, "loss": 0.4059, "num_tokens": 3239751484.0, "step": 9694 }, { "epoch": 3.5514588008977235, "grad_norm": 0.1309231888611845, "learning_rate": 1.1364656239763172e-05, "loss": 0.3998, "num_tokens": 3240552804.0, "step": 9695 }, { "epoch": 3.551825218705629, "grad_norm": 0.1434442009453919, "learning_rate": 1.1361210745050441e-05, "loss": 0.4227, "num_tokens": 3241286060.0, "step": 9696 }, { "epoch": 3.5521916365135344, "grad_norm": 0.12612354531051664, "learning_rate": 1.1357765849322751e-05, "loss": 0.4076, "num_tokens": 3242206788.0, "step": 9697 }, { "epoch": 3.55255805432144, "grad_norm": 0.14941599154990057, "learning_rate": 1.1354321552774064e-05, "loss": 0.3692, "num_tokens": 3242896894.0, "step": 9698 }, { "epoch": 3.5529244721293454, "grad_norm": 0.13829247414422813, "learning_rate": 1.1350877855598292e-05, "loss": 0.4037, "num_tokens": 3243676946.0, "step": 9699 }, { "epoch": 3.553290889937251, "grad_norm": 0.13937646183419472, "learning_rate": 1.1347434757989333e-05, "loss": 0.4091, "num_tokens": 3244537264.0, "step": 9700 }, { "epoch": 3.5536573077451563, "grad_norm": 0.1317923351124796, "learning_rate": 1.1343992260141035e-05, "loss": 0.4025, "num_tokens": 3245414101.0, "step": 9701 }, { "epoch": 3.554023725553062, "grad_norm": 0.13464159495393582, "learning_rate": 1.1340550362247209e-05, "loss": 0.4055, "num_tokens": 3246148689.0, "step": 9702 }, { "epoch": 3.5543901433609673, "grad_norm": 0.15115861347677748, "learning_rate": 1.1337109064501655e-05, "loss": 0.4177, "num_tokens": 3246933261.0, "step": 9703 }, { "epoch": 3.554756561168873, "grad_norm": 0.14643418743681477, "learning_rate": 1.1333668367098118e-05, "loss": 0.3965, "num_tokens": 3247586686.0, "step": 9704 }, { "epoch": 3.555122978976778, "grad_norm": 0.139647411191033, "learning_rate": 1.1330228270230319e-05, "loss": 0.4211, "num_tokens": 3248401917.0, "step": 9705 }, { "epoch": 3.555489396784684, "grad_norm": 0.13876783070973656, "learning_rate": 1.132678877409193e-05, "loss": 0.384, "num_tokens": 3249225335.0, "step": 9706 }, { "epoch": 3.555855814592589, "grad_norm": 0.13636534624043462, "learning_rate": 1.132334987887662e-05, "loss": 0.4293, "num_tokens": 3250068031.0, "step": 9707 }, { "epoch": 3.5562222324004944, "grad_norm": 0.14547270091392933, "learning_rate": 1.1319911584777987e-05, "loss": 0.4391, "num_tokens": 3250845491.0, "step": 9708 }, { "epoch": 3.5565886502084, "grad_norm": 0.1523473156462373, "learning_rate": 1.1316473891989635e-05, "loss": 0.4174, "num_tokens": 3251507917.0, "step": 9709 }, { "epoch": 3.556955068016306, "grad_norm": 0.14783457672343805, "learning_rate": 1.1313036800705099e-05, "loss": 0.4007, "num_tokens": 3252214341.0, "step": 9710 }, { "epoch": 3.557321485824211, "grad_norm": 0.14001118147808844, "learning_rate": 1.1309600311117896e-05, "loss": 0.4224, "num_tokens": 3253021168.0, "step": 9711 }, { "epoch": 3.5576879036321163, "grad_norm": 0.1435800752256705, "learning_rate": 1.1306164423421511e-05, "loss": 0.4286, "num_tokens": 3253805762.0, "step": 9712 }, { "epoch": 3.558054321440022, "grad_norm": 0.1261758299781926, "learning_rate": 1.1302729137809378e-05, "loss": 0.3997, "num_tokens": 3254669347.0, "step": 9713 }, { "epoch": 3.5584207392479277, "grad_norm": 0.1417725064303285, "learning_rate": 1.1299294454474925e-05, "loss": 0.43, "num_tokens": 3255405836.0, "step": 9714 }, { "epoch": 3.558787157055833, "grad_norm": 0.13195525868998462, "learning_rate": 1.129586037361153e-05, "loss": 0.414, "num_tokens": 3256234249.0, "step": 9715 }, { "epoch": 3.559153574863738, "grad_norm": 0.13499205399593914, "learning_rate": 1.1292426895412537e-05, "loss": 0.3726, "num_tokens": 3256996031.0, "step": 9716 }, { "epoch": 3.559519992671644, "grad_norm": 0.13467540792302246, "learning_rate": 1.1288994020071256e-05, "loss": 0.4305, "num_tokens": 3257741123.0, "step": 9717 }, { "epoch": 3.5598864104795496, "grad_norm": 0.14400845949622038, "learning_rate": 1.1285561747780966e-05, "loss": 0.389, "num_tokens": 3258473515.0, "step": 9718 }, { "epoch": 3.560252828287455, "grad_norm": 0.13989116691657624, "learning_rate": 1.1282130078734903e-05, "loss": 0.3952, "num_tokens": 3259280994.0, "step": 9719 }, { "epoch": 3.56061924609536, "grad_norm": 0.12810012553950648, "learning_rate": 1.1278699013126286e-05, "loss": 0.3909, "num_tokens": 3260107908.0, "step": 9720 }, { "epoch": 3.5609856639032658, "grad_norm": 0.14189676979486693, "learning_rate": 1.127526855114829e-05, "loss": 0.3787, "num_tokens": 3260864399.0, "step": 9721 }, { "epoch": 3.561352081711171, "grad_norm": 0.15317944980170933, "learning_rate": 1.127183869299406e-05, "loss": 0.4419, "num_tokens": 3261548299.0, "step": 9722 }, { "epoch": 3.5617184995190767, "grad_norm": 0.12683569534286518, "learning_rate": 1.1268409438856698e-05, "loss": 0.4142, "num_tokens": 3262444698.0, "step": 9723 }, { "epoch": 3.562084917326982, "grad_norm": 0.13733146637814703, "learning_rate": 1.1264980788929274e-05, "loss": 0.4016, "num_tokens": 3263193448.0, "step": 9724 }, { "epoch": 3.5624513351348877, "grad_norm": 0.13399879973863155, "learning_rate": 1.1261552743404835e-05, "loss": 0.3859, "num_tokens": 3264053326.0, "step": 9725 }, { "epoch": 3.562817752942793, "grad_norm": 0.1341150686663794, "learning_rate": 1.125812530247639e-05, "loss": 0.4371, "num_tokens": 3264908700.0, "step": 9726 }, { "epoch": 3.5631841707506986, "grad_norm": 0.14600973727675906, "learning_rate": 1.1254698466336898e-05, "loss": 0.4323, "num_tokens": 3265582609.0, "step": 9727 }, { "epoch": 3.563550588558604, "grad_norm": 0.14143350227402274, "learning_rate": 1.125127223517931e-05, "loss": 0.4244, "num_tokens": 3266374988.0, "step": 9728 }, { "epoch": 3.5639170063665095, "grad_norm": 0.14916649937553736, "learning_rate": 1.1247846609196524e-05, "loss": 0.3982, "num_tokens": 3267100732.0, "step": 9729 }, { "epoch": 3.564283424174415, "grad_norm": 0.1396122708448385, "learning_rate": 1.1244421588581402e-05, "loss": 0.4187, "num_tokens": 3267858271.0, "step": 9730 }, { "epoch": 3.5646498419823205, "grad_norm": 0.12698287561435115, "learning_rate": 1.1240997173526793e-05, "loss": 0.4396, "num_tokens": 3268810793.0, "step": 9731 }, { "epoch": 3.5650162597902257, "grad_norm": 0.14006591707690946, "learning_rate": 1.1237573364225494e-05, "loss": 0.4032, "num_tokens": 3269554078.0, "step": 9732 }, { "epoch": 3.5653826775981314, "grad_norm": 0.14792626712819348, "learning_rate": 1.123415016087027e-05, "loss": 0.4235, "num_tokens": 3270259905.0, "step": 9733 }, { "epoch": 3.5657490954060367, "grad_norm": 0.12647132304480277, "learning_rate": 1.1230727563653848e-05, "loss": 0.4158, "num_tokens": 3271083085.0, "step": 9734 }, { "epoch": 3.5661155132139424, "grad_norm": 0.137481166644567, "learning_rate": 1.1227305572768934e-05, "loss": 0.4177, "num_tokens": 3271945029.0, "step": 9735 }, { "epoch": 3.5664819310218476, "grad_norm": 0.13272322208754647, "learning_rate": 1.12238841884082e-05, "loss": 0.3968, "num_tokens": 3272791541.0, "step": 9736 }, { "epoch": 3.566848348829753, "grad_norm": 0.12554031865099924, "learning_rate": 1.1220463410764271e-05, "loss": 0.383, "num_tokens": 3273669361.0, "step": 9737 }, { "epoch": 3.5672147666376586, "grad_norm": 0.14174709593488388, "learning_rate": 1.121704324002974e-05, "loss": 0.3776, "num_tokens": 3274406452.0, "step": 9738 }, { "epoch": 3.5675811844455643, "grad_norm": 0.13545571713040874, "learning_rate": 1.1213623676397172e-05, "loss": 0.4261, "num_tokens": 3275220806.0, "step": 9739 }, { "epoch": 3.5679476022534695, "grad_norm": 0.13970525253483435, "learning_rate": 1.1210204720059087e-05, "loss": 0.4403, "num_tokens": 3275967081.0, "step": 9740 }, { "epoch": 3.5683140200613748, "grad_norm": 0.13209448275237848, "learning_rate": 1.1206786371207989e-05, "loss": 0.421, "num_tokens": 3276803400.0, "step": 9741 }, { "epoch": 3.5686804378692805, "grad_norm": 0.14750470336527033, "learning_rate": 1.1203368630036345e-05, "loss": 0.426, "num_tokens": 3277591813.0, "step": 9742 }, { "epoch": 3.569046855677186, "grad_norm": 0.14174108347379208, "learning_rate": 1.1199951496736565e-05, "loss": 0.4082, "num_tokens": 3278303421.0, "step": 9743 }, { "epoch": 3.5694132734850914, "grad_norm": 0.13977583316204462, "learning_rate": 1.1196534971501049e-05, "loss": 0.4091, "num_tokens": 3279083545.0, "step": 9744 }, { "epoch": 3.5697796912929967, "grad_norm": 0.14031243389664758, "learning_rate": 1.1193119054522152e-05, "loss": 0.386, "num_tokens": 3279911636.0, "step": 9745 }, { "epoch": 3.5701461091009024, "grad_norm": 0.1417166886452089, "learning_rate": 1.1189703745992193e-05, "loss": 0.4005, "num_tokens": 3280646774.0, "step": 9746 }, { "epoch": 3.5705125269088076, "grad_norm": 0.14085483133062962, "learning_rate": 1.118628904610347e-05, "loss": 0.4262, "num_tokens": 3281507677.0, "step": 9747 }, { "epoch": 3.5708789447167133, "grad_norm": 0.1330128775882191, "learning_rate": 1.1182874955048222e-05, "loss": 0.3852, "num_tokens": 3282332235.0, "step": 9748 }, { "epoch": 3.5712453625246185, "grad_norm": 0.13488593960355696, "learning_rate": 1.1179461473018689e-05, "loss": 0.4001, "num_tokens": 3283117836.0, "step": 9749 }, { "epoch": 3.5716117803325242, "grad_norm": 0.1452548834829334, "learning_rate": 1.1176048600207046e-05, "loss": 0.4155, "num_tokens": 3283787855.0, "step": 9750 }, { "epoch": 3.5719781981404295, "grad_norm": 0.14692992037771274, "learning_rate": 1.1172636336805447e-05, "loss": 0.3727, "num_tokens": 3284412438.0, "step": 9751 }, { "epoch": 3.572344615948335, "grad_norm": 0.13701964386953644, "learning_rate": 1.1169224683006002e-05, "loss": 0.4131, "num_tokens": 3285181743.0, "step": 9752 }, { "epoch": 3.5727110337562404, "grad_norm": 0.14504183407698443, "learning_rate": 1.1165813639000803e-05, "loss": 0.4246, "num_tokens": 3285883243.0, "step": 9753 }, { "epoch": 3.573077451564146, "grad_norm": 0.13778128647291366, "learning_rate": 1.1162403204981888e-05, "loss": 0.3831, "num_tokens": 3286675033.0, "step": 9754 }, { "epoch": 3.5734438693720514, "grad_norm": 0.14170386591615947, "learning_rate": 1.1158993381141287e-05, "loss": 0.3982, "num_tokens": 3287384184.0, "step": 9755 }, { "epoch": 3.573810287179957, "grad_norm": 0.13091900551711508, "learning_rate": 1.1155584167670975e-05, "loss": 0.3871, "num_tokens": 3288197523.0, "step": 9756 }, { "epoch": 3.5741767049878623, "grad_norm": 0.14302067479158784, "learning_rate": 1.1152175564762888e-05, "loss": 0.4304, "num_tokens": 3288919973.0, "step": 9757 }, { "epoch": 3.574543122795768, "grad_norm": 0.1272371673490465, "learning_rate": 1.1148767572608938e-05, "loss": 0.3971, "num_tokens": 3289792360.0, "step": 9758 }, { "epoch": 3.5749095406036733, "grad_norm": 0.13249181968889004, "learning_rate": 1.1145360191401014e-05, "loss": 0.3803, "num_tokens": 3290597486.0, "step": 9759 }, { "epoch": 3.575275958411579, "grad_norm": 0.1343318379879721, "learning_rate": 1.1141953421330953e-05, "loss": 0.4046, "num_tokens": 3291461228.0, "step": 9760 }, { "epoch": 3.575642376219484, "grad_norm": 0.12796513955839103, "learning_rate": 1.1138547262590551e-05, "loss": 0.398, "num_tokens": 3292287770.0, "step": 9761 }, { "epoch": 3.5760087940273895, "grad_norm": 0.1305696639537588, "learning_rate": 1.11351417153716e-05, "loss": 0.3865, "num_tokens": 3293123570.0, "step": 9762 }, { "epoch": 3.576375211835295, "grad_norm": 0.13971607180818615, "learning_rate": 1.1131736779865825e-05, "loss": 0.4112, "num_tokens": 3293817300.0, "step": 9763 }, { "epoch": 3.576741629643201, "grad_norm": 0.1429653501043895, "learning_rate": 1.1128332456264942e-05, "loss": 0.3923, "num_tokens": 3294582559.0, "step": 9764 }, { "epoch": 3.577108047451106, "grad_norm": 0.13322568400004955, "learning_rate": 1.1124928744760614e-05, "loss": 0.4107, "num_tokens": 3295356320.0, "step": 9765 }, { "epoch": 3.5774744652590114, "grad_norm": 0.1427696186194251, "learning_rate": 1.1121525645544482e-05, "loss": 0.4125, "num_tokens": 3296156002.0, "step": 9766 }, { "epoch": 3.577840883066917, "grad_norm": 0.1430541766511197, "learning_rate": 1.1118123158808141e-05, "loss": 0.4325, "num_tokens": 3296880505.0, "step": 9767 }, { "epoch": 3.5782073008748227, "grad_norm": 0.1585061988917085, "learning_rate": 1.1114721284743152e-05, "loss": 0.4519, "num_tokens": 3297531125.0, "step": 9768 }, { "epoch": 3.578573718682728, "grad_norm": 0.1432008989138194, "learning_rate": 1.111132002354106e-05, "loss": 0.4198, "num_tokens": 3298270546.0, "step": 9769 }, { "epoch": 3.5789401364906333, "grad_norm": 0.12599660057012274, "learning_rate": 1.1107919375393362e-05, "loss": 0.3968, "num_tokens": 3299126641.0, "step": 9770 }, { "epoch": 3.579306554298539, "grad_norm": 0.13862001990114933, "learning_rate": 1.1104519340491518e-05, "loss": 0.4102, "num_tokens": 3299880178.0, "step": 9771 }, { "epoch": 3.5796729721064446, "grad_norm": 0.14891675678130759, "learning_rate": 1.1101119919026956e-05, "loss": 0.4093, "num_tokens": 3300653275.0, "step": 9772 }, { "epoch": 3.58003938991435, "grad_norm": 0.14237011512513667, "learning_rate": 1.1097721111191073e-05, "loss": 0.4165, "num_tokens": 3301363964.0, "step": 9773 }, { "epoch": 3.580405807722255, "grad_norm": 0.14704215965965425, "learning_rate": 1.1094322917175217e-05, "loss": 0.4208, "num_tokens": 3302096716.0, "step": 9774 }, { "epoch": 3.580772225530161, "grad_norm": 0.14908360505201512, "learning_rate": 1.1090925337170725e-05, "loss": 0.4083, "num_tokens": 3302797985.0, "step": 9775 }, { "epoch": 3.581138643338066, "grad_norm": 0.1407198994320055, "learning_rate": 1.1087528371368892e-05, "loss": 0.4057, "num_tokens": 3303524058.0, "step": 9776 }, { "epoch": 3.581505061145972, "grad_norm": 0.1430276827649577, "learning_rate": 1.1084132019960965e-05, "loss": 0.402, "num_tokens": 3304221200.0, "step": 9777 }, { "epoch": 3.581871478953877, "grad_norm": 0.13829469388768223, "learning_rate": 1.1080736283138166e-05, "loss": 0.4317, "num_tokens": 3304960059.0, "step": 9778 }, { "epoch": 3.5822378967617827, "grad_norm": 0.13146345853431052, "learning_rate": 1.1077341161091684e-05, "loss": 0.3748, "num_tokens": 3305756864.0, "step": 9779 }, { "epoch": 3.582604314569688, "grad_norm": 0.12651010205344687, "learning_rate": 1.1073946654012663e-05, "loss": 0.3919, "num_tokens": 3306607706.0, "step": 9780 }, { "epoch": 3.5829707323775937, "grad_norm": 0.13991645662794866, "learning_rate": 1.107055276209224e-05, "loss": 0.4233, "num_tokens": 3307408641.0, "step": 9781 }, { "epoch": 3.583337150185499, "grad_norm": 0.1580519084698157, "learning_rate": 1.106715948552147e-05, "loss": 0.4177, "num_tokens": 3308053431.0, "step": 9782 }, { "epoch": 3.5837035679934046, "grad_norm": 0.13657457166811546, "learning_rate": 1.1063766824491428e-05, "loss": 0.4231, "num_tokens": 3308823831.0, "step": 9783 }, { "epoch": 3.58406998580131, "grad_norm": 0.13600680986293376, "learning_rate": 1.1060374779193117e-05, "loss": 0.405, "num_tokens": 3309638894.0, "step": 9784 }, { "epoch": 3.5844364036092156, "grad_norm": 0.1378361459738418, "learning_rate": 1.1056983349817507e-05, "loss": 0.4333, "num_tokens": 3310425917.0, "step": 9785 }, { "epoch": 3.584802821417121, "grad_norm": 0.13559223529052983, "learning_rate": 1.1053592536555557e-05, "loss": 0.3955, "num_tokens": 3311198280.0, "step": 9786 }, { "epoch": 3.585169239225026, "grad_norm": 0.1354418970456353, "learning_rate": 1.105020233959817e-05, "loss": 0.377, "num_tokens": 3311991818.0, "step": 9787 }, { "epoch": 3.5855356570329318, "grad_norm": 0.14442903393265752, "learning_rate": 1.1046812759136223e-05, "loss": 0.4008, "num_tokens": 3312682130.0, "step": 9788 }, { "epoch": 3.5859020748408375, "grad_norm": 0.13706824390987588, "learning_rate": 1.1043423795360546e-05, "loss": 0.4137, "num_tokens": 3313493285.0, "step": 9789 }, { "epoch": 3.5862684926487427, "grad_norm": 0.13995499020072258, "learning_rate": 1.1040035448461958e-05, "loss": 0.4007, "num_tokens": 3314249429.0, "step": 9790 }, { "epoch": 3.586634910456648, "grad_norm": 0.1330309548401357, "learning_rate": 1.1036647718631222e-05, "loss": 0.3971, "num_tokens": 3315106190.0, "step": 9791 }, { "epoch": 3.5870013282645536, "grad_norm": 0.12902062121401772, "learning_rate": 1.1033260606059078e-05, "loss": 0.394, "num_tokens": 3315931997.0, "step": 9792 }, { "epoch": 3.5873677460724593, "grad_norm": 0.14176596200550892, "learning_rate": 1.1029874110936225e-05, "loss": 0.4107, "num_tokens": 3316715307.0, "step": 9793 }, { "epoch": 3.5877341638803646, "grad_norm": 0.12737532391325235, "learning_rate": 1.1026488233453331e-05, "loss": 0.3838, "num_tokens": 3317501567.0, "step": 9794 }, { "epoch": 3.58810058168827, "grad_norm": 0.1317001813710587, "learning_rate": 1.1023102973801021e-05, "loss": 0.3943, "num_tokens": 3318316509.0, "step": 9795 }, { "epoch": 3.5884669994961755, "grad_norm": 0.1384561451762363, "learning_rate": 1.1019718332169896e-05, "loss": 0.3993, "num_tokens": 3319042436.0, "step": 9796 }, { "epoch": 3.5888334173040812, "grad_norm": 0.13643144245271563, "learning_rate": 1.1016334308750527e-05, "loss": 0.4104, "num_tokens": 3319882763.0, "step": 9797 }, { "epoch": 3.5891998351119865, "grad_norm": 0.1282264877224262, "learning_rate": 1.101295090373343e-05, "loss": 0.406, "num_tokens": 3320734267.0, "step": 9798 }, { "epoch": 3.5895662529198917, "grad_norm": 0.12738142037550731, "learning_rate": 1.1009568117309106e-05, "loss": 0.4013, "num_tokens": 3321545412.0, "step": 9799 }, { "epoch": 3.5899326707277974, "grad_norm": 0.1490405174581241, "learning_rate": 1.1006185949668006e-05, "loss": 0.3906, "num_tokens": 3322298340.0, "step": 9800 }, { "epoch": 3.5902990885357027, "grad_norm": 0.14588208975569125, "learning_rate": 1.1002804401000552e-05, "loss": 0.4071, "num_tokens": 3322922111.0, "step": 9801 }, { "epoch": 3.5906655063436084, "grad_norm": 0.15329381059305316, "learning_rate": 1.099942347149713e-05, "loss": 0.4247, "num_tokens": 3323606142.0, "step": 9802 }, { "epoch": 3.5910319241515136, "grad_norm": 0.13634370455065256, "learning_rate": 1.0996043161348096e-05, "loss": 0.4035, "num_tokens": 3324420256.0, "step": 9803 }, { "epoch": 3.5913983419594193, "grad_norm": 0.14445784140422885, "learning_rate": 1.0992663470743776e-05, "loss": 0.4004, "num_tokens": 3325169451.0, "step": 9804 }, { "epoch": 3.5917647597673246, "grad_norm": 0.14481798716527255, "learning_rate": 1.0989284399874449e-05, "loss": 0.4162, "num_tokens": 3325971889.0, "step": 9805 }, { "epoch": 3.5921311775752303, "grad_norm": 0.1413681033278746, "learning_rate": 1.098590594893036e-05, "loss": 0.3861, "num_tokens": 3326716210.0, "step": 9806 }, { "epoch": 3.5924975953831355, "grad_norm": 0.13370995270595182, "learning_rate": 1.0982528118101716e-05, "loss": 0.3694, "num_tokens": 3327521635.0, "step": 9807 }, { "epoch": 3.592864013191041, "grad_norm": 0.13751525756393265, "learning_rate": 1.0979150907578715e-05, "loss": 0.396, "num_tokens": 3328314371.0, "step": 9808 }, { "epoch": 3.5932304309989465, "grad_norm": 0.1498380230479296, "learning_rate": 1.0975774317551478e-05, "loss": 0.3979, "num_tokens": 3329067099.0, "step": 9809 }, { "epoch": 3.593596848806852, "grad_norm": 0.13275108364447083, "learning_rate": 1.0972398348210133e-05, "loss": 0.41, "num_tokens": 3329958247.0, "step": 9810 }, { "epoch": 3.5939632666147574, "grad_norm": 0.13419757926598874, "learning_rate": 1.0969022999744742e-05, "loss": 0.3825, "num_tokens": 3330739256.0, "step": 9811 }, { "epoch": 3.594329684422663, "grad_norm": 0.14175412048544586, "learning_rate": 1.096564827234535e-05, "loss": 0.4177, "num_tokens": 3331543722.0, "step": 9812 }, { "epoch": 3.5946961022305683, "grad_norm": 0.13846961047125655, "learning_rate": 1.0962274166201956e-05, "loss": 0.4002, "num_tokens": 3332367972.0, "step": 9813 }, { "epoch": 3.595062520038474, "grad_norm": 0.1404287034992013, "learning_rate": 1.0958900681504535e-05, "loss": 0.3606, "num_tokens": 3333059109.0, "step": 9814 }, { "epoch": 3.5954289378463793, "grad_norm": 0.13886702703653253, "learning_rate": 1.0955527818443015e-05, "loss": 0.4325, "num_tokens": 3333869039.0, "step": 9815 }, { "epoch": 3.5957953556542845, "grad_norm": 0.13310793432599902, "learning_rate": 1.0952155577207295e-05, "loss": 0.414, "num_tokens": 3334703505.0, "step": 9816 }, { "epoch": 3.5961617734621902, "grad_norm": 0.1444461246746872, "learning_rate": 1.0948783957987246e-05, "loss": 0.4043, "num_tokens": 3335417565.0, "step": 9817 }, { "epoch": 3.596528191270096, "grad_norm": 0.14771660911945292, "learning_rate": 1.0945412960972683e-05, "loss": 0.3894, "num_tokens": 3336140161.0, "step": 9818 }, { "epoch": 3.596894609078001, "grad_norm": 0.13358944957447424, "learning_rate": 1.0942042586353416e-05, "loss": 0.4383, "num_tokens": 3336959559.0, "step": 9819 }, { "epoch": 3.5972610268859064, "grad_norm": 0.13737763620339297, "learning_rate": 1.0938672834319197e-05, "loss": 0.4023, "num_tokens": 3337785081.0, "step": 9820 }, { "epoch": 3.597627444693812, "grad_norm": 0.1371405922883416, "learning_rate": 1.0935303705059753e-05, "loss": 0.4245, "num_tokens": 3338601429.0, "step": 9821 }, { "epoch": 3.597993862501718, "grad_norm": 0.13162641142865994, "learning_rate": 1.0931935198764762e-05, "loss": 0.416, "num_tokens": 3339388065.0, "step": 9822 }, { "epoch": 3.598360280309623, "grad_norm": 0.1466240992931229, "learning_rate": 1.0928567315623884e-05, "loss": 0.4287, "num_tokens": 3340183331.0, "step": 9823 }, { "epoch": 3.5987266981175283, "grad_norm": 0.13734395095786447, "learning_rate": 1.0925200055826737e-05, "loss": 0.422, "num_tokens": 3340948049.0, "step": 9824 }, { "epoch": 3.599093115925434, "grad_norm": 0.1460881014293812, "learning_rate": 1.0921833419562911e-05, "loss": 0.4017, "num_tokens": 3341657687.0, "step": 9825 }, { "epoch": 3.5994595337333397, "grad_norm": 0.14265344995354767, "learning_rate": 1.091846740702195e-05, "loss": 0.4109, "num_tokens": 3342328497.0, "step": 9826 }, { "epoch": 3.599825951541245, "grad_norm": 0.1456998019813591, "learning_rate": 1.0915102018393367e-05, "loss": 0.3974, "num_tokens": 3342979766.0, "step": 9827 }, { "epoch": 3.60019236934915, "grad_norm": 0.1441676095383605, "learning_rate": 1.0911737253866637e-05, "loss": 0.4073, "num_tokens": 3343783776.0, "step": 9828 }, { "epoch": 3.600558787157056, "grad_norm": 0.14480169517216565, "learning_rate": 1.0908373113631201e-05, "loss": 0.3978, "num_tokens": 3344504555.0, "step": 9829 }, { "epoch": 3.600925204964961, "grad_norm": 0.14748316447451482, "learning_rate": 1.0905009597876469e-05, "loss": 0.4136, "num_tokens": 3345158459.0, "step": 9830 }, { "epoch": 3.601291622772867, "grad_norm": 0.12622521888297802, "learning_rate": 1.0901646706791821e-05, "loss": 0.417, "num_tokens": 3346101444.0, "step": 9831 }, { "epoch": 3.601658040580772, "grad_norm": 0.1548514125749539, "learning_rate": 1.0898284440566593e-05, "loss": 0.4238, "num_tokens": 3346700522.0, "step": 9832 }, { "epoch": 3.602024458388678, "grad_norm": 0.1316207884120713, "learning_rate": 1.089492279939008e-05, "loss": 0.3886, "num_tokens": 3347571486.0, "step": 9833 }, { "epoch": 3.602390876196583, "grad_norm": 0.14006136188267757, "learning_rate": 1.0891561783451557e-05, "loss": 0.4233, "num_tokens": 3348356134.0, "step": 9834 }, { "epoch": 3.6027572940044887, "grad_norm": 0.14143601774159542, "learning_rate": 1.0888201392940242e-05, "loss": 0.4039, "num_tokens": 3349051628.0, "step": 9835 }, { "epoch": 3.603123711812394, "grad_norm": 0.1450483506462903, "learning_rate": 1.0884841628045349e-05, "loss": 0.4005, "num_tokens": 3349789556.0, "step": 9836 }, { "epoch": 3.6034901296202997, "grad_norm": 0.15494818511453726, "learning_rate": 1.0881482488956023e-05, "loss": 0.4136, "num_tokens": 3350470817.0, "step": 9837 }, { "epoch": 3.603856547428205, "grad_norm": 0.13678761159982897, "learning_rate": 1.0878123975861408e-05, "loss": 0.3702, "num_tokens": 3351237015.0, "step": 9838 }, { "epoch": 3.6042229652361106, "grad_norm": 0.1276927049169039, "learning_rate": 1.0874766088950584e-05, "loss": 0.4058, "num_tokens": 3352081399.0, "step": 9839 }, { "epoch": 3.604589383044016, "grad_norm": 0.13955770057198028, "learning_rate": 1.087140882841261e-05, "loss": 0.3949, "num_tokens": 3352945122.0, "step": 9840 }, { "epoch": 3.604955800851921, "grad_norm": 0.14338946706107814, "learning_rate": 1.0868052194436497e-05, "loss": 0.3783, "num_tokens": 3353678145.0, "step": 9841 }, { "epoch": 3.605322218659827, "grad_norm": 0.15705016878120193, "learning_rate": 1.0864696187211243e-05, "loss": 0.4276, "num_tokens": 3354359789.0, "step": 9842 }, { "epoch": 3.6056886364677325, "grad_norm": 0.15055675332440932, "learning_rate": 1.0861340806925795e-05, "loss": 0.4063, "num_tokens": 3355108703.0, "step": 9843 }, { "epoch": 3.6060550542756378, "grad_norm": 0.1273097250325729, "learning_rate": 1.085798605376906e-05, "loss": 0.3587, "num_tokens": 3355873673.0, "step": 9844 }, { "epoch": 3.606421472083543, "grad_norm": 0.13248272115202656, "learning_rate": 1.0854631927929931e-05, "loss": 0.4361, "num_tokens": 3356692612.0, "step": 9845 }, { "epoch": 3.6067878898914487, "grad_norm": 0.14700111981669628, "learning_rate": 1.0851278429597234e-05, "loss": 0.3962, "num_tokens": 3357474860.0, "step": 9846 }, { "epoch": 3.6071543076993544, "grad_norm": 0.12553138816918613, "learning_rate": 1.0847925558959793e-05, "loss": 0.407, "num_tokens": 3358332620.0, "step": 9847 }, { "epoch": 3.6075207255072597, "grad_norm": 0.13160794782127228, "learning_rate": 1.084457331620638e-05, "loss": 0.4075, "num_tokens": 3359213380.0, "step": 9848 }, { "epoch": 3.607887143315165, "grad_norm": 0.13066892273776542, "learning_rate": 1.0841221701525726e-05, "loss": 0.4234, "num_tokens": 3360107697.0, "step": 9849 }, { "epoch": 3.6082535611230706, "grad_norm": 0.14566988253281973, "learning_rate": 1.083787071510653e-05, "loss": 0.404, "num_tokens": 3360720884.0, "step": 9850 }, { "epoch": 3.6086199789309763, "grad_norm": 0.1454875893266458, "learning_rate": 1.0834520357137475e-05, "loss": 0.4091, "num_tokens": 3361409709.0, "step": 9851 }, { "epoch": 3.6089863967388816, "grad_norm": 0.13853615720681575, "learning_rate": 1.0831170627807177e-05, "loss": 0.4011, "num_tokens": 3362199235.0, "step": 9852 }, { "epoch": 3.609352814546787, "grad_norm": 0.14712425109429336, "learning_rate": 1.0827821527304243e-05, "loss": 0.4009, "num_tokens": 3362928021.0, "step": 9853 }, { "epoch": 3.6097192323546925, "grad_norm": 0.14701173716031737, "learning_rate": 1.0824473055817229e-05, "loss": 0.4228, "num_tokens": 3363590903.0, "step": 9854 }, { "epoch": 3.6100856501625977, "grad_norm": 0.14311990022759874, "learning_rate": 1.0821125213534662e-05, "loss": 0.3801, "num_tokens": 3364331635.0, "step": 9855 }, { "epoch": 3.6104520679705034, "grad_norm": 0.13059263562701662, "learning_rate": 1.0817778000645034e-05, "loss": 0.3973, "num_tokens": 3365239646.0, "step": 9856 }, { "epoch": 3.6108184857784087, "grad_norm": 0.1388315396277112, "learning_rate": 1.0814431417336793e-05, "loss": 0.4156, "num_tokens": 3365983576.0, "step": 9857 }, { "epoch": 3.6111849035863144, "grad_norm": 0.1400135617162392, "learning_rate": 1.0811085463798357e-05, "loss": 0.4051, "num_tokens": 3366814885.0, "step": 9858 }, { "epoch": 3.6115513213942196, "grad_norm": 0.13629376222863887, "learning_rate": 1.0807740140218126e-05, "loss": 0.4091, "num_tokens": 3367593575.0, "step": 9859 }, { "epoch": 3.6119177392021253, "grad_norm": 0.14090472657915878, "learning_rate": 1.0804395446784437e-05, "loss": 0.4285, "num_tokens": 3368465559.0, "step": 9860 }, { "epoch": 3.6122841570100306, "grad_norm": 0.14314536634827313, "learning_rate": 1.0801051383685603e-05, "loss": 0.4182, "num_tokens": 3369229991.0, "step": 9861 }, { "epoch": 3.6126505748179363, "grad_norm": 0.13739350399947858, "learning_rate": 1.0797707951109904e-05, "loss": 0.3921, "num_tokens": 3369965618.0, "step": 9862 }, { "epoch": 3.6130169926258415, "grad_norm": 0.13746356639762886, "learning_rate": 1.0794365149245574e-05, "loss": 0.4035, "num_tokens": 3370682112.0, "step": 9863 }, { "epoch": 3.6133834104337472, "grad_norm": 0.13522120118274897, "learning_rate": 1.0791022978280825e-05, "loss": 0.4086, "num_tokens": 3371497017.0, "step": 9864 }, { "epoch": 3.6137498282416525, "grad_norm": 0.15741315151581936, "learning_rate": 1.0787681438403836e-05, "loss": 0.4039, "num_tokens": 3372243301.0, "step": 9865 }, { "epoch": 3.614116246049558, "grad_norm": 0.13906757468490621, "learning_rate": 1.0784340529802732e-05, "loss": 0.3992, "num_tokens": 3372996267.0, "step": 9866 }, { "epoch": 3.6144826638574634, "grad_norm": 0.13381959132631516, "learning_rate": 1.078100025266562e-05, "loss": 0.3894, "num_tokens": 3373716139.0, "step": 9867 }, { "epoch": 3.614849081665369, "grad_norm": 0.14489280040067692, "learning_rate": 1.0777660607180551e-05, "loss": 0.4054, "num_tokens": 3374405488.0, "step": 9868 }, { "epoch": 3.6152154994732744, "grad_norm": 0.14739508315132857, "learning_rate": 1.077432159353557e-05, "loss": 0.4301, "num_tokens": 3375053944.0, "step": 9869 }, { "epoch": 3.6155819172811796, "grad_norm": 0.13069827352024188, "learning_rate": 1.0770983211918664e-05, "loss": 0.4096, "num_tokens": 3375899535.0, "step": 9870 }, { "epoch": 3.6159483350890853, "grad_norm": 0.13153270619262167, "learning_rate": 1.076764546251778e-05, "loss": 0.4021, "num_tokens": 3376736061.0, "step": 9871 }, { "epoch": 3.616314752896991, "grad_norm": 0.15285587378360152, "learning_rate": 1.076430834552086e-05, "loss": 0.4, "num_tokens": 3377449264.0, "step": 9872 }, { "epoch": 3.6166811707048963, "grad_norm": 0.14052101969714878, "learning_rate": 1.0760971861115776e-05, "loss": 0.4219, "num_tokens": 3378200927.0, "step": 9873 }, { "epoch": 3.6170475885128015, "grad_norm": 0.13290352679487025, "learning_rate": 1.075763600949038e-05, "loss": 0.4054, "num_tokens": 3378957259.0, "step": 9874 }, { "epoch": 3.617414006320707, "grad_norm": 0.13984355056741213, "learning_rate": 1.0754300790832492e-05, "loss": 0.4217, "num_tokens": 3379701347.0, "step": 9875 }, { "epoch": 3.617780424128613, "grad_norm": 0.14475415939554434, "learning_rate": 1.075096620532989e-05, "loss": 0.4012, "num_tokens": 3380484933.0, "step": 9876 }, { "epoch": 3.618146841936518, "grad_norm": 0.1552999600452534, "learning_rate": 1.0747632253170317e-05, "loss": 0.3999, "num_tokens": 3381136181.0, "step": 9877 }, { "epoch": 3.6185132597444234, "grad_norm": 0.1487965563290998, "learning_rate": 1.0744298934541475e-05, "loss": 0.3902, "num_tokens": 3381856218.0, "step": 9878 }, { "epoch": 3.618879677552329, "grad_norm": 0.15946076789175678, "learning_rate": 1.074096624963104e-05, "loss": 0.425, "num_tokens": 3382539740.0, "step": 9879 }, { "epoch": 3.619246095360235, "grad_norm": 0.14115551407305088, "learning_rate": 1.0737634198626661e-05, "loss": 0.4017, "num_tokens": 3383329904.0, "step": 9880 }, { "epoch": 3.61961251316814, "grad_norm": 0.1372260834203111, "learning_rate": 1.0734302781715928e-05, "loss": 0.4158, "num_tokens": 3384154056.0, "step": 9881 }, { "epoch": 3.6199789309760453, "grad_norm": 0.11963733071749048, "learning_rate": 1.073097199908641e-05, "loss": 0.3852, "num_tokens": 3384980420.0, "step": 9882 }, { "epoch": 3.620345348783951, "grad_norm": 0.1348659786578925, "learning_rate": 1.072764185092563e-05, "loss": 0.3915, "num_tokens": 3385770876.0, "step": 9883 }, { "epoch": 3.6207117665918562, "grad_norm": 0.13600414654892903, "learning_rate": 1.0724312337421083e-05, "loss": 0.4194, "num_tokens": 3386607405.0, "step": 9884 }, { "epoch": 3.621078184399762, "grad_norm": 0.15354492331303216, "learning_rate": 1.072098345876023e-05, "loss": 0.4162, "num_tokens": 3387160586.0, "step": 9885 }, { "epoch": 3.621444602207667, "grad_norm": 0.13137928587418485, "learning_rate": 1.07176552151305e-05, "loss": 0.4125, "num_tokens": 3387946046.0, "step": 9886 }, { "epoch": 3.621811020015573, "grad_norm": 0.13190548621197232, "learning_rate": 1.0714327606719275e-05, "loss": 0.4093, "num_tokens": 3388648733.0, "step": 9887 }, { "epoch": 3.622177437823478, "grad_norm": 0.14210010189402536, "learning_rate": 1.0711000633713904e-05, "loss": 0.3785, "num_tokens": 3389426534.0, "step": 9888 }, { "epoch": 3.622543855631384, "grad_norm": 0.15686778080102107, "learning_rate": 1.0707674296301705e-05, "loss": 0.412, "num_tokens": 3390119688.0, "step": 9889 }, { "epoch": 3.622910273439289, "grad_norm": 0.1478134714490861, "learning_rate": 1.0704348594669946e-05, "loss": 0.4115, "num_tokens": 3390785419.0, "step": 9890 }, { "epoch": 3.6232766912471948, "grad_norm": 0.13531995959658139, "learning_rate": 1.0701023529005892e-05, "loss": 0.4263, "num_tokens": 3391621680.0, "step": 9891 }, { "epoch": 3.6236431090551, "grad_norm": 0.16384767134926753, "learning_rate": 1.069769909949673e-05, "loss": 0.4227, "num_tokens": 3392236784.0, "step": 9892 }, { "epoch": 3.6240095268630057, "grad_norm": 0.1582671759151473, "learning_rate": 1.0694375306329645e-05, "loss": 0.4152, "num_tokens": 3392867284.0, "step": 9893 }, { "epoch": 3.624375944670911, "grad_norm": 0.1405269020870895, "learning_rate": 1.0691052149691773e-05, "loss": 0.3916, "num_tokens": 3393680872.0, "step": 9894 }, { "epoch": 3.624742362478816, "grad_norm": 0.14366537357347525, "learning_rate": 1.0687729629770204e-05, "loss": 0.4151, "num_tokens": 3394367534.0, "step": 9895 }, { "epoch": 3.625108780286722, "grad_norm": 0.14752086156606575, "learning_rate": 1.0684407746752008e-05, "loss": 0.4236, "num_tokens": 3395100379.0, "step": 9896 }, { "epoch": 3.6254751980946276, "grad_norm": 0.1373770175377511, "learning_rate": 1.0681086500824222e-05, "loss": 0.4129, "num_tokens": 3395835195.0, "step": 9897 }, { "epoch": 3.625841615902533, "grad_norm": 0.13505347561075254, "learning_rate": 1.067776589217383e-05, "loss": 0.4176, "num_tokens": 3396720056.0, "step": 9898 }, { "epoch": 3.626208033710438, "grad_norm": 0.14206177687334198, "learning_rate": 1.0674445920987784e-05, "loss": 0.406, "num_tokens": 3397516327.0, "step": 9899 }, { "epoch": 3.626574451518344, "grad_norm": 0.14318673546495056, "learning_rate": 1.0671126587453017e-05, "loss": 0.4171, "num_tokens": 3398237690.0, "step": 9900 }, { "epoch": 3.6269408693262495, "grad_norm": 0.12463114203604236, "learning_rate": 1.0667807891756406e-05, "loss": 0.3843, "num_tokens": 3399117917.0, "step": 9901 }, { "epoch": 3.6273072871341547, "grad_norm": 0.14234788656875202, "learning_rate": 1.0664489834084803e-05, "loss": 0.4302, "num_tokens": 3399871003.0, "step": 9902 }, { "epoch": 3.62767370494206, "grad_norm": 0.14254981503288633, "learning_rate": 1.0661172414625025e-05, "loss": 0.4084, "num_tokens": 3400594947.0, "step": 9903 }, { "epoch": 3.6280401227499657, "grad_norm": 0.1303393578404911, "learning_rate": 1.0657855633563843e-05, "loss": 0.4217, "num_tokens": 3401466211.0, "step": 9904 }, { "epoch": 3.6284065405578714, "grad_norm": 0.13653045946951423, "learning_rate": 1.0654539491087994e-05, "loss": 0.4122, "num_tokens": 3402260684.0, "step": 9905 }, { "epoch": 3.6287729583657766, "grad_norm": 0.13483085057760485, "learning_rate": 1.0651223987384198e-05, "loss": 0.3848, "num_tokens": 3403019610.0, "step": 9906 }, { "epoch": 3.629139376173682, "grad_norm": 0.14635296457072364, "learning_rate": 1.064790912263911e-05, "loss": 0.4201, "num_tokens": 3403752932.0, "step": 9907 }, { "epoch": 3.6295057939815876, "grad_norm": 0.13074225361913563, "learning_rate": 1.0644594897039377e-05, "loss": 0.4182, "num_tokens": 3404572792.0, "step": 9908 }, { "epoch": 3.629872211789493, "grad_norm": 0.1520138800401638, "learning_rate": 1.0641281310771588e-05, "loss": 0.3762, "num_tokens": 3405222012.0, "step": 9909 }, { "epoch": 3.6302386295973985, "grad_norm": 0.14429483106212967, "learning_rate": 1.0637968364022307e-05, "loss": 0.4147, "num_tokens": 3405937461.0, "step": 9910 }, { "epoch": 3.6306050474053038, "grad_norm": 0.14162665674546138, "learning_rate": 1.0634656056978056e-05, "loss": 0.407, "num_tokens": 3406697319.0, "step": 9911 }, { "epoch": 3.6309714652132095, "grad_norm": 0.14543584773978066, "learning_rate": 1.0631344389825324e-05, "loss": 0.412, "num_tokens": 3407370690.0, "step": 9912 }, { "epoch": 3.6313378830211147, "grad_norm": 0.14134478300207745, "learning_rate": 1.0628033362750568e-05, "loss": 0.4207, "num_tokens": 3408135367.0, "step": 9913 }, { "epoch": 3.6317043008290204, "grad_norm": 0.14680817669610915, "learning_rate": 1.0624722975940212e-05, "loss": 0.4219, "num_tokens": 3408877325.0, "step": 9914 }, { "epoch": 3.6320707186369257, "grad_norm": 0.13469736078645186, "learning_rate": 1.0621413229580627e-05, "loss": 0.4266, "num_tokens": 3409610159.0, "step": 9915 }, { "epoch": 3.6324371364448313, "grad_norm": 0.12929277881210413, "learning_rate": 1.0618104123858162e-05, "loss": 0.3881, "num_tokens": 3410424884.0, "step": 9916 }, { "epoch": 3.6328035542527366, "grad_norm": 0.1444082555370219, "learning_rate": 1.0614795658959127e-05, "loss": 0.418, "num_tokens": 3411156790.0, "step": 9917 }, { "epoch": 3.6331699720606423, "grad_norm": 0.13795563068517594, "learning_rate": 1.0611487835069787e-05, "loss": 0.3976, "num_tokens": 3411949397.0, "step": 9918 }, { "epoch": 3.6335363898685475, "grad_norm": 0.15592092401815563, "learning_rate": 1.0608180652376392e-05, "loss": 0.4186, "num_tokens": 3412581291.0, "step": 9919 }, { "epoch": 3.6339028076764532, "grad_norm": 0.1388286017916406, "learning_rate": 1.0604874111065131e-05, "loss": 0.3884, "num_tokens": 3413283892.0, "step": 9920 }, { "epoch": 3.6342692254843585, "grad_norm": 0.14486060240577536, "learning_rate": 1.0601568211322182e-05, "loss": 0.4035, "num_tokens": 3413977450.0, "step": 9921 }, { "epoch": 3.634635643292264, "grad_norm": 0.1453152253582251, "learning_rate": 1.0598262953333665e-05, "loss": 0.4153, "num_tokens": 3414732710.0, "step": 9922 }, { "epoch": 3.6350020611001694, "grad_norm": 0.147492987212418, "learning_rate": 1.0594958337285676e-05, "loss": 0.4346, "num_tokens": 3415469817.0, "step": 9923 }, { "epoch": 3.6353684789080747, "grad_norm": 0.14476127072624811, "learning_rate": 1.0591654363364266e-05, "loss": 0.4023, "num_tokens": 3416155262.0, "step": 9924 }, { "epoch": 3.6357348967159804, "grad_norm": 0.1383292021560014, "learning_rate": 1.0588351031755461e-05, "loss": 0.4191, "num_tokens": 3417033488.0, "step": 9925 }, { "epoch": 3.636101314523886, "grad_norm": 0.1337982836607846, "learning_rate": 1.0585048342645238e-05, "loss": 0.3885, "num_tokens": 3417895759.0, "step": 9926 }, { "epoch": 3.6364677323317913, "grad_norm": 0.14223369558801086, "learning_rate": 1.0581746296219558e-05, "loss": 0.4359, "num_tokens": 3418606131.0, "step": 9927 }, { "epoch": 3.6368341501396966, "grad_norm": 0.14200299754114723, "learning_rate": 1.0578444892664325e-05, "loss": 0.4328, "num_tokens": 3419466061.0, "step": 9928 }, { "epoch": 3.6372005679476023, "grad_norm": 0.14343251004667323, "learning_rate": 1.0575144132165407e-05, "loss": 0.4118, "num_tokens": 3420199986.0, "step": 9929 }, { "epoch": 3.637566985755508, "grad_norm": 0.13814043171092907, "learning_rate": 1.0571844014908657e-05, "loss": 0.4073, "num_tokens": 3420976266.0, "step": 9930 }, { "epoch": 3.637933403563413, "grad_norm": 0.15260267978220868, "learning_rate": 1.0568544541079872e-05, "loss": 0.3966, "num_tokens": 3421623194.0, "step": 9931 }, { "epoch": 3.6382998213713185, "grad_norm": 0.12828571622255883, "learning_rate": 1.0565245710864822e-05, "loss": 0.3663, "num_tokens": 3422496560.0, "step": 9932 }, { "epoch": 3.638666239179224, "grad_norm": 0.14010932911210008, "learning_rate": 1.0561947524449228e-05, "loss": 0.4078, "num_tokens": 3423261117.0, "step": 9933 }, { "epoch": 3.6390326569871294, "grad_norm": 0.13332816909979178, "learning_rate": 1.0558649982018796e-05, "loss": 0.4008, "num_tokens": 3424099464.0, "step": 9934 }, { "epoch": 3.639399074795035, "grad_norm": 0.13887530769433434, "learning_rate": 1.0555353083759172e-05, "loss": 0.3956, "num_tokens": 3424851837.0, "step": 9935 }, { "epoch": 3.6397654926029404, "grad_norm": 0.14809254110464332, "learning_rate": 1.0552056829855996e-05, "loss": 0.4367, "num_tokens": 3425723225.0, "step": 9936 }, { "epoch": 3.640131910410846, "grad_norm": 0.13531330826309335, "learning_rate": 1.0548761220494841e-05, "loss": 0.3917, "num_tokens": 3426458101.0, "step": 9937 }, { "epoch": 3.6404983282187513, "grad_norm": 0.13651679684958032, "learning_rate": 1.0545466255861258e-05, "loss": 0.4271, "num_tokens": 3427248420.0, "step": 9938 }, { "epoch": 3.640864746026657, "grad_norm": 0.14444140026810687, "learning_rate": 1.0542171936140755e-05, "loss": 0.3757, "num_tokens": 3427966327.0, "step": 9939 }, { "epoch": 3.6412311638345622, "grad_norm": 0.13736951038969014, "learning_rate": 1.0538878261518815e-05, "loss": 0.4009, "num_tokens": 3428713610.0, "step": 9940 }, { "epoch": 3.641597581642468, "grad_norm": 0.14046752743993154, "learning_rate": 1.0535585232180882e-05, "loss": 0.3795, "num_tokens": 3429426365.0, "step": 9941 }, { "epoch": 3.641963999450373, "grad_norm": 0.13673055122440245, "learning_rate": 1.0532292848312357e-05, "loss": 0.4366, "num_tokens": 3430252135.0, "step": 9942 }, { "epoch": 3.642330417258279, "grad_norm": 0.1483557325634885, "learning_rate": 1.0529001110098606e-05, "loss": 0.4021, "num_tokens": 3430939020.0, "step": 9943 }, { "epoch": 3.642696835066184, "grad_norm": 0.1523555582120293, "learning_rate": 1.0525710017724965e-05, "loss": 0.4274, "num_tokens": 3431595921.0, "step": 9944 }, { "epoch": 3.64306325287409, "grad_norm": 0.14570892455679696, "learning_rate": 1.0522419571376723e-05, "loss": 0.422, "num_tokens": 3432383028.0, "step": 9945 }, { "epoch": 3.643429670681995, "grad_norm": 0.15296471116170868, "learning_rate": 1.0519129771239133e-05, "loss": 0.4192, "num_tokens": 3433039039.0, "step": 9946 }, { "epoch": 3.6437960884899008, "grad_norm": 0.14397470432349677, "learning_rate": 1.0515840617497428e-05, "loss": 0.3864, "num_tokens": 3433705213.0, "step": 9947 }, { "epoch": 3.644162506297806, "grad_norm": 0.13360097746336957, "learning_rate": 1.0512552110336796e-05, "loss": 0.3852, "num_tokens": 3434465919.0, "step": 9948 }, { "epoch": 3.6445289241057113, "grad_norm": 0.13235967285425482, "learning_rate": 1.0509264249942381e-05, "loss": 0.4154, "num_tokens": 3435249766.0, "step": 9949 }, { "epoch": 3.644895341913617, "grad_norm": 0.138005989052932, "learning_rate": 1.0505977036499298e-05, "loss": 0.3828, "num_tokens": 3435989730.0, "step": 9950 }, { "epoch": 3.6452617597215227, "grad_norm": 0.1313931402797504, "learning_rate": 1.0502690470192615e-05, "loss": 0.4264, "num_tokens": 3436793479.0, "step": 9951 }, { "epoch": 3.645628177529428, "grad_norm": 0.15157235316990805, "learning_rate": 1.0499404551207384e-05, "loss": 0.4538, "num_tokens": 3437589321.0, "step": 9952 }, { "epoch": 3.645994595337333, "grad_norm": 0.12603051686259847, "learning_rate": 1.0496119279728609e-05, "loss": 0.3737, "num_tokens": 3438437597.0, "step": 9953 }, { "epoch": 3.646361013145239, "grad_norm": 0.13182046171665449, "learning_rate": 1.0492834655941244e-05, "loss": 0.4074, "num_tokens": 3439239292.0, "step": 9954 }, { "epoch": 3.6467274309531446, "grad_norm": 0.13859088503300668, "learning_rate": 1.0489550680030236e-05, "loss": 0.4161, "num_tokens": 3440053894.0, "step": 9955 }, { "epoch": 3.64709384876105, "grad_norm": 0.14849494945162917, "learning_rate": 1.048626735218047e-05, "loss": 0.3933, "num_tokens": 3440741221.0, "step": 9956 }, { "epoch": 3.647460266568955, "grad_norm": 0.1357907318330214, "learning_rate": 1.0482984672576802e-05, "loss": 0.4299, "num_tokens": 3441480993.0, "step": 9957 }, { "epoch": 3.6478266843768608, "grad_norm": 0.15022015032850294, "learning_rate": 1.0479702641404064e-05, "loss": 0.4022, "num_tokens": 3442185601.0, "step": 9958 }, { "epoch": 3.6481931021847664, "grad_norm": 0.13616831483214922, "learning_rate": 1.0476421258847034e-05, "loss": 0.3834, "num_tokens": 3442932409.0, "step": 9959 }, { "epoch": 3.6485595199926717, "grad_norm": 0.13263128755848094, "learning_rate": 1.0473140525090453e-05, "loss": 0.3991, "num_tokens": 3443728530.0, "step": 9960 }, { "epoch": 3.648925937800577, "grad_norm": 0.12694980426338676, "learning_rate": 1.0469860440319046e-05, "loss": 0.4033, "num_tokens": 3444571466.0, "step": 9961 }, { "epoch": 3.6492923556084826, "grad_norm": 0.14809303751838868, "learning_rate": 1.0466581004717478e-05, "loss": 0.434, "num_tokens": 3445264667.0, "step": 9962 }, { "epoch": 3.649658773416388, "grad_norm": 0.14823502494626584, "learning_rate": 1.0463302218470398e-05, "loss": 0.4457, "num_tokens": 3445940064.0, "step": 9963 }, { "epoch": 3.6500251912242936, "grad_norm": 0.13882375247935022, "learning_rate": 1.04600240817624e-05, "loss": 0.3862, "num_tokens": 3446694215.0, "step": 9964 }, { "epoch": 3.650391609032199, "grad_norm": 0.13698124838508685, "learning_rate": 1.0456746594778057e-05, "loss": 0.3954, "num_tokens": 3447465708.0, "step": 9965 }, { "epoch": 3.6507580268401045, "grad_norm": 0.1424301678551565, "learning_rate": 1.0453469757701896e-05, "loss": 0.435, "num_tokens": 3448253569.0, "step": 9966 }, { "epoch": 3.65112444464801, "grad_norm": 0.14310292329045304, "learning_rate": 1.0450193570718392e-05, "loss": 0.3661, "num_tokens": 3449122779.0, "step": 9967 }, { "epoch": 3.6514908624559155, "grad_norm": 0.1552479937683936, "learning_rate": 1.0446918034012021e-05, "loss": 0.4513, "num_tokens": 3449762919.0, "step": 9968 }, { "epoch": 3.6518572802638207, "grad_norm": 0.13509043849877111, "learning_rate": 1.0443643147767204e-05, "loss": 0.3907, "num_tokens": 3450642192.0, "step": 9969 }, { "epoch": 3.6522236980717264, "grad_norm": 0.14734865677750075, "learning_rate": 1.0440368912168316e-05, "loss": 0.4182, "num_tokens": 3451366392.0, "step": 9970 }, { "epoch": 3.6525901158796317, "grad_norm": 0.15657082237023187, "learning_rate": 1.0437095327399701e-05, "loss": 0.4203, "num_tokens": 3452066350.0, "step": 9971 }, { "epoch": 3.6529565336875374, "grad_norm": 0.13403440421778842, "learning_rate": 1.0433822393645674e-05, "loss": 0.4341, "num_tokens": 3452826738.0, "step": 9972 }, { "epoch": 3.6533229514954426, "grad_norm": 0.15075299113273458, "learning_rate": 1.0430550111090497e-05, "loss": 0.4164, "num_tokens": 3453564268.0, "step": 9973 }, { "epoch": 3.653689369303348, "grad_norm": 0.12891600579950604, "learning_rate": 1.0427278479918421e-05, "loss": 0.4032, "num_tokens": 3454449741.0, "step": 9974 }, { "epoch": 3.6540557871112536, "grad_norm": 0.1566437973082684, "learning_rate": 1.042400750031363e-05, "loss": 0.4356, "num_tokens": 3455060439.0, "step": 9975 }, { "epoch": 3.6544222049191593, "grad_norm": 0.13465488741350531, "learning_rate": 1.0420737172460302e-05, "loss": 0.409, "num_tokens": 3455930013.0, "step": 9976 }, { "epoch": 3.6547886227270645, "grad_norm": 0.13329560931568804, "learning_rate": 1.0417467496542555e-05, "loss": 0.3997, "num_tokens": 3456799377.0, "step": 9977 }, { "epoch": 3.6551550405349698, "grad_norm": 0.15242792020142037, "learning_rate": 1.041419847274448e-05, "loss": 0.4133, "num_tokens": 3457446564.0, "step": 9978 }, { "epoch": 3.6555214583428755, "grad_norm": 0.13833912088369502, "learning_rate": 1.0410930101250123e-05, "loss": 0.4235, "num_tokens": 3458258633.0, "step": 9979 }, { "epoch": 3.655887876150781, "grad_norm": 0.13594295981066007, "learning_rate": 1.040766238224351e-05, "loss": 0.4111, "num_tokens": 3459058203.0, "step": 9980 }, { "epoch": 3.6562542939586864, "grad_norm": 0.13620899486374147, "learning_rate": 1.0404395315908611e-05, "loss": 0.4379, "num_tokens": 3459898547.0, "step": 9981 }, { "epoch": 3.6566207117665916, "grad_norm": 0.14674655687473626, "learning_rate": 1.0401128902429379e-05, "loss": 0.3891, "num_tokens": 3460583159.0, "step": 9982 }, { "epoch": 3.6569871295744973, "grad_norm": 0.14871320410773053, "learning_rate": 1.0397863141989713e-05, "loss": 0.4186, "num_tokens": 3461288062.0, "step": 9983 }, { "epoch": 3.657353547382403, "grad_norm": 0.13725584132991567, "learning_rate": 1.0394598034773474e-05, "loss": 0.4168, "num_tokens": 3462161066.0, "step": 9984 }, { "epoch": 3.6577199651903083, "grad_norm": 0.13676539665664425, "learning_rate": 1.0391333580964508e-05, "loss": 0.4317, "num_tokens": 3462925282.0, "step": 9985 }, { "epoch": 3.6580863829982135, "grad_norm": 0.15072649524369958, "learning_rate": 1.0388069780746605e-05, "loss": 0.3872, "num_tokens": 3463726555.0, "step": 9986 }, { "epoch": 3.6584528008061192, "grad_norm": 0.15140353919748287, "learning_rate": 1.0384806634303525e-05, "loss": 0.4096, "num_tokens": 3464399173.0, "step": 9987 }, { "epoch": 3.6588192186140245, "grad_norm": 0.16575135082762973, "learning_rate": 1.0381544141818983e-05, "loss": 0.3951, "num_tokens": 3465065940.0, "step": 9988 }, { "epoch": 3.65918563642193, "grad_norm": 0.1344142780278971, "learning_rate": 1.037828230347667e-05, "loss": 0.3827, "num_tokens": 3465903128.0, "step": 9989 }, { "epoch": 3.6595520542298354, "grad_norm": 0.13892398513740967, "learning_rate": 1.0375021119460228e-05, "loss": 0.4033, "num_tokens": 3466581023.0, "step": 9990 }, { "epoch": 3.659918472037741, "grad_norm": 0.15380811830226163, "learning_rate": 1.0371760589953281e-05, "loss": 0.4144, "num_tokens": 3467330632.0, "step": 9991 }, { "epoch": 3.6602848898456464, "grad_norm": 0.1460991179247029, "learning_rate": 1.0368500715139393e-05, "loss": 0.3943, "num_tokens": 3468108753.0, "step": 9992 }, { "epoch": 3.660651307653552, "grad_norm": 0.1338585037539409, "learning_rate": 1.0365241495202106e-05, "loss": 0.3987, "num_tokens": 3468824597.0, "step": 9993 }, { "epoch": 3.6610177254614573, "grad_norm": 0.14100612779287908, "learning_rate": 1.0361982930324908e-05, "loss": 0.3864, "num_tokens": 3469575579.0, "step": 9994 }, { "epoch": 3.661384143269363, "grad_norm": 0.13565918487709505, "learning_rate": 1.035872502069128e-05, "loss": 0.418, "num_tokens": 3470409236.0, "step": 9995 }, { "epoch": 3.6617505610772683, "grad_norm": 0.14427183569264593, "learning_rate": 1.0355467766484635e-05, "loss": 0.4487, "num_tokens": 3471214018.0, "step": 9996 }, { "epoch": 3.662116978885174, "grad_norm": 0.14745488198115375, "learning_rate": 1.0352211167888377e-05, "loss": 0.4055, "num_tokens": 3471933277.0, "step": 9997 }, { "epoch": 3.662483396693079, "grad_norm": 0.14590973858363276, "learning_rate": 1.034895522508585e-05, "loss": 0.3947, "num_tokens": 3472679258.0, "step": 9998 }, { "epoch": 3.662849814500985, "grad_norm": 0.14225188765242494, "learning_rate": 1.0345699938260373e-05, "loss": 0.4164, "num_tokens": 3473472107.0, "step": 9999 }, { "epoch": 3.66321623230889, "grad_norm": 0.13953290895371923, "learning_rate": 1.0342445307595223e-05, "loss": 0.4358, "num_tokens": 3474352648.0, "step": 10000 }, { "epoch": 3.663582650116796, "grad_norm": 0.14386859890925532, "learning_rate": 1.0339191333273637e-05, "loss": 0.4217, "num_tokens": 3475098380.0, "step": 10001 }, { "epoch": 3.663949067924701, "grad_norm": 0.13052378896935407, "learning_rate": 1.0335938015478825e-05, "loss": 0.4155, "num_tokens": 3475994252.0, "step": 10002 }, { "epoch": 3.6643154857326063, "grad_norm": 0.13767102690316574, "learning_rate": 1.0332685354393963e-05, "loss": 0.3906, "num_tokens": 3476841318.0, "step": 10003 }, { "epoch": 3.664681903540512, "grad_norm": 0.14417263450509035, "learning_rate": 1.0329433350202176e-05, "loss": 0.4093, "num_tokens": 3477608268.0, "step": 10004 }, { "epoch": 3.6650483213484177, "grad_norm": 0.15024220568361157, "learning_rate": 1.0326182003086558e-05, "loss": 0.4253, "num_tokens": 3478316097.0, "step": 10005 }, { "epoch": 3.665414739156323, "grad_norm": 0.13230864322206545, "learning_rate": 1.0322931313230162e-05, "loss": 0.4032, "num_tokens": 3479155876.0, "step": 10006 }, { "epoch": 3.6657811569642282, "grad_norm": 0.13192101306758225, "learning_rate": 1.0319681280816013e-05, "loss": 0.3761, "num_tokens": 3479901226.0, "step": 10007 }, { "epoch": 3.666147574772134, "grad_norm": 0.14548243659254897, "learning_rate": 1.0316431906027096e-05, "loss": 0.4263, "num_tokens": 3480531900.0, "step": 10008 }, { "epoch": 3.6665139925800396, "grad_norm": 0.1555545595307725, "learning_rate": 1.031318318904635e-05, "loss": 0.4173, "num_tokens": 3481277624.0, "step": 10009 }, { "epoch": 3.666880410387945, "grad_norm": 0.15997218136315605, "learning_rate": 1.0309935130056696e-05, "loss": 0.3962, "num_tokens": 3481974745.0, "step": 10010 }, { "epoch": 3.66724682819585, "grad_norm": 0.14339136374920936, "learning_rate": 1.0306687729240998e-05, "loss": 0.4096, "num_tokens": 3482672383.0, "step": 10011 }, { "epoch": 3.667613246003756, "grad_norm": 0.13340493299834946, "learning_rate": 1.0303440986782089e-05, "loss": 0.3899, "num_tokens": 3483493880.0, "step": 10012 }, { "epoch": 3.6679796638116615, "grad_norm": 0.1567330891913514, "learning_rate": 1.0300194902862776e-05, "loss": 0.4197, "num_tokens": 3484170180.0, "step": 10013 }, { "epoch": 3.6683460816195668, "grad_norm": 0.13691548364198378, "learning_rate": 1.0296949477665815e-05, "loss": 0.4026, "num_tokens": 3484942104.0, "step": 10014 }, { "epoch": 3.668712499427472, "grad_norm": 0.1387100137548245, "learning_rate": 1.0293704711373924e-05, "loss": 0.4153, "num_tokens": 3485657725.0, "step": 10015 }, { "epoch": 3.6690789172353777, "grad_norm": 0.15667079917496174, "learning_rate": 1.02904606041698e-05, "loss": 0.4412, "num_tokens": 3486269602.0, "step": 10016 }, { "epoch": 3.669445335043283, "grad_norm": 0.1637931036717989, "learning_rate": 1.028721715623609e-05, "loss": 0.4214, "num_tokens": 3486966348.0, "step": 10017 }, { "epoch": 3.6698117528511887, "grad_norm": 0.12400501437352766, "learning_rate": 1.0283974367755398e-05, "loss": 0.3921, "num_tokens": 3487873581.0, "step": 10018 }, { "epoch": 3.670178170659094, "grad_norm": 0.13761309562437482, "learning_rate": 1.0280732238910309e-05, "loss": 0.3942, "num_tokens": 3488656621.0, "step": 10019 }, { "epoch": 3.6705445884669996, "grad_norm": 0.14603524097232554, "learning_rate": 1.0277490769883362e-05, "loss": 0.415, "num_tokens": 3489427149.0, "step": 10020 }, { "epoch": 3.670911006274905, "grad_norm": 0.14851567782521835, "learning_rate": 1.0274249960857055e-05, "loss": 0.4294, "num_tokens": 3490150144.0, "step": 10021 }, { "epoch": 3.6712774240828105, "grad_norm": 0.13531878626359134, "learning_rate": 1.0271009812013846e-05, "loss": 0.4022, "num_tokens": 3490919974.0, "step": 10022 }, { "epoch": 3.671643841890716, "grad_norm": 0.12334039607834889, "learning_rate": 1.0267770323536165e-05, "loss": 0.3958, "num_tokens": 3491757429.0, "step": 10023 }, { "epoch": 3.6720102596986215, "grad_norm": 0.14058916300651736, "learning_rate": 1.0264531495606414e-05, "loss": 0.4053, "num_tokens": 3492535688.0, "step": 10024 }, { "epoch": 3.6723766775065267, "grad_norm": 0.16179285672636118, "learning_rate": 1.0261293328406934e-05, "loss": 0.4117, "num_tokens": 3493223577.0, "step": 10025 }, { "epoch": 3.6727430953144324, "grad_norm": 0.14746218268921554, "learning_rate": 1.025805582212004e-05, "loss": 0.4498, "num_tokens": 3493939442.0, "step": 10026 }, { "epoch": 3.6731095131223377, "grad_norm": 0.14164128056668016, "learning_rate": 1.0254818976928014e-05, "loss": 0.4146, "num_tokens": 3494691875.0, "step": 10027 }, { "epoch": 3.673475930930243, "grad_norm": 0.12961263532343756, "learning_rate": 1.0251582793013088e-05, "loss": 0.3766, "num_tokens": 3495615121.0, "step": 10028 }, { "epoch": 3.6738423487381486, "grad_norm": 0.12791669244809756, "learning_rate": 1.024834727055748e-05, "loss": 0.421, "num_tokens": 3496568878.0, "step": 10029 }, { "epoch": 3.6742087665460543, "grad_norm": 0.14288202188236793, "learning_rate": 1.0245112409743343e-05, "loss": 0.4084, "num_tokens": 3497296425.0, "step": 10030 }, { "epoch": 3.6745751843539596, "grad_norm": 0.14114969855375392, "learning_rate": 1.0241878210752816e-05, "loss": 0.4374, "num_tokens": 3498121440.0, "step": 10031 }, { "epoch": 3.674941602161865, "grad_norm": 0.13653970177223682, "learning_rate": 1.0238644673767988e-05, "loss": 0.4206, "num_tokens": 3498931854.0, "step": 10032 }, { "epoch": 3.6753080199697705, "grad_norm": 0.12975591004713025, "learning_rate": 1.023541179897091e-05, "loss": 0.4128, "num_tokens": 3499950575.0, "step": 10033 }, { "epoch": 3.675674437777676, "grad_norm": 0.1496015244884419, "learning_rate": 1.0232179586543598e-05, "loss": 0.4319, "num_tokens": 3500650286.0, "step": 10034 }, { "epoch": 3.6760408555855815, "grad_norm": 0.13198402938841886, "learning_rate": 1.0228948036668042e-05, "loss": 0.3958, "num_tokens": 3501440754.0, "step": 10035 }, { "epoch": 3.6764072733934867, "grad_norm": 0.14585475336089876, "learning_rate": 1.0225717149526173e-05, "loss": 0.4055, "num_tokens": 3502133307.0, "step": 10036 }, { "epoch": 3.6767736912013924, "grad_norm": 0.14344602851272995, "learning_rate": 1.0222486925299909e-05, "loss": 0.4233, "num_tokens": 3502860109.0, "step": 10037 }, { "epoch": 3.677140109009298, "grad_norm": 0.15222239429922632, "learning_rate": 1.0219257364171107e-05, "loss": 0.4429, "num_tokens": 3503596646.0, "step": 10038 }, { "epoch": 3.6775065268172034, "grad_norm": 0.14090176976048238, "learning_rate": 1.0216028466321604e-05, "loss": 0.4232, "num_tokens": 3504361701.0, "step": 10039 }, { "epoch": 3.6778729446251086, "grad_norm": 0.15347118412950328, "learning_rate": 1.0212800231933184e-05, "loss": 0.4358, "num_tokens": 3505093323.0, "step": 10040 }, { "epoch": 3.6782393624330143, "grad_norm": 0.13168330397157688, "learning_rate": 1.0209572661187616e-05, "loss": 0.4066, "num_tokens": 3505972829.0, "step": 10041 }, { "epoch": 3.6786057802409196, "grad_norm": 0.14477603437527586, "learning_rate": 1.0206345754266613e-05, "loss": 0.404, "num_tokens": 3506657583.0, "step": 10042 }, { "epoch": 3.6789721980488252, "grad_norm": 0.13744310980104618, "learning_rate": 1.0203119511351848e-05, "loss": 0.4257, "num_tokens": 3507411075.0, "step": 10043 }, { "epoch": 3.6793386158567305, "grad_norm": 0.16141714138835633, "learning_rate": 1.0199893932624982e-05, "loss": 0.4357, "num_tokens": 3508115048.0, "step": 10044 }, { "epoch": 3.679705033664636, "grad_norm": 0.15934815570313224, "learning_rate": 1.0196669018267606e-05, "loss": 0.4104, "num_tokens": 3508681080.0, "step": 10045 }, { "epoch": 3.6800714514725414, "grad_norm": 0.14058727731196266, "learning_rate": 1.01934447684613e-05, "loss": 0.4061, "num_tokens": 3509513115.0, "step": 10046 }, { "epoch": 3.680437869280447, "grad_norm": 0.14193903239782096, "learning_rate": 1.019022118338759e-05, "loss": 0.4143, "num_tokens": 3510194638.0, "step": 10047 }, { "epoch": 3.6808042870883524, "grad_norm": 0.1342219655951788, "learning_rate": 1.0186998263227976e-05, "loss": 0.3976, "num_tokens": 3510999346.0, "step": 10048 }, { "epoch": 3.681170704896258, "grad_norm": 0.1466564103879957, "learning_rate": 1.0183776008163902e-05, "loss": 0.4197, "num_tokens": 3511805572.0, "step": 10049 }, { "epoch": 3.6815371227041633, "grad_norm": 0.14169755849053065, "learning_rate": 1.0180554418376801e-05, "loss": 0.4194, "num_tokens": 3512539073.0, "step": 10050 }, { "epoch": 3.681903540512069, "grad_norm": 0.1334440061174068, "learning_rate": 1.017733349404804e-05, "loss": 0.4133, "num_tokens": 3513321956.0, "step": 10051 }, { "epoch": 3.6822699583199743, "grad_norm": 0.13189515564900187, "learning_rate": 1.0174113235358984e-05, "loss": 0.3983, "num_tokens": 3514026264.0, "step": 10052 }, { "epoch": 3.68263637612788, "grad_norm": 0.15356113165799118, "learning_rate": 1.0170893642490928e-05, "loss": 0.4269, "num_tokens": 3514678321.0, "step": 10053 }, { "epoch": 3.6830027939357852, "grad_norm": 0.13431141738116456, "learning_rate": 1.0167674715625142e-05, "loss": 0.3641, "num_tokens": 3515457402.0, "step": 10054 }, { "epoch": 3.683369211743691, "grad_norm": 0.1480398717641892, "learning_rate": 1.0164456454942862e-05, "loss": 0.4345, "num_tokens": 3516170159.0, "step": 10055 }, { "epoch": 3.683735629551596, "grad_norm": 0.1427458954199695, "learning_rate": 1.0161238860625269e-05, "loss": 0.3742, "num_tokens": 3516880803.0, "step": 10056 }, { "epoch": 3.6841020473595014, "grad_norm": 0.13307287233337997, "learning_rate": 1.015802193285353e-05, "loss": 0.3923, "num_tokens": 3517682278.0, "step": 10057 }, { "epoch": 3.684468465167407, "grad_norm": 0.1319144932654777, "learning_rate": 1.0154805671808771e-05, "loss": 0.4023, "num_tokens": 3518542073.0, "step": 10058 }, { "epoch": 3.684834882975313, "grad_norm": 0.16512004916738524, "learning_rate": 1.015159007767207e-05, "loss": 0.3974, "num_tokens": 3519353084.0, "step": 10059 }, { "epoch": 3.685201300783218, "grad_norm": 0.135731312519359, "learning_rate": 1.0148375150624465e-05, "loss": 0.4141, "num_tokens": 3520169976.0, "step": 10060 }, { "epoch": 3.6855677185911233, "grad_norm": 0.13276520501429362, "learning_rate": 1.0145160890846968e-05, "loss": 0.406, "num_tokens": 3520945013.0, "step": 10061 }, { "epoch": 3.685934136399029, "grad_norm": 0.14789214804361267, "learning_rate": 1.014194729852054e-05, "loss": 0.398, "num_tokens": 3521811035.0, "step": 10062 }, { "epoch": 3.6863005542069347, "grad_norm": 0.13152089004527223, "learning_rate": 1.0138734373826127e-05, "loss": 0.4238, "num_tokens": 3522655437.0, "step": 10063 }, { "epoch": 3.68666697201484, "grad_norm": 0.1414359240142158, "learning_rate": 1.013552211694461e-05, "loss": 0.4084, "num_tokens": 3523382384.0, "step": 10064 }, { "epoch": 3.687033389822745, "grad_norm": 0.12979258148634787, "learning_rate": 1.0132310528056855e-05, "loss": 0.3969, "num_tokens": 3524208571.0, "step": 10065 }, { "epoch": 3.687399807630651, "grad_norm": 0.1344177577125199, "learning_rate": 1.0129099607343677e-05, "loss": 0.4206, "num_tokens": 3524984941.0, "step": 10066 }, { "epoch": 3.6877662254385566, "grad_norm": 0.15136778419641278, "learning_rate": 1.0125889354985859e-05, "loss": 0.3904, "num_tokens": 3525730023.0, "step": 10067 }, { "epoch": 3.688132643246462, "grad_norm": 0.1448219390930603, "learning_rate": 1.0122679771164133e-05, "loss": 0.3996, "num_tokens": 3526422178.0, "step": 10068 }, { "epoch": 3.688499061054367, "grad_norm": 0.14285190161705585, "learning_rate": 1.0119470856059222e-05, "loss": 0.3916, "num_tokens": 3527110868.0, "step": 10069 }, { "epoch": 3.688865478862273, "grad_norm": 0.14280842449885695, "learning_rate": 1.0116262609851782e-05, "loss": 0.4072, "num_tokens": 3527832260.0, "step": 10070 }, { "epoch": 3.689231896670178, "grad_norm": 0.13754009791452584, "learning_rate": 1.0113055032722455e-05, "loss": 0.4102, "num_tokens": 3528546329.0, "step": 10071 }, { "epoch": 3.6895983144780837, "grad_norm": 0.14215621905479525, "learning_rate": 1.0109848124851824e-05, "loss": 0.4243, "num_tokens": 3529274548.0, "step": 10072 }, { "epoch": 3.689964732285989, "grad_norm": 0.132459577060647, "learning_rate": 1.0106641886420448e-05, "loss": 0.3885, "num_tokens": 3530119811.0, "step": 10073 }, { "epoch": 3.6903311500938947, "grad_norm": 0.13281579017630055, "learning_rate": 1.0103436317608846e-05, "loss": 0.3995, "num_tokens": 3530906756.0, "step": 10074 }, { "epoch": 3.6906975679018, "grad_norm": 0.14619678130558747, "learning_rate": 1.0100231418597498e-05, "loss": 0.3821, "num_tokens": 3531717734.0, "step": 10075 }, { "epoch": 3.6910639857097056, "grad_norm": 0.14458509985365237, "learning_rate": 1.0097027189566843e-05, "loss": 0.3829, "num_tokens": 3532400422.0, "step": 10076 }, { "epoch": 3.691430403517611, "grad_norm": 0.1330440195897519, "learning_rate": 1.0093823630697281e-05, "loss": 0.4192, "num_tokens": 3533241437.0, "step": 10077 }, { "epoch": 3.6917968213255166, "grad_norm": 0.1325553828195781, "learning_rate": 1.0090620742169195e-05, "loss": 0.3998, "num_tokens": 3534041876.0, "step": 10078 }, { "epoch": 3.692163239133422, "grad_norm": 0.14330919377879156, "learning_rate": 1.0087418524162894e-05, "loss": 0.4258, "num_tokens": 3534910938.0, "step": 10079 }, { "epoch": 3.6925296569413275, "grad_norm": 0.12939859941207174, "learning_rate": 1.0084216976858684e-05, "loss": 0.3609, "num_tokens": 3535718360.0, "step": 10080 }, { "epoch": 3.6928960747492328, "grad_norm": 0.1351425440070934, "learning_rate": 1.0081016100436818e-05, "loss": 0.3825, "num_tokens": 3536474568.0, "step": 10081 }, { "epoch": 3.693262492557138, "grad_norm": 0.1433556682934124, "learning_rate": 1.0077815895077503e-05, "loss": 0.4139, "num_tokens": 3537254758.0, "step": 10082 }, { "epoch": 3.6936289103650437, "grad_norm": 0.1338130353291734, "learning_rate": 1.0074616360960921e-05, "loss": 0.3992, "num_tokens": 3538147467.0, "step": 10083 }, { "epoch": 3.6939953281729494, "grad_norm": 0.1378484949853717, "learning_rate": 1.0071417498267212e-05, "loss": 0.4429, "num_tokens": 3538929090.0, "step": 10084 }, { "epoch": 3.6943617459808546, "grad_norm": 0.13471879722685448, "learning_rate": 1.0068219307176476e-05, "loss": 0.4051, "num_tokens": 3539712643.0, "step": 10085 }, { "epoch": 3.69472816378876, "grad_norm": 0.13809317703008356, "learning_rate": 1.0065021787868785e-05, "loss": 0.4339, "num_tokens": 3540581502.0, "step": 10086 }, { "epoch": 3.6950945815966656, "grad_norm": 0.14384445892003764, "learning_rate": 1.0061824940524163e-05, "loss": 0.4136, "num_tokens": 3541274148.0, "step": 10087 }, { "epoch": 3.6954609994045713, "grad_norm": 0.1451563331702254, "learning_rate": 1.0058628765322593e-05, "loss": 0.4039, "num_tokens": 3542055817.0, "step": 10088 }, { "epoch": 3.6958274172124765, "grad_norm": 0.15025892535694466, "learning_rate": 1.0055433262444035e-05, "loss": 0.389, "num_tokens": 3542707541.0, "step": 10089 }, { "epoch": 3.696193835020382, "grad_norm": 0.15231238685273507, "learning_rate": 1.0052238432068387e-05, "loss": 0.4375, "num_tokens": 3543462714.0, "step": 10090 }, { "epoch": 3.6965602528282875, "grad_norm": 0.1391987599132391, "learning_rate": 1.0049044274375537e-05, "loss": 0.4179, "num_tokens": 3544281829.0, "step": 10091 }, { "epoch": 3.696926670636193, "grad_norm": 0.14483904129186095, "learning_rate": 1.0045850789545326e-05, "loss": 0.4183, "num_tokens": 3545031668.0, "step": 10092 }, { "epoch": 3.6972930884440984, "grad_norm": 0.13679336829378647, "learning_rate": 1.0042657977757546e-05, "loss": 0.4208, "num_tokens": 3545814950.0, "step": 10093 }, { "epoch": 3.6976595062520037, "grad_norm": 0.14743193591890155, "learning_rate": 1.0039465839191962e-05, "loss": 0.4344, "num_tokens": 3546576450.0, "step": 10094 }, { "epoch": 3.6980259240599094, "grad_norm": 0.14269669225098433, "learning_rate": 1.0036274374028287e-05, "loss": 0.4118, "num_tokens": 3547319109.0, "step": 10095 }, { "epoch": 3.6983923418678146, "grad_norm": 0.13930708215373933, "learning_rate": 1.0033083582446222e-05, "loss": 0.3679, "num_tokens": 3548014082.0, "step": 10096 }, { "epoch": 3.6987587596757203, "grad_norm": 0.15137094087161515, "learning_rate": 1.0029893464625411e-05, "loss": 0.3999, "num_tokens": 3548690614.0, "step": 10097 }, { "epoch": 3.6991251774836256, "grad_norm": 0.14968047926642686, "learning_rate": 1.0026704020745454e-05, "loss": 0.4131, "num_tokens": 3549396669.0, "step": 10098 }, { "epoch": 3.6994915952915313, "grad_norm": 0.12699035056028646, "learning_rate": 1.0023515250985938e-05, "loss": 0.4328, "num_tokens": 3550249593.0, "step": 10099 }, { "epoch": 3.6998580130994365, "grad_norm": 0.13948323085515663, "learning_rate": 1.0020327155526388e-05, "loss": 0.395, "num_tokens": 3550987395.0, "step": 10100 }, { "epoch": 3.700224430907342, "grad_norm": 0.14449450936615332, "learning_rate": 1.0017139734546297e-05, "loss": 0.4386, "num_tokens": 3551705939.0, "step": 10101 }, { "epoch": 3.7005908487152475, "grad_norm": 0.13859225446341442, "learning_rate": 1.0013952988225135e-05, "loss": 0.4358, "num_tokens": 3552504804.0, "step": 10102 }, { "epoch": 3.700957266523153, "grad_norm": 0.147660260816442, "learning_rate": 1.0010766916742313e-05, "loss": 0.4593, "num_tokens": 3553364132.0, "step": 10103 }, { "epoch": 3.7013236843310584, "grad_norm": 0.1388411898529176, "learning_rate": 1.0007581520277211e-05, "loss": 0.4018, "num_tokens": 3554083692.0, "step": 10104 }, { "epoch": 3.701690102138964, "grad_norm": 0.14108671463839983, "learning_rate": 1.0004396799009184e-05, "loss": 0.4222, "num_tokens": 3554800484.0, "step": 10105 }, { "epoch": 3.7020565199468694, "grad_norm": 0.14453880217906964, "learning_rate": 1.0001212753117523e-05, "loss": 0.4327, "num_tokens": 3555543676.0, "step": 10106 }, { "epoch": 3.702422937754775, "grad_norm": 0.14523542272247553, "learning_rate": 9.998029382781515e-06, "loss": 0.3977, "num_tokens": 3556361990.0, "step": 10107 }, { "epoch": 3.7027893555626803, "grad_norm": 0.1374365394596508, "learning_rate": 9.994846688180377e-06, "loss": 0.3804, "num_tokens": 3557144065.0, "step": 10108 }, { "epoch": 3.703155773370586, "grad_norm": 0.12911576411883305, "learning_rate": 9.991664669493304e-06, "loss": 0.4094, "num_tokens": 3557994526.0, "step": 10109 }, { "epoch": 3.7035221911784912, "grad_norm": 0.13604695629943175, "learning_rate": 9.988483326899452e-06, "loss": 0.3891, "num_tokens": 3558750546.0, "step": 10110 }, { "epoch": 3.7038886089863965, "grad_norm": 0.1256514260064947, "learning_rate": 9.985302660577929e-06, "loss": 0.3927, "num_tokens": 3559601932.0, "step": 10111 }, { "epoch": 3.704255026794302, "grad_norm": 0.14326569331584496, "learning_rate": 9.982122670707817e-06, "loss": 0.3929, "num_tokens": 3560312093.0, "step": 10112 }, { "epoch": 3.704621444602208, "grad_norm": 0.143243609317245, "learning_rate": 9.978943357468164e-06, "loss": 0.3866, "num_tokens": 3560975772.0, "step": 10113 }, { "epoch": 3.704987862410113, "grad_norm": 0.14422890248740375, "learning_rate": 9.975764721037965e-06, "loss": 0.4341, "num_tokens": 3561672960.0, "step": 10114 }, { "epoch": 3.7053542802180184, "grad_norm": 0.13531951026304143, "learning_rate": 9.972586761596185e-06, "loss": 0.4366, "num_tokens": 3562445669.0, "step": 10115 }, { "epoch": 3.705720698025924, "grad_norm": 0.14276661319129597, "learning_rate": 9.969409479321746e-06, "loss": 0.4074, "num_tokens": 3563262516.0, "step": 10116 }, { "epoch": 3.7060871158338298, "grad_norm": 0.13059341279232867, "learning_rate": 9.966232874393535e-06, "loss": 0.3847, "num_tokens": 3564070692.0, "step": 10117 }, { "epoch": 3.706453533641735, "grad_norm": 0.13517646076244413, "learning_rate": 9.963056946990408e-06, "loss": 0.3996, "num_tokens": 3564858604.0, "step": 10118 }, { "epoch": 3.7068199514496403, "grad_norm": 0.13466621380291424, "learning_rate": 9.959881697291167e-06, "loss": 0.4112, "num_tokens": 3565692322.0, "step": 10119 }, { "epoch": 3.707186369257546, "grad_norm": 0.1290457260227936, "learning_rate": 9.956707125474594e-06, "loss": 0.4055, "num_tokens": 3566466310.0, "step": 10120 }, { "epoch": 3.707552787065451, "grad_norm": 0.14977349406741552, "learning_rate": 9.95353323171942e-06, "loss": 0.4023, "num_tokens": 3567195925.0, "step": 10121 }, { "epoch": 3.707919204873357, "grad_norm": 0.1486959039406181, "learning_rate": 9.95036001620434e-06, "loss": 0.3998, "num_tokens": 3567909748.0, "step": 10122 }, { "epoch": 3.708285622681262, "grad_norm": 0.13071462706753295, "learning_rate": 9.947187479108008e-06, "loss": 0.3701, "num_tokens": 3568591262.0, "step": 10123 }, { "epoch": 3.708652040489168, "grad_norm": 0.14595318877161725, "learning_rate": 9.944015620609056e-06, "loss": 0.4316, "num_tokens": 3569317378.0, "step": 10124 }, { "epoch": 3.709018458297073, "grad_norm": 0.14133660605487267, "learning_rate": 9.940844440886055e-06, "loss": 0.4082, "num_tokens": 3570116609.0, "step": 10125 }, { "epoch": 3.709384876104979, "grad_norm": 0.14019150611459824, "learning_rate": 9.937673940117558e-06, "loss": 0.4208, "num_tokens": 3570856272.0, "step": 10126 }, { "epoch": 3.709751293912884, "grad_norm": 0.13408127070709433, "learning_rate": 9.934504118482063e-06, "loss": 0.4045, "num_tokens": 3571716077.0, "step": 10127 }, { "epoch": 3.7101177117207897, "grad_norm": 0.13332619951642666, "learning_rate": 9.931334976158037e-06, "loss": 0.4133, "num_tokens": 3572480691.0, "step": 10128 }, { "epoch": 3.710484129528695, "grad_norm": 0.13000862463202434, "learning_rate": 9.928166513323917e-06, "loss": 0.3923, "num_tokens": 3573365338.0, "step": 10129 }, { "epoch": 3.7108505473366007, "grad_norm": 0.13959782554587685, "learning_rate": 9.924998730158091e-06, "loss": 0.4343, "num_tokens": 3574159773.0, "step": 10130 }, { "epoch": 3.711216965144506, "grad_norm": 0.13520801112860367, "learning_rate": 9.921831626838906e-06, "loss": 0.3787, "num_tokens": 3574969278.0, "step": 10131 }, { "epoch": 3.7115833829524116, "grad_norm": 0.1420134115362045, "learning_rate": 9.918665203544677e-06, "loss": 0.4041, "num_tokens": 3575721950.0, "step": 10132 }, { "epoch": 3.711949800760317, "grad_norm": 0.14783921031022756, "learning_rate": 9.915499460453684e-06, "loss": 0.4419, "num_tokens": 3576443959.0, "step": 10133 }, { "epoch": 3.7123162185682226, "grad_norm": 0.14383673819386744, "learning_rate": 9.91233439774416e-06, "loss": 0.4175, "num_tokens": 3577135013.0, "step": 10134 }, { "epoch": 3.712682636376128, "grad_norm": 0.1342514488431519, "learning_rate": 9.909170015594313e-06, "loss": 0.3858, "num_tokens": 3577989967.0, "step": 10135 }, { "epoch": 3.713049054184033, "grad_norm": 0.1369211664375196, "learning_rate": 9.906006314182299e-06, "loss": 0.4192, "num_tokens": 3578792358.0, "step": 10136 }, { "epoch": 3.7134154719919388, "grad_norm": 0.1399018651359544, "learning_rate": 9.90284329368624e-06, "loss": 0.4196, "num_tokens": 3579555213.0, "step": 10137 }, { "epoch": 3.7137818897998445, "grad_norm": 0.13781792565247147, "learning_rate": 9.899680954284215e-06, "loss": 0.4194, "num_tokens": 3580441948.0, "step": 10138 }, { "epoch": 3.7141483076077497, "grad_norm": 0.13152028170150468, "learning_rate": 9.89651929615428e-06, "loss": 0.4136, "num_tokens": 3581253545.0, "step": 10139 }, { "epoch": 3.714514725415655, "grad_norm": 0.14356868205516884, "learning_rate": 9.893358319474433e-06, "loss": 0.429, "num_tokens": 3581945236.0, "step": 10140 }, { "epoch": 3.7148811432235607, "grad_norm": 0.15329353143256283, "learning_rate": 9.890198024422655e-06, "loss": 0.3919, "num_tokens": 3582551318.0, "step": 10141 }, { "epoch": 3.7152475610314664, "grad_norm": 0.14268888434134214, "learning_rate": 9.88703841117687e-06, "loss": 0.4374, "num_tokens": 3583313291.0, "step": 10142 }, { "epoch": 3.7156139788393716, "grad_norm": 0.13970318940953852, "learning_rate": 9.883879479914971e-06, "loss": 0.4014, "num_tokens": 3584014738.0, "step": 10143 }, { "epoch": 3.715980396647277, "grad_norm": 0.13427576706444275, "learning_rate": 9.880721230814812e-06, "loss": 0.41, "num_tokens": 3584843268.0, "step": 10144 }, { "epoch": 3.7163468144551826, "grad_norm": 0.136412563996512, "learning_rate": 9.877563664054202e-06, "loss": 0.3978, "num_tokens": 3585621482.0, "step": 10145 }, { "epoch": 3.7167132322630883, "grad_norm": 0.13904045996875772, "learning_rate": 9.874406779810926e-06, "loss": 0.4198, "num_tokens": 3586454676.0, "step": 10146 }, { "epoch": 3.7170796500709935, "grad_norm": 0.13659835109499907, "learning_rate": 9.87125057826273e-06, "loss": 0.3985, "num_tokens": 3587229727.0, "step": 10147 }, { "epoch": 3.7174460678788988, "grad_norm": 0.13216161430125087, "learning_rate": 9.868095059587303e-06, "loss": 0.3871, "num_tokens": 3588016555.0, "step": 10148 }, { "epoch": 3.7178124856868044, "grad_norm": 0.1332226926361204, "learning_rate": 9.864940223962313e-06, "loss": 0.4166, "num_tokens": 3588759594.0, "step": 10149 }, { "epoch": 3.7181789034947097, "grad_norm": 0.14992129881232205, "learning_rate": 9.86178607156538e-06, "loss": 0.4078, "num_tokens": 3589426754.0, "step": 10150 }, { "epoch": 3.7185453213026154, "grad_norm": 0.13428021225129658, "learning_rate": 9.858632602574084e-06, "loss": 0.4061, "num_tokens": 3590283951.0, "step": 10151 }, { "epoch": 3.7189117391105206, "grad_norm": 0.11818321879774385, "learning_rate": 9.855479817165983e-06, "loss": 0.404, "num_tokens": 3591208950.0, "step": 10152 }, { "epoch": 3.7192781569184263, "grad_norm": 0.1347165454654625, "learning_rate": 9.852327715518574e-06, "loss": 0.4282, "num_tokens": 3592024617.0, "step": 10153 }, { "epoch": 3.7196445747263316, "grad_norm": 0.1363679774670967, "learning_rate": 9.849176297809339e-06, "loss": 0.4281, "num_tokens": 3592788708.0, "step": 10154 }, { "epoch": 3.7200109925342373, "grad_norm": 0.13650226995107784, "learning_rate": 9.846025564215702e-06, "loss": 0.38, "num_tokens": 3593602039.0, "step": 10155 }, { "epoch": 3.7203774103421425, "grad_norm": 0.14849061314510686, "learning_rate": 9.84287551491505e-06, "loss": 0.4314, "num_tokens": 3594274363.0, "step": 10156 }, { "epoch": 3.7207438281500482, "grad_norm": 0.14014325060556068, "learning_rate": 9.83972615008475e-06, "loss": 0.4169, "num_tokens": 3595039120.0, "step": 10157 }, { "epoch": 3.7211102459579535, "grad_norm": 0.12598619301285977, "learning_rate": 9.836577469902107e-06, "loss": 0.3877, "num_tokens": 3595920218.0, "step": 10158 }, { "epoch": 3.721476663765859, "grad_norm": 0.13096223129766213, "learning_rate": 9.833429474544399e-06, "loss": 0.404, "num_tokens": 3596726102.0, "step": 10159 }, { "epoch": 3.7218430815737644, "grad_norm": 0.1465920070782167, "learning_rate": 9.830282164188872e-06, "loss": 0.4086, "num_tokens": 3597502631.0, "step": 10160 }, { "epoch": 3.7222094993816697, "grad_norm": 0.12751705414613865, "learning_rate": 9.827135539012721e-06, "loss": 0.3962, "num_tokens": 3598287580.0, "step": 10161 }, { "epoch": 3.7225759171895754, "grad_norm": 0.14997806297616167, "learning_rate": 9.8239895991931e-06, "loss": 0.4119, "num_tokens": 3599057574.0, "step": 10162 }, { "epoch": 3.722942334997481, "grad_norm": 0.14502241715256028, "learning_rate": 9.820844344907149e-06, "loss": 0.4071, "num_tokens": 3599794111.0, "step": 10163 }, { "epoch": 3.7233087528053863, "grad_norm": 0.12904743731812243, "learning_rate": 9.817699776331937e-06, "loss": 0.4071, "num_tokens": 3600659827.0, "step": 10164 }, { "epoch": 3.7236751706132916, "grad_norm": 0.13790529595883552, "learning_rate": 9.814555893644514e-06, "loss": 0.4203, "num_tokens": 3601446319.0, "step": 10165 }, { "epoch": 3.7240415884211973, "grad_norm": 0.1431360078671692, "learning_rate": 9.811412697021884e-06, "loss": 0.4204, "num_tokens": 3602150679.0, "step": 10166 }, { "epoch": 3.724408006229103, "grad_norm": 0.1408451577105873, "learning_rate": 9.808270186641018e-06, "loss": 0.3946, "num_tokens": 3602824429.0, "step": 10167 }, { "epoch": 3.724774424037008, "grad_norm": 0.1481000401673417, "learning_rate": 9.805128362678853e-06, "loss": 0.4618, "num_tokens": 3603620268.0, "step": 10168 }, { "epoch": 3.7251408418449135, "grad_norm": 0.14422030254751195, "learning_rate": 9.80198722531227e-06, "loss": 0.429, "num_tokens": 3604301769.0, "step": 10169 }, { "epoch": 3.725507259652819, "grad_norm": 0.13254905037085832, "learning_rate": 9.798846774718125e-06, "loss": 0.4273, "num_tokens": 3605102974.0, "step": 10170 }, { "epoch": 3.725873677460725, "grad_norm": 0.14827808203216802, "learning_rate": 9.795707011073232e-06, "loss": 0.3799, "num_tokens": 3605813038.0, "step": 10171 }, { "epoch": 3.72624009526863, "grad_norm": 0.12585671583005056, "learning_rate": 9.792567934554366e-06, "loss": 0.4097, "num_tokens": 3606591878.0, "step": 10172 }, { "epoch": 3.7266065130765353, "grad_norm": 0.14114827765548285, "learning_rate": 9.789429545338252e-06, "loss": 0.3884, "num_tokens": 3607385594.0, "step": 10173 }, { "epoch": 3.726972930884441, "grad_norm": 0.13840391378469416, "learning_rate": 9.786291843601602e-06, "loss": 0.4126, "num_tokens": 3608139836.0, "step": 10174 }, { "epoch": 3.7273393486923463, "grad_norm": 0.1420015805291042, "learning_rate": 9.783154829521075e-06, "loss": 0.413, "num_tokens": 3608943817.0, "step": 10175 }, { "epoch": 3.727705766500252, "grad_norm": 0.13703024675578235, "learning_rate": 9.780018503273288e-06, "loss": 0.4239, "num_tokens": 3609794056.0, "step": 10176 }, { "epoch": 3.7280721843081572, "grad_norm": 0.13543299859090208, "learning_rate": 9.776882865034818e-06, "loss": 0.4025, "num_tokens": 3610579960.0, "step": 10177 }, { "epoch": 3.728438602116063, "grad_norm": 0.138107554267116, "learning_rate": 9.773747914982206e-06, "loss": 0.4039, "num_tokens": 3611352481.0, "step": 10178 }, { "epoch": 3.728805019923968, "grad_norm": 0.1496894102071191, "learning_rate": 9.77061365329197e-06, "loss": 0.4366, "num_tokens": 3612083206.0, "step": 10179 }, { "epoch": 3.729171437731874, "grad_norm": 0.14244959369791432, "learning_rate": 9.767480080140555e-06, "loss": 0.4276, "num_tokens": 3612847587.0, "step": 10180 }, { "epoch": 3.729537855539779, "grad_norm": 0.14631867130881276, "learning_rate": 9.764347195704407e-06, "loss": 0.4145, "num_tokens": 3613550731.0, "step": 10181 }, { "epoch": 3.729904273347685, "grad_norm": 0.13926745882538086, "learning_rate": 9.761215000159905e-06, "loss": 0.415, "num_tokens": 3614383293.0, "step": 10182 }, { "epoch": 3.73027069115559, "grad_norm": 0.15853270484326992, "learning_rate": 9.758083493683396e-06, "loss": 0.4171, "num_tokens": 3615126200.0, "step": 10183 }, { "epoch": 3.7306371089634958, "grad_norm": 0.13822037281027916, "learning_rate": 9.754952676451189e-06, "loss": 0.4115, "num_tokens": 3615920463.0, "step": 10184 }, { "epoch": 3.731003526771401, "grad_norm": 0.1487555120241316, "learning_rate": 9.751822548639563e-06, "loss": 0.4095, "num_tokens": 3616681600.0, "step": 10185 }, { "epoch": 3.7313699445793067, "grad_norm": 0.1403379447310819, "learning_rate": 9.748693110424744e-06, "loss": 0.3936, "num_tokens": 3617423764.0, "step": 10186 }, { "epoch": 3.731736362387212, "grad_norm": 0.14646900906735127, "learning_rate": 9.745564361982922e-06, "loss": 0.4312, "num_tokens": 3618111492.0, "step": 10187 }, { "epoch": 3.7321027801951177, "grad_norm": 0.13609893133375975, "learning_rate": 9.742436303490262e-06, "loss": 0.4148, "num_tokens": 3618908039.0, "step": 10188 }, { "epoch": 3.732469198003023, "grad_norm": 0.15348493689128503, "learning_rate": 9.739308935122871e-06, "loss": 0.4171, "num_tokens": 3619592027.0, "step": 10189 }, { "epoch": 3.732835615810928, "grad_norm": 0.1385835097819987, "learning_rate": 9.736182257056835e-06, "loss": 0.3756, "num_tokens": 3620387460.0, "step": 10190 }, { "epoch": 3.733202033618834, "grad_norm": 0.12392396850713075, "learning_rate": 9.733056269468189e-06, "loss": 0.403, "num_tokens": 3621206950.0, "step": 10191 }, { "epoch": 3.7335684514267395, "grad_norm": 0.13680959518666913, "learning_rate": 9.729930972532927e-06, "loss": 0.423, "num_tokens": 3621993198.0, "step": 10192 }, { "epoch": 3.733934869234645, "grad_norm": 0.130913578808523, "learning_rate": 9.726806366427008e-06, "loss": 0.4043, "num_tokens": 3622852657.0, "step": 10193 }, { "epoch": 3.73430128704255, "grad_norm": 0.14877879179503448, "learning_rate": 9.723682451326367e-06, "loss": 0.4286, "num_tokens": 3623488458.0, "step": 10194 }, { "epoch": 3.7346677048504557, "grad_norm": 0.14680400465526994, "learning_rate": 9.72055922740687e-06, "loss": 0.4125, "num_tokens": 3624211543.0, "step": 10195 }, { "epoch": 3.7350341226583614, "grad_norm": 0.14711171380485583, "learning_rate": 9.717436694844374e-06, "loss": 0.3831, "num_tokens": 3624798228.0, "step": 10196 }, { "epoch": 3.7354005404662667, "grad_norm": 0.13857112125503723, "learning_rate": 9.714314853814682e-06, "loss": 0.4021, "num_tokens": 3625573079.0, "step": 10197 }, { "epoch": 3.735766958274172, "grad_norm": 0.13887156587200183, "learning_rate": 9.711193704493554e-06, "loss": 0.3987, "num_tokens": 3626404376.0, "step": 10198 }, { "epoch": 3.7361333760820776, "grad_norm": 0.13594420595217027, "learning_rate": 9.70807324705672e-06, "loss": 0.3746, "num_tokens": 3627178212.0, "step": 10199 }, { "epoch": 3.7364997938899833, "grad_norm": 0.1316491994222984, "learning_rate": 9.704953481679862e-06, "loss": 0.4186, "num_tokens": 3627863118.0, "step": 10200 }, { "epoch": 3.7368662116978886, "grad_norm": 0.14508315438247352, "learning_rate": 9.701834408538633e-06, "loss": 0.3915, "num_tokens": 3628644167.0, "step": 10201 }, { "epoch": 3.737232629505794, "grad_norm": 0.14630043977326707, "learning_rate": 9.698716027808652e-06, "loss": 0.3755, "num_tokens": 3629360057.0, "step": 10202 }, { "epoch": 3.7375990473136995, "grad_norm": 0.1437649272958445, "learning_rate": 9.695598339665482e-06, "loss": 0.4268, "num_tokens": 3630060683.0, "step": 10203 }, { "epoch": 3.7379654651216048, "grad_norm": 0.1425393761206449, "learning_rate": 9.692481344284655e-06, "loss": 0.4088, "num_tokens": 3630817575.0, "step": 10204 }, { "epoch": 3.7383318829295105, "grad_norm": 0.1412782367591934, "learning_rate": 9.689365041841666e-06, "loss": 0.3865, "num_tokens": 3631450240.0, "step": 10205 }, { "epoch": 3.7386983007374157, "grad_norm": 0.13217397184565471, "learning_rate": 9.686249432511963e-06, "loss": 0.4073, "num_tokens": 3632201884.0, "step": 10206 }, { "epoch": 3.7390647185453214, "grad_norm": 0.13402396018977353, "learning_rate": 9.683134516470967e-06, "loss": 0.405, "num_tokens": 3633074428.0, "step": 10207 }, { "epoch": 3.7394311363532267, "grad_norm": 0.13552548723083488, "learning_rate": 9.680020293894051e-06, "loss": 0.3703, "num_tokens": 3633838506.0, "step": 10208 }, { "epoch": 3.7397975541611324, "grad_norm": 0.14328505496300573, "learning_rate": 9.676906764956558e-06, "loss": 0.4083, "num_tokens": 3634577429.0, "step": 10209 }, { "epoch": 3.7401639719690376, "grad_norm": 0.13261602590012564, "learning_rate": 9.673793929833782e-06, "loss": 0.4201, "num_tokens": 3635437388.0, "step": 10210 }, { "epoch": 3.7405303897769433, "grad_norm": 0.1498317172136455, "learning_rate": 9.670681788700977e-06, "loss": 0.4214, "num_tokens": 3636183409.0, "step": 10211 }, { "epoch": 3.7408968075848485, "grad_norm": 0.12569594696942293, "learning_rate": 9.66757034173337e-06, "loss": 0.3727, "num_tokens": 3636973396.0, "step": 10212 }, { "epoch": 3.7412632253927542, "grad_norm": 0.1348328384456032, "learning_rate": 9.66445958910614e-06, "loss": 0.389, "num_tokens": 3637723793.0, "step": 10213 }, { "epoch": 3.7416296432006595, "grad_norm": 0.12926271284913407, "learning_rate": 9.661349530994422e-06, "loss": 0.4295, "num_tokens": 3638526901.0, "step": 10214 }, { "epoch": 3.7419960610085647, "grad_norm": 0.13540944429091442, "learning_rate": 9.658240167573328e-06, "loss": 0.4061, "num_tokens": 3639354150.0, "step": 10215 }, { "epoch": 3.7423624788164704, "grad_norm": 0.14790345450635192, "learning_rate": 9.65513149901792e-06, "loss": 0.431, "num_tokens": 3640036069.0, "step": 10216 }, { "epoch": 3.742728896624376, "grad_norm": 0.1470920466552597, "learning_rate": 9.652023525503211e-06, "loss": 0.3834, "num_tokens": 3640716651.0, "step": 10217 }, { "epoch": 3.7430953144322814, "grad_norm": 0.13902284274675505, "learning_rate": 9.648916247204201e-06, "loss": 0.4111, "num_tokens": 3641444874.0, "step": 10218 }, { "epoch": 3.7434617322401866, "grad_norm": 0.14113280849434512, "learning_rate": 9.645809664295831e-06, "loss": 0.4123, "num_tokens": 3642149846.0, "step": 10219 }, { "epoch": 3.7438281500480923, "grad_norm": 0.13545463483042022, "learning_rate": 9.642703776953005e-06, "loss": 0.4159, "num_tokens": 3642917288.0, "step": 10220 }, { "epoch": 3.744194567855998, "grad_norm": 0.1379519430077716, "learning_rate": 9.639598585350586e-06, "loss": 0.4175, "num_tokens": 3643730218.0, "step": 10221 }, { "epoch": 3.7445609856639033, "grad_norm": 0.151634124622074, "learning_rate": 9.636494089663411e-06, "loss": 0.4366, "num_tokens": 3644428059.0, "step": 10222 }, { "epoch": 3.7449274034718085, "grad_norm": 0.13639122292511782, "learning_rate": 9.63339029006627e-06, "loss": 0.3929, "num_tokens": 3645213513.0, "step": 10223 }, { "epoch": 3.745293821279714, "grad_norm": 0.1392553009623881, "learning_rate": 9.63028718673391e-06, "loss": 0.4176, "num_tokens": 3645891965.0, "step": 10224 }, { "epoch": 3.74566023908762, "grad_norm": 0.1380369866513793, "learning_rate": 9.627184779841042e-06, "loss": 0.3826, "num_tokens": 3646668254.0, "step": 10225 }, { "epoch": 3.746026656895525, "grad_norm": 0.12235375567020802, "learning_rate": 9.624083069562337e-06, "loss": 0.4031, "num_tokens": 3647579400.0, "step": 10226 }, { "epoch": 3.7463930747034304, "grad_norm": 0.14920029741478982, "learning_rate": 9.620982056072428e-06, "loss": 0.4328, "num_tokens": 3648322397.0, "step": 10227 }, { "epoch": 3.746759492511336, "grad_norm": 0.13482801222730612, "learning_rate": 9.617881739545904e-06, "loss": 0.3816, "num_tokens": 3649112611.0, "step": 10228 }, { "epoch": 3.7471259103192414, "grad_norm": 0.14310853748461613, "learning_rate": 9.614782120157321e-06, "loss": 0.4165, "num_tokens": 3649827686.0, "step": 10229 }, { "epoch": 3.747492328127147, "grad_norm": 0.1481666386922543, "learning_rate": 9.611683198081204e-06, "loss": 0.4187, "num_tokens": 3650538788.0, "step": 10230 }, { "epoch": 3.7478587459350523, "grad_norm": 0.14167915933628608, "learning_rate": 9.60858497349202e-06, "loss": 0.4561, "num_tokens": 3651294928.0, "step": 10231 }, { "epoch": 3.748225163742958, "grad_norm": 0.14596222023702332, "learning_rate": 9.605487446564204e-06, "loss": 0.4151, "num_tokens": 3652020032.0, "step": 10232 }, { "epoch": 3.7485915815508632, "grad_norm": 0.14756923211468684, "learning_rate": 9.602390617472156e-06, "loss": 0.4288, "num_tokens": 3652725880.0, "step": 10233 }, { "epoch": 3.748957999358769, "grad_norm": 0.1412669220416029, "learning_rate": 9.599294486390223e-06, "loss": 0.4065, "num_tokens": 3653463482.0, "step": 10234 }, { "epoch": 3.749324417166674, "grad_norm": 0.1301457924724771, "learning_rate": 9.596199053492738e-06, "loss": 0.4059, "num_tokens": 3654270924.0, "step": 10235 }, { "epoch": 3.74969083497458, "grad_norm": 0.13578820518134882, "learning_rate": 9.593104318953977e-06, "loss": 0.3841, "num_tokens": 3654996686.0, "step": 10236 }, { "epoch": 3.750057252782485, "grad_norm": 0.14546554753908644, "learning_rate": 9.590010282948178e-06, "loss": 0.453, "num_tokens": 3655739366.0, "step": 10237 }, { "epoch": 3.750423670590391, "grad_norm": 0.15038988092449074, "learning_rate": 9.586916945649541e-06, "loss": 0.4154, "num_tokens": 3656440383.0, "step": 10238 }, { "epoch": 3.750790088398296, "grad_norm": 0.1269853403391957, "learning_rate": 9.58382430723222e-06, "loss": 0.3915, "num_tokens": 3657289429.0, "step": 10239 }, { "epoch": 3.751156506206202, "grad_norm": 0.13065420645484693, "learning_rate": 9.580732367870352e-06, "loss": 0.3712, "num_tokens": 3658046883.0, "step": 10240 }, { "epoch": 3.751522924014107, "grad_norm": 0.14778777587369124, "learning_rate": 9.577641127738008e-06, "loss": 0.4152, "num_tokens": 3658779451.0, "step": 10241 }, { "epoch": 3.7518893418220127, "grad_norm": 0.14851043480340942, "learning_rate": 9.574550587009228e-06, "loss": 0.4143, "num_tokens": 3659435710.0, "step": 10242 }, { "epoch": 3.752255759629918, "grad_norm": 0.1502460311897029, "learning_rate": 9.571460745858028e-06, "loss": 0.3897, "num_tokens": 3660068889.0, "step": 10243 }, { "epoch": 3.7526221774378232, "grad_norm": 0.1361333951032252, "learning_rate": 9.568371604458366e-06, "loss": 0.3721, "num_tokens": 3660797597.0, "step": 10244 }, { "epoch": 3.752988595245729, "grad_norm": 0.13987959021056956, "learning_rate": 9.56528316298416e-06, "loss": 0.4113, "num_tokens": 3661517624.0, "step": 10245 }, { "epoch": 3.7533550130536346, "grad_norm": 0.13237129094757444, "learning_rate": 9.562195421609309e-06, "loss": 0.4288, "num_tokens": 3662398790.0, "step": 10246 }, { "epoch": 3.75372143086154, "grad_norm": 0.14451796898749575, "learning_rate": 9.55910838050765e-06, "loss": 0.4289, "num_tokens": 3663141286.0, "step": 10247 }, { "epoch": 3.754087848669445, "grad_norm": 0.13545870382084307, "learning_rate": 9.556022039852993e-06, "loss": 0.4193, "num_tokens": 3663926892.0, "step": 10248 }, { "epoch": 3.754454266477351, "grad_norm": 0.12439294365603609, "learning_rate": 9.552936399819097e-06, "loss": 0.3961, "num_tokens": 3664693049.0, "step": 10249 }, { "epoch": 3.7548206842852565, "grad_norm": 0.13508110503045326, "learning_rate": 9.549851460579697e-06, "loss": 0.3947, "num_tokens": 3665557773.0, "step": 10250 }, { "epoch": 3.7551871020931618, "grad_norm": 0.13089658960345207, "learning_rate": 9.546767222308486e-06, "loss": 0.4089, "num_tokens": 3666405499.0, "step": 10251 }, { "epoch": 3.755553519901067, "grad_norm": 0.1334436789628869, "learning_rate": 9.543683685179107e-06, "loss": 0.4145, "num_tokens": 3667217845.0, "step": 10252 }, { "epoch": 3.7559199377089727, "grad_norm": 0.13179097383838995, "learning_rate": 9.540600849365171e-06, "loss": 0.4151, "num_tokens": 3668010747.0, "step": 10253 }, { "epoch": 3.7562863555168784, "grad_norm": 0.15047576855149952, "learning_rate": 9.537518715040243e-06, "loss": 0.3839, "num_tokens": 3668643401.0, "step": 10254 }, { "epoch": 3.7566527733247836, "grad_norm": 0.1381360868781214, "learning_rate": 9.534437282377855e-06, "loss": 0.3753, "num_tokens": 3669410655.0, "step": 10255 }, { "epoch": 3.757019191132689, "grad_norm": 0.1442717352910366, "learning_rate": 9.531356551551495e-06, "loss": 0.4147, "num_tokens": 3670147594.0, "step": 10256 }, { "epoch": 3.7573856089405946, "grad_norm": 0.1408583776214885, "learning_rate": 9.528276522734628e-06, "loss": 0.3909, "num_tokens": 3670900470.0, "step": 10257 }, { "epoch": 3.7577520267485, "grad_norm": 0.13478152125855664, "learning_rate": 9.525197196100653e-06, "loss": 0.4089, "num_tokens": 3671550213.0, "step": 10258 }, { "epoch": 3.7581184445564055, "grad_norm": 0.13909155223278466, "learning_rate": 9.52211857182295e-06, "loss": 0.431, "num_tokens": 3672336977.0, "step": 10259 }, { "epoch": 3.758484862364311, "grad_norm": 0.13405309615570318, "learning_rate": 9.519040650074845e-06, "loss": 0.4188, "num_tokens": 3673144694.0, "step": 10260 }, { "epoch": 3.7588512801722165, "grad_norm": 0.13724428304391253, "learning_rate": 9.51596343102963e-06, "loss": 0.4041, "num_tokens": 3674003230.0, "step": 10261 }, { "epoch": 3.7592176979801217, "grad_norm": 0.14168819974651903, "learning_rate": 9.512886914860564e-06, "loss": 0.3769, "num_tokens": 3674710392.0, "step": 10262 }, { "epoch": 3.7595841157880274, "grad_norm": 0.136250899469335, "learning_rate": 9.509811101740856e-06, "loss": 0.4131, "num_tokens": 3675526804.0, "step": 10263 }, { "epoch": 3.7599505335959327, "grad_norm": 0.14320840055491746, "learning_rate": 9.506735991843688e-06, "loss": 0.4187, "num_tokens": 3676272212.0, "step": 10264 }, { "epoch": 3.7603169514038384, "grad_norm": 0.1522054754039764, "learning_rate": 9.503661585342191e-06, "loss": 0.4136, "num_tokens": 3676943000.0, "step": 10265 }, { "epoch": 3.7606833692117436, "grad_norm": 0.13494611842940954, "learning_rate": 9.50058788240946e-06, "loss": 0.3916, "num_tokens": 3677730636.0, "step": 10266 }, { "epoch": 3.7610497870196493, "grad_norm": 0.13824020909696655, "learning_rate": 9.497514883218546e-06, "loss": 0.4193, "num_tokens": 3678585427.0, "step": 10267 }, { "epoch": 3.7614162048275546, "grad_norm": 0.13363802010343076, "learning_rate": 9.494442587942472e-06, "loss": 0.4057, "num_tokens": 3679314578.0, "step": 10268 }, { "epoch": 3.76178262263546, "grad_norm": 0.14457600516595046, "learning_rate": 9.491370996754206e-06, "loss": 0.3841, "num_tokens": 3680013411.0, "step": 10269 }, { "epoch": 3.7621490404433655, "grad_norm": 0.13316889431146278, "learning_rate": 9.488300109826697e-06, "loss": 0.4071, "num_tokens": 3680830836.0, "step": 10270 }, { "epoch": 3.762515458251271, "grad_norm": 0.146109949319119, "learning_rate": 9.485229927332834e-06, "loss": 0.3938, "num_tokens": 3681572026.0, "step": 10271 }, { "epoch": 3.7628818760591765, "grad_norm": 0.13716809150583847, "learning_rate": 9.482160449445471e-06, "loss": 0.4148, "num_tokens": 3682379564.0, "step": 10272 }, { "epoch": 3.7632482938670817, "grad_norm": 0.13774136047507865, "learning_rate": 9.479091676337435e-06, "loss": 0.4133, "num_tokens": 3683163389.0, "step": 10273 }, { "epoch": 3.7636147116749874, "grad_norm": 0.13941646997652868, "learning_rate": 9.476023608181501e-06, "loss": 0.3791, "num_tokens": 3683881270.0, "step": 10274 }, { "epoch": 3.763981129482893, "grad_norm": 0.13777532798079284, "learning_rate": 9.472956245150405e-06, "loss": 0.388, "num_tokens": 3684550516.0, "step": 10275 }, { "epoch": 3.7643475472907983, "grad_norm": 0.1384631668709815, "learning_rate": 9.469889587416841e-06, "loss": 0.3958, "num_tokens": 3685375606.0, "step": 10276 }, { "epoch": 3.7647139650987036, "grad_norm": 0.1471098129987927, "learning_rate": 9.466823635153478e-06, "loss": 0.3954, "num_tokens": 3686059198.0, "step": 10277 }, { "epoch": 3.7650803829066093, "grad_norm": 0.13672085988494367, "learning_rate": 9.463758388532926e-06, "loss": 0.3954, "num_tokens": 3686747176.0, "step": 10278 }, { "epoch": 3.765446800714515, "grad_norm": 0.13775598669197428, "learning_rate": 9.460693847727775e-06, "loss": 0.4115, "num_tokens": 3687558679.0, "step": 10279 }, { "epoch": 3.7658132185224202, "grad_norm": 0.1305524723621592, "learning_rate": 9.457630012910559e-06, "loss": 0.4097, "num_tokens": 3688495196.0, "step": 10280 }, { "epoch": 3.7661796363303255, "grad_norm": 0.1291760695400813, "learning_rate": 9.454566884253777e-06, "loss": 0.3955, "num_tokens": 3689275364.0, "step": 10281 }, { "epoch": 3.766546054138231, "grad_norm": 0.13669870886173457, "learning_rate": 9.451504461929889e-06, "loss": 0.412, "num_tokens": 3690089706.0, "step": 10282 }, { "epoch": 3.7669124719461364, "grad_norm": 0.13308973313315267, "learning_rate": 9.448442746111312e-06, "loss": 0.4019, "num_tokens": 3691006618.0, "step": 10283 }, { "epoch": 3.767278889754042, "grad_norm": 0.14401338478014064, "learning_rate": 9.445381736970432e-06, "loss": 0.4257, "num_tokens": 3691731283.0, "step": 10284 }, { "epoch": 3.7676453075619474, "grad_norm": 0.14887168927079025, "learning_rate": 9.442321434679594e-06, "loss": 0.4121, "num_tokens": 3692421734.0, "step": 10285 }, { "epoch": 3.768011725369853, "grad_norm": 0.14046935842722452, "learning_rate": 9.439261839411095e-06, "loss": 0.395, "num_tokens": 3693231364.0, "step": 10286 }, { "epoch": 3.7683781431777583, "grad_norm": 0.13509566416868576, "learning_rate": 9.436202951337194e-06, "loss": 0.4307, "num_tokens": 3694059350.0, "step": 10287 }, { "epoch": 3.768744560985664, "grad_norm": 0.14967779759632974, "learning_rate": 9.433144770630116e-06, "loss": 0.4242, "num_tokens": 3694775472.0, "step": 10288 }, { "epoch": 3.7691109787935693, "grad_norm": 0.15040387497864022, "learning_rate": 9.430087297462034e-06, "loss": 0.4027, "num_tokens": 3695488483.0, "step": 10289 }, { "epoch": 3.769477396601475, "grad_norm": 0.13937861675289862, "learning_rate": 9.427030532005097e-06, "loss": 0.4067, "num_tokens": 3696331274.0, "step": 10290 }, { "epoch": 3.76984381440938, "grad_norm": 0.1359753175075253, "learning_rate": 9.423974474431413e-06, "loss": 0.4261, "num_tokens": 3697162413.0, "step": 10291 }, { "epoch": 3.770210232217286, "grad_norm": 0.13479554701971583, "learning_rate": 9.420919124913038e-06, "loss": 0.4252, "num_tokens": 3697978615.0, "step": 10292 }, { "epoch": 3.770576650025191, "grad_norm": 0.1256606413971783, "learning_rate": 9.417864483621992e-06, "loss": 0.3905, "num_tokens": 3698831056.0, "step": 10293 }, { "epoch": 3.770943067833097, "grad_norm": 0.1365469194885649, "learning_rate": 9.414810550730257e-06, "loss": 0.4081, "num_tokens": 3699561527.0, "step": 10294 }, { "epoch": 3.771309485641002, "grad_norm": 0.13695555430118356, "learning_rate": 9.41175732640978e-06, "loss": 0.3892, "num_tokens": 3700374227.0, "step": 10295 }, { "epoch": 3.771675903448908, "grad_norm": 0.1303265737798927, "learning_rate": 9.408704810832463e-06, "loss": 0.4144, "num_tokens": 3701148998.0, "step": 10296 }, { "epoch": 3.772042321256813, "grad_norm": 0.13542035903955973, "learning_rate": 9.405653004170162e-06, "loss": 0.3941, "num_tokens": 3701964416.0, "step": 10297 }, { "epoch": 3.7724087390647183, "grad_norm": 0.148096148520342, "learning_rate": 9.402601906594711e-06, "loss": 0.3866, "num_tokens": 3702656670.0, "step": 10298 }, { "epoch": 3.772775156872624, "grad_norm": 0.1432921507309767, "learning_rate": 9.399551518277886e-06, "loss": 0.4257, "num_tokens": 3703426567.0, "step": 10299 }, { "epoch": 3.7731415746805297, "grad_norm": 0.14907211009517488, "learning_rate": 9.396501839391426e-06, "loss": 0.429, "num_tokens": 3704104171.0, "step": 10300 }, { "epoch": 3.773507992488435, "grad_norm": 0.13393721328195976, "learning_rate": 9.393452870107042e-06, "loss": 0.4003, "num_tokens": 3704915144.0, "step": 10301 }, { "epoch": 3.77387441029634, "grad_norm": 0.12416296867769137, "learning_rate": 9.390404610596397e-06, "loss": 0.3656, "num_tokens": 3705751233.0, "step": 10302 }, { "epoch": 3.774240828104246, "grad_norm": 0.14239880550049264, "learning_rate": 9.38735706103111e-06, "loss": 0.4115, "num_tokens": 3706553529.0, "step": 10303 }, { "epoch": 3.7746072459121516, "grad_norm": 0.13761262615417963, "learning_rate": 9.38431022158276e-06, "loss": 0.4235, "num_tokens": 3707352329.0, "step": 10304 }, { "epoch": 3.774973663720057, "grad_norm": 0.13402758654516617, "learning_rate": 9.381264092422902e-06, "loss": 0.4011, "num_tokens": 3708149840.0, "step": 10305 }, { "epoch": 3.775340081527962, "grad_norm": 0.13628637363807442, "learning_rate": 9.378218673723025e-06, "loss": 0.4185, "num_tokens": 3708907258.0, "step": 10306 }, { "epoch": 3.7757064993358678, "grad_norm": 0.12961659525183283, "learning_rate": 9.375173965654603e-06, "loss": 0.3974, "num_tokens": 3709622945.0, "step": 10307 }, { "epoch": 3.776072917143773, "grad_norm": 0.14248093120549116, "learning_rate": 9.372129968389059e-06, "loss": 0.4213, "num_tokens": 3710452470.0, "step": 10308 }, { "epoch": 3.7764393349516787, "grad_norm": 0.14348286967076335, "learning_rate": 9.369086682097772e-06, "loss": 0.4278, "num_tokens": 3711246890.0, "step": 10309 }, { "epoch": 3.776805752759584, "grad_norm": 0.1313995977805631, "learning_rate": 9.36604410695208e-06, "loss": 0.3993, "num_tokens": 3712047501.0, "step": 10310 }, { "epoch": 3.7771721705674897, "grad_norm": 0.13465648340308492, "learning_rate": 9.363002243123293e-06, "loss": 0.4023, "num_tokens": 3712892198.0, "step": 10311 }, { "epoch": 3.777538588375395, "grad_norm": 0.14529775227613084, "learning_rate": 9.359961090782677e-06, "loss": 0.4063, "num_tokens": 3713554121.0, "step": 10312 }, { "epoch": 3.7779050061833006, "grad_norm": 0.13731102860625863, "learning_rate": 9.356920650101453e-06, "loss": 0.4239, "num_tokens": 3714323943.0, "step": 10313 }, { "epoch": 3.778271423991206, "grad_norm": 0.13871826770947276, "learning_rate": 9.3538809212508e-06, "loss": 0.382, "num_tokens": 3715084777.0, "step": 10314 }, { "epoch": 3.7786378417991116, "grad_norm": 0.14179107473180716, "learning_rate": 9.350841904401864e-06, "loss": 0.4204, "num_tokens": 3715879874.0, "step": 10315 }, { "epoch": 3.779004259607017, "grad_norm": 0.1633179384316153, "learning_rate": 9.347803599725746e-06, "loss": 0.408, "num_tokens": 3716448934.0, "step": 10316 }, { "epoch": 3.7793706774149225, "grad_norm": 0.1435875845350067, "learning_rate": 9.344766007393501e-06, "loss": 0.4104, "num_tokens": 3717120955.0, "step": 10317 }, { "epoch": 3.7797370952228277, "grad_norm": 0.1629402998049073, "learning_rate": 9.34172912757616e-06, "loss": 0.4142, "num_tokens": 3717671002.0, "step": 10318 }, { "epoch": 3.7801035130307334, "grad_norm": 0.1348376485207134, "learning_rate": 9.338692960444712e-06, "loss": 0.4214, "num_tokens": 3718539637.0, "step": 10319 }, { "epoch": 3.7804699308386387, "grad_norm": 0.1483391136644517, "learning_rate": 9.335657506170091e-06, "loss": 0.424, "num_tokens": 3719226367.0, "step": 10320 }, { "epoch": 3.7808363486465444, "grad_norm": 0.13150443152227642, "learning_rate": 9.332622764923201e-06, "loss": 0.3865, "num_tokens": 3719953539.0, "step": 10321 }, { "epoch": 3.7812027664544496, "grad_norm": 0.13958368604764976, "learning_rate": 9.329588736874896e-06, "loss": 0.3957, "num_tokens": 3720750452.0, "step": 10322 }, { "epoch": 3.781569184262355, "grad_norm": 0.13382972330677423, "learning_rate": 9.32655542219601e-06, "loss": 0.3929, "num_tokens": 3721489488.0, "step": 10323 }, { "epoch": 3.7819356020702606, "grad_norm": 0.1502023766317843, "learning_rate": 9.323522821057316e-06, "loss": 0.4076, "num_tokens": 3722239937.0, "step": 10324 }, { "epoch": 3.7823020198781663, "grad_norm": 0.1373079707056979, "learning_rate": 9.320490933629565e-06, "loss": 0.3656, "num_tokens": 3722991260.0, "step": 10325 }, { "epoch": 3.7826684376860715, "grad_norm": 0.11738433232635376, "learning_rate": 9.317459760083453e-06, "loss": 0.3836, "num_tokens": 3723849858.0, "step": 10326 }, { "epoch": 3.783034855493977, "grad_norm": 0.13164387306004435, "learning_rate": 9.314429300589638e-06, "loss": 0.3956, "num_tokens": 3724657876.0, "step": 10327 }, { "epoch": 3.7834012733018825, "grad_norm": 0.15562326132491694, "learning_rate": 9.31139955531874e-06, "loss": 0.3892, "num_tokens": 3725303940.0, "step": 10328 }, { "epoch": 3.783767691109788, "grad_norm": 0.14210477385517906, "learning_rate": 9.308370524441346e-06, "loss": 0.4261, "num_tokens": 3726044235.0, "step": 10329 }, { "epoch": 3.7841341089176934, "grad_norm": 0.15603378570309717, "learning_rate": 9.305342208127995e-06, "loss": 0.4489, "num_tokens": 3726693139.0, "step": 10330 }, { "epoch": 3.7845005267255987, "grad_norm": 0.14531854345222023, "learning_rate": 9.302314606549183e-06, "loss": 0.4243, "num_tokens": 3727438472.0, "step": 10331 }, { "epoch": 3.7848669445335044, "grad_norm": 0.13900616209886282, "learning_rate": 9.299287719875376e-06, "loss": 0.4289, "num_tokens": 3728202141.0, "step": 10332 }, { "epoch": 3.78523336234141, "grad_norm": 0.1401312669040653, "learning_rate": 9.296261548276982e-06, "loss": 0.4104, "num_tokens": 3728892966.0, "step": 10333 }, { "epoch": 3.7855997801493153, "grad_norm": 0.13905614310643588, "learning_rate": 9.2932360919244e-06, "loss": 0.3686, "num_tokens": 3729675400.0, "step": 10334 }, { "epoch": 3.7859661979572206, "grad_norm": 0.14783771069811158, "learning_rate": 9.290211350987952e-06, "loss": 0.439, "num_tokens": 3730457881.0, "step": 10335 }, { "epoch": 3.7863326157651263, "grad_norm": 0.13048885521093107, "learning_rate": 9.287187325637952e-06, "loss": 0.4145, "num_tokens": 3731236634.0, "step": 10336 }, { "epoch": 3.7866990335730315, "grad_norm": 0.1285311019740546, "learning_rate": 9.284164016044644e-06, "loss": 0.3844, "num_tokens": 3732073715.0, "step": 10337 }, { "epoch": 3.787065451380937, "grad_norm": 0.15486896929811472, "learning_rate": 9.281141422378248e-06, "loss": 0.4118, "num_tokens": 3732671302.0, "step": 10338 }, { "epoch": 3.7874318691888424, "grad_norm": 0.13560660159165447, "learning_rate": 9.278119544808946e-06, "loss": 0.4022, "num_tokens": 3733463513.0, "step": 10339 }, { "epoch": 3.787798286996748, "grad_norm": 0.16462737072120118, "learning_rate": 9.275098383506882e-06, "loss": 0.4402, "num_tokens": 3734098155.0, "step": 10340 }, { "epoch": 3.7881647048046534, "grad_norm": 0.14244717537459536, "learning_rate": 9.272077938642147e-06, "loss": 0.4108, "num_tokens": 3734828875.0, "step": 10341 }, { "epoch": 3.788531122612559, "grad_norm": 0.14871456186878526, "learning_rate": 9.2690582103848e-06, "loss": 0.4669, "num_tokens": 3735559642.0, "step": 10342 }, { "epoch": 3.7888975404204643, "grad_norm": 0.13657784953027166, "learning_rate": 9.266039198904853e-06, "loss": 0.414, "num_tokens": 3736290970.0, "step": 10343 }, { "epoch": 3.78926395822837, "grad_norm": 0.13893234280842387, "learning_rate": 9.263020904372283e-06, "loss": 0.4183, "num_tokens": 3737073104.0, "step": 10344 }, { "epoch": 3.7896303760362753, "grad_norm": 0.139756285691374, "learning_rate": 9.260003326957028e-06, "loss": 0.409, "num_tokens": 3737790504.0, "step": 10345 }, { "epoch": 3.789996793844181, "grad_norm": 0.1470517579610942, "learning_rate": 9.256986466828991e-06, "loss": 0.4271, "num_tokens": 3738524956.0, "step": 10346 }, { "epoch": 3.7903632116520862, "grad_norm": 0.14426497709082203, "learning_rate": 9.25397032415802e-06, "loss": 0.3964, "num_tokens": 3739313521.0, "step": 10347 }, { "epoch": 3.7907296294599915, "grad_norm": 0.14220426459983332, "learning_rate": 9.250954899113928e-06, "loss": 0.4195, "num_tokens": 3740037462.0, "step": 10348 }, { "epoch": 3.791096047267897, "grad_norm": 0.13352400343902382, "learning_rate": 9.247940191866494e-06, "loss": 0.3991, "num_tokens": 3740805118.0, "step": 10349 }, { "epoch": 3.791462465075803, "grad_norm": 0.1370157903517084, "learning_rate": 9.244926202585442e-06, "loss": 0.4204, "num_tokens": 3741613175.0, "step": 10350 }, { "epoch": 3.791828882883708, "grad_norm": 0.12812657870377414, "learning_rate": 9.241912931440482e-06, "loss": 0.4088, "num_tokens": 3742487821.0, "step": 10351 }, { "epoch": 3.7921953006916134, "grad_norm": 0.13868370265840865, "learning_rate": 9.238900378601253e-06, "loss": 0.4005, "num_tokens": 3743184212.0, "step": 10352 }, { "epoch": 3.792561718499519, "grad_norm": 0.13252408299309393, "learning_rate": 9.235888544237376e-06, "loss": 0.4013, "num_tokens": 3743924756.0, "step": 10353 }, { "epoch": 3.7929281363074248, "grad_norm": 0.14083785416582092, "learning_rate": 9.232877428518423e-06, "loss": 0.4252, "num_tokens": 3744778784.0, "step": 10354 }, { "epoch": 3.79329455411533, "grad_norm": 0.15511072042521976, "learning_rate": 9.229867031613917e-06, "loss": 0.4486, "num_tokens": 3745437909.0, "step": 10355 }, { "epoch": 3.7936609719232353, "grad_norm": 0.13062929761719516, "learning_rate": 9.226857353693361e-06, "loss": 0.4187, "num_tokens": 3746308200.0, "step": 10356 }, { "epoch": 3.794027389731141, "grad_norm": 0.1416057354609379, "learning_rate": 9.223848394926202e-06, "loss": 0.3928, "num_tokens": 3747003478.0, "step": 10357 }, { "epoch": 3.7943938075390466, "grad_norm": 0.125842880888402, "learning_rate": 9.220840155481847e-06, "loss": 0.3997, "num_tokens": 3747848459.0, "step": 10358 }, { "epoch": 3.794760225346952, "grad_norm": 0.13618468752093033, "learning_rate": 9.217832635529662e-06, "loss": 0.3546, "num_tokens": 3748615256.0, "step": 10359 }, { "epoch": 3.795126643154857, "grad_norm": 0.1351022859347134, "learning_rate": 9.21482583523899e-06, "loss": 0.4093, "num_tokens": 3749398240.0, "step": 10360 }, { "epoch": 3.795493060962763, "grad_norm": 0.1331108618284053, "learning_rate": 9.211819754779107e-06, "loss": 0.41, "num_tokens": 3750167690.0, "step": 10361 }, { "epoch": 3.795859478770668, "grad_norm": 0.14972278887514653, "learning_rate": 9.208814394319269e-06, "loss": 0.4035, "num_tokens": 3750843247.0, "step": 10362 }, { "epoch": 3.796225896578574, "grad_norm": 0.1458930693512143, "learning_rate": 9.205809754028684e-06, "loss": 0.4106, "num_tokens": 3751563009.0, "step": 10363 }, { "epoch": 3.796592314386479, "grad_norm": 0.13108415893321412, "learning_rate": 9.202805834076514e-06, "loss": 0.4278, "num_tokens": 3752364342.0, "step": 10364 }, { "epoch": 3.7969587321943847, "grad_norm": 0.13759801273008299, "learning_rate": 9.199802634631884e-06, "loss": 0.4072, "num_tokens": 3753133671.0, "step": 10365 }, { "epoch": 3.79732515000229, "grad_norm": 0.13821725672537377, "learning_rate": 9.196800155863886e-06, "loss": 0.3912, "num_tokens": 3753838261.0, "step": 10366 }, { "epoch": 3.7976915678101957, "grad_norm": 0.14049532168057538, "learning_rate": 9.193798397941565e-06, "loss": 0.3861, "num_tokens": 3754642240.0, "step": 10367 }, { "epoch": 3.798057985618101, "grad_norm": 0.13660364397855393, "learning_rate": 9.190797361033928e-06, "loss": 0.3622, "num_tokens": 3755373679.0, "step": 10368 }, { "epoch": 3.7984244034260066, "grad_norm": 0.13177287951538028, "learning_rate": 9.187797045309936e-06, "loss": 0.4095, "num_tokens": 3756125333.0, "step": 10369 }, { "epoch": 3.798790821233912, "grad_norm": 0.14010917245349014, "learning_rate": 9.18479745093851e-06, "loss": 0.3949, "num_tokens": 3756946743.0, "step": 10370 }, { "epoch": 3.7991572390418176, "grad_norm": 0.12370463852375209, "learning_rate": 9.181798578088542e-06, "loss": 0.408, "num_tokens": 3757786525.0, "step": 10371 }, { "epoch": 3.799523656849723, "grad_norm": 0.13087594331972072, "learning_rate": 9.178800426928857e-06, "loss": 0.41, "num_tokens": 3758582705.0, "step": 10372 }, { "epoch": 3.7998900746576285, "grad_norm": 0.1278391240534961, "learning_rate": 9.175802997628272e-06, "loss": 0.3948, "num_tokens": 3759372353.0, "step": 10373 }, { "epoch": 3.8002564924655338, "grad_norm": 0.12399057154634989, "learning_rate": 9.17280629035555e-06, "loss": 0.4071, "num_tokens": 3760263840.0, "step": 10374 }, { "epoch": 3.8006229102734395, "grad_norm": 0.13991104593728806, "learning_rate": 9.169810305279403e-06, "loss": 0.4366, "num_tokens": 3761057309.0, "step": 10375 }, { "epoch": 3.8009893280813447, "grad_norm": 0.1306794226626352, "learning_rate": 9.166815042568516e-06, "loss": 0.384, "num_tokens": 3761890782.0, "step": 10376 }, { "epoch": 3.80135574588925, "grad_norm": 0.13844418236019537, "learning_rate": 9.163820502391521e-06, "loss": 0.3885, "num_tokens": 3762558752.0, "step": 10377 }, { "epoch": 3.8017221636971557, "grad_norm": 0.14094479087300515, "learning_rate": 9.160826684917028e-06, "loss": 0.3925, "num_tokens": 3763347703.0, "step": 10378 }, { "epoch": 3.8020885815050613, "grad_norm": 0.13248202676702592, "learning_rate": 9.157833590313582e-06, "loss": 0.3912, "num_tokens": 3764119239.0, "step": 10379 }, { "epoch": 3.8024549993129666, "grad_norm": 0.13132449218855263, "learning_rate": 9.154841218749716e-06, "loss": 0.3953, "num_tokens": 3764886907.0, "step": 10380 }, { "epoch": 3.802821417120872, "grad_norm": 0.14824445205721665, "learning_rate": 9.151849570393894e-06, "loss": 0.4462, "num_tokens": 3765575068.0, "step": 10381 }, { "epoch": 3.8031878349287775, "grad_norm": 0.1323729892153934, "learning_rate": 9.148858645414557e-06, "loss": 0.4207, "num_tokens": 3766394650.0, "step": 10382 }, { "epoch": 3.8035542527366832, "grad_norm": 0.1441844578535824, "learning_rate": 9.145868443980092e-06, "loss": 0.4146, "num_tokens": 3767180398.0, "step": 10383 }, { "epoch": 3.8039206705445885, "grad_norm": 0.13885240006220645, "learning_rate": 9.142878966258867e-06, "loss": 0.433, "num_tokens": 3768024335.0, "step": 10384 }, { "epoch": 3.8042870883524937, "grad_norm": 0.13585839464399804, "learning_rate": 9.139890212419186e-06, "loss": 0.4168, "num_tokens": 3768735392.0, "step": 10385 }, { "epoch": 3.8046535061603994, "grad_norm": 0.14068866198896707, "learning_rate": 9.13690218262932e-06, "loss": 0.3922, "num_tokens": 3769454729.0, "step": 10386 }, { "epoch": 3.805019923968305, "grad_norm": 0.14449841621856094, "learning_rate": 9.133914877057511e-06, "loss": 0.4138, "num_tokens": 3770275926.0, "step": 10387 }, { "epoch": 3.8053863417762104, "grad_norm": 0.13804158570695496, "learning_rate": 9.130928295871943e-06, "loss": 0.4006, "num_tokens": 3771067380.0, "step": 10388 }, { "epoch": 3.8057527595841156, "grad_norm": 0.14131562753117652, "learning_rate": 9.127942439240764e-06, "loss": 0.4274, "num_tokens": 3771799349.0, "step": 10389 }, { "epoch": 3.8061191773920213, "grad_norm": 0.12974793094441694, "learning_rate": 9.124957307332092e-06, "loss": 0.3796, "num_tokens": 3772569366.0, "step": 10390 }, { "epoch": 3.8064855951999266, "grad_norm": 0.15723736494076213, "learning_rate": 9.121972900313993e-06, "loss": 0.3862, "num_tokens": 3773386740.0, "step": 10391 }, { "epoch": 3.8068520130078323, "grad_norm": 0.14299647252047246, "learning_rate": 9.11898921835449e-06, "loss": 0.4239, "num_tokens": 3774144014.0, "step": 10392 }, { "epoch": 3.8072184308157375, "grad_norm": 0.13971195609991552, "learning_rate": 9.116006261621572e-06, "loss": 0.4172, "num_tokens": 3774888580.0, "step": 10393 }, { "epoch": 3.807584848623643, "grad_norm": 0.135099601703263, "learning_rate": 9.113024030283184e-06, "loss": 0.416, "num_tokens": 3775641787.0, "step": 10394 }, { "epoch": 3.8079512664315485, "grad_norm": 0.136230175142829, "learning_rate": 9.11004252450724e-06, "loss": 0.407, "num_tokens": 3776365059.0, "step": 10395 }, { "epoch": 3.808317684239454, "grad_norm": 0.1345788412282771, "learning_rate": 9.107061744461602e-06, "loss": 0.4157, "num_tokens": 3777224579.0, "step": 10396 }, { "epoch": 3.8086841020473594, "grad_norm": 0.13183833791460603, "learning_rate": 9.104081690314091e-06, "loss": 0.3896, "num_tokens": 3777962068.0, "step": 10397 }, { "epoch": 3.809050519855265, "grad_norm": 0.1384994832460186, "learning_rate": 9.101102362232488e-06, "loss": 0.4151, "num_tokens": 3778702887.0, "step": 10398 }, { "epoch": 3.8094169376631704, "grad_norm": 0.14663294133001828, "learning_rate": 9.098123760384533e-06, "loss": 0.4211, "num_tokens": 3779477313.0, "step": 10399 }, { "epoch": 3.809783355471076, "grad_norm": 0.1382352501823561, "learning_rate": 9.095145884937933e-06, "loss": 0.4086, "num_tokens": 3780292623.0, "step": 10400 }, { "epoch": 3.8101497732789813, "grad_norm": 0.14103394112973872, "learning_rate": 9.09216873606035e-06, "loss": 0.4142, "num_tokens": 3781050750.0, "step": 10401 }, { "epoch": 3.8105161910868866, "grad_norm": 0.15086773045068885, "learning_rate": 9.089192313919403e-06, "loss": 0.452, "num_tokens": 3781756673.0, "step": 10402 }, { "epoch": 3.8108826088947922, "grad_norm": 0.1443801982787608, "learning_rate": 9.086216618682665e-06, "loss": 0.4225, "num_tokens": 3782499171.0, "step": 10403 }, { "epoch": 3.811249026702698, "grad_norm": 0.13283488703066423, "learning_rate": 9.083241650517678e-06, "loss": 0.3818, "num_tokens": 3783284860.0, "step": 10404 }, { "epoch": 3.811615444510603, "grad_norm": 0.13816128890853183, "learning_rate": 9.08026740959193e-06, "loss": 0.3795, "num_tokens": 3783944733.0, "step": 10405 }, { "epoch": 3.8119818623185084, "grad_norm": 0.14142475320090792, "learning_rate": 9.07729389607289e-06, "loss": 0.3961, "num_tokens": 3784650289.0, "step": 10406 }, { "epoch": 3.812348280126414, "grad_norm": 0.15090479759622988, "learning_rate": 9.074321110127962e-06, "loss": 0.4123, "num_tokens": 3785260891.0, "step": 10407 }, { "epoch": 3.81271469793432, "grad_norm": 0.1374587422488774, "learning_rate": 9.071349051924529e-06, "loss": 0.4011, "num_tokens": 3786060072.0, "step": 10408 }, { "epoch": 3.813081115742225, "grad_norm": 0.14250704878419182, "learning_rate": 9.06837772162992e-06, "loss": 0.4262, "num_tokens": 3786925761.0, "step": 10409 }, { "epoch": 3.8134475335501303, "grad_norm": 0.1434418898757841, "learning_rate": 9.065407119411424e-06, "loss": 0.4132, "num_tokens": 3787677451.0, "step": 10410 }, { "epoch": 3.813813951358036, "grad_norm": 0.13677539434341351, "learning_rate": 9.06243724543629e-06, "loss": 0.3998, "num_tokens": 3788468754.0, "step": 10411 }, { "epoch": 3.8141803691659417, "grad_norm": 0.1413131048144772, "learning_rate": 9.059468099871736e-06, "loss": 0.4399, "num_tokens": 3789238737.0, "step": 10412 }, { "epoch": 3.814546786973847, "grad_norm": 0.1337769947015872, "learning_rate": 9.056499682884926e-06, "loss": 0.3915, "num_tokens": 3790021103.0, "step": 10413 }, { "epoch": 3.814913204781752, "grad_norm": 0.14734176490684406, "learning_rate": 9.053531994642984e-06, "loss": 0.4476, "num_tokens": 3790720465.0, "step": 10414 }, { "epoch": 3.815279622589658, "grad_norm": 0.1493418943925569, "learning_rate": 9.050565035313005e-06, "loss": 0.382, "num_tokens": 3791372077.0, "step": 10415 }, { "epoch": 3.815646040397563, "grad_norm": 0.1394438578904959, "learning_rate": 9.047598805062027e-06, "loss": 0.3796, "num_tokens": 3792095873.0, "step": 10416 }, { "epoch": 3.816012458205469, "grad_norm": 0.14871474569639537, "learning_rate": 9.044633304057062e-06, "loss": 0.4449, "num_tokens": 3792828097.0, "step": 10417 }, { "epoch": 3.816378876013374, "grad_norm": 0.14034763465814776, "learning_rate": 9.04166853246507e-06, "loss": 0.4034, "num_tokens": 3793530469.0, "step": 10418 }, { "epoch": 3.81674529382128, "grad_norm": 0.14666493132412337, "learning_rate": 9.038704490452974e-06, "loss": 0.4219, "num_tokens": 3794277940.0, "step": 10419 }, { "epoch": 3.817111711629185, "grad_norm": 0.14181542356565868, "learning_rate": 9.035741178187649e-06, "loss": 0.4366, "num_tokens": 3795071143.0, "step": 10420 }, { "epoch": 3.8174781294370908, "grad_norm": 0.14760321641859173, "learning_rate": 9.032778595835948e-06, "loss": 0.4563, "num_tokens": 3795817624.0, "step": 10421 }, { "epoch": 3.817844547244996, "grad_norm": 0.1355879910743657, "learning_rate": 9.029816743564656e-06, "loss": 0.3875, "num_tokens": 3796708395.0, "step": 10422 }, { "epoch": 3.8182109650529017, "grad_norm": 0.12970774609295374, "learning_rate": 9.026855621540547e-06, "loss": 0.3844, "num_tokens": 3797543183.0, "step": 10423 }, { "epoch": 3.818577382860807, "grad_norm": 0.1450101259398464, "learning_rate": 9.023895229930327e-06, "loss": 0.4062, "num_tokens": 3798322783.0, "step": 10424 }, { "epoch": 3.8189438006687126, "grad_norm": 0.12993074051065312, "learning_rate": 9.020935568900678e-06, "loss": 0.4043, "num_tokens": 3799118835.0, "step": 10425 }, { "epoch": 3.819310218476618, "grad_norm": 0.14125221222224552, "learning_rate": 9.017976638618229e-06, "loss": 0.4036, "num_tokens": 3799851556.0, "step": 10426 }, { "epoch": 3.8196766362845236, "grad_norm": 0.14701709527107978, "learning_rate": 9.01501843924957e-06, "loss": 0.4593, "num_tokens": 3800574227.0, "step": 10427 }, { "epoch": 3.820043054092429, "grad_norm": 0.1454631861556882, "learning_rate": 9.012060970961262e-06, "loss": 0.384, "num_tokens": 3801291769.0, "step": 10428 }, { "epoch": 3.8204094719003345, "grad_norm": 0.1350247280964423, "learning_rate": 9.00910423391982e-06, "loss": 0.406, "num_tokens": 3802088377.0, "step": 10429 }, { "epoch": 3.82077588970824, "grad_norm": 0.15636442730072078, "learning_rate": 9.006148228291708e-06, "loss": 0.4268, "num_tokens": 3802804057.0, "step": 10430 }, { "epoch": 3.821142307516145, "grad_norm": 0.14339850326519643, "learning_rate": 9.003192954243354e-06, "loss": 0.3937, "num_tokens": 3803534609.0, "step": 10431 }, { "epoch": 3.8215087253240507, "grad_norm": 0.1380820095224565, "learning_rate": 9.00023841194115e-06, "loss": 0.3846, "num_tokens": 3804315988.0, "step": 10432 }, { "epoch": 3.8218751431319564, "grad_norm": 0.14917729617036926, "learning_rate": 8.997284601551431e-06, "loss": 0.4055, "num_tokens": 3804990869.0, "step": 10433 }, { "epoch": 3.8222415609398617, "grad_norm": 0.15604009716180256, "learning_rate": 8.994331523240513e-06, "loss": 0.4529, "num_tokens": 3805666043.0, "step": 10434 }, { "epoch": 3.822607978747767, "grad_norm": 0.1473787553497167, "learning_rate": 8.991379177174665e-06, "loss": 0.4491, "num_tokens": 3806418469.0, "step": 10435 }, { "epoch": 3.8229743965556726, "grad_norm": 0.14392432957563103, "learning_rate": 8.9884275635201e-06, "loss": 0.3944, "num_tokens": 3807131337.0, "step": 10436 }, { "epoch": 3.8233408143635783, "grad_norm": 0.14424383313016842, "learning_rate": 8.985476682443006e-06, "loss": 0.401, "num_tokens": 3807822151.0, "step": 10437 }, { "epoch": 3.8237072321714836, "grad_norm": 0.13193063018732962, "learning_rate": 8.982526534109517e-06, "loss": 0.422, "num_tokens": 3808700762.0, "step": 10438 }, { "epoch": 3.824073649979389, "grad_norm": 0.14426011173774644, "learning_rate": 8.979577118685738e-06, "loss": 0.4217, "num_tokens": 3809406966.0, "step": 10439 }, { "epoch": 3.8244400677872945, "grad_norm": 0.14478754686003287, "learning_rate": 8.976628436337725e-06, "loss": 0.401, "num_tokens": 3810104915.0, "step": 10440 }, { "epoch": 3.8248064855952, "grad_norm": 0.13200862402893257, "learning_rate": 8.97368048723149e-06, "loss": 0.4065, "num_tokens": 3810926146.0, "step": 10441 }, { "epoch": 3.8251729034031055, "grad_norm": 0.14113551426877483, "learning_rate": 8.970733271533019e-06, "loss": 0.3909, "num_tokens": 3811759806.0, "step": 10442 }, { "epoch": 3.8255393212110107, "grad_norm": 0.13254489336366987, "learning_rate": 8.967786789408238e-06, "loss": 0.3986, "num_tokens": 3812522181.0, "step": 10443 }, { "epoch": 3.8259057390189164, "grad_norm": 0.13779291150124826, "learning_rate": 8.964841041023039e-06, "loss": 0.3795, "num_tokens": 3813249796.0, "step": 10444 }, { "epoch": 3.8262721568268216, "grad_norm": 0.13792150622866614, "learning_rate": 8.96189602654328e-06, "loss": 0.4224, "num_tokens": 3814050007.0, "step": 10445 }, { "epoch": 3.8266385746347273, "grad_norm": 0.1427975181978749, "learning_rate": 8.95895174613477e-06, "loss": 0.3859, "num_tokens": 3814783074.0, "step": 10446 }, { "epoch": 3.8270049924426326, "grad_norm": 0.1327876143279324, "learning_rate": 8.956008199963271e-06, "loss": 0.44, "num_tokens": 3815607905.0, "step": 10447 }, { "epoch": 3.8273714102505383, "grad_norm": 0.14720808986516262, "learning_rate": 8.953065388194514e-06, "loss": 0.4271, "num_tokens": 3816330150.0, "step": 10448 }, { "epoch": 3.8277378280584435, "grad_norm": 0.14018988762786783, "learning_rate": 8.950123310994183e-06, "loss": 0.3879, "num_tokens": 3817053095.0, "step": 10449 }, { "epoch": 3.8281042458663492, "grad_norm": 0.15198224560463364, "learning_rate": 8.947181968527932e-06, "loss": 0.4385, "num_tokens": 3817766027.0, "step": 10450 }, { "epoch": 3.8284706636742545, "grad_norm": 0.13536879660139906, "learning_rate": 8.94424136096136e-06, "loss": 0.4088, "num_tokens": 3818494383.0, "step": 10451 }, { "epoch": 3.82883708148216, "grad_norm": 0.1303512729114302, "learning_rate": 8.941301488460026e-06, "loss": 0.3997, "num_tokens": 3819280090.0, "step": 10452 }, { "epoch": 3.8292034992900654, "grad_norm": 0.1335642271625562, "learning_rate": 8.938362351189452e-06, "loss": 0.4019, "num_tokens": 3820128826.0, "step": 10453 }, { "epoch": 3.829569917097971, "grad_norm": 0.1403382550517154, "learning_rate": 8.935423949315114e-06, "loss": 0.4033, "num_tokens": 3820854037.0, "step": 10454 }, { "epoch": 3.8299363349058764, "grad_norm": 0.14106706909404942, "learning_rate": 8.93248628300245e-06, "loss": 0.4118, "num_tokens": 3821592226.0, "step": 10455 }, { "epoch": 3.8303027527137816, "grad_norm": 0.1303640728721653, "learning_rate": 8.929549352416869e-06, "loss": 0.3972, "num_tokens": 3822440709.0, "step": 10456 }, { "epoch": 3.8306691705216873, "grad_norm": 0.13367353249244912, "learning_rate": 8.926613157723716e-06, "loss": 0.4299, "num_tokens": 3823260854.0, "step": 10457 }, { "epoch": 3.831035588329593, "grad_norm": 0.13279567228578656, "learning_rate": 8.923677699088302e-06, "loss": 0.4045, "num_tokens": 3824115938.0, "step": 10458 }, { "epoch": 3.8314020061374983, "grad_norm": 0.13353323256855681, "learning_rate": 8.920742976675904e-06, "loss": 0.4274, "num_tokens": 3824913910.0, "step": 10459 }, { "epoch": 3.8317684239454035, "grad_norm": 0.13324588454851763, "learning_rate": 8.917808990651752e-06, "loss": 0.4139, "num_tokens": 3825701949.0, "step": 10460 }, { "epoch": 3.832134841753309, "grad_norm": 0.13432122602483207, "learning_rate": 8.914875741181032e-06, "loss": 0.4018, "num_tokens": 3826487413.0, "step": 10461 }, { "epoch": 3.832501259561215, "grad_norm": 0.1321860344956753, "learning_rate": 8.91194322842889e-06, "loss": 0.3942, "num_tokens": 3827234984.0, "step": 10462 }, { "epoch": 3.83286767736912, "grad_norm": 0.14736655172561447, "learning_rate": 8.909011452560445e-06, "loss": 0.379, "num_tokens": 3828026394.0, "step": 10463 }, { "epoch": 3.8332340951770254, "grad_norm": 0.13267746229317945, "learning_rate": 8.906080413740752e-06, "loss": 0.373, "num_tokens": 3828703439.0, "step": 10464 }, { "epoch": 3.833600512984931, "grad_norm": 0.1482185374374203, "learning_rate": 8.903150112134836e-06, "loss": 0.3744, "num_tokens": 3829435387.0, "step": 10465 }, { "epoch": 3.833966930792837, "grad_norm": 0.13146723179323838, "learning_rate": 8.900220547907675e-06, "loss": 0.3951, "num_tokens": 3830215004.0, "step": 10466 }, { "epoch": 3.834333348600742, "grad_norm": 0.1313217042933228, "learning_rate": 8.897291721224215e-06, "loss": 0.3727, "num_tokens": 3830974019.0, "step": 10467 }, { "epoch": 3.8346997664086473, "grad_norm": 0.13791528341008105, "learning_rate": 8.894363632249357e-06, "loss": 0.3999, "num_tokens": 3831728702.0, "step": 10468 }, { "epoch": 3.835066184216553, "grad_norm": 0.13673989120783528, "learning_rate": 8.891436281147944e-06, "loss": 0.4057, "num_tokens": 3832538532.0, "step": 10469 }, { "epoch": 3.8354326020244582, "grad_norm": 0.14576984873303475, "learning_rate": 8.88850966808481e-06, "loss": 0.3931, "num_tokens": 3833264322.0, "step": 10470 }, { "epoch": 3.835799019832364, "grad_norm": 0.13119548555365007, "learning_rate": 8.885583793224722e-06, "loss": 0.4148, "num_tokens": 3833998152.0, "step": 10471 }, { "epoch": 3.836165437640269, "grad_norm": 0.1335627979506342, "learning_rate": 8.882658656732404e-06, "loss": 0.4149, "num_tokens": 3834886447.0, "step": 10472 }, { "epoch": 3.836531855448175, "grad_norm": 0.12951728735683155, "learning_rate": 8.879734258772559e-06, "loss": 0.3895, "num_tokens": 3835707298.0, "step": 10473 }, { "epoch": 3.83689827325608, "grad_norm": 0.12636295534129896, "learning_rate": 8.876810599509834e-06, "loss": 0.4045, "num_tokens": 3836537350.0, "step": 10474 }, { "epoch": 3.837264691063986, "grad_norm": 0.13423438278003125, "learning_rate": 8.873887679108827e-06, "loss": 0.4063, "num_tokens": 3837352445.0, "step": 10475 }, { "epoch": 3.837631108871891, "grad_norm": 0.14261913462070733, "learning_rate": 8.870965497734118e-06, "loss": 0.3986, "num_tokens": 3838091917.0, "step": 10476 }, { "epoch": 3.8379975266797968, "grad_norm": 0.12764871185532067, "learning_rate": 8.868044055550224e-06, "loss": 0.4309, "num_tokens": 3839000447.0, "step": 10477 }, { "epoch": 3.838363944487702, "grad_norm": 0.14587378894799297, "learning_rate": 8.865123352721631e-06, "loss": 0.423, "num_tokens": 3839734377.0, "step": 10478 }, { "epoch": 3.8387303622956077, "grad_norm": 0.14776074276961884, "learning_rate": 8.86220338941278e-06, "loss": 0.3991, "num_tokens": 3840473265.0, "step": 10479 }, { "epoch": 3.839096780103513, "grad_norm": 0.1360812575568098, "learning_rate": 8.859284165788071e-06, "loss": 0.4084, "num_tokens": 3841288287.0, "step": 10480 }, { "epoch": 3.8394631979114187, "grad_norm": 0.13748667379215807, "learning_rate": 8.856365682011859e-06, "loss": 0.4044, "num_tokens": 3842008669.0, "step": 10481 }, { "epoch": 3.839829615719324, "grad_norm": 0.1382999903838147, "learning_rate": 8.85344793824846e-06, "loss": 0.3842, "num_tokens": 3842754591.0, "step": 10482 }, { "epoch": 3.8401960335272296, "grad_norm": 0.14265091589434387, "learning_rate": 8.850530934662148e-06, "loss": 0.3932, "num_tokens": 3843450266.0, "step": 10483 }, { "epoch": 3.840562451335135, "grad_norm": 0.15886385228404643, "learning_rate": 8.847614671417166e-06, "loss": 0.4454, "num_tokens": 3844108639.0, "step": 10484 }, { "epoch": 3.84092886914304, "grad_norm": 0.14636797361022685, "learning_rate": 8.844699148677698e-06, "loss": 0.4078, "num_tokens": 3844799862.0, "step": 10485 }, { "epoch": 3.841295286950946, "grad_norm": 0.14328488582248927, "learning_rate": 8.841784366607894e-06, "loss": 0.4085, "num_tokens": 3845557906.0, "step": 10486 }, { "epoch": 3.8416617047588515, "grad_norm": 0.1293238761074591, "learning_rate": 8.838870325371864e-06, "loss": 0.3976, "num_tokens": 3846588203.0, "step": 10487 }, { "epoch": 3.8420281225667567, "grad_norm": 0.14427050561693303, "learning_rate": 8.835957025133665e-06, "loss": 0.4331, "num_tokens": 3847306832.0, "step": 10488 }, { "epoch": 3.842394540374662, "grad_norm": 0.15328053327415148, "learning_rate": 8.833044466057338e-06, "loss": 0.4105, "num_tokens": 3848047699.0, "step": 10489 }, { "epoch": 3.8427609581825677, "grad_norm": 0.13409976132569257, "learning_rate": 8.83013264830685e-06, "loss": 0.4078, "num_tokens": 3848796868.0, "step": 10490 }, { "epoch": 3.8431273759904734, "grad_norm": 0.1412159824583734, "learning_rate": 8.827221572046156e-06, "loss": 0.4128, "num_tokens": 3849551811.0, "step": 10491 }, { "epoch": 3.8434937937983786, "grad_norm": 0.13597469413262606, "learning_rate": 8.824311237439149e-06, "loss": 0.399, "num_tokens": 3850300886.0, "step": 10492 }, { "epoch": 3.843860211606284, "grad_norm": 0.15377570645672026, "learning_rate": 8.821401644649686e-06, "loss": 0.4349, "num_tokens": 3851029892.0, "step": 10493 }, { "epoch": 3.8442266294141896, "grad_norm": 0.15525586958855264, "learning_rate": 8.818492793841577e-06, "loss": 0.3779, "num_tokens": 3851705677.0, "step": 10494 }, { "epoch": 3.844593047222095, "grad_norm": 0.13900906921035186, "learning_rate": 8.815584685178609e-06, "loss": 0.39, "num_tokens": 3852543343.0, "step": 10495 }, { "epoch": 3.8449594650300005, "grad_norm": 0.12772146747540075, "learning_rate": 8.8126773188245e-06, "loss": 0.4259, "num_tokens": 3853406062.0, "step": 10496 }, { "epoch": 3.8453258828379058, "grad_norm": 0.14240135406015753, "learning_rate": 8.809770694942957e-06, "loss": 0.4175, "num_tokens": 3854162882.0, "step": 10497 }, { "epoch": 3.8456923006458115, "grad_norm": 0.1430452891498213, "learning_rate": 8.80686481369762e-06, "loss": 0.3621, "num_tokens": 3854938500.0, "step": 10498 }, { "epoch": 3.8460587184537167, "grad_norm": 0.12818354920227074, "learning_rate": 8.803959675252087e-06, "loss": 0.4012, "num_tokens": 3855694010.0, "step": 10499 }, { "epoch": 3.8464251362616224, "grad_norm": 0.14295685926267002, "learning_rate": 8.80105527976994e-06, "loss": 0.4079, "num_tokens": 3856527577.0, "step": 10500 }, { "epoch": 3.8467915540695277, "grad_norm": 0.1332351182166362, "learning_rate": 8.798151627414694e-06, "loss": 0.4182, "num_tokens": 3857368651.0, "step": 10501 }, { "epoch": 3.8471579718774334, "grad_norm": 0.13218532489738608, "learning_rate": 8.795248718349831e-06, "loss": 0.418, "num_tokens": 3858155841.0, "step": 10502 }, { "epoch": 3.8475243896853386, "grad_norm": 0.1384881075075517, "learning_rate": 8.792346552738785e-06, "loss": 0.4398, "num_tokens": 3858971052.0, "step": 10503 }, { "epoch": 3.8478908074932443, "grad_norm": 0.13856132843054114, "learning_rate": 8.789445130744964e-06, "loss": 0.3937, "num_tokens": 3859692223.0, "step": 10504 }, { "epoch": 3.8482572253011496, "grad_norm": 1.3755089254520265, "learning_rate": 8.786544452531713e-06, "loss": 0.4391, "num_tokens": 3860415605.0, "step": 10505 }, { "epoch": 3.8486236431090552, "grad_norm": 0.1464478196963076, "learning_rate": 8.783644518262357e-06, "loss": 0.4196, "num_tokens": 3861208947.0, "step": 10506 }, { "epoch": 3.8489900609169605, "grad_norm": 0.15253247856916274, "learning_rate": 8.780745328100164e-06, "loss": 0.4245, "num_tokens": 3861888938.0, "step": 10507 }, { "epoch": 3.849356478724866, "grad_norm": 0.13714686408772667, "learning_rate": 8.777846882208362e-06, "loss": 0.4049, "num_tokens": 3862634455.0, "step": 10508 }, { "epoch": 3.8497228965327714, "grad_norm": 0.14516559561699166, "learning_rate": 8.774949180750136e-06, "loss": 0.4047, "num_tokens": 3863415867.0, "step": 10509 }, { "epoch": 3.8500893143406767, "grad_norm": 0.13649703624959963, "learning_rate": 8.772052223888637e-06, "loss": 0.398, "num_tokens": 3864113179.0, "step": 10510 }, { "epoch": 3.8504557321485824, "grad_norm": 0.1398392658226282, "learning_rate": 8.769156011786972e-06, "loss": 0.3976, "num_tokens": 3864860241.0, "step": 10511 }, { "epoch": 3.850822149956488, "grad_norm": 0.13680283237452467, "learning_rate": 8.7662605446082e-06, "loss": 0.4182, "num_tokens": 3865612745.0, "step": 10512 }, { "epoch": 3.8511885677643933, "grad_norm": 0.14087408313351651, "learning_rate": 8.763365822515349e-06, "loss": 0.403, "num_tokens": 3866349602.0, "step": 10513 }, { "epoch": 3.8515549855722986, "grad_norm": 0.13973401402364263, "learning_rate": 8.760471845671385e-06, "loss": 0.3879, "num_tokens": 3867107070.0, "step": 10514 }, { "epoch": 3.8519214033802043, "grad_norm": 0.13085632494859867, "learning_rate": 8.75757861423925e-06, "loss": 0.4077, "num_tokens": 3867925129.0, "step": 10515 }, { "epoch": 3.85228782118811, "grad_norm": 0.12543394464090232, "learning_rate": 8.754686128381837e-06, "loss": 0.4022, "num_tokens": 3868772675.0, "step": 10516 }, { "epoch": 3.8526542389960152, "grad_norm": 0.12935667196542736, "learning_rate": 8.751794388262e-06, "loss": 0.3818, "num_tokens": 3869565392.0, "step": 10517 }, { "epoch": 3.8530206568039205, "grad_norm": 0.14235479779103655, "learning_rate": 8.748903394042555e-06, "loss": 0.4064, "num_tokens": 3870295101.0, "step": 10518 }, { "epoch": 3.853387074611826, "grad_norm": 0.1512603772085474, "learning_rate": 8.746013145886268e-06, "loss": 0.4008, "num_tokens": 3870999832.0, "step": 10519 }, { "epoch": 3.853753492419732, "grad_norm": 0.13633986871802406, "learning_rate": 8.743123643955862e-06, "loss": 0.3961, "num_tokens": 3871703443.0, "step": 10520 }, { "epoch": 3.854119910227637, "grad_norm": 0.1342509395519483, "learning_rate": 8.74023488841402e-06, "loss": 0.412, "num_tokens": 3872445168.0, "step": 10521 }, { "epoch": 3.8544863280355424, "grad_norm": 0.13978932001288805, "learning_rate": 8.737346879423393e-06, "loss": 0.4033, "num_tokens": 3873200835.0, "step": 10522 }, { "epoch": 3.854852745843448, "grad_norm": 0.14717569359947524, "learning_rate": 8.734459617146578e-06, "loss": 0.4008, "num_tokens": 3873857106.0, "step": 10523 }, { "epoch": 3.8552191636513533, "grad_norm": 0.13241339941481273, "learning_rate": 8.731573101746127e-06, "loss": 0.3888, "num_tokens": 3874587179.0, "step": 10524 }, { "epoch": 3.855585581459259, "grad_norm": 0.14579471238365707, "learning_rate": 8.728687333384568e-06, "loss": 0.427, "num_tokens": 3875280945.0, "step": 10525 }, { "epoch": 3.8559519992671643, "grad_norm": 0.14386187612269305, "learning_rate": 8.725802312224372e-06, "loss": 0.4335, "num_tokens": 3876060691.0, "step": 10526 }, { "epoch": 3.85631841707507, "grad_norm": 0.12889794871522905, "learning_rate": 8.72291803842796e-06, "loss": 0.426, "num_tokens": 3876883869.0, "step": 10527 }, { "epoch": 3.856684834882975, "grad_norm": 0.12794562487701097, "learning_rate": 8.720034512157739e-06, "loss": 0.3908, "num_tokens": 3877752372.0, "step": 10528 }, { "epoch": 3.857051252690881, "grad_norm": 0.1314029560201648, "learning_rate": 8.717151733576051e-06, "loss": 0.405, "num_tokens": 3878531251.0, "step": 10529 }, { "epoch": 3.857417670498786, "grad_norm": 0.14163417379287202, "learning_rate": 8.714269702845194e-06, "loss": 0.4222, "num_tokens": 3879278744.0, "step": 10530 }, { "epoch": 3.857784088306692, "grad_norm": 0.13993520733714257, "learning_rate": 8.711388420127447e-06, "loss": 0.4142, "num_tokens": 3880122969.0, "step": 10531 }, { "epoch": 3.858150506114597, "grad_norm": 0.13558688819364426, "learning_rate": 8.708507885585017e-06, "loss": 0.4142, "num_tokens": 3880900845.0, "step": 10532 }, { "epoch": 3.858516923922503, "grad_norm": 0.13379280931674833, "learning_rate": 8.705628099380097e-06, "loss": 0.401, "num_tokens": 3881685478.0, "step": 10533 }, { "epoch": 3.858883341730408, "grad_norm": 0.13559034500858091, "learning_rate": 8.702749061674818e-06, "loss": 0.4284, "num_tokens": 3882427421.0, "step": 10534 }, { "epoch": 3.8592497595383133, "grad_norm": 0.1481874255570238, "learning_rate": 8.699870772631278e-06, "loss": 0.3794, "num_tokens": 3883093005.0, "step": 10535 }, { "epoch": 3.859616177346219, "grad_norm": 0.14985567322143398, "learning_rate": 8.696993232411528e-06, "loss": 0.4257, "num_tokens": 3883738915.0, "step": 10536 }, { "epoch": 3.8599825951541247, "grad_norm": 0.1493760820155715, "learning_rate": 8.694116441177576e-06, "loss": 0.3925, "num_tokens": 3884397746.0, "step": 10537 }, { "epoch": 3.86034901296203, "grad_norm": 0.14990969230971998, "learning_rate": 8.691240399091393e-06, "loss": 0.4457, "num_tokens": 3885140821.0, "step": 10538 }, { "epoch": 3.860715430769935, "grad_norm": 0.13071663985011495, "learning_rate": 8.688365106314914e-06, "loss": 0.412, "num_tokens": 3885947745.0, "step": 10539 }, { "epoch": 3.861081848577841, "grad_norm": 0.14298207681693037, "learning_rate": 8.685490563010014e-06, "loss": 0.4172, "num_tokens": 3886652831.0, "step": 10540 }, { "epoch": 3.8614482663857466, "grad_norm": 0.1410277841255125, "learning_rate": 8.682616769338546e-06, "loss": 0.3767, "num_tokens": 3887466467.0, "step": 10541 }, { "epoch": 3.861814684193652, "grad_norm": 0.12782512948838864, "learning_rate": 8.6797437254623e-06, "loss": 0.4045, "num_tokens": 3888278472.0, "step": 10542 }, { "epoch": 3.862181102001557, "grad_norm": 0.12548809334989428, "learning_rate": 8.676871431543033e-06, "loss": 0.3968, "num_tokens": 3889076427.0, "step": 10543 }, { "epoch": 3.8625475198094628, "grad_norm": 0.13622777759579752, "learning_rate": 8.67399988774247e-06, "loss": 0.3846, "num_tokens": 3889771885.0, "step": 10544 }, { "epoch": 3.8629139376173685, "grad_norm": 0.1503594918457126, "learning_rate": 8.671129094222276e-06, "loss": 0.4222, "num_tokens": 3890426473.0, "step": 10545 }, { "epoch": 3.8632803554252737, "grad_norm": 0.15608664791813845, "learning_rate": 8.66825905114409e-06, "loss": 0.3996, "num_tokens": 3891104799.0, "step": 10546 }, { "epoch": 3.863646773233179, "grad_norm": 0.1220506558497231, "learning_rate": 8.6653897586695e-06, "loss": 0.4099, "num_tokens": 3891898465.0, "step": 10547 }, { "epoch": 3.8640131910410846, "grad_norm": 0.14419678146207587, "learning_rate": 8.662521216960049e-06, "loss": 0.447, "num_tokens": 3892672686.0, "step": 10548 }, { "epoch": 3.86437960884899, "grad_norm": 0.1302631306673571, "learning_rate": 8.659653426177237e-06, "loss": 0.4039, "num_tokens": 3893531148.0, "step": 10549 }, { "epoch": 3.8647460266568956, "grad_norm": 0.1386674088047999, "learning_rate": 8.65678638648254e-06, "loss": 0.4297, "num_tokens": 3894242981.0, "step": 10550 }, { "epoch": 3.865112444464801, "grad_norm": 0.11571420925636176, "learning_rate": 8.653920098037365e-06, "loss": 0.3874, "num_tokens": 3895178711.0, "step": 10551 }, { "epoch": 3.8654788622727065, "grad_norm": 0.12827430869160308, "learning_rate": 8.651054561003101e-06, "loss": 0.3975, "num_tokens": 3895998535.0, "step": 10552 }, { "epoch": 3.865845280080612, "grad_norm": 0.1449214065689055, "learning_rate": 8.648189775541076e-06, "loss": 0.4005, "num_tokens": 3896824257.0, "step": 10553 }, { "epoch": 3.8662116978885175, "grad_norm": 0.13904040011593688, "learning_rate": 8.645325741812585e-06, "loss": 0.4238, "num_tokens": 3897603413.0, "step": 10554 }, { "epoch": 3.8665781156964227, "grad_norm": 0.14509479234010292, "learning_rate": 8.642462459978875e-06, "loss": 0.4116, "num_tokens": 3898303381.0, "step": 10555 }, { "epoch": 3.8669445335043284, "grad_norm": 0.1509346224346967, "learning_rate": 8.639599930201164e-06, "loss": 0.4175, "num_tokens": 3899024628.0, "step": 10556 }, { "epoch": 3.8673109513122337, "grad_norm": 0.13719873683268932, "learning_rate": 8.63673815264061e-06, "loss": 0.3922, "num_tokens": 3899840280.0, "step": 10557 }, { "epoch": 3.8676773691201394, "grad_norm": 0.14924102621632312, "learning_rate": 8.633877127458334e-06, "loss": 0.4149, "num_tokens": 3900521370.0, "step": 10558 }, { "epoch": 3.8680437869280446, "grad_norm": 0.13995087227601774, "learning_rate": 8.63101685481543e-06, "loss": 0.3873, "num_tokens": 3901265966.0, "step": 10559 }, { "epoch": 3.8684102047359503, "grad_norm": 0.14240109672574547, "learning_rate": 8.628157334872923e-06, "loss": 0.4099, "num_tokens": 3902041692.0, "step": 10560 }, { "epoch": 3.8687766225438556, "grad_norm": 0.14198577530612064, "learning_rate": 8.62529856779182e-06, "loss": 0.417, "num_tokens": 3902782548.0, "step": 10561 }, { "epoch": 3.8691430403517613, "grad_norm": 0.14487827571592304, "learning_rate": 8.622440553733076e-06, "loss": 0.401, "num_tokens": 3903503578.0, "step": 10562 }, { "epoch": 3.8695094581596665, "grad_norm": 0.1285320294693468, "learning_rate": 8.619583292857594e-06, "loss": 0.3915, "num_tokens": 3904269086.0, "step": 10563 }, { "epoch": 3.8698758759675718, "grad_norm": 0.14378573279566034, "learning_rate": 8.616726785326246e-06, "loss": 0.4334, "num_tokens": 3905019814.0, "step": 10564 }, { "epoch": 3.8702422937754775, "grad_norm": 0.13622706230659362, "learning_rate": 8.613871031299863e-06, "loss": 0.4453, "num_tokens": 3905865888.0, "step": 10565 }, { "epoch": 3.870608711583383, "grad_norm": 0.13320140510751738, "learning_rate": 8.611016030939223e-06, "loss": 0.4104, "num_tokens": 3906658786.0, "step": 10566 }, { "epoch": 3.8709751293912884, "grad_norm": 0.1420801720994312, "learning_rate": 8.60816178440508e-06, "loss": 0.419, "num_tokens": 3907441516.0, "step": 10567 }, { "epoch": 3.8713415471991937, "grad_norm": 0.12227791646169997, "learning_rate": 8.605308291858125e-06, "loss": 0.3605, "num_tokens": 3908248152.0, "step": 10568 }, { "epoch": 3.8717079650070994, "grad_norm": 0.1551973331162061, "learning_rate": 8.602455553459017e-06, "loss": 0.4341, "num_tokens": 3908915980.0, "step": 10569 }, { "epoch": 3.872074382815005, "grad_norm": 0.14155450403648978, "learning_rate": 8.599603569368372e-06, "loss": 0.4152, "num_tokens": 3909606322.0, "step": 10570 }, { "epoch": 3.8724408006229103, "grad_norm": 0.14663602240411494, "learning_rate": 8.596752339746756e-06, "loss": 0.4068, "num_tokens": 3910275918.0, "step": 10571 }, { "epoch": 3.8728072184308155, "grad_norm": 0.14049151128486867, "learning_rate": 8.593901864754703e-06, "loss": 0.3994, "num_tokens": 3911036938.0, "step": 10572 }, { "epoch": 3.8731736362387212, "grad_norm": 0.1418365554614226, "learning_rate": 8.591052144552707e-06, "loss": 0.4115, "num_tokens": 3911767212.0, "step": 10573 }, { "epoch": 3.873540054046627, "grad_norm": 0.14097513479576543, "learning_rate": 8.588203179301208e-06, "loss": 0.4179, "num_tokens": 3912487547.0, "step": 10574 }, { "epoch": 3.873906471854532, "grad_norm": 0.13623520340896902, "learning_rate": 8.585354969160607e-06, "loss": 0.4073, "num_tokens": 3913262466.0, "step": 10575 }, { "epoch": 3.8742728896624374, "grad_norm": 0.13878142213003372, "learning_rate": 8.582507514291265e-06, "loss": 0.3917, "num_tokens": 3914000731.0, "step": 10576 }, { "epoch": 3.874639307470343, "grad_norm": 0.13671376881129485, "learning_rate": 8.579660814853494e-06, "loss": 0.4203, "num_tokens": 3914775922.0, "step": 10577 }, { "epoch": 3.8750057252782484, "grad_norm": 0.1515484890004402, "learning_rate": 8.57681487100758e-06, "loss": 0.4246, "num_tokens": 3915532915.0, "step": 10578 }, { "epoch": 3.875372143086154, "grad_norm": 0.15082634626929672, "learning_rate": 8.573969682913744e-06, "loss": 0.4271, "num_tokens": 3916192371.0, "step": 10579 }, { "epoch": 3.8757385608940593, "grad_norm": 0.1354658397791163, "learning_rate": 8.571125250732186e-06, "loss": 0.399, "num_tokens": 3916954743.0, "step": 10580 }, { "epoch": 3.876104978701965, "grad_norm": 0.1391593740766234, "learning_rate": 8.568281574623049e-06, "loss": 0.3839, "num_tokens": 3917676859.0, "step": 10581 }, { "epoch": 3.8764713965098703, "grad_norm": 0.14665181640607475, "learning_rate": 8.56543865474643e-06, "loss": 0.4017, "num_tokens": 3918519969.0, "step": 10582 }, { "epoch": 3.876837814317776, "grad_norm": 0.1346889113982512, "learning_rate": 8.562596491262403e-06, "loss": 0.4078, "num_tokens": 3919362740.0, "step": 10583 }, { "epoch": 3.877204232125681, "grad_norm": 0.14484060219525996, "learning_rate": 8.559755084330986e-06, "loss": 0.4351, "num_tokens": 3920064112.0, "step": 10584 }, { "epoch": 3.877570649933587, "grad_norm": 0.15084687351201104, "learning_rate": 8.556914434112144e-06, "loss": 0.4433, "num_tokens": 3920838751.0, "step": 10585 }, { "epoch": 3.877937067741492, "grad_norm": 0.13788986810117063, "learning_rate": 8.554074540765824e-06, "loss": 0.426, "num_tokens": 3921640048.0, "step": 10586 }, { "epoch": 3.878303485549398, "grad_norm": 0.14437072570845455, "learning_rate": 8.551235404451915e-06, "loss": 0.3977, "num_tokens": 3922326458.0, "step": 10587 }, { "epoch": 3.878669903357303, "grad_norm": 0.1286398932169989, "learning_rate": 8.548397025330259e-06, "loss": 0.3902, "num_tokens": 3923160113.0, "step": 10588 }, { "epoch": 3.8790363211652084, "grad_norm": 0.13446580762264515, "learning_rate": 8.545559403560673e-06, "loss": 0.4025, "num_tokens": 3923903352.0, "step": 10589 }, { "epoch": 3.879402738973114, "grad_norm": 0.13806122607022844, "learning_rate": 8.542722539302915e-06, "loss": 0.4045, "num_tokens": 3924740399.0, "step": 10590 }, { "epoch": 3.8797691567810197, "grad_norm": 0.13606588738856049, "learning_rate": 8.53988643271671e-06, "loss": 0.4104, "num_tokens": 3925513501.0, "step": 10591 }, { "epoch": 3.880135574588925, "grad_norm": 0.14255133580700632, "learning_rate": 8.537051083961723e-06, "loss": 0.4086, "num_tokens": 3926236822.0, "step": 10592 }, { "epoch": 3.8805019923968302, "grad_norm": 0.13102544527855153, "learning_rate": 8.534216493197601e-06, "loss": 0.4153, "num_tokens": 3927054785.0, "step": 10593 }, { "epoch": 3.880868410204736, "grad_norm": 0.15093831866654103, "learning_rate": 8.531382660583942e-06, "loss": 0.4292, "num_tokens": 3927688173.0, "step": 10594 }, { "epoch": 3.8812348280126416, "grad_norm": 0.14301404776493395, "learning_rate": 8.52854958628029e-06, "loss": 0.4112, "num_tokens": 3928434914.0, "step": 10595 }, { "epoch": 3.881601245820547, "grad_norm": 0.13155512934263497, "learning_rate": 8.525717270446151e-06, "loss": 0.409, "num_tokens": 3929309395.0, "step": 10596 }, { "epoch": 3.881967663628452, "grad_norm": 0.13819241012973763, "learning_rate": 8.522885713240996e-06, "loss": 0.4063, "num_tokens": 3930006811.0, "step": 10597 }, { "epoch": 3.882334081436358, "grad_norm": 0.1303614206204856, "learning_rate": 8.520054914824236e-06, "loss": 0.4033, "num_tokens": 3930781577.0, "step": 10598 }, { "epoch": 3.8827004992442635, "grad_norm": 0.15080212105201812, "learning_rate": 8.517224875355263e-06, "loss": 0.4137, "num_tokens": 3931399391.0, "step": 10599 }, { "epoch": 3.8830669170521688, "grad_norm": 0.14228369359264395, "learning_rate": 8.514395594993405e-06, "loss": 0.4088, "num_tokens": 3932137245.0, "step": 10600 }, { "epoch": 3.883433334860074, "grad_norm": 0.1481383098312527, "learning_rate": 8.511567073897964e-06, "loss": 0.4097, "num_tokens": 3932874524.0, "step": 10601 }, { "epoch": 3.8837997526679797, "grad_norm": 0.13457217030585, "learning_rate": 8.50873931222819e-06, "loss": 0.4285, "num_tokens": 3933637029.0, "step": 10602 }, { "epoch": 3.884166170475885, "grad_norm": 0.1412464206154295, "learning_rate": 8.505912310143288e-06, "loss": 0.398, "num_tokens": 3934482725.0, "step": 10603 }, { "epoch": 3.8845325882837907, "grad_norm": 0.14086419404422268, "learning_rate": 8.503086067802416e-06, "loss": 0.4028, "num_tokens": 3935321129.0, "step": 10604 }, { "epoch": 3.884899006091696, "grad_norm": 0.12898060139323753, "learning_rate": 8.500260585364713e-06, "loss": 0.3852, "num_tokens": 3936095593.0, "step": 10605 }, { "epoch": 3.8852654238996016, "grad_norm": 0.13508243095577901, "learning_rate": 8.497435862989249e-06, "loss": 0.3917, "num_tokens": 3936875811.0, "step": 10606 }, { "epoch": 3.885631841707507, "grad_norm": 0.13851033139001287, "learning_rate": 8.494611900835069e-06, "loss": 0.4209, "num_tokens": 3937612022.0, "step": 10607 }, { "epoch": 3.8859982595154126, "grad_norm": 0.14219083095472068, "learning_rate": 8.491788699061163e-06, "loss": 0.4062, "num_tokens": 3938328830.0, "step": 10608 }, { "epoch": 3.886364677323318, "grad_norm": 0.1403196228794501, "learning_rate": 8.488966257826483e-06, "loss": 0.4302, "num_tokens": 3939044792.0, "step": 10609 }, { "epoch": 3.8867310951312235, "grad_norm": 0.14233106155005065, "learning_rate": 8.486144577289935e-06, "loss": 0.4472, "num_tokens": 3939811307.0, "step": 10610 }, { "epoch": 3.8870975129391288, "grad_norm": 0.13500339631183375, "learning_rate": 8.483323657610392e-06, "loss": 0.3608, "num_tokens": 3940593789.0, "step": 10611 }, { "epoch": 3.8874639307470344, "grad_norm": 0.12741737245714335, "learning_rate": 8.480503498946675e-06, "loss": 0.424, "num_tokens": 3941405805.0, "step": 10612 }, { "epoch": 3.8878303485549397, "grad_norm": 0.1379444845961976, "learning_rate": 8.477684101457559e-06, "loss": 0.4105, "num_tokens": 3942252476.0, "step": 10613 }, { "epoch": 3.8881967663628454, "grad_norm": 0.13242030769688598, "learning_rate": 8.47486546530179e-06, "loss": 0.4212, "num_tokens": 3943062012.0, "step": 10614 }, { "epoch": 3.8885631841707506, "grad_norm": 0.13509509987078883, "learning_rate": 8.472047590638054e-06, "loss": 0.4126, "num_tokens": 3943800129.0, "step": 10615 }, { "epoch": 3.8889296019786563, "grad_norm": 0.14739664459355065, "learning_rate": 8.469230477625012e-06, "loss": 0.4086, "num_tokens": 3944539810.0, "step": 10616 }, { "epoch": 3.8892960197865616, "grad_norm": 0.1365789507553111, "learning_rate": 8.466414126421272e-06, "loss": 0.4176, "num_tokens": 3945298266.0, "step": 10617 }, { "epoch": 3.889662437594467, "grad_norm": 0.14514938869589492, "learning_rate": 8.463598537185398e-06, "loss": 0.4125, "num_tokens": 3946100621.0, "step": 10618 }, { "epoch": 3.8900288554023725, "grad_norm": 0.14309174622033033, "learning_rate": 8.460783710075904e-06, "loss": 0.4207, "num_tokens": 3946801342.0, "step": 10619 }, { "epoch": 3.8903952732102782, "grad_norm": 0.14363901068352822, "learning_rate": 8.457969645251284e-06, "loss": 0.3859, "num_tokens": 3947516185.0, "step": 10620 }, { "epoch": 3.8907616910181835, "grad_norm": 0.153442349510685, "learning_rate": 8.455156342869968e-06, "loss": 0.4141, "num_tokens": 3948174949.0, "step": 10621 }, { "epoch": 3.8911281088260887, "grad_norm": 0.13727108622394443, "learning_rate": 8.45234380309036e-06, "loss": 0.3959, "num_tokens": 3948959668.0, "step": 10622 }, { "epoch": 3.8914945266339944, "grad_norm": 0.14511274107407554, "learning_rate": 8.449532026070802e-06, "loss": 0.4398, "num_tokens": 3949721547.0, "step": 10623 }, { "epoch": 3.8918609444419, "grad_norm": 0.14834596522876078, "learning_rate": 8.446721011969603e-06, "loss": 0.3964, "num_tokens": 3950394010.0, "step": 10624 }, { "epoch": 3.8922273622498054, "grad_norm": 0.14418888800800891, "learning_rate": 8.443910760945034e-06, "loss": 0.4102, "num_tokens": 3951055089.0, "step": 10625 }, { "epoch": 3.8925937800577106, "grad_norm": 0.14330643817915603, "learning_rate": 8.441101273155307e-06, "loss": 0.4213, "num_tokens": 3951783608.0, "step": 10626 }, { "epoch": 3.8929601978656163, "grad_norm": 0.14006040231678574, "learning_rate": 8.438292548758613e-06, "loss": 0.4038, "num_tokens": 3952530428.0, "step": 10627 }, { "epoch": 3.893326615673522, "grad_norm": 0.13963146723626219, "learning_rate": 8.43548458791309e-06, "loss": 0.3993, "num_tokens": 3953291745.0, "step": 10628 }, { "epoch": 3.8936930334814273, "grad_norm": 0.14267114016235724, "learning_rate": 8.432677390776824e-06, "loss": 0.4125, "num_tokens": 3954053978.0, "step": 10629 }, { "epoch": 3.8940594512893325, "grad_norm": 0.14131747096779876, "learning_rate": 8.429870957507872e-06, "loss": 0.4016, "num_tokens": 3954845429.0, "step": 10630 }, { "epoch": 3.894425869097238, "grad_norm": 0.1330795695345817, "learning_rate": 8.427065288264235e-06, "loss": 0.3859, "num_tokens": 3955691687.0, "step": 10631 }, { "epoch": 3.8947922869051435, "grad_norm": 0.13108965002571535, "learning_rate": 8.424260383203879e-06, "loss": 0.3911, "num_tokens": 3956483108.0, "step": 10632 }, { "epoch": 3.895158704713049, "grad_norm": 0.13326532574945466, "learning_rate": 8.421456242484733e-06, "loss": 0.411, "num_tokens": 3957335775.0, "step": 10633 }, { "epoch": 3.8955251225209544, "grad_norm": 0.14729256924257986, "learning_rate": 8.418652866264664e-06, "loss": 0.4014, "num_tokens": 3958037801.0, "step": 10634 }, { "epoch": 3.89589154032886, "grad_norm": 0.14118361114173078, "learning_rate": 8.415850254701522e-06, "loss": 0.429, "num_tokens": 3958763822.0, "step": 10635 }, { "epoch": 3.8962579581367653, "grad_norm": 0.15124034376987208, "learning_rate": 8.41304840795309e-06, "loss": 0.3929, "num_tokens": 3959496094.0, "step": 10636 }, { "epoch": 3.896624375944671, "grad_norm": 0.13888280694335656, "learning_rate": 8.41024732617712e-06, "loss": 0.4273, "num_tokens": 3960300046.0, "step": 10637 }, { "epoch": 3.8969907937525763, "grad_norm": 0.13566969313606927, "learning_rate": 8.407447009531315e-06, "loss": 0.3926, "num_tokens": 3960973243.0, "step": 10638 }, { "epoch": 3.897357211560482, "grad_norm": 0.14099369449341956, "learning_rate": 8.404647458173348e-06, "loss": 0.4041, "num_tokens": 3961824518.0, "step": 10639 }, { "epoch": 3.8977236293683872, "grad_norm": 0.1318727066539514, "learning_rate": 8.401848672260824e-06, "loss": 0.4129, "num_tokens": 3962637049.0, "step": 10640 }, { "epoch": 3.898090047176293, "grad_norm": 0.14095306804480517, "learning_rate": 8.399050651951336e-06, "loss": 0.4117, "num_tokens": 3963461411.0, "step": 10641 }, { "epoch": 3.898456464984198, "grad_norm": 0.1454590595167768, "learning_rate": 8.39625339740241e-06, "loss": 0.4058, "num_tokens": 3964112848.0, "step": 10642 }, { "epoch": 3.8988228827921034, "grad_norm": 0.1499624061812279, "learning_rate": 8.393456908771534e-06, "loss": 0.428, "num_tokens": 3964821394.0, "step": 10643 }, { "epoch": 3.899189300600009, "grad_norm": 0.1356529678816996, "learning_rate": 8.390661186216164e-06, "loss": 0.4273, "num_tokens": 3965629660.0, "step": 10644 }, { "epoch": 3.899555718407915, "grad_norm": 0.14244796781904168, "learning_rate": 8.387866229893702e-06, "loss": 0.4412, "num_tokens": 3966268060.0, "step": 10645 }, { "epoch": 3.89992213621582, "grad_norm": 0.1531840903670438, "learning_rate": 8.38507203996151e-06, "loss": 0.4143, "num_tokens": 3967019763.0, "step": 10646 }, { "epoch": 3.9002885540237253, "grad_norm": 0.14057952069489457, "learning_rate": 8.3822786165769e-06, "loss": 0.3759, "num_tokens": 3967748570.0, "step": 10647 }, { "epoch": 3.900654971831631, "grad_norm": 0.15118826375429215, "learning_rate": 8.379485959897154e-06, "loss": 0.42, "num_tokens": 3968467454.0, "step": 10648 }, { "epoch": 3.9010213896395367, "grad_norm": 0.14303389980508305, "learning_rate": 8.3766940700795e-06, "loss": 0.4125, "num_tokens": 3969180360.0, "step": 10649 }, { "epoch": 3.901387807447442, "grad_norm": 0.13766875262518724, "learning_rate": 8.373902947281135e-06, "loss": 0.4164, "num_tokens": 3969918430.0, "step": 10650 }, { "epoch": 3.901754225255347, "grad_norm": 0.13259499330777758, "learning_rate": 8.371112591659198e-06, "loss": 0.3918, "num_tokens": 3970715432.0, "step": 10651 }, { "epoch": 3.902120643063253, "grad_norm": 0.1426646398301901, "learning_rate": 8.368323003370792e-06, "loss": 0.4082, "num_tokens": 3971579726.0, "step": 10652 }, { "epoch": 3.9024870608711586, "grad_norm": 0.12741779412448448, "learning_rate": 8.365534182572974e-06, "loss": 0.3994, "num_tokens": 3972469818.0, "step": 10653 }, { "epoch": 3.902853478679064, "grad_norm": 0.13686824918274665, "learning_rate": 8.362746129422766e-06, "loss": 0.3997, "num_tokens": 3973260601.0, "step": 10654 }, { "epoch": 3.903219896486969, "grad_norm": 0.15307379979356148, "learning_rate": 8.359958844077134e-06, "loss": 0.4269, "num_tokens": 3973915408.0, "step": 10655 }, { "epoch": 3.903586314294875, "grad_norm": 0.1398416982104177, "learning_rate": 8.357172326693017e-06, "loss": 0.4239, "num_tokens": 3974665909.0, "step": 10656 }, { "epoch": 3.90395273210278, "grad_norm": 0.1324993913267776, "learning_rate": 8.354386577427298e-06, "loss": 0.3898, "num_tokens": 3975420087.0, "step": 10657 }, { "epoch": 3.9043191499106857, "grad_norm": 0.1382095001600576, "learning_rate": 8.351601596436818e-06, "loss": 0.4165, "num_tokens": 3976171522.0, "step": 10658 }, { "epoch": 3.904685567718591, "grad_norm": 0.1403673271709278, "learning_rate": 8.348817383878376e-06, "loss": 0.3994, "num_tokens": 3976999983.0, "step": 10659 }, { "epoch": 3.9050519855264967, "grad_norm": 0.1277459217379234, "learning_rate": 8.346033939908727e-06, "loss": 0.4043, "num_tokens": 3977722224.0, "step": 10660 }, { "epoch": 3.905418403334402, "grad_norm": 0.1342591494186383, "learning_rate": 8.343251264684589e-06, "loss": 0.3961, "num_tokens": 3978491031.0, "step": 10661 }, { "epoch": 3.9057848211423076, "grad_norm": 0.13878103404804878, "learning_rate": 8.340469358362634e-06, "loss": 0.4235, "num_tokens": 3979240251.0, "step": 10662 }, { "epoch": 3.906151238950213, "grad_norm": 0.14211965653593786, "learning_rate": 8.337688221099486e-06, "loss": 0.3841, "num_tokens": 3980009950.0, "step": 10663 }, { "epoch": 3.9065176567581186, "grad_norm": 0.145376708797404, "learning_rate": 8.334907853051727e-06, "loss": 0.4334, "num_tokens": 3980750274.0, "step": 10664 }, { "epoch": 3.906884074566024, "grad_norm": 0.13293669729103266, "learning_rate": 8.332128254375898e-06, "loss": 0.4102, "num_tokens": 3981573467.0, "step": 10665 }, { "epoch": 3.9072504923739295, "grad_norm": 0.14105443540997073, "learning_rate": 8.329349425228497e-06, "loss": 0.4028, "num_tokens": 3982275764.0, "step": 10666 }, { "epoch": 3.9076169101818348, "grad_norm": 0.13999947547373673, "learning_rate": 8.32657136576598e-06, "loss": 0.392, "num_tokens": 3982972117.0, "step": 10667 }, { "epoch": 3.9079833279897405, "grad_norm": 0.1317934904105156, "learning_rate": 8.323794076144749e-06, "loss": 0.4072, "num_tokens": 3983785076.0, "step": 10668 }, { "epoch": 3.9083497457976457, "grad_norm": 0.12961334203575986, "learning_rate": 8.321017556521182e-06, "loss": 0.4161, "num_tokens": 3984581261.0, "step": 10669 }, { "epoch": 3.9087161636055514, "grad_norm": 0.1327672933964032, "learning_rate": 8.318241807051596e-06, "loss": 0.4015, "num_tokens": 3985362901.0, "step": 10670 }, { "epoch": 3.9090825814134567, "grad_norm": 0.12313649365423343, "learning_rate": 8.315466827892268e-06, "loss": 0.3992, "num_tokens": 3986230515.0, "step": 10671 }, { "epoch": 3.909448999221362, "grad_norm": 0.14028710004821654, "learning_rate": 8.312692619199445e-06, "loss": 0.4085, "num_tokens": 3987020493.0, "step": 10672 }, { "epoch": 3.9098154170292676, "grad_norm": 0.12089622093002235, "learning_rate": 8.309919181129314e-06, "loss": 0.4122, "num_tokens": 3987935379.0, "step": 10673 }, { "epoch": 3.9101818348371733, "grad_norm": 0.13691519629895557, "learning_rate": 8.30714651383802e-06, "loss": 0.412, "num_tokens": 3988772945.0, "step": 10674 }, { "epoch": 3.9105482526450785, "grad_norm": 0.1303161171913966, "learning_rate": 8.30437461748168e-06, "loss": 0.4116, "num_tokens": 3989584069.0, "step": 10675 }, { "epoch": 3.910914670452984, "grad_norm": 0.1282644157377575, "learning_rate": 8.30160349221635e-06, "loss": 0.399, "num_tokens": 3990404435.0, "step": 10676 }, { "epoch": 3.9112810882608895, "grad_norm": 0.142341150079264, "learning_rate": 8.298833138198056e-06, "loss": 0.4153, "num_tokens": 3991141305.0, "step": 10677 }, { "epoch": 3.911647506068795, "grad_norm": 0.13411087654555023, "learning_rate": 8.296063555582771e-06, "loss": 0.4238, "num_tokens": 3991998111.0, "step": 10678 }, { "epoch": 3.9120139238767004, "grad_norm": 0.12621729357191722, "learning_rate": 8.293294744526432e-06, "loss": 0.3964, "num_tokens": 3992852899.0, "step": 10679 }, { "epoch": 3.9123803416846057, "grad_norm": 0.1509214289175215, "learning_rate": 8.290526705184921e-06, "loss": 0.415, "num_tokens": 3993518092.0, "step": 10680 }, { "epoch": 3.9127467594925114, "grad_norm": 0.13496969815799564, "learning_rate": 8.287759437714082e-06, "loss": 0.4021, "num_tokens": 3994287809.0, "step": 10681 }, { "epoch": 3.9131131773004166, "grad_norm": 0.14567969094685146, "learning_rate": 8.284992942269724e-06, "loss": 0.4212, "num_tokens": 3994944906.0, "step": 10682 }, { "epoch": 3.9134795951083223, "grad_norm": 0.13993714681902886, "learning_rate": 8.282227219007613e-06, "loss": 0.4176, "num_tokens": 3995797132.0, "step": 10683 }, { "epoch": 3.9138460129162276, "grad_norm": 0.1377232272940956, "learning_rate": 8.279462268083453e-06, "loss": 0.3984, "num_tokens": 3996582781.0, "step": 10684 }, { "epoch": 3.9142124307241333, "grad_norm": 0.14576887443052547, "learning_rate": 8.276698089652926e-06, "loss": 0.4614, "num_tokens": 3997311637.0, "step": 10685 }, { "epoch": 3.9145788485320385, "grad_norm": 0.1354580222055181, "learning_rate": 8.273934683871654e-06, "loss": 0.4187, "num_tokens": 3998118803.0, "step": 10686 }, { "epoch": 3.914945266339944, "grad_norm": 0.14890078008531454, "learning_rate": 8.271172050895216e-06, "loss": 0.4188, "num_tokens": 3998842634.0, "step": 10687 }, { "epoch": 3.9153116841478495, "grad_norm": 0.13988346238210775, "learning_rate": 8.268410190879169e-06, "loss": 0.4024, "num_tokens": 3999519494.0, "step": 10688 }, { "epoch": 3.915678101955755, "grad_norm": 0.13493844253684145, "learning_rate": 8.265649103978996e-06, "loss": 0.3773, "num_tokens": 4000263287.0, "step": 10689 }, { "epoch": 3.9160445197636604, "grad_norm": 0.1385560817593866, "learning_rate": 8.262888790350165e-06, "loss": 0.4003, "num_tokens": 4001031056.0, "step": 10690 }, { "epoch": 3.916410937571566, "grad_norm": 0.13280097969221114, "learning_rate": 8.260129250148086e-06, "loss": 0.3922, "num_tokens": 4001886640.0, "step": 10691 }, { "epoch": 3.9167773553794714, "grad_norm": 0.12617465864393995, "learning_rate": 8.257370483528117e-06, "loss": 0.4133, "num_tokens": 4002721457.0, "step": 10692 }, { "epoch": 3.917143773187377, "grad_norm": 0.1495484492711858, "learning_rate": 8.254612490645583e-06, "loss": 0.4016, "num_tokens": 4003389709.0, "step": 10693 }, { "epoch": 3.9175101909952823, "grad_norm": 0.16033779133677112, "learning_rate": 8.251855271655775e-06, "loss": 0.4046, "num_tokens": 4003995771.0, "step": 10694 }, { "epoch": 3.917876608803188, "grad_norm": 0.13850183257430612, "learning_rate": 8.24909882671392e-06, "loss": 0.4059, "num_tokens": 4004820527.0, "step": 10695 }, { "epoch": 3.9182430266110932, "grad_norm": 0.1324871027717381, "learning_rate": 8.246343155975217e-06, "loss": 0.403, "num_tokens": 4005629373.0, "step": 10696 }, { "epoch": 3.9186094444189985, "grad_norm": 0.13934098563348282, "learning_rate": 8.243588259594816e-06, "loss": 0.3954, "num_tokens": 4006410707.0, "step": 10697 }, { "epoch": 3.918975862226904, "grad_norm": 0.1253046739152016, "learning_rate": 8.24083413772782e-06, "loss": 0.378, "num_tokens": 4007268720.0, "step": 10698 }, { "epoch": 3.91934228003481, "grad_norm": 0.13669681044553483, "learning_rate": 8.238080790529289e-06, "loss": 0.4014, "num_tokens": 4008151901.0, "step": 10699 }, { "epoch": 3.919708697842715, "grad_norm": 0.15927135441773466, "learning_rate": 8.235328218154246e-06, "loss": 0.4195, "num_tokens": 4008711689.0, "step": 10700 }, { "epoch": 3.9200751156506204, "grad_norm": 0.15408220883553309, "learning_rate": 8.23257642075767e-06, "loss": 0.3987, "num_tokens": 4009356177.0, "step": 10701 }, { "epoch": 3.920441533458526, "grad_norm": 0.13943115413432425, "learning_rate": 8.22982539849448e-06, "loss": 0.419, "num_tokens": 4010140284.0, "step": 10702 }, { "epoch": 3.920807951266432, "grad_norm": 0.13138375767296032, "learning_rate": 8.22707515151958e-06, "loss": 0.3809, "num_tokens": 4010922375.0, "step": 10703 }, { "epoch": 3.921174369074337, "grad_norm": 0.1435868082675954, "learning_rate": 8.224325679987801e-06, "loss": 0.4234, "num_tokens": 4011695170.0, "step": 10704 }, { "epoch": 3.9215407868822423, "grad_norm": 0.13453015851548192, "learning_rate": 8.221576984053954e-06, "loss": 0.3903, "num_tokens": 4012458660.0, "step": 10705 }, { "epoch": 3.921907204690148, "grad_norm": 0.12958224662472156, "learning_rate": 8.218829063872792e-06, "loss": 0.3882, "num_tokens": 4013216202.0, "step": 10706 }, { "epoch": 3.9222736224980537, "grad_norm": 0.13917101740113774, "learning_rate": 8.21608191959903e-06, "loss": 0.4099, "num_tokens": 4013965430.0, "step": 10707 }, { "epoch": 3.922640040305959, "grad_norm": 0.14449124811091998, "learning_rate": 8.213335551387328e-06, "loss": 0.43, "num_tokens": 4014777696.0, "step": 10708 }, { "epoch": 3.923006458113864, "grad_norm": 0.13696261236755816, "learning_rate": 8.210589959392324e-06, "loss": 0.3997, "num_tokens": 4015511056.0, "step": 10709 }, { "epoch": 3.92337287592177, "grad_norm": 0.139393930677804, "learning_rate": 8.207845143768592e-06, "loss": 0.4121, "num_tokens": 4016216741.0, "step": 10710 }, { "epoch": 3.923739293729675, "grad_norm": 0.1337276998181964, "learning_rate": 8.20510110467068e-06, "loss": 0.3841, "num_tokens": 4017010146.0, "step": 10711 }, { "epoch": 3.924105711537581, "grad_norm": 0.13333478441864707, "learning_rate": 8.20235784225308e-06, "loss": 0.4082, "num_tokens": 4017821994.0, "step": 10712 }, { "epoch": 3.924472129345486, "grad_norm": 0.12999237716773993, "learning_rate": 8.199615356670236e-06, "loss": 0.4467, "num_tokens": 4018581862.0, "step": 10713 }, { "epoch": 3.9248385471533918, "grad_norm": 0.15019736999629224, "learning_rate": 8.196873648076562e-06, "loss": 0.4479, "num_tokens": 4019219984.0, "step": 10714 }, { "epoch": 3.925204964961297, "grad_norm": 0.14268010810771126, "learning_rate": 8.194132716626414e-06, "loss": 0.4148, "num_tokens": 4020041582.0, "step": 10715 }, { "epoch": 3.9255713827692027, "grad_norm": 0.14364370464266946, "learning_rate": 8.191392562474118e-06, "loss": 0.4132, "num_tokens": 4020698157.0, "step": 10716 }, { "epoch": 3.925937800577108, "grad_norm": 0.13883865632426157, "learning_rate": 8.188653185773954e-06, "loss": 0.3883, "num_tokens": 4021401243.0, "step": 10717 }, { "epoch": 3.9263042183850136, "grad_norm": 0.12893688509011939, "learning_rate": 8.185914586680151e-06, "loss": 0.3751, "num_tokens": 4022188111.0, "step": 10718 }, { "epoch": 3.926670636192919, "grad_norm": 0.15239240225778966, "learning_rate": 8.183176765346897e-06, "loss": 0.4086, "num_tokens": 4022863558.0, "step": 10719 }, { "epoch": 3.9270370540008246, "grad_norm": 0.1275716470522923, "learning_rate": 8.180439721928336e-06, "loss": 0.3872, "num_tokens": 4023677350.0, "step": 10720 }, { "epoch": 3.92740347180873, "grad_norm": 0.13758635926807888, "learning_rate": 8.177703456578564e-06, "loss": 0.4143, "num_tokens": 4024458658.0, "step": 10721 }, { "epoch": 3.9277698896166355, "grad_norm": 0.14621706872137302, "learning_rate": 8.17496796945165e-06, "loss": 0.3933, "num_tokens": 4025135464.0, "step": 10722 }, { "epoch": 3.928136307424541, "grad_norm": 0.13608279397694442, "learning_rate": 8.172233260701594e-06, "loss": 0.4131, "num_tokens": 4025901490.0, "step": 10723 }, { "epoch": 3.9285027252324465, "grad_norm": 0.1467479398499563, "learning_rate": 8.169499330482379e-06, "loss": 0.386, "num_tokens": 4026513577.0, "step": 10724 }, { "epoch": 3.9288691430403517, "grad_norm": 0.14126140274488752, "learning_rate": 8.166766178947924e-06, "loss": 0.4273, "num_tokens": 4027297110.0, "step": 10725 }, { "epoch": 3.929235560848257, "grad_norm": 0.14841491672783597, "learning_rate": 8.164033806252106e-06, "loss": 0.4187, "num_tokens": 4028024410.0, "step": 10726 }, { "epoch": 3.9296019786561627, "grad_norm": 0.14363838682074176, "learning_rate": 8.161302212548772e-06, "loss": 0.4248, "num_tokens": 4028752714.0, "step": 10727 }, { "epoch": 3.9299683964640684, "grad_norm": 0.1409737196083824, "learning_rate": 8.158571397991714e-06, "loss": 0.3829, "num_tokens": 4029458631.0, "step": 10728 }, { "epoch": 3.9303348142719736, "grad_norm": 0.152369386440053, "learning_rate": 8.155841362734675e-06, "loss": 0.4326, "num_tokens": 4030086978.0, "step": 10729 }, { "epoch": 3.930701232079879, "grad_norm": 0.1531443326952177, "learning_rate": 8.153112106931373e-06, "loss": 0.4116, "num_tokens": 4030718489.0, "step": 10730 }, { "epoch": 3.9310676498877846, "grad_norm": 0.1412801146345387, "learning_rate": 8.150383630735465e-06, "loss": 0.392, "num_tokens": 4031468889.0, "step": 10731 }, { "epoch": 3.9314340676956903, "grad_norm": 0.14782398939413474, "learning_rate": 8.147655934300565e-06, "loss": 0.4157, "num_tokens": 4032122564.0, "step": 10732 }, { "epoch": 3.9318004855035955, "grad_norm": 0.16038199201603623, "learning_rate": 8.144929017780255e-06, "loss": 0.4583, "num_tokens": 4032806392.0, "step": 10733 }, { "epoch": 3.9321669033115008, "grad_norm": 0.1483490813150187, "learning_rate": 8.142202881328061e-06, "loss": 0.4256, "num_tokens": 4033533012.0, "step": 10734 }, { "epoch": 3.9325333211194065, "grad_norm": 0.12822702143117873, "learning_rate": 8.139477525097475e-06, "loss": 0.4058, "num_tokens": 4034357412.0, "step": 10735 }, { "epoch": 3.9328997389273117, "grad_norm": 0.14511917801005864, "learning_rate": 8.136752949241933e-06, "loss": 0.3792, "num_tokens": 4035015904.0, "step": 10736 }, { "epoch": 3.9332661567352174, "grad_norm": 0.15135584711873923, "learning_rate": 8.134029153914833e-06, "loss": 0.3985, "num_tokens": 4035680951.0, "step": 10737 }, { "epoch": 3.9336325745431227, "grad_norm": 0.14239895389383553, "learning_rate": 8.131306139269544e-06, "loss": 0.4137, "num_tokens": 4036402390.0, "step": 10738 }, { "epoch": 3.9339989923510283, "grad_norm": 0.14920361387368322, "learning_rate": 8.128583905459366e-06, "loss": 0.4116, "num_tokens": 4037120576.0, "step": 10739 }, { "epoch": 3.9343654101589336, "grad_norm": 0.13270132478812965, "learning_rate": 8.12586245263757e-06, "loss": 0.4151, "num_tokens": 4037957274.0, "step": 10740 }, { "epoch": 3.9347318279668393, "grad_norm": 0.1488589759343597, "learning_rate": 8.123141780957375e-06, "loss": 0.4238, "num_tokens": 4038665765.0, "step": 10741 }, { "epoch": 3.9350982457747445, "grad_norm": 0.1431982007326699, "learning_rate": 8.120421890571966e-06, "loss": 0.4093, "num_tokens": 4039378896.0, "step": 10742 }, { "epoch": 3.9354646635826502, "grad_norm": 0.13264440811020006, "learning_rate": 8.117702781634466e-06, "loss": 0.3954, "num_tokens": 4040220097.0, "step": 10743 }, { "epoch": 3.9358310813905555, "grad_norm": 0.14859635343813313, "learning_rate": 8.114984454297979e-06, "loss": 0.4232, "num_tokens": 4040953854.0, "step": 10744 }, { "epoch": 3.936197499198461, "grad_norm": 0.13124758709218443, "learning_rate": 8.112266908715552e-06, "loss": 0.4056, "num_tokens": 4041776663.0, "step": 10745 }, { "epoch": 3.9365639170063664, "grad_norm": 0.14902947284151077, "learning_rate": 8.109550145040185e-06, "loss": 0.4198, "num_tokens": 4042409057.0, "step": 10746 }, { "epoch": 3.936930334814272, "grad_norm": 0.13012382752757948, "learning_rate": 8.106834163424837e-06, "loss": 0.374, "num_tokens": 4043199745.0, "step": 10747 }, { "epoch": 3.9372967526221774, "grad_norm": 0.14176076170022617, "learning_rate": 8.104118964022419e-06, "loss": 0.4096, "num_tokens": 4043960678.0, "step": 10748 }, { "epoch": 3.937663170430083, "grad_norm": 0.13293524032569176, "learning_rate": 8.10140454698581e-06, "loss": 0.3916, "num_tokens": 4044728275.0, "step": 10749 }, { "epoch": 3.9380295882379883, "grad_norm": 0.1391903689332839, "learning_rate": 8.09869091246783e-06, "loss": 0.4025, "num_tokens": 4045521706.0, "step": 10750 }, { "epoch": 3.9383960060458936, "grad_norm": 0.13810429539486932, "learning_rate": 8.095978060621269e-06, "loss": 0.4132, "num_tokens": 4046329967.0, "step": 10751 }, { "epoch": 3.9387624238537993, "grad_norm": 0.1334688100060501, "learning_rate": 8.093265991598863e-06, "loss": 0.4253, "num_tokens": 4047185827.0, "step": 10752 }, { "epoch": 3.939128841661705, "grad_norm": 0.14829246093564832, "learning_rate": 8.090554705553304e-06, "loss": 0.4335, "num_tokens": 4047914191.0, "step": 10753 }, { "epoch": 3.93949525946961, "grad_norm": 0.1293768907203021, "learning_rate": 8.08784420263724e-06, "loss": 0.4104, "num_tokens": 4048767290.0, "step": 10754 }, { "epoch": 3.9398616772775155, "grad_norm": 0.15028686430396604, "learning_rate": 8.085134483003292e-06, "loss": 0.4136, "num_tokens": 4049441970.0, "step": 10755 }, { "epoch": 3.940228095085421, "grad_norm": 0.14173950270612165, "learning_rate": 8.082425546804009e-06, "loss": 0.3965, "num_tokens": 4050172686.0, "step": 10756 }, { "epoch": 3.940594512893327, "grad_norm": 0.14762421271260714, "learning_rate": 8.079717394191913e-06, "loss": 0.4484, "num_tokens": 4050869009.0, "step": 10757 }, { "epoch": 3.940960930701232, "grad_norm": 0.14912596397902794, "learning_rate": 8.07701002531948e-06, "loss": 0.3967, "num_tokens": 4051560807.0, "step": 10758 }, { "epoch": 3.9413273485091374, "grad_norm": 0.1428157011720083, "learning_rate": 8.074303440339137e-06, "loss": 0.4223, "num_tokens": 4052242383.0, "step": 10759 }, { "epoch": 3.941693766317043, "grad_norm": 0.13208118340174171, "learning_rate": 8.071597639403279e-06, "loss": 0.4187, "num_tokens": 4052973365.0, "step": 10760 }, { "epoch": 3.9420601841249487, "grad_norm": 0.14589197460358622, "learning_rate": 8.068892622664238e-06, "loss": 0.4056, "num_tokens": 4053624658.0, "step": 10761 }, { "epoch": 3.942426601932854, "grad_norm": 0.1324755391230932, "learning_rate": 8.066188390274318e-06, "loss": 0.3955, "num_tokens": 4054372655.0, "step": 10762 }, { "epoch": 3.9427930197407592, "grad_norm": 0.13903315178315687, "learning_rate": 8.063484942385767e-06, "loss": 0.3967, "num_tokens": 4055183495.0, "step": 10763 }, { "epoch": 3.943159437548665, "grad_norm": 0.12845516987311004, "learning_rate": 8.060782279150797e-06, "loss": 0.4014, "num_tokens": 4055995776.0, "step": 10764 }, { "epoch": 3.94352585535657, "grad_norm": 0.13197905000941976, "learning_rate": 8.058080400721572e-06, "loss": 0.3951, "num_tokens": 4056744779.0, "step": 10765 }, { "epoch": 3.943892273164476, "grad_norm": 0.153744576745795, "learning_rate": 8.055379307250222e-06, "loss": 0.4403, "num_tokens": 4057377055.0, "step": 10766 }, { "epoch": 3.944258690972381, "grad_norm": 0.14843600684502176, "learning_rate": 8.052678998888817e-06, "loss": 0.386, "num_tokens": 4058079986.0, "step": 10767 }, { "epoch": 3.944625108780287, "grad_norm": 0.13673638211246525, "learning_rate": 8.049979475789387e-06, "loss": 0.3935, "num_tokens": 4058891163.0, "step": 10768 }, { "epoch": 3.944991526588192, "grad_norm": 0.13710857744400354, "learning_rate": 8.047280738103926e-06, "loss": 0.4167, "num_tokens": 4059684505.0, "step": 10769 }, { "epoch": 3.9453579443960978, "grad_norm": 0.1370392975689825, "learning_rate": 8.044582785984369e-06, "loss": 0.4204, "num_tokens": 4060461623.0, "step": 10770 }, { "epoch": 3.945724362204003, "grad_norm": 0.1384250659368667, "learning_rate": 8.041885619582624e-06, "loss": 0.4134, "num_tokens": 4061261245.0, "step": 10771 }, { "epoch": 3.9460907800119087, "grad_norm": 0.1341954452346986, "learning_rate": 8.03918923905055e-06, "loss": 0.4034, "num_tokens": 4062062769.0, "step": 10772 }, { "epoch": 3.946457197819814, "grad_norm": 0.13610089661215183, "learning_rate": 8.036493644539952e-06, "loss": 0.4022, "num_tokens": 4062837511.0, "step": 10773 }, { "epoch": 3.9468236156277197, "grad_norm": 0.13458925730468324, "learning_rate": 8.033798836202605e-06, "loss": 0.3995, "num_tokens": 4063620045.0, "step": 10774 }, { "epoch": 3.947190033435625, "grad_norm": 0.13636479553937095, "learning_rate": 8.031104814190223e-06, "loss": 0.3882, "num_tokens": 4064399841.0, "step": 10775 }, { "epoch": 3.94755645124353, "grad_norm": 0.1500078955273994, "learning_rate": 8.028411578654484e-06, "loss": 0.4131, "num_tokens": 4065043947.0, "step": 10776 }, { "epoch": 3.947922869051436, "grad_norm": 0.1350551709339732, "learning_rate": 8.025719129747032e-06, "loss": 0.4187, "num_tokens": 4065834753.0, "step": 10777 }, { "epoch": 3.9482892868593416, "grad_norm": 0.14197058106059188, "learning_rate": 8.023027467619448e-06, "loss": 0.4302, "num_tokens": 4066549638.0, "step": 10778 }, { "epoch": 3.948655704667247, "grad_norm": 0.133322538559368, "learning_rate": 8.020336592423285e-06, "loss": 0.4018, "num_tokens": 4067395638.0, "step": 10779 }, { "epoch": 3.949022122475152, "grad_norm": 0.14918843936312193, "learning_rate": 8.017646504310046e-06, "loss": 0.4114, "num_tokens": 4068087906.0, "step": 10780 }, { "epoch": 3.9493885402830577, "grad_norm": 0.14298790172898548, "learning_rate": 8.014957203431179e-06, "loss": 0.4158, "num_tokens": 4068875243.0, "step": 10781 }, { "epoch": 3.9497549580909634, "grad_norm": 0.11521122122103138, "learning_rate": 8.0122686899381e-06, "loss": 0.3683, "num_tokens": 4069822496.0, "step": 10782 }, { "epoch": 3.9501213758988687, "grad_norm": 0.13300059574213924, "learning_rate": 8.009580963982187e-06, "loss": 0.4073, "num_tokens": 4070645606.0, "step": 10783 }, { "epoch": 3.950487793706774, "grad_norm": 0.14262772175402982, "learning_rate": 8.006894025714748e-06, "loss": 0.4348, "num_tokens": 4071381238.0, "step": 10784 }, { "epoch": 3.9508542115146796, "grad_norm": 0.14023350324528663, "learning_rate": 8.00420787528708e-06, "loss": 0.4204, "num_tokens": 4072117997.0, "step": 10785 }, { "epoch": 3.9512206293225853, "grad_norm": 0.13196187932323988, "learning_rate": 8.001522512850407e-06, "loss": 0.3723, "num_tokens": 4072955220.0, "step": 10786 }, { "epoch": 3.9515870471304906, "grad_norm": 0.14507449551664536, "learning_rate": 7.998837938555921e-06, "loss": 0.3848, "num_tokens": 4073593211.0, "step": 10787 }, { "epoch": 3.951953464938396, "grad_norm": 0.12854311730433818, "learning_rate": 7.99615415255478e-06, "loss": 0.3965, "num_tokens": 4074558893.0, "step": 10788 }, { "epoch": 3.9523198827463015, "grad_norm": 0.12924109972846598, "learning_rate": 7.993471154998076e-06, "loss": 0.4303, "num_tokens": 4075350956.0, "step": 10789 }, { "epoch": 3.9526863005542068, "grad_norm": 0.1397154066456824, "learning_rate": 7.990788946036867e-06, "loss": 0.418, "num_tokens": 4076146442.0, "step": 10790 }, { "epoch": 3.9530527183621125, "grad_norm": 0.13229449817167294, "learning_rate": 7.988107525822166e-06, "loss": 0.4253, "num_tokens": 4076951308.0, "step": 10791 }, { "epoch": 3.9534191361700177, "grad_norm": 0.1445722830795326, "learning_rate": 7.985426894504952e-06, "loss": 0.3832, "num_tokens": 4077681965.0, "step": 10792 }, { "epoch": 3.9537855539779234, "grad_norm": 0.15848282118320212, "learning_rate": 7.982747052236137e-06, "loss": 0.4169, "num_tokens": 4078251635.0, "step": 10793 }, { "epoch": 3.9541519717858287, "grad_norm": 0.14881839602670455, "learning_rate": 7.980067999166614e-06, "loss": 0.4279, "num_tokens": 4078950390.0, "step": 10794 }, { "epoch": 3.9545183895937344, "grad_norm": 0.13732274166881564, "learning_rate": 7.977389735447213e-06, "loss": 0.3941, "num_tokens": 4079698451.0, "step": 10795 }, { "epoch": 3.9548848074016396, "grad_norm": 0.14161787187629124, "learning_rate": 7.974712261228723e-06, "loss": 0.3975, "num_tokens": 4080510847.0, "step": 10796 }, { "epoch": 3.9552512252095453, "grad_norm": 0.13135198591908806, "learning_rate": 7.972035576661896e-06, "loss": 0.4186, "num_tokens": 4081221564.0, "step": 10797 }, { "epoch": 3.9556176430174506, "grad_norm": 0.13282332678749015, "learning_rate": 7.969359681897428e-06, "loss": 0.3944, "num_tokens": 4082071012.0, "step": 10798 }, { "epoch": 3.9559840608253563, "grad_norm": 0.14054948493129737, "learning_rate": 7.966684577085979e-06, "loss": 0.4184, "num_tokens": 4082866582.0, "step": 10799 }, { "epoch": 3.9563504786332615, "grad_norm": 0.15193724488641958, "learning_rate": 7.964010262378172e-06, "loss": 0.403, "num_tokens": 4083578704.0, "step": 10800 }, { "epoch": 3.956716896441167, "grad_norm": 0.12049500953899606, "learning_rate": 7.961336737924568e-06, "loss": 0.406, "num_tokens": 4084420455.0, "step": 10801 }, { "epoch": 3.9570833142490724, "grad_norm": 0.13081410481938938, "learning_rate": 7.958664003875695e-06, "loss": 0.4395, "num_tokens": 4085358636.0, "step": 10802 }, { "epoch": 3.957449732056978, "grad_norm": 0.13813205001296505, "learning_rate": 7.955992060382029e-06, "loss": 0.4214, "num_tokens": 4086133437.0, "step": 10803 }, { "epoch": 3.9578161498648834, "grad_norm": 0.13481037227000459, "learning_rate": 7.953320907594004e-06, "loss": 0.4079, "num_tokens": 4086886229.0, "step": 10804 }, { "epoch": 3.9581825676727886, "grad_norm": 0.15076557891511255, "learning_rate": 7.95065054566202e-06, "loss": 0.419, "num_tokens": 4087585633.0, "step": 10805 }, { "epoch": 3.9585489854806943, "grad_norm": 0.13672460623501875, "learning_rate": 7.94798097473642e-06, "loss": 0.4258, "num_tokens": 4088437326.0, "step": 10806 }, { "epoch": 3.9589154032886, "grad_norm": 0.14234962251057737, "learning_rate": 7.945312194967504e-06, "loss": 0.4146, "num_tokens": 4089144103.0, "step": 10807 }, { "epoch": 3.9592818210965053, "grad_norm": 0.12386616405178083, "learning_rate": 7.942644206505533e-06, "loss": 0.4103, "num_tokens": 4090103908.0, "step": 10808 }, { "epoch": 3.9596482389044105, "grad_norm": 0.13226031337394875, "learning_rate": 7.939977009500712e-06, "loss": 0.4109, "num_tokens": 4090993129.0, "step": 10809 }, { "epoch": 3.9600146567123162, "grad_norm": 0.14021935953879858, "learning_rate": 7.937310604103224e-06, "loss": 0.4083, "num_tokens": 4091683973.0, "step": 10810 }, { "epoch": 3.960381074520222, "grad_norm": 0.15350917562864255, "learning_rate": 7.93464499046318e-06, "loss": 0.4101, "num_tokens": 4092320066.0, "step": 10811 }, { "epoch": 3.960747492328127, "grad_norm": 0.1451242523993081, "learning_rate": 7.931980168730662e-06, "loss": 0.388, "num_tokens": 4093081852.0, "step": 10812 }, { "epoch": 3.9611139101360324, "grad_norm": 0.13072274289584782, "learning_rate": 7.92931613905571e-06, "loss": 0.3876, "num_tokens": 4093812906.0, "step": 10813 }, { "epoch": 3.961480327943938, "grad_norm": 0.13785549434739489, "learning_rate": 7.926652901588312e-06, "loss": 0.4469, "num_tokens": 4094605717.0, "step": 10814 }, { "epoch": 3.961846745751844, "grad_norm": 0.13242903744223283, "learning_rate": 7.923990456478405e-06, "loss": 0.4211, "num_tokens": 4095389512.0, "step": 10815 }, { "epoch": 3.962213163559749, "grad_norm": 0.1437405707898514, "learning_rate": 7.921328803875905e-06, "loss": 0.4191, "num_tokens": 4096127469.0, "step": 10816 }, { "epoch": 3.9625795813676543, "grad_norm": 0.1458603389957632, "learning_rate": 7.918667943930658e-06, "loss": 0.4306, "num_tokens": 4096756842.0, "step": 10817 }, { "epoch": 3.96294599917556, "grad_norm": 0.14432763634141785, "learning_rate": 7.916007876792482e-06, "loss": 0.4064, "num_tokens": 4097543361.0, "step": 10818 }, { "epoch": 3.9633124169834653, "grad_norm": 0.14238806873040474, "learning_rate": 7.913348602611133e-06, "loss": 0.3952, "num_tokens": 4098278828.0, "step": 10819 }, { "epoch": 3.963678834791371, "grad_norm": 0.14735884664177043, "learning_rate": 7.910690121536343e-06, "loss": 0.3947, "num_tokens": 4098907304.0, "step": 10820 }, { "epoch": 3.964045252599276, "grad_norm": 0.1543718174934982, "learning_rate": 7.90803243371779e-06, "loss": 0.4028, "num_tokens": 4099568345.0, "step": 10821 }, { "epoch": 3.964411670407182, "grad_norm": 0.14682797779802217, "learning_rate": 7.905375539305104e-06, "loss": 0.4342, "num_tokens": 4100264331.0, "step": 10822 }, { "epoch": 3.964778088215087, "grad_norm": 0.1411571397329282, "learning_rate": 7.902719438447879e-06, "loss": 0.4033, "num_tokens": 4101002552.0, "step": 10823 }, { "epoch": 3.965144506022993, "grad_norm": 0.12232974812771558, "learning_rate": 7.90006413129565e-06, "loss": 0.4053, "num_tokens": 4101904636.0, "step": 10824 }, { "epoch": 3.965510923830898, "grad_norm": 0.14238375459051983, "learning_rate": 7.897409617997918e-06, "loss": 0.432, "num_tokens": 4102615615.0, "step": 10825 }, { "epoch": 3.965877341638804, "grad_norm": 0.12658735834494692, "learning_rate": 7.894755898704139e-06, "loss": 0.39, "num_tokens": 4103420703.0, "step": 10826 }, { "epoch": 3.966243759446709, "grad_norm": 0.13179246138055353, "learning_rate": 7.892102973563727e-06, "loss": 0.3922, "num_tokens": 4104260415.0, "step": 10827 }, { "epoch": 3.9666101772546147, "grad_norm": 0.12242486770145043, "learning_rate": 7.889450842726043e-06, "loss": 0.4145, "num_tokens": 4105147627.0, "step": 10828 }, { "epoch": 3.96697659506252, "grad_norm": 0.14007560332158495, "learning_rate": 7.886799506340408e-06, "loss": 0.4141, "num_tokens": 4105789669.0, "step": 10829 }, { "epoch": 3.9673430128704252, "grad_norm": 0.1425944748761641, "learning_rate": 7.884148964556096e-06, "loss": 0.4261, "num_tokens": 4106609504.0, "step": 10830 }, { "epoch": 3.967709430678331, "grad_norm": 0.13119200907103182, "learning_rate": 7.881499217522335e-06, "loss": 0.3924, "num_tokens": 4107376608.0, "step": 10831 }, { "epoch": 3.9680758484862366, "grad_norm": 0.13700334645428758, "learning_rate": 7.878850265388321e-06, "loss": 0.4054, "num_tokens": 4108113217.0, "step": 10832 }, { "epoch": 3.968442266294142, "grad_norm": 0.12490977922793628, "learning_rate": 7.87620210830318e-06, "loss": 0.4342, "num_tokens": 4109035039.0, "step": 10833 }, { "epoch": 3.968808684102047, "grad_norm": 0.14252043939986694, "learning_rate": 7.873554746416027e-06, "loss": 0.4384, "num_tokens": 4109802701.0, "step": 10834 }, { "epoch": 3.969175101909953, "grad_norm": 0.13947334988597765, "learning_rate": 7.870908179875903e-06, "loss": 0.3803, "num_tokens": 4110555239.0, "step": 10835 }, { "epoch": 3.9695415197178585, "grad_norm": 0.15354274818431393, "learning_rate": 7.868262408831817e-06, "loss": 0.4169, "num_tokens": 4111207646.0, "step": 10836 }, { "epoch": 3.9699079375257638, "grad_norm": 0.13909585207516068, "learning_rate": 7.865617433432723e-06, "loss": 0.3981, "num_tokens": 4111954285.0, "step": 10837 }, { "epoch": 3.970274355333669, "grad_norm": 0.1455415589180617, "learning_rate": 7.862973253827553e-06, "loss": 0.4131, "num_tokens": 4112636648.0, "step": 10838 }, { "epoch": 3.9706407731415747, "grad_norm": 0.13813966471337344, "learning_rate": 7.860329870165169e-06, "loss": 0.3913, "num_tokens": 4113350954.0, "step": 10839 }, { "epoch": 3.9710071909494804, "grad_norm": 0.13277000965783595, "learning_rate": 7.857687282594403e-06, "loss": 0.3959, "num_tokens": 4114165887.0, "step": 10840 }, { "epoch": 3.9713736087573857, "grad_norm": 0.12471917261762264, "learning_rate": 7.855045491264039e-06, "loss": 0.3846, "num_tokens": 4114998183.0, "step": 10841 }, { "epoch": 3.971740026565291, "grad_norm": 0.1470863971187596, "learning_rate": 7.852404496322812e-06, "loss": 0.4231, "num_tokens": 4115652008.0, "step": 10842 }, { "epoch": 3.9721064443731966, "grad_norm": 0.1335271676867278, "learning_rate": 7.849764297919416e-06, "loss": 0.4082, "num_tokens": 4116455486.0, "step": 10843 }, { "epoch": 3.972472862181102, "grad_norm": 0.13851206098979865, "learning_rate": 7.847124896202504e-06, "loss": 0.4019, "num_tokens": 4117215378.0, "step": 10844 }, { "epoch": 3.9728392799890075, "grad_norm": 0.14013130630019144, "learning_rate": 7.844486291320674e-06, "loss": 0.3802, "num_tokens": 4117948365.0, "step": 10845 }, { "epoch": 3.973205697796913, "grad_norm": 0.13798064116948058, "learning_rate": 7.841848483422482e-06, "loss": 0.4123, "num_tokens": 4118683286.0, "step": 10846 }, { "epoch": 3.9735721156048185, "grad_norm": 0.12277358760690489, "learning_rate": 7.83921147265645e-06, "loss": 0.4221, "num_tokens": 4119611579.0, "step": 10847 }, { "epoch": 3.9739385334127237, "grad_norm": 0.12718084167182864, "learning_rate": 7.83657525917104e-06, "loss": 0.3724, "num_tokens": 4120477627.0, "step": 10848 }, { "epoch": 3.9743049512206294, "grad_norm": 0.1364924443822937, "learning_rate": 7.833939843114684e-06, "loss": 0.4197, "num_tokens": 4121298774.0, "step": 10849 }, { "epoch": 3.9746713690285347, "grad_norm": 0.1300412442458436, "learning_rate": 7.831305224635754e-06, "loss": 0.4056, "num_tokens": 4122083243.0, "step": 10850 }, { "epoch": 3.9750377868364404, "grad_norm": 0.11727388624738486, "learning_rate": 7.828671403882588e-06, "loss": 0.3928, "num_tokens": 4123142984.0, "step": 10851 }, { "epoch": 3.9754042046443456, "grad_norm": 0.13393617621182366, "learning_rate": 7.826038381003475e-06, "loss": 0.4036, "num_tokens": 4123949175.0, "step": 10852 }, { "epoch": 3.9757706224522513, "grad_norm": 0.11971725837781749, "learning_rate": 7.823406156146651e-06, "loss": 0.3811, "num_tokens": 4124780428.0, "step": 10853 }, { "epoch": 3.9761370402601566, "grad_norm": 0.14186509410003229, "learning_rate": 7.820774729460328e-06, "loss": 0.3963, "num_tokens": 4125563164.0, "step": 10854 }, { "epoch": 3.9765034580680623, "grad_norm": 0.13135767288588293, "learning_rate": 7.818144101092655e-06, "loss": 0.4092, "num_tokens": 4126398541.0, "step": 10855 }, { "epoch": 3.9768698758759675, "grad_norm": 0.12901917351745285, "learning_rate": 7.815514271191745e-06, "loss": 0.4354, "num_tokens": 4127257042.0, "step": 10856 }, { "epoch": 3.977236293683873, "grad_norm": 0.13966246283317074, "learning_rate": 7.812885239905658e-06, "loss": 0.4111, "num_tokens": 4127994775.0, "step": 10857 }, { "epoch": 3.9776027114917785, "grad_norm": 0.13601979194704628, "learning_rate": 7.810257007382418e-06, "loss": 0.4019, "num_tokens": 4128720856.0, "step": 10858 }, { "epoch": 3.9779691292996837, "grad_norm": 0.13806279775035682, "learning_rate": 7.807629573769992e-06, "loss": 0.4201, "num_tokens": 4129505161.0, "step": 10859 }, { "epoch": 3.9783355471075894, "grad_norm": 0.13657856297871956, "learning_rate": 7.805002939216313e-06, "loss": 0.3798, "num_tokens": 4130202363.0, "step": 10860 }, { "epoch": 3.978701964915495, "grad_norm": 0.13744806911488047, "learning_rate": 7.802377103869274e-06, "loss": 0.4215, "num_tokens": 4131034154.0, "step": 10861 }, { "epoch": 3.9790683827234004, "grad_norm": 0.14383206215466554, "learning_rate": 7.799752067876709e-06, "loss": 0.4036, "num_tokens": 4131798516.0, "step": 10862 }, { "epoch": 3.9794348005313056, "grad_norm": 0.132791612960825, "learning_rate": 7.797127831386411e-06, "loss": 0.3999, "num_tokens": 4132694681.0, "step": 10863 }, { "epoch": 3.9798012183392113, "grad_norm": 0.13330623430133495, "learning_rate": 7.794504394546131e-06, "loss": 0.3962, "num_tokens": 4133467135.0, "step": 10864 }, { "epoch": 3.980167636147117, "grad_norm": 0.15593377810437173, "learning_rate": 7.791881757503571e-06, "loss": 0.4233, "num_tokens": 4134111664.0, "step": 10865 }, { "epoch": 3.9805340539550222, "grad_norm": 0.14973577511416, "learning_rate": 7.789259920406398e-06, "loss": 0.4094, "num_tokens": 4134842224.0, "step": 10866 }, { "epoch": 3.9809004717629275, "grad_norm": 0.1484911457466406, "learning_rate": 7.786638883402216e-06, "loss": 0.4259, "num_tokens": 4135600627.0, "step": 10867 }, { "epoch": 3.981266889570833, "grad_norm": 0.12963766409977187, "learning_rate": 7.784018646638606e-06, "loss": 0.3899, "num_tokens": 4136372395.0, "step": 10868 }, { "epoch": 3.981633307378739, "grad_norm": 0.1376290293012942, "learning_rate": 7.781399210263087e-06, "loss": 0.417, "num_tokens": 4137154783.0, "step": 10869 }, { "epoch": 3.981999725186644, "grad_norm": 0.14856101202903027, "learning_rate": 7.778780574423134e-06, "loss": 0.4273, "num_tokens": 4137895175.0, "step": 10870 }, { "epoch": 3.9823661429945494, "grad_norm": 0.14585139871344474, "learning_rate": 7.77616273926619e-06, "loss": 0.4139, "num_tokens": 4138606686.0, "step": 10871 }, { "epoch": 3.982732560802455, "grad_norm": 0.1478709775747222, "learning_rate": 7.773545704939644e-06, "loss": 0.4021, "num_tokens": 4139266491.0, "step": 10872 }, { "epoch": 3.9830989786103603, "grad_norm": 0.13834098449220428, "learning_rate": 7.770929471590835e-06, "loss": 0.3975, "num_tokens": 4139959603.0, "step": 10873 }, { "epoch": 3.983465396418266, "grad_norm": 0.13864067310654585, "learning_rate": 7.768314039367059e-06, "loss": 0.3799, "num_tokens": 4140704832.0, "step": 10874 }, { "epoch": 3.9838318142261713, "grad_norm": 0.14406088333913303, "learning_rate": 7.765699408415579e-06, "loss": 0.4201, "num_tokens": 4141550474.0, "step": 10875 }, { "epoch": 3.984198232034077, "grad_norm": 0.14584149403666852, "learning_rate": 7.763085578883595e-06, "loss": 0.4103, "num_tokens": 4142218054.0, "step": 10876 }, { "epoch": 3.984564649841982, "grad_norm": 0.13463350389744608, "learning_rate": 7.76047255091828e-06, "loss": 0.4171, "num_tokens": 4143116235.0, "step": 10877 }, { "epoch": 3.984931067649888, "grad_norm": 0.14036109766276522, "learning_rate": 7.75786032466675e-06, "loss": 0.406, "num_tokens": 4143960615.0, "step": 10878 }, { "epoch": 3.985297485457793, "grad_norm": 0.1330657242404053, "learning_rate": 7.755248900276077e-06, "loss": 0.4068, "num_tokens": 4144773136.0, "step": 10879 }, { "epoch": 3.985663903265699, "grad_norm": 0.14989441408495116, "learning_rate": 7.752638277893285e-06, "loss": 0.3725, "num_tokens": 4145498640.0, "step": 10880 }, { "epoch": 3.986030321073604, "grad_norm": 0.12688282818941177, "learning_rate": 7.75002845766536e-06, "loss": 0.3925, "num_tokens": 4146396658.0, "step": 10881 }, { "epoch": 3.98639673888151, "grad_norm": 0.13239987981532286, "learning_rate": 7.747419439739248e-06, "loss": 0.3982, "num_tokens": 4147218901.0, "step": 10882 }, { "epoch": 3.986763156689415, "grad_norm": 0.13245965737299262, "learning_rate": 7.744811224261835e-06, "loss": 0.3743, "num_tokens": 4147954587.0, "step": 10883 }, { "epoch": 3.9871295744973203, "grad_norm": 0.1370162138915451, "learning_rate": 7.74220381137997e-06, "loss": 0.4262, "num_tokens": 4148789985.0, "step": 10884 }, { "epoch": 3.987495992305226, "grad_norm": 0.13118539327221174, "learning_rate": 7.739597201240454e-06, "loss": 0.407, "num_tokens": 4149626091.0, "step": 10885 }, { "epoch": 3.9878624101131317, "grad_norm": 0.12744739027417745, "learning_rate": 7.736991393990046e-06, "loss": 0.4013, "num_tokens": 4150569311.0, "step": 10886 }, { "epoch": 3.988228827921037, "grad_norm": 0.12880252044808668, "learning_rate": 7.73438638977545e-06, "loss": 0.4289, "num_tokens": 4151373995.0, "step": 10887 }, { "epoch": 3.988595245728942, "grad_norm": 0.1420341894497333, "learning_rate": 7.731782188743342e-06, "loss": 0.4051, "num_tokens": 4152058219.0, "step": 10888 }, { "epoch": 3.988961663536848, "grad_norm": 0.13976439271954166, "learning_rate": 7.729178791040348e-06, "loss": 0.394, "num_tokens": 4152797120.0, "step": 10889 }, { "epoch": 3.9893280813447536, "grad_norm": 0.1318185896611007, "learning_rate": 7.726576196813036e-06, "loss": 0.4173, "num_tokens": 4153618247.0, "step": 10890 }, { "epoch": 3.989694499152659, "grad_norm": 0.1355547839365489, "learning_rate": 7.723974406207943e-06, "loss": 0.4213, "num_tokens": 4154391417.0, "step": 10891 }, { "epoch": 3.990060916960564, "grad_norm": 0.13272949097653397, "learning_rate": 7.721373419371545e-06, "loss": 0.4155, "num_tokens": 4155205689.0, "step": 10892 }, { "epoch": 3.99042733476847, "grad_norm": 0.13688813434719366, "learning_rate": 7.718773236450295e-06, "loss": 0.412, "num_tokens": 4156051481.0, "step": 10893 }, { "epoch": 3.9907937525763755, "grad_norm": 0.1385212159632511, "learning_rate": 7.716173857590576e-06, "loss": 0.402, "num_tokens": 4156781167.0, "step": 10894 }, { "epoch": 3.9911601703842807, "grad_norm": 0.13783009636914023, "learning_rate": 7.713575282938754e-06, "loss": 0.4322, "num_tokens": 4157549801.0, "step": 10895 }, { "epoch": 3.991526588192186, "grad_norm": 0.15077754379617178, "learning_rate": 7.71097751264112e-06, "loss": 0.4415, "num_tokens": 4158190673.0, "step": 10896 }, { "epoch": 3.9918930060000917, "grad_norm": 0.1346706256212071, "learning_rate": 7.708380546843943e-06, "loss": 0.4299, "num_tokens": 4159071621.0, "step": 10897 }, { "epoch": 3.992259423807997, "grad_norm": 0.1332985844859147, "learning_rate": 7.705784385693426e-06, "loss": 0.387, "num_tokens": 4159806231.0, "step": 10898 }, { "epoch": 3.9926258416159026, "grad_norm": 0.13462220951234016, "learning_rate": 7.703189029335752e-06, "loss": 0.4342, "num_tokens": 4160480989.0, "step": 10899 }, { "epoch": 3.992992259423808, "grad_norm": 0.13885902162875227, "learning_rate": 7.700594477917037e-06, "loss": 0.3987, "num_tokens": 4161299725.0, "step": 10900 }, { "epoch": 3.9933586772317136, "grad_norm": 0.15339768512144006, "learning_rate": 7.698000731583354e-06, "loss": 0.4502, "num_tokens": 4161913614.0, "step": 10901 }, { "epoch": 3.993725095039619, "grad_norm": 0.13483166544178796, "learning_rate": 7.695407790480749e-06, "loss": 0.4048, "num_tokens": 4162756786.0, "step": 10902 }, { "epoch": 3.9940915128475245, "grad_norm": 0.13562341735479783, "learning_rate": 7.692815654755197e-06, "loss": 0.3988, "num_tokens": 4163501943.0, "step": 10903 }, { "epoch": 3.9944579306554298, "grad_norm": 0.13837977294386095, "learning_rate": 7.690224324552653e-06, "loss": 0.4155, "num_tokens": 4164274705.0, "step": 10904 }, { "epoch": 3.9948243484633355, "grad_norm": 0.13517998894356867, "learning_rate": 7.687633800019005e-06, "loss": 0.4121, "num_tokens": 4165068446.0, "step": 10905 }, { "epoch": 3.9951907662712407, "grad_norm": 0.13091767562077966, "learning_rate": 7.68504408130011e-06, "loss": 0.3949, "num_tokens": 4165905913.0, "step": 10906 }, { "epoch": 3.9955571840791464, "grad_norm": 0.13907350355415646, "learning_rate": 7.682455168541772e-06, "loss": 0.4216, "num_tokens": 4166632182.0, "step": 10907 }, { "epoch": 3.9959236018870516, "grad_norm": 0.13183419013657965, "learning_rate": 7.679867061889745e-06, "loss": 0.4041, "num_tokens": 4167418917.0, "step": 10908 }, { "epoch": 3.9962900196949573, "grad_norm": 0.1418367612513874, "learning_rate": 7.677279761489751e-06, "loss": 0.401, "num_tokens": 4168085432.0, "step": 10909 }, { "epoch": 3.9966564375028626, "grad_norm": 0.14368732603315815, "learning_rate": 7.674693267487466e-06, "loss": 0.4076, "num_tokens": 4168867463.0, "step": 10910 }, { "epoch": 3.9970228553107683, "grad_norm": 0.1369546217167295, "learning_rate": 7.672107580028507e-06, "loss": 0.4213, "num_tokens": 4169595254.0, "step": 10911 }, { "epoch": 3.9973892731186735, "grad_norm": 0.12897784928703068, "learning_rate": 7.669522699258458e-06, "loss": 0.3903, "num_tokens": 4170377491.0, "step": 10912 }, { "epoch": 3.997755690926579, "grad_norm": 0.14461126885934564, "learning_rate": 7.666938625322851e-06, "loss": 0.4025, "num_tokens": 4171092214.0, "step": 10913 }, { "epoch": 3.9981221087344845, "grad_norm": 0.14778202141107424, "learning_rate": 7.664355358367167e-06, "loss": 0.391, "num_tokens": 4171726454.0, "step": 10914 }, { "epoch": 3.99848852654239, "grad_norm": 0.14102799674468536, "learning_rate": 7.661772898536859e-06, "loss": 0.4221, "num_tokens": 4172547703.0, "step": 10915 }, { "epoch": 3.9988549443502954, "grad_norm": 0.14355418309775494, "learning_rate": 7.659191245977322e-06, "loss": 0.4164, "num_tokens": 4173258048.0, "step": 10916 }, { "epoch": 3.9992213621582007, "grad_norm": 0.12689490772899648, "learning_rate": 7.65661040083391e-06, "loss": 0.4005, "num_tokens": 4174166382.0, "step": 10917 }, { "epoch": 3.9995877799661064, "grad_norm": 0.14909355934363416, "learning_rate": 7.654030363251928e-06, "loss": 0.3817, "num_tokens": 4174863690.0, "step": 10918 }, { "epoch": 3.999954197774012, "grad_norm": 0.14045755768013726, "learning_rate": 7.651451133376636e-06, "loss": 0.4212, "num_tokens": 4175591822.0, "step": 10919 }, { "epoch": 4.0, "grad_norm": 0.14045755768013726, "learning_rate": 7.648872711353245e-06, "loss": 0.4788, "num_tokens": 4175638850.0, "step": 10920 }, { "epoch": 4.000366417807905, "grad_norm": 0.5615678270351072, "learning_rate": 7.64629509732694e-06, "loss": 0.4034, "num_tokens": 4176398431.0, "step": 10921 }, { "epoch": 4.0007328356158105, "grad_norm": 0.20249440313265143, "learning_rate": 7.643718291442828e-06, "loss": 0.3593, "num_tokens": 4177108491.0, "step": 10922 }, { "epoch": 4.001099253423717, "grad_norm": 0.17279825554798056, "learning_rate": 7.641142293846e-06, "loss": 0.4165, "num_tokens": 4177863362.0, "step": 10923 }, { "epoch": 4.001465671231622, "grad_norm": 0.1698986329002521, "learning_rate": 7.63856710468149e-06, "loss": 0.3707, "num_tokens": 4178606429.0, "step": 10924 }, { "epoch": 4.001832089039527, "grad_norm": 0.18472066977593768, "learning_rate": 7.63599272409428e-06, "loss": 0.3744, "num_tokens": 4179341925.0, "step": 10925 }, { "epoch": 4.002198506847432, "grad_norm": 0.18185349232756975, "learning_rate": 7.633419152229316e-06, "loss": 0.3439, "num_tokens": 4180086658.0, "step": 10926 }, { "epoch": 4.0025649246553385, "grad_norm": 0.18196679159751805, "learning_rate": 7.6308463892315e-06, "loss": 0.3649, "num_tokens": 4180905092.0, "step": 10927 }, { "epoch": 4.002931342463244, "grad_norm": 0.15134313669421937, "learning_rate": 7.628274435245675e-06, "loss": 0.368, "num_tokens": 4181622279.0, "step": 10928 }, { "epoch": 4.003297760271149, "grad_norm": 0.1606241380088707, "learning_rate": 7.625703290416646e-06, "loss": 0.3577, "num_tokens": 4182307847.0, "step": 10929 }, { "epoch": 4.003664178079054, "grad_norm": 0.2112454194227112, "learning_rate": 7.623132954889183e-06, "loss": 0.3711, "num_tokens": 4183016870.0, "step": 10930 }, { "epoch": 4.00403059588696, "grad_norm": 0.19518328415995106, "learning_rate": 7.620563428807994e-06, "loss": 0.3763, "num_tokens": 4183725739.0, "step": 10931 }, { "epoch": 4.004397013694866, "grad_norm": 0.17454161527676906, "learning_rate": 7.6179947123177534e-06, "loss": 0.3375, "num_tokens": 4184361723.0, "step": 10932 }, { "epoch": 4.004763431502771, "grad_norm": 0.17572934642373325, "learning_rate": 7.615426805563084e-06, "loss": 0.3944, "num_tokens": 4185029458.0, "step": 10933 }, { "epoch": 4.005129849310676, "grad_norm": 0.1705090984142197, "learning_rate": 7.612859708688564e-06, "loss": 0.3734, "num_tokens": 4185807700.0, "step": 10934 }, { "epoch": 4.005496267118582, "grad_norm": 0.1859372546359862, "learning_rate": 7.610293421838718e-06, "loss": 0.3894, "num_tokens": 4186623214.0, "step": 10935 }, { "epoch": 4.005862684926488, "grad_norm": 0.1545453256610198, "learning_rate": 7.607727945158045e-06, "loss": 0.355, "num_tokens": 4187398477.0, "step": 10936 }, { "epoch": 4.006229102734393, "grad_norm": 0.14724177190717655, "learning_rate": 7.605163278790977e-06, "loss": 0.364, "num_tokens": 4188145556.0, "step": 10937 }, { "epoch": 4.006595520542298, "grad_norm": 0.15977559549383857, "learning_rate": 7.602599422881918e-06, "loss": 0.3414, "num_tokens": 4188891727.0, "step": 10938 }, { "epoch": 4.006961938350204, "grad_norm": 0.1411514827622127, "learning_rate": 7.600036377575215e-06, "loss": 0.35, "num_tokens": 4189742962.0, "step": 10939 }, { "epoch": 4.0073283561581095, "grad_norm": 0.15695562647899625, "learning_rate": 7.597474143015173e-06, "loss": 0.3734, "num_tokens": 4190513374.0, "step": 10940 }, { "epoch": 4.007694773966015, "grad_norm": 0.15329370192228944, "learning_rate": 7.594912719346047e-06, "loss": 0.3526, "num_tokens": 4191209746.0, "step": 10941 }, { "epoch": 4.00806119177392, "grad_norm": 0.14729960974930492, "learning_rate": 7.592352106712049e-06, "loss": 0.364, "num_tokens": 4192021694.0, "step": 10942 }, { "epoch": 4.008427609581826, "grad_norm": 0.15742202029392324, "learning_rate": 7.589792305257353e-06, "loss": 0.3842, "num_tokens": 4192862421.0, "step": 10943 }, { "epoch": 4.008794027389731, "grad_norm": 0.16000061934803148, "learning_rate": 7.58723331512608e-06, "loss": 0.372, "num_tokens": 4193625745.0, "step": 10944 }, { "epoch": 4.009160445197637, "grad_norm": 0.1596186040035408, "learning_rate": 7.584675136462307e-06, "loss": 0.3615, "num_tokens": 4194335396.0, "step": 10945 }, { "epoch": 4.009526863005542, "grad_norm": 0.14077696464847816, "learning_rate": 7.58211776941006e-06, "loss": 0.3576, "num_tokens": 4195087739.0, "step": 10946 }, { "epoch": 4.009893280813447, "grad_norm": 0.1522909918355517, "learning_rate": 7.579561214113328e-06, "loss": 0.3782, "num_tokens": 4195951589.0, "step": 10947 }, { "epoch": 4.010259698621353, "grad_norm": 0.15669168963183788, "learning_rate": 7.577005470716044e-06, "loss": 0.3866, "num_tokens": 4196695885.0, "step": 10948 }, { "epoch": 4.0106261164292585, "grad_norm": 0.14001327750452816, "learning_rate": 7.574450539362106e-06, "loss": 0.3867, "num_tokens": 4197531278.0, "step": 10949 }, { "epoch": 4.010992534237164, "grad_norm": 0.15529016679171143, "learning_rate": 7.571896420195366e-06, "loss": 0.3493, "num_tokens": 4198242969.0, "step": 10950 }, { "epoch": 4.011358952045069, "grad_norm": 0.15027587004297172, "learning_rate": 7.569343113359619e-06, "loss": 0.3773, "num_tokens": 4198938515.0, "step": 10951 }, { "epoch": 4.011725369852975, "grad_norm": 0.15546533959978115, "learning_rate": 7.566790618998627e-06, "loss": 0.3673, "num_tokens": 4199631841.0, "step": 10952 }, { "epoch": 4.01209178766088, "grad_norm": 0.13869960212600552, "learning_rate": 7.564238937256091e-06, "loss": 0.368, "num_tokens": 4200569549.0, "step": 10953 }, { "epoch": 4.012458205468786, "grad_norm": 0.16153668725985124, "learning_rate": 7.561688068275687e-06, "loss": 0.3499, "num_tokens": 4201296802.0, "step": 10954 }, { "epoch": 4.012824623276691, "grad_norm": 0.1421875756169137, "learning_rate": 7.5591380122010285e-06, "loss": 0.3583, "num_tokens": 4202111756.0, "step": 10955 }, { "epoch": 4.013191041084597, "grad_norm": 0.1517013892198122, "learning_rate": 7.556588769175686e-06, "loss": 0.3886, "num_tokens": 4202854851.0, "step": 10956 }, { "epoch": 4.013557458892502, "grad_norm": 0.14427749622563932, "learning_rate": 7.554040339343194e-06, "loss": 0.3733, "num_tokens": 4203640697.0, "step": 10957 }, { "epoch": 4.0139238767004075, "grad_norm": 0.14191392541008782, "learning_rate": 7.551492722847029e-06, "loss": 0.3491, "num_tokens": 4204485672.0, "step": 10958 }, { "epoch": 4.014290294508313, "grad_norm": 0.14678017567386056, "learning_rate": 7.548945919830627e-06, "loss": 0.3816, "num_tokens": 4205254198.0, "step": 10959 }, { "epoch": 4.014656712316219, "grad_norm": 0.147602814385427, "learning_rate": 7.546399930437382e-06, "loss": 0.3687, "num_tokens": 4206016860.0, "step": 10960 }, { "epoch": 4.015023130124124, "grad_norm": 0.1444457203386432, "learning_rate": 7.5438547548106375e-06, "loss": 0.3524, "num_tokens": 4206701159.0, "step": 10961 }, { "epoch": 4.015389547932029, "grad_norm": 0.1633035023116164, "learning_rate": 7.541310393093688e-06, "loss": 0.3492, "num_tokens": 4207368493.0, "step": 10962 }, { "epoch": 4.015755965739935, "grad_norm": 0.150485693319318, "learning_rate": 7.538766845429786e-06, "loss": 0.3847, "num_tokens": 4208107430.0, "step": 10963 }, { "epoch": 4.016122383547841, "grad_norm": 0.1425082299642064, "learning_rate": 7.536224111962138e-06, "loss": 0.3526, "num_tokens": 4208868595.0, "step": 10964 }, { "epoch": 4.016488801355746, "grad_norm": 0.1596538354177553, "learning_rate": 7.533682192833913e-06, "loss": 0.3728, "num_tokens": 4209557234.0, "step": 10965 }, { "epoch": 4.016855219163651, "grad_norm": 0.14960692396852066, "learning_rate": 7.531141088188223e-06, "loss": 0.3716, "num_tokens": 4210330156.0, "step": 10966 }, { "epoch": 4.0172216369715565, "grad_norm": 0.13825181721193588, "learning_rate": 7.5286007981681334e-06, "loss": 0.3838, "num_tokens": 4211170633.0, "step": 10967 }, { "epoch": 4.017588054779463, "grad_norm": 0.15288944387918196, "learning_rate": 7.526061322916669e-06, "loss": 0.3606, "num_tokens": 4211946813.0, "step": 10968 }, { "epoch": 4.017954472587368, "grad_norm": 0.15427694897774544, "learning_rate": 7.5235226625768035e-06, "loss": 0.3805, "num_tokens": 4212658039.0, "step": 10969 }, { "epoch": 4.018320890395273, "grad_norm": 0.17421874818044314, "learning_rate": 7.5209848172914725e-06, "loss": 0.4005, "num_tokens": 4213366243.0, "step": 10970 }, { "epoch": 4.018687308203178, "grad_norm": 0.14713837273432898, "learning_rate": 7.5184477872035654e-06, "loss": 0.3726, "num_tokens": 4214110175.0, "step": 10971 }, { "epoch": 4.019053726011085, "grad_norm": 0.153191393545158, "learning_rate": 7.515911572455918e-06, "loss": 0.3685, "num_tokens": 4214856024.0, "step": 10972 }, { "epoch": 4.01942014381899, "grad_norm": 0.1443771100158673, "learning_rate": 7.513376173191327e-06, "loss": 0.3694, "num_tokens": 4215607791.0, "step": 10973 }, { "epoch": 4.019786561626895, "grad_norm": 0.15438673871329422, "learning_rate": 7.510841589552536e-06, "loss": 0.347, "num_tokens": 4216338671.0, "step": 10974 }, { "epoch": 4.0201529794348, "grad_norm": 0.14504736012276304, "learning_rate": 7.5083078216822444e-06, "loss": 0.3784, "num_tokens": 4217154850.0, "step": 10975 }, { "epoch": 4.020519397242706, "grad_norm": 0.14837775336614303, "learning_rate": 7.505774869723119e-06, "loss": 0.3632, "num_tokens": 4218004274.0, "step": 10976 }, { "epoch": 4.020885815050612, "grad_norm": 0.150745116861637, "learning_rate": 7.503242733817759e-06, "loss": 0.3786, "num_tokens": 4218799857.0, "step": 10977 }, { "epoch": 4.021252232858517, "grad_norm": 0.14344056445360553, "learning_rate": 7.500711414108739e-06, "loss": 0.3607, "num_tokens": 4219535596.0, "step": 10978 }, { "epoch": 4.021618650666422, "grad_norm": 0.13733068428288864, "learning_rate": 7.498180910738573e-06, "loss": 0.3514, "num_tokens": 4220422321.0, "step": 10979 }, { "epoch": 4.0219850684743275, "grad_norm": 0.15337504129823404, "learning_rate": 7.4956512238497314e-06, "loss": 0.3226, "num_tokens": 4221080382.0, "step": 10980 }, { "epoch": 4.022351486282234, "grad_norm": 0.14722221530472177, "learning_rate": 7.493122353584639e-06, "loss": 0.387, "num_tokens": 4221770122.0, "step": 10981 }, { "epoch": 4.022717904090139, "grad_norm": 0.15282732145686104, "learning_rate": 7.490594300085683e-06, "loss": 0.3683, "num_tokens": 4222495459.0, "step": 10982 }, { "epoch": 4.023084321898044, "grad_norm": 0.1510888831234609, "learning_rate": 7.488067063495192e-06, "loss": 0.3632, "num_tokens": 4223197963.0, "step": 10983 }, { "epoch": 4.023450739705949, "grad_norm": 0.1502845603112606, "learning_rate": 7.485540643955453e-06, "loss": 0.3863, "num_tokens": 4224039529.0, "step": 10984 }, { "epoch": 4.0238171575138555, "grad_norm": 0.13903593752337895, "learning_rate": 7.483015041608718e-06, "loss": 0.3394, "num_tokens": 4224854165.0, "step": 10985 }, { "epoch": 4.024183575321761, "grad_norm": 0.14200589212922982, "learning_rate": 7.48049025659717e-06, "loss": 0.333, "num_tokens": 4225610604.0, "step": 10986 }, { "epoch": 4.024549993129666, "grad_norm": 0.1484671724954071, "learning_rate": 7.477966289062975e-06, "loss": 0.3591, "num_tokens": 4226464805.0, "step": 10987 }, { "epoch": 4.024916410937571, "grad_norm": 0.13988473509810256, "learning_rate": 7.475443139148224e-06, "loss": 0.3542, "num_tokens": 4227271993.0, "step": 10988 }, { "epoch": 4.025282828745477, "grad_norm": 0.142194891599292, "learning_rate": 7.472920806994985e-06, "loss": 0.3636, "num_tokens": 4228025467.0, "step": 10989 }, { "epoch": 4.025649246553383, "grad_norm": 0.15659880747641547, "learning_rate": 7.4703992927452595e-06, "loss": 0.3761, "num_tokens": 4228718995.0, "step": 10990 }, { "epoch": 4.026015664361288, "grad_norm": 0.15703036287380015, "learning_rate": 7.467878596541025e-06, "loss": 0.3501, "num_tokens": 4229411950.0, "step": 10991 }, { "epoch": 4.026382082169193, "grad_norm": 0.14055418452172394, "learning_rate": 7.465358718524194e-06, "loss": 0.3923, "num_tokens": 4230283186.0, "step": 10992 }, { "epoch": 4.026748499977099, "grad_norm": 0.14190283199734363, "learning_rate": 7.4628396588366445e-06, "loss": 0.3416, "num_tokens": 4231073408.0, "step": 10993 }, { "epoch": 4.0271149177850045, "grad_norm": 0.14157294140234009, "learning_rate": 7.460321417620207e-06, "loss": 0.3483, "num_tokens": 4231843700.0, "step": 10994 }, { "epoch": 4.02748133559291, "grad_norm": 0.15666861565578638, "learning_rate": 7.457803995016659e-06, "loss": 0.4059, "num_tokens": 4232515895.0, "step": 10995 }, { "epoch": 4.027847753400815, "grad_norm": 0.161842430129606, "learning_rate": 7.455287391167737e-06, "loss": 0.3901, "num_tokens": 4233211866.0, "step": 10996 }, { "epoch": 4.028214171208721, "grad_norm": 0.15899929003806476, "learning_rate": 7.4527716062151275e-06, "loss": 0.3633, "num_tokens": 4233946323.0, "step": 10997 }, { "epoch": 4.028580589016626, "grad_norm": 0.1452714485074158, "learning_rate": 7.450256640300477e-06, "loss": 0.398, "num_tokens": 4234817140.0, "step": 10998 }, { "epoch": 4.028947006824532, "grad_norm": 0.14285689751838765, "learning_rate": 7.447742493565391e-06, "loss": 0.3988, "num_tokens": 4235575824.0, "step": 10999 }, { "epoch": 4.029313424632437, "grad_norm": 0.14369826106016853, "learning_rate": 7.445229166151411e-06, "loss": 0.3789, "num_tokens": 4236365086.0, "step": 11000 }, { "epoch": 4.029679842440342, "grad_norm": 0.15515289372885122, "learning_rate": 7.442716658200046e-06, "loss": 0.3469, "num_tokens": 4237111476.0, "step": 11001 }, { "epoch": 4.030046260248248, "grad_norm": 0.144415568990116, "learning_rate": 7.4402049698527556e-06, "loss": 0.3646, "num_tokens": 4237868902.0, "step": 11002 }, { "epoch": 4.0304126780561536, "grad_norm": 0.1430674197614954, "learning_rate": 7.437694101250949e-06, "loss": 0.3851, "num_tokens": 4238631642.0, "step": 11003 }, { "epoch": 4.030779095864059, "grad_norm": 0.15516873434309542, "learning_rate": 7.435184052535997e-06, "loss": 0.3907, "num_tokens": 4239310572.0, "step": 11004 }, { "epoch": 4.031145513671964, "grad_norm": 0.14997797128140403, "learning_rate": 7.432674823849217e-06, "loss": 0.3618, "num_tokens": 4240120525.0, "step": 11005 }, { "epoch": 4.03151193147987, "grad_norm": 0.14072779633032664, "learning_rate": 7.430166415331892e-06, "loss": 0.3634, "num_tokens": 4241001095.0, "step": 11006 }, { "epoch": 4.031878349287775, "grad_norm": 0.1442850012557405, "learning_rate": 7.427658827125244e-06, "loss": 0.3745, "num_tokens": 4241760701.0, "step": 11007 }, { "epoch": 4.032244767095681, "grad_norm": 0.15263822055472479, "learning_rate": 7.4251520593704485e-06, "loss": 0.3588, "num_tokens": 4242503541.0, "step": 11008 }, { "epoch": 4.032611184903586, "grad_norm": 0.15231917414824722, "learning_rate": 7.4226461122086525e-06, "loss": 0.3466, "num_tokens": 4243380737.0, "step": 11009 }, { "epoch": 4.032977602711492, "grad_norm": 0.13636977742673304, "learning_rate": 7.4201409857809434e-06, "loss": 0.3733, "num_tokens": 4244135925.0, "step": 11010 }, { "epoch": 4.033344020519397, "grad_norm": 0.14213792910470519, "learning_rate": 7.417636680228362e-06, "loss": 0.3616, "num_tokens": 4245022074.0, "step": 11011 }, { "epoch": 4.033710438327303, "grad_norm": 0.14054986712886675, "learning_rate": 7.415133195691907e-06, "loss": 0.354, "num_tokens": 4245836562.0, "step": 11012 }, { "epoch": 4.034076856135208, "grad_norm": 0.14758942533910546, "learning_rate": 7.4126305323125325e-06, "loss": 0.3537, "num_tokens": 4246707486.0, "step": 11013 }, { "epoch": 4.034443273943114, "grad_norm": 0.15229954546391064, "learning_rate": 7.410128690231133e-06, "loss": 0.3839, "num_tokens": 4247448553.0, "step": 11014 }, { "epoch": 4.034809691751019, "grad_norm": 0.13806427227574228, "learning_rate": 7.4076276695885815e-06, "loss": 0.3392, "num_tokens": 4248220022.0, "step": 11015 }, { "epoch": 4.0351761095589245, "grad_norm": 0.16192543025032585, "learning_rate": 7.4051274705256834e-06, "loss": 0.3579, "num_tokens": 4248875775.0, "step": 11016 }, { "epoch": 4.03554252736683, "grad_norm": 0.1472932021595326, "learning_rate": 7.402628093183206e-06, "loss": 0.3514, "num_tokens": 4249680071.0, "step": 11017 }, { "epoch": 4.035908945174736, "grad_norm": 0.1493925058377855, "learning_rate": 7.400129537701861e-06, "loss": 0.3807, "num_tokens": 4250405668.0, "step": 11018 }, { "epoch": 4.036275362982641, "grad_norm": 0.14275747910026299, "learning_rate": 7.397631804222336e-06, "loss": 0.3703, "num_tokens": 4251243009.0, "step": 11019 }, { "epoch": 4.036641780790546, "grad_norm": 0.13868456721101977, "learning_rate": 7.395134892885247e-06, "loss": 0.3542, "num_tokens": 4252023809.0, "step": 11020 }, { "epoch": 4.037008198598452, "grad_norm": 0.15569821194151767, "learning_rate": 7.3926388038311845e-06, "loss": 0.3996, "num_tokens": 4252697891.0, "step": 11021 }, { "epoch": 4.037374616406358, "grad_norm": 0.16150457841595856, "learning_rate": 7.39014353720068e-06, "loss": 0.385, "num_tokens": 4253424048.0, "step": 11022 }, { "epoch": 4.037741034214263, "grad_norm": 0.15075604161481448, "learning_rate": 7.3876490931342195e-06, "loss": 0.37, "num_tokens": 4254174154.0, "step": 11023 }, { "epoch": 4.038107452022168, "grad_norm": 0.15317769834125436, "learning_rate": 7.385155471772241e-06, "loss": 0.3903, "num_tokens": 4254939062.0, "step": 11024 }, { "epoch": 4.0384738698300735, "grad_norm": 0.15553609454484696, "learning_rate": 7.382662673255147e-06, "loss": 0.352, "num_tokens": 4255704924.0, "step": 11025 }, { "epoch": 4.03884028763798, "grad_norm": 0.13869652146591938, "learning_rate": 7.38017069772329e-06, "loss": 0.3617, "num_tokens": 4256572789.0, "step": 11026 }, { "epoch": 4.039206705445885, "grad_norm": 0.14707540863341867, "learning_rate": 7.377679545316969e-06, "loss": 0.3605, "num_tokens": 4257285810.0, "step": 11027 }, { "epoch": 4.03957312325379, "grad_norm": 0.13971511556564983, "learning_rate": 7.3751892161764395e-06, "loss": 0.3755, "num_tokens": 4258092618.0, "step": 11028 }, { "epoch": 4.039939541061695, "grad_norm": 0.1573847269963395, "learning_rate": 7.3726997104419155e-06, "loss": 0.3571, "num_tokens": 4258799521.0, "step": 11029 }, { "epoch": 4.040305958869601, "grad_norm": 0.168050428168057, "learning_rate": 7.3702110282535576e-06, "loss": 0.3763, "num_tokens": 4259439091.0, "step": 11030 }, { "epoch": 4.040672376677507, "grad_norm": 0.1578472825019834, "learning_rate": 7.36772316975148e-06, "loss": 0.3664, "num_tokens": 4260170733.0, "step": 11031 }, { "epoch": 4.041038794485412, "grad_norm": 0.1446746564012249, "learning_rate": 7.365236135075759e-06, "loss": 0.364, "num_tokens": 4260905987.0, "step": 11032 }, { "epoch": 4.041405212293317, "grad_norm": 0.1536149047608413, "learning_rate": 7.362749924366426e-06, "loss": 0.3642, "num_tokens": 4261590866.0, "step": 11033 }, { "epoch": 4.0417716301012225, "grad_norm": 0.1683446341049728, "learning_rate": 7.36026453776345e-06, "loss": 0.4092, "num_tokens": 4262281329.0, "step": 11034 }, { "epoch": 4.042138047909129, "grad_norm": 0.15344949816713405, "learning_rate": 7.357779975406769e-06, "loss": 0.3565, "num_tokens": 4263099892.0, "step": 11035 }, { "epoch": 4.042504465717034, "grad_norm": 0.1408150769871818, "learning_rate": 7.355296237436261e-06, "loss": 0.3494, "num_tokens": 4263894085.0, "step": 11036 }, { "epoch": 4.042870883524939, "grad_norm": 0.14085355151621923, "learning_rate": 7.352813323991775e-06, "loss": 0.3631, "num_tokens": 4264697093.0, "step": 11037 }, { "epoch": 4.043237301332844, "grad_norm": 0.14760425123079007, "learning_rate": 7.3503312352131e-06, "loss": 0.3596, "num_tokens": 4265454647.0, "step": 11038 }, { "epoch": 4.043603719140751, "grad_norm": 0.15198915425607112, "learning_rate": 7.347849971239977e-06, "loss": 0.3527, "num_tokens": 4266252716.0, "step": 11039 }, { "epoch": 4.043970136948656, "grad_norm": 0.14478160681099256, "learning_rate": 7.3453695322121166e-06, "loss": 0.3339, "num_tokens": 4267064923.0, "step": 11040 }, { "epoch": 4.044336554756561, "grad_norm": 0.13851787886303224, "learning_rate": 7.342889918269167e-06, "loss": 0.3414, "num_tokens": 4267878882.0, "step": 11041 }, { "epoch": 4.044702972564466, "grad_norm": 0.1476971159192775, "learning_rate": 7.340411129550728e-06, "loss": 0.3595, "num_tokens": 4268597005.0, "step": 11042 }, { "epoch": 4.0450693903723725, "grad_norm": 0.1665544256233841, "learning_rate": 7.337933166196377e-06, "loss": 0.3934, "num_tokens": 4269255489.0, "step": 11043 }, { "epoch": 4.045435808180278, "grad_norm": 0.1405895539766762, "learning_rate": 7.335456028345615e-06, "loss": 0.3831, "num_tokens": 4270210743.0, "step": 11044 }, { "epoch": 4.045802225988183, "grad_norm": 0.15499942714925982, "learning_rate": 7.3329797161379135e-06, "loss": 0.3886, "num_tokens": 4271027521.0, "step": 11045 }, { "epoch": 4.046168643796088, "grad_norm": 0.14515683381960587, "learning_rate": 7.330504229712697e-06, "loss": 0.3681, "num_tokens": 4271909399.0, "step": 11046 }, { "epoch": 4.046535061603994, "grad_norm": 0.1456154670034025, "learning_rate": 7.328029569209334e-06, "loss": 0.36, "num_tokens": 4272604054.0, "step": 11047 }, { "epoch": 4.0469014794119, "grad_norm": 0.14948364705371892, "learning_rate": 7.32555573476716e-06, "loss": 0.3653, "num_tokens": 4273440875.0, "step": 11048 }, { "epoch": 4.047267897219805, "grad_norm": 0.1515553668896924, "learning_rate": 7.323082726525452e-06, "loss": 0.4015, "num_tokens": 4274226028.0, "step": 11049 }, { "epoch": 4.04763431502771, "grad_norm": 0.14177429807154704, "learning_rate": 7.320610544623448e-06, "loss": 0.3861, "num_tokens": 4275080607.0, "step": 11050 }, { "epoch": 4.048000732835616, "grad_norm": 0.14157057412293786, "learning_rate": 7.318139189200335e-06, "loss": 0.3637, "num_tokens": 4275915672.0, "step": 11051 }, { "epoch": 4.0483671506435215, "grad_norm": 0.13869919305198894, "learning_rate": 7.315668660395251e-06, "loss": 0.3603, "num_tokens": 4276766316.0, "step": 11052 }, { "epoch": 4.048733568451427, "grad_norm": 0.16186477126500248, "learning_rate": 7.313198958347297e-06, "loss": 0.366, "num_tokens": 4277468320.0, "step": 11053 }, { "epoch": 4.049099986259332, "grad_norm": 0.1484064607251117, "learning_rate": 7.310730083195524e-06, "loss": 0.3756, "num_tokens": 4278238721.0, "step": 11054 }, { "epoch": 4.049466404067237, "grad_norm": 0.15102111903978155, "learning_rate": 7.308262035078935e-06, "loss": 0.382, "num_tokens": 4279031092.0, "step": 11055 }, { "epoch": 4.049832821875143, "grad_norm": 0.14054981293336333, "learning_rate": 7.3057948141364795e-06, "loss": 0.3751, "num_tokens": 4279835334.0, "step": 11056 }, { "epoch": 4.050199239683049, "grad_norm": 0.15096573981145814, "learning_rate": 7.303328420507072e-06, "loss": 0.3736, "num_tokens": 4280604976.0, "step": 11057 }, { "epoch": 4.050565657490954, "grad_norm": 0.14215230020270878, "learning_rate": 7.300862854329569e-06, "loss": 0.3776, "num_tokens": 4281362801.0, "step": 11058 }, { "epoch": 4.050932075298859, "grad_norm": 0.15641636117989652, "learning_rate": 7.298398115742798e-06, "loss": 0.3525, "num_tokens": 4282108870.0, "step": 11059 }, { "epoch": 4.051298493106765, "grad_norm": 0.1432091045647732, "learning_rate": 7.295934204885516e-06, "loss": 0.3673, "num_tokens": 4283008391.0, "step": 11060 }, { "epoch": 4.0516649109146705, "grad_norm": 0.13588708208084535, "learning_rate": 7.2934711218964585e-06, "loss": 0.3347, "num_tokens": 4283860423.0, "step": 11061 }, { "epoch": 4.052031328722576, "grad_norm": 0.1361880881739578, "learning_rate": 7.291008866914293e-06, "loss": 0.3597, "num_tokens": 4284653004.0, "step": 11062 }, { "epoch": 4.052397746530481, "grad_norm": 0.15331282082308373, "learning_rate": 7.288547440077653e-06, "loss": 0.3416, "num_tokens": 4285417023.0, "step": 11063 }, { "epoch": 4.052764164338387, "grad_norm": 0.12911991870092426, "learning_rate": 7.2860868415251185e-06, "loss": 0.3709, "num_tokens": 4286345195.0, "step": 11064 }, { "epoch": 4.053130582146292, "grad_norm": 0.14984053725133759, "learning_rate": 7.283627071395234e-06, "loss": 0.3887, "num_tokens": 4287055437.0, "step": 11065 }, { "epoch": 4.053496999954198, "grad_norm": 0.16038122588878606, "learning_rate": 7.281168129826476e-06, "loss": 0.3634, "num_tokens": 4287802162.0, "step": 11066 }, { "epoch": 4.053863417762103, "grad_norm": 0.147589101574098, "learning_rate": 7.278710016957301e-06, "loss": 0.3462, "num_tokens": 4288584351.0, "step": 11067 }, { "epoch": 4.054229835570009, "grad_norm": 0.14145528003119887, "learning_rate": 7.276252732926104e-06, "loss": 0.351, "num_tokens": 4289388527.0, "step": 11068 }, { "epoch": 4.054596253377914, "grad_norm": 0.14348002020531306, "learning_rate": 7.273796277871222e-06, "loss": 0.3818, "num_tokens": 4290199961.0, "step": 11069 }, { "epoch": 4.0549626711858195, "grad_norm": 0.16438844737286806, "learning_rate": 7.271340651930976e-06, "loss": 0.3993, "num_tokens": 4290931853.0, "step": 11070 }, { "epoch": 4.055329088993725, "grad_norm": 0.15144224641576456, "learning_rate": 7.268885855243613e-06, "loss": 0.3815, "num_tokens": 4291618594.0, "step": 11071 }, { "epoch": 4.055695506801631, "grad_norm": 0.15669194935143813, "learning_rate": 7.266431887947345e-06, "loss": 0.3817, "num_tokens": 4292390777.0, "step": 11072 }, { "epoch": 4.056061924609536, "grad_norm": 0.13922461682916543, "learning_rate": 7.2639787501803295e-06, "loss": 0.3677, "num_tokens": 4293217451.0, "step": 11073 }, { "epoch": 4.056428342417441, "grad_norm": 0.14340850416807735, "learning_rate": 7.261526442080693e-06, "loss": 0.3429, "num_tokens": 4294018312.0, "step": 11074 }, { "epoch": 4.056794760225347, "grad_norm": 0.1439000114633739, "learning_rate": 7.259074963786497e-06, "loss": 0.3469, "num_tokens": 4294772680.0, "step": 11075 }, { "epoch": 4.057161178033253, "grad_norm": 0.159609147639435, "learning_rate": 7.256624315435772e-06, "loss": 0.3569, "num_tokens": 4295454855.0, "step": 11076 }, { "epoch": 4.057527595841158, "grad_norm": 0.1596117144777813, "learning_rate": 7.25417449716649e-06, "loss": 0.3516, "num_tokens": 4296116095.0, "step": 11077 }, { "epoch": 4.057894013649063, "grad_norm": 0.1667653826795939, "learning_rate": 7.251725509116582e-06, "loss": 0.3862, "num_tokens": 4296804229.0, "step": 11078 }, { "epoch": 4.058260431456969, "grad_norm": 0.14822539897929835, "learning_rate": 7.249277351423925e-06, "loss": 0.4084, "num_tokens": 4297619991.0, "step": 11079 }, { "epoch": 4.058626849264874, "grad_norm": 0.15189217235307784, "learning_rate": 7.246830024226361e-06, "loss": 0.3572, "num_tokens": 4298381539.0, "step": 11080 }, { "epoch": 4.05899326707278, "grad_norm": 0.15967431724472766, "learning_rate": 7.244383527661682e-06, "loss": 0.3919, "num_tokens": 4299099384.0, "step": 11081 }, { "epoch": 4.059359684880685, "grad_norm": 0.15237789474383284, "learning_rate": 7.2419378618676295e-06, "loss": 0.3755, "num_tokens": 4299844083.0, "step": 11082 }, { "epoch": 4.0597261026885905, "grad_norm": 0.14897919946282198, "learning_rate": 7.2394930269818965e-06, "loss": 0.3503, "num_tokens": 4300589907.0, "step": 11083 }, { "epoch": 4.060092520496496, "grad_norm": 0.14810980436078197, "learning_rate": 7.2370490231421296e-06, "loss": 0.3555, "num_tokens": 4301326944.0, "step": 11084 }, { "epoch": 4.060458938304402, "grad_norm": 0.15192256604205562, "learning_rate": 7.2346058504859386e-06, "loss": 0.3579, "num_tokens": 4302051869.0, "step": 11085 }, { "epoch": 4.060825356112307, "grad_norm": 0.15928290646059312, "learning_rate": 7.232163509150867e-06, "loss": 0.3675, "num_tokens": 4302823828.0, "step": 11086 }, { "epoch": 4.061191773920212, "grad_norm": 0.1448512153168942, "learning_rate": 7.229721999274433e-06, "loss": 0.3405, "num_tokens": 4303490392.0, "step": 11087 }, { "epoch": 4.061558191728118, "grad_norm": 0.14894897267793172, "learning_rate": 7.227281320994101e-06, "loss": 0.3704, "num_tokens": 4304316046.0, "step": 11088 }, { "epoch": 4.061924609536024, "grad_norm": 0.1393480689041161, "learning_rate": 7.224841474447281e-06, "loss": 0.3587, "num_tokens": 4305132408.0, "step": 11089 }, { "epoch": 4.062291027343929, "grad_norm": 0.15003425173200244, "learning_rate": 7.222402459771341e-06, "loss": 0.3774, "num_tokens": 4305939412.0, "step": 11090 }, { "epoch": 4.062657445151834, "grad_norm": 0.16027131816205073, "learning_rate": 7.219964277103604e-06, "loss": 0.3421, "num_tokens": 4306577146.0, "step": 11091 }, { "epoch": 4.0630238629597395, "grad_norm": 0.15217395883248858, "learning_rate": 7.2175269265813394e-06, "loss": 0.3662, "num_tokens": 4307342066.0, "step": 11092 }, { "epoch": 4.063390280767646, "grad_norm": 0.13985390138371814, "learning_rate": 7.2150904083417826e-06, "loss": 0.3461, "num_tokens": 4308138683.0, "step": 11093 }, { "epoch": 4.063756698575551, "grad_norm": 0.16366403363114543, "learning_rate": 7.21265472252211e-06, "loss": 0.3714, "num_tokens": 4308819611.0, "step": 11094 }, { "epoch": 4.064123116383456, "grad_norm": 0.14916441458061563, "learning_rate": 7.210219869259458e-06, "loss": 0.3722, "num_tokens": 4309669187.0, "step": 11095 }, { "epoch": 4.064489534191361, "grad_norm": 0.15659384566895135, "learning_rate": 7.207785848690914e-06, "loss": 0.3415, "num_tokens": 4310404890.0, "step": 11096 }, { "epoch": 4.0648559519992675, "grad_norm": 0.14142476918012073, "learning_rate": 7.205352660953513e-06, "loss": 0.3884, "num_tokens": 4311279074.0, "step": 11097 }, { "epoch": 4.065222369807173, "grad_norm": 0.1486549792754081, "learning_rate": 7.202920306184253e-06, "loss": 0.3846, "num_tokens": 4312042120.0, "step": 11098 }, { "epoch": 4.065588787615078, "grad_norm": 0.14251733625588653, "learning_rate": 7.200488784520082e-06, "loss": 0.3616, "num_tokens": 4312842656.0, "step": 11099 }, { "epoch": 4.065955205422983, "grad_norm": 0.15063928306555469, "learning_rate": 7.198058096097891e-06, "loss": 0.3673, "num_tokens": 4313685332.0, "step": 11100 }, { "epoch": 4.066321623230889, "grad_norm": 0.1483815337839596, "learning_rate": 7.195628241054544e-06, "loss": 0.4151, "num_tokens": 4314388846.0, "step": 11101 }, { "epoch": 4.066688041038795, "grad_norm": 0.15697977476770697, "learning_rate": 7.193199219526841e-06, "loss": 0.3501, "num_tokens": 4315158099.0, "step": 11102 }, { "epoch": 4.0670544588467, "grad_norm": 0.1455412372071258, "learning_rate": 7.1907710316515355e-06, "loss": 0.3917, "num_tokens": 4315994234.0, "step": 11103 }, { "epoch": 4.067420876654605, "grad_norm": 0.151755783233286, "learning_rate": 7.1883436775653515e-06, "loss": 0.3325, "num_tokens": 4316651459.0, "step": 11104 }, { "epoch": 4.067787294462511, "grad_norm": 0.14772920343064658, "learning_rate": 7.185917157404945e-06, "loss": 0.3876, "num_tokens": 4317389758.0, "step": 11105 }, { "epoch": 4.0681537122704166, "grad_norm": 0.1619984720397729, "learning_rate": 7.183491471306936e-06, "loss": 0.3775, "num_tokens": 4318133991.0, "step": 11106 }, { "epoch": 4.068520130078322, "grad_norm": 0.14040429302686852, "learning_rate": 7.18106661940789e-06, "loss": 0.3551, "num_tokens": 4318966735.0, "step": 11107 }, { "epoch": 4.068886547886227, "grad_norm": 0.14190949654760673, "learning_rate": 7.1786426018443386e-06, "loss": 0.3592, "num_tokens": 4319836371.0, "step": 11108 }, { "epoch": 4.069252965694132, "grad_norm": 0.13850853900062618, "learning_rate": 7.1762194187527615e-06, "loss": 0.3604, "num_tokens": 4320635441.0, "step": 11109 }, { "epoch": 4.0696193835020384, "grad_norm": 0.14484238611121913, "learning_rate": 7.173797070269584e-06, "loss": 0.3436, "num_tokens": 4321474473.0, "step": 11110 }, { "epoch": 4.069985801309944, "grad_norm": 0.13006609117328194, "learning_rate": 7.171375556531187e-06, "loss": 0.3378, "num_tokens": 4322252969.0, "step": 11111 }, { "epoch": 4.070352219117849, "grad_norm": 0.15730545210881733, "learning_rate": 7.16895487767391e-06, "loss": 0.3591, "num_tokens": 4322987239.0, "step": 11112 }, { "epoch": 4.070718636925754, "grad_norm": 0.14301156586949632, "learning_rate": 7.1665350338340355e-06, "loss": 0.3709, "num_tokens": 4323804802.0, "step": 11113 }, { "epoch": 4.07108505473366, "grad_norm": 0.1576906331741126, "learning_rate": 7.164116025147817e-06, "loss": 0.3709, "num_tokens": 4324510961.0, "step": 11114 }, { "epoch": 4.071451472541566, "grad_norm": 0.14712666009129344, "learning_rate": 7.161697851751437e-06, "loss": 0.3435, "num_tokens": 4325329988.0, "step": 11115 }, { "epoch": 4.071817890349471, "grad_norm": 0.1501552788580012, "learning_rate": 7.159280513781055e-06, "loss": 0.3822, "num_tokens": 4326028239.0, "step": 11116 }, { "epoch": 4.072184308157376, "grad_norm": 0.15054868838884589, "learning_rate": 7.156864011372766e-06, "loss": 0.3461, "num_tokens": 4326760349.0, "step": 11117 }, { "epoch": 4.072550725965282, "grad_norm": 0.1453858499403267, "learning_rate": 7.154448344662626e-06, "loss": 0.3892, "num_tokens": 4327597654.0, "step": 11118 }, { "epoch": 4.0729171437731875, "grad_norm": 0.14929212540007275, "learning_rate": 7.152033513786635e-06, "loss": 0.3428, "num_tokens": 4328484211.0, "step": 11119 }, { "epoch": 4.073283561581093, "grad_norm": 0.1484221106283863, "learning_rate": 7.149619518880762e-06, "loss": 0.354, "num_tokens": 4329270404.0, "step": 11120 }, { "epoch": 4.073649979388998, "grad_norm": 0.14487068744726242, "learning_rate": 7.147206360080911e-06, "loss": 0.3572, "num_tokens": 4330054382.0, "step": 11121 }, { "epoch": 4.074016397196904, "grad_norm": 0.13727152606487966, "learning_rate": 7.144794037522957e-06, "loss": 0.3325, "num_tokens": 4330899751.0, "step": 11122 }, { "epoch": 4.074382815004809, "grad_norm": 0.15069389613439726, "learning_rate": 7.142382551342717e-06, "loss": 0.3858, "num_tokens": 4331708866.0, "step": 11123 }, { "epoch": 4.074749232812715, "grad_norm": 0.14348339811464703, "learning_rate": 7.1399719016759576e-06, "loss": 0.3662, "num_tokens": 4332537043.0, "step": 11124 }, { "epoch": 4.07511565062062, "grad_norm": 0.14515861429720375, "learning_rate": 7.137562088658401e-06, "loss": 0.3601, "num_tokens": 4333430501.0, "step": 11125 }, { "epoch": 4.075482068428526, "grad_norm": 0.14469193713015968, "learning_rate": 7.135153112425733e-06, "loss": 0.3726, "num_tokens": 4334179116.0, "step": 11126 }, { "epoch": 4.075848486236431, "grad_norm": 0.15227409343050838, "learning_rate": 7.1327449731135785e-06, "loss": 0.3331, "num_tokens": 4334827176.0, "step": 11127 }, { "epoch": 4.0762149040443365, "grad_norm": 0.1562305819847885, "learning_rate": 7.130337670857518e-06, "loss": 0.3545, "num_tokens": 4335607983.0, "step": 11128 }, { "epoch": 4.076581321852242, "grad_norm": 0.13282958876808482, "learning_rate": 7.127931205793097e-06, "loss": 0.3713, "num_tokens": 4336494239.0, "step": 11129 }, { "epoch": 4.076947739660148, "grad_norm": 0.1539277015603467, "learning_rate": 7.12552557805579e-06, "loss": 0.3603, "num_tokens": 4337187700.0, "step": 11130 }, { "epoch": 4.077314157468053, "grad_norm": 0.14917892135187752, "learning_rate": 7.123120787781054e-06, "loss": 0.342, "num_tokens": 4337958720.0, "step": 11131 }, { "epoch": 4.077680575275958, "grad_norm": 0.1560130229390143, "learning_rate": 7.120716835104275e-06, "loss": 0.4112, "num_tokens": 4338690420.0, "step": 11132 }, { "epoch": 4.078046993083864, "grad_norm": 0.14677589308452157, "learning_rate": 7.118313720160803e-06, "loss": 0.366, "num_tokens": 4339513564.0, "step": 11133 }, { "epoch": 4.07841341089177, "grad_norm": 0.15442909192613954, "learning_rate": 7.115911443085932e-06, "loss": 0.3948, "num_tokens": 4340246726.0, "step": 11134 }, { "epoch": 4.078779828699675, "grad_norm": 0.1635091148240302, "learning_rate": 7.113510004014926e-06, "loss": 0.3782, "num_tokens": 4340921707.0, "step": 11135 }, { "epoch": 4.07914624650758, "grad_norm": 0.146279817713336, "learning_rate": 7.111109403082976e-06, "loss": 0.3619, "num_tokens": 4341700728.0, "step": 11136 }, { "epoch": 4.0795126643154855, "grad_norm": 0.1610278690513346, "learning_rate": 7.108709640425258e-06, "loss": 0.3716, "num_tokens": 4342366045.0, "step": 11137 }, { "epoch": 4.079879082123391, "grad_norm": 0.1694145704213424, "learning_rate": 7.106310716176872e-06, "loss": 0.354, "num_tokens": 4342962292.0, "step": 11138 }, { "epoch": 4.080245499931297, "grad_norm": 0.14542796825200585, "learning_rate": 7.103912630472884e-06, "loss": 0.3947, "num_tokens": 4343871009.0, "step": 11139 }, { "epoch": 4.080611917739202, "grad_norm": 0.13695738346543784, "learning_rate": 7.101515383448314e-06, "loss": 0.3287, "num_tokens": 4344649715.0, "step": 11140 }, { "epoch": 4.080978335547107, "grad_norm": 0.14690266177510228, "learning_rate": 7.099118975238124e-06, "loss": 0.3579, "num_tokens": 4345471541.0, "step": 11141 }, { "epoch": 4.081344753355013, "grad_norm": 0.1474973525512381, "learning_rate": 7.0967234059772425e-06, "loss": 0.3814, "num_tokens": 4346277360.0, "step": 11142 }, { "epoch": 4.081711171162919, "grad_norm": 0.1431694502543626, "learning_rate": 7.09432867580055e-06, "loss": 0.3845, "num_tokens": 4347079861.0, "step": 11143 }, { "epoch": 4.082077588970824, "grad_norm": 0.15782011469627658, "learning_rate": 7.091934784842869e-06, "loss": 0.3957, "num_tokens": 4347733083.0, "step": 11144 }, { "epoch": 4.082444006778729, "grad_norm": 0.14514783558282626, "learning_rate": 7.08954173323898e-06, "loss": 0.3816, "num_tokens": 4348480729.0, "step": 11145 }, { "epoch": 4.082810424586635, "grad_norm": 0.13744439194968563, "learning_rate": 7.0871495211236175e-06, "loss": 0.3401, "num_tokens": 4349389602.0, "step": 11146 }, { "epoch": 4.083176842394541, "grad_norm": 0.14036823864856424, "learning_rate": 7.084758148631461e-06, "loss": 0.3521, "num_tokens": 4350155186.0, "step": 11147 }, { "epoch": 4.083543260202446, "grad_norm": 0.15759291931441974, "learning_rate": 7.082367615897161e-06, "loss": 0.3733, "num_tokens": 4350826924.0, "step": 11148 }, { "epoch": 4.083909678010351, "grad_norm": 0.15988268113994614, "learning_rate": 7.0799779230553016e-06, "loss": 0.3912, "num_tokens": 4351593499.0, "step": 11149 }, { "epoch": 4.0842760958182565, "grad_norm": 0.15493087959519022, "learning_rate": 7.077589070240431e-06, "loss": 0.3874, "num_tokens": 4352353925.0, "step": 11150 }, { "epoch": 4.084642513626163, "grad_norm": 0.1421249128023942, "learning_rate": 7.075201057587047e-06, "loss": 0.3435, "num_tokens": 4353245853.0, "step": 11151 }, { "epoch": 4.085008931434068, "grad_norm": 0.15758418365035268, "learning_rate": 7.072813885229594e-06, "loss": 0.3747, "num_tokens": 4353930562.0, "step": 11152 }, { "epoch": 4.085375349241973, "grad_norm": 0.15355126125067733, "learning_rate": 7.070427553302481e-06, "loss": 0.3997, "num_tokens": 4354669397.0, "step": 11153 }, { "epoch": 4.085741767049878, "grad_norm": 0.15229882629076405, "learning_rate": 7.068042061940063e-06, "loss": 0.3655, "num_tokens": 4355390007.0, "step": 11154 }, { "epoch": 4.0861081848577845, "grad_norm": 0.14525410301491257, "learning_rate": 7.065657411276638e-06, "loss": 0.3568, "num_tokens": 4356171853.0, "step": 11155 }, { "epoch": 4.08647460266569, "grad_norm": 0.14972110981811568, "learning_rate": 7.063273601446481e-06, "loss": 0.3681, "num_tokens": 4356917118.0, "step": 11156 }, { "epoch": 4.086841020473595, "grad_norm": 0.1539490489447025, "learning_rate": 7.060890632583797e-06, "loss": 0.3798, "num_tokens": 4357601804.0, "step": 11157 }, { "epoch": 4.0872074382815, "grad_norm": 0.15078786049719498, "learning_rate": 7.0585085048227474e-06, "loss": 0.3719, "num_tokens": 4358420947.0, "step": 11158 }, { "epoch": 4.087573856089406, "grad_norm": 0.13828262459846033, "learning_rate": 7.056127218297464e-06, "loss": 0.3661, "num_tokens": 4359289653.0, "step": 11159 }, { "epoch": 4.087940273897312, "grad_norm": 0.15651345419443785, "learning_rate": 7.053746773142008e-06, "loss": 0.3752, "num_tokens": 4360033007.0, "step": 11160 }, { "epoch": 4.088306691705217, "grad_norm": 0.13905336449371214, "learning_rate": 7.051367169490406e-06, "loss": 0.3704, "num_tokens": 4360935896.0, "step": 11161 }, { "epoch": 4.088673109513122, "grad_norm": 0.13780621884486974, "learning_rate": 7.04898840747663e-06, "loss": 0.3938, "num_tokens": 4361781251.0, "step": 11162 }, { "epoch": 4.089039527321027, "grad_norm": 0.1526467588778363, "learning_rate": 7.046610487234613e-06, "loss": 0.3659, "num_tokens": 4362481825.0, "step": 11163 }, { "epoch": 4.0894059451289335, "grad_norm": 0.15217279425353775, "learning_rate": 7.044233408898242e-06, "loss": 0.356, "num_tokens": 4363184521.0, "step": 11164 }, { "epoch": 4.089772362936839, "grad_norm": 0.158298118809397, "learning_rate": 7.0418571726013455e-06, "loss": 0.3763, "num_tokens": 4363961989.0, "step": 11165 }, { "epoch": 4.090138780744744, "grad_norm": 0.14997234206816928, "learning_rate": 7.03948177847771e-06, "loss": 0.389, "num_tokens": 4364708740.0, "step": 11166 }, { "epoch": 4.090505198552649, "grad_norm": 0.15197173475262007, "learning_rate": 7.0371072266610775e-06, "loss": 0.3502, "num_tokens": 4365454941.0, "step": 11167 }, { "epoch": 4.090871616360555, "grad_norm": 0.14302612190070071, "learning_rate": 7.034733517285131e-06, "loss": 0.371, "num_tokens": 4366215552.0, "step": 11168 }, { "epoch": 4.091238034168461, "grad_norm": 0.14629068157721659, "learning_rate": 7.03236065048353e-06, "loss": 0.3685, "num_tokens": 4366996326.0, "step": 11169 }, { "epoch": 4.091604451976366, "grad_norm": 0.15495332417093483, "learning_rate": 7.029988626389857e-06, "loss": 0.3691, "num_tokens": 4367718935.0, "step": 11170 }, { "epoch": 4.091970869784271, "grad_norm": 0.15884020720095923, "learning_rate": 7.027617445137676e-06, "loss": 0.3648, "num_tokens": 4368496439.0, "step": 11171 }, { "epoch": 4.092337287592177, "grad_norm": 0.14059426813306125, "learning_rate": 7.02524710686048e-06, "loss": 0.3463, "num_tokens": 4369290921.0, "step": 11172 }, { "epoch": 4.0927037054000825, "grad_norm": 0.15318747617869924, "learning_rate": 7.0228776116917256e-06, "loss": 0.3645, "num_tokens": 4370039383.0, "step": 11173 }, { "epoch": 4.093070123207988, "grad_norm": 0.1541372068364107, "learning_rate": 7.02050895976482e-06, "loss": 0.3474, "num_tokens": 4370810196.0, "step": 11174 }, { "epoch": 4.093436541015893, "grad_norm": 0.15176840494952026, "learning_rate": 7.01814115121312e-06, "loss": 0.3649, "num_tokens": 4371661677.0, "step": 11175 }, { "epoch": 4.093802958823799, "grad_norm": 0.1514947774654771, "learning_rate": 7.01577418616994e-06, "loss": 0.3741, "num_tokens": 4372386096.0, "step": 11176 }, { "epoch": 4.094169376631704, "grad_norm": 0.15540416446794048, "learning_rate": 7.013408064768551e-06, "loss": 0.384, "num_tokens": 4373057013.0, "step": 11177 }, { "epoch": 4.09453579443961, "grad_norm": 0.14729117030360506, "learning_rate": 7.011042787142164e-06, "loss": 0.3663, "num_tokens": 4373846366.0, "step": 11178 }, { "epoch": 4.094902212247515, "grad_norm": 0.15550142118651294, "learning_rate": 7.008678353423952e-06, "loss": 0.341, "num_tokens": 4374544068.0, "step": 11179 }, { "epoch": 4.095268630055421, "grad_norm": 0.16191370529389332, "learning_rate": 7.006314763747031e-06, "loss": 0.3819, "num_tokens": 4375340631.0, "step": 11180 }, { "epoch": 4.095635047863326, "grad_norm": 0.1416454190922513, "learning_rate": 7.003952018244485e-06, "loss": 0.3564, "num_tokens": 4376146213.0, "step": 11181 }, { "epoch": 4.096001465671232, "grad_norm": 0.15141344849077812, "learning_rate": 7.001590117049335e-06, "loss": 0.4236, "num_tokens": 4376923623.0, "step": 11182 }, { "epoch": 4.096367883479137, "grad_norm": 0.1434079907985159, "learning_rate": 6.999229060294558e-06, "loss": 0.3641, "num_tokens": 4377746428.0, "step": 11183 }, { "epoch": 4.096734301287043, "grad_norm": 0.14940105565195425, "learning_rate": 6.996868848113096e-06, "loss": 0.3449, "num_tokens": 4378562762.0, "step": 11184 }, { "epoch": 4.097100719094948, "grad_norm": 0.14656804961652733, "learning_rate": 6.994509480637828e-06, "loss": 0.3703, "num_tokens": 4379283350.0, "step": 11185 }, { "epoch": 4.0974671369028535, "grad_norm": 0.16011181014818712, "learning_rate": 6.992150958001586e-06, "loss": 0.3662, "num_tokens": 4380051340.0, "step": 11186 }, { "epoch": 4.097833554710759, "grad_norm": 0.1466471944280294, "learning_rate": 6.989793280337171e-06, "loss": 0.3724, "num_tokens": 4380812617.0, "step": 11187 }, { "epoch": 4.098199972518664, "grad_norm": 0.15626175368839648, "learning_rate": 6.987436447777318e-06, "loss": 0.3597, "num_tokens": 4381522272.0, "step": 11188 }, { "epoch": 4.09856639032657, "grad_norm": 0.1458832861357641, "learning_rate": 6.985080460454716e-06, "loss": 0.3555, "num_tokens": 4382302631.0, "step": 11189 }, { "epoch": 4.098932808134475, "grad_norm": 0.1486865927192739, "learning_rate": 6.982725318502026e-06, "loss": 0.3832, "num_tokens": 4383084516.0, "step": 11190 }, { "epoch": 4.099299225942381, "grad_norm": 0.15418396020957947, "learning_rate": 6.980371022051833e-06, "loss": 0.3655, "num_tokens": 4383778189.0, "step": 11191 }, { "epoch": 4.099665643750286, "grad_norm": 0.14720624813384128, "learning_rate": 6.978017571236697e-06, "loss": 0.3319, "num_tokens": 4384569457.0, "step": 11192 }, { "epoch": 4.100032061558192, "grad_norm": 0.15299862439467687, "learning_rate": 6.9756649661891214e-06, "loss": 0.3671, "num_tokens": 4385301722.0, "step": 11193 }, { "epoch": 4.100398479366097, "grad_norm": 0.15357428224239453, "learning_rate": 6.973313207041561e-06, "loss": 0.3639, "num_tokens": 4385984518.0, "step": 11194 }, { "epoch": 4.1007648971740025, "grad_norm": 0.15034337785185956, "learning_rate": 6.970962293926425e-06, "loss": 0.3643, "num_tokens": 4386755152.0, "step": 11195 }, { "epoch": 4.101131314981908, "grad_norm": 0.15186835784543842, "learning_rate": 6.9686122269760705e-06, "loss": 0.3615, "num_tokens": 4387521046.0, "step": 11196 }, { "epoch": 4.101497732789814, "grad_norm": 0.15480013606024537, "learning_rate": 6.9662630063228106e-06, "loss": 0.3724, "num_tokens": 4388191790.0, "step": 11197 }, { "epoch": 4.101864150597719, "grad_norm": 0.15443703380087656, "learning_rate": 6.9639146320989225e-06, "loss": 0.3735, "num_tokens": 4389000860.0, "step": 11198 }, { "epoch": 4.102230568405624, "grad_norm": 0.1451629562513015, "learning_rate": 6.9615671044366155e-06, "loss": 0.3608, "num_tokens": 4389755790.0, "step": 11199 }, { "epoch": 4.10259698621353, "grad_norm": 0.1485317097356651, "learning_rate": 6.959220423468061e-06, "loss": 0.3489, "num_tokens": 4390424551.0, "step": 11200 }, { "epoch": 4.102963404021436, "grad_norm": 0.14795897068476938, "learning_rate": 6.956874589325383e-06, "loss": 0.3646, "num_tokens": 4391234321.0, "step": 11201 }, { "epoch": 4.103329821829341, "grad_norm": 0.15696266536405495, "learning_rate": 6.95452960214065e-06, "loss": 0.3479, "num_tokens": 4391937477.0, "step": 11202 }, { "epoch": 4.103696239637246, "grad_norm": 0.14104518201841282, "learning_rate": 6.952185462045899e-06, "loss": 0.3271, "num_tokens": 4392720613.0, "step": 11203 }, { "epoch": 4.1040626574451515, "grad_norm": 0.15223810586551997, "learning_rate": 6.949842169173105e-06, "loss": 0.3818, "num_tokens": 4393456529.0, "step": 11204 }, { "epoch": 4.104429075253058, "grad_norm": 0.15340198850543943, "learning_rate": 6.947499723654202e-06, "loss": 0.3825, "num_tokens": 4394218333.0, "step": 11205 }, { "epoch": 4.104795493060963, "grad_norm": 0.14028835843338633, "learning_rate": 6.945158125621075e-06, "loss": 0.3703, "num_tokens": 4395037898.0, "step": 11206 }, { "epoch": 4.105161910868868, "grad_norm": 0.1605305181683422, "learning_rate": 6.942817375205557e-06, "loss": 0.3712, "num_tokens": 4395772973.0, "step": 11207 }, { "epoch": 4.105528328676773, "grad_norm": 0.13988259562865168, "learning_rate": 6.9404774725394375e-06, "loss": 0.3648, "num_tokens": 4396643644.0, "step": 11208 }, { "epoch": 4.10589474648468, "grad_norm": 0.15160918049143315, "learning_rate": 6.938138417754462e-06, "loss": 0.3659, "num_tokens": 4397374158.0, "step": 11209 }, { "epoch": 4.106261164292585, "grad_norm": 0.14972580465555696, "learning_rate": 6.935800210982317e-06, "loss": 0.402, "num_tokens": 4398106551.0, "step": 11210 }, { "epoch": 4.10662758210049, "grad_norm": 0.151771523399074, "learning_rate": 6.933462852354656e-06, "loss": 0.3654, "num_tokens": 4398883429.0, "step": 11211 }, { "epoch": 4.106993999908395, "grad_norm": 0.15515204257957457, "learning_rate": 6.9311263420030735e-06, "loss": 0.3824, "num_tokens": 4399740212.0, "step": 11212 }, { "epoch": 4.1073604177163014, "grad_norm": 0.1583555459215043, "learning_rate": 6.928790680059114e-06, "loss": 0.3709, "num_tokens": 4400451433.0, "step": 11213 }, { "epoch": 4.107726835524207, "grad_norm": 0.15631585614567764, "learning_rate": 6.926455866654293e-06, "loss": 0.359, "num_tokens": 4401151451.0, "step": 11214 }, { "epoch": 4.108093253332112, "grad_norm": 0.15823680602818474, "learning_rate": 6.924121901920053e-06, "loss": 0.3298, "num_tokens": 4401852758.0, "step": 11215 }, { "epoch": 4.108459671140017, "grad_norm": 0.15273525994693216, "learning_rate": 6.921788785987809e-06, "loss": 0.3422, "num_tokens": 4402592297.0, "step": 11216 }, { "epoch": 4.1088260889479225, "grad_norm": 0.15828879470504026, "learning_rate": 6.91945651898891e-06, "loss": 0.3546, "num_tokens": 4403334225.0, "step": 11217 }, { "epoch": 4.109192506755829, "grad_norm": 0.15646806920500392, "learning_rate": 6.917125101054678e-06, "loss": 0.3794, "num_tokens": 4404055632.0, "step": 11218 }, { "epoch": 4.109558924563734, "grad_norm": 0.13902337491660144, "learning_rate": 6.914794532316368e-06, "loss": 0.3792, "num_tokens": 4404909122.0, "step": 11219 }, { "epoch": 4.109925342371639, "grad_norm": 0.14180643076884308, "learning_rate": 6.912464812905204e-06, "loss": 0.3679, "num_tokens": 4405741024.0, "step": 11220 }, { "epoch": 4.110291760179544, "grad_norm": 0.14893014729160525, "learning_rate": 6.910135942952352e-06, "loss": 0.3449, "num_tokens": 4406471730.0, "step": 11221 }, { "epoch": 4.1106581779874505, "grad_norm": 0.1439187651129514, "learning_rate": 6.9078079225889275e-06, "loss": 0.3776, "num_tokens": 4407288823.0, "step": 11222 }, { "epoch": 4.111024595795356, "grad_norm": 0.15389714355959166, "learning_rate": 6.905480751946e-06, "loss": 0.3728, "num_tokens": 4407954679.0, "step": 11223 }, { "epoch": 4.111391013603261, "grad_norm": 0.15794562469822623, "learning_rate": 6.903154431154604e-06, "loss": 0.3496, "num_tokens": 4408833710.0, "step": 11224 }, { "epoch": 4.111757431411166, "grad_norm": 0.13572691540254408, "learning_rate": 6.900828960345705e-06, "loss": 0.3763, "num_tokens": 4409690677.0, "step": 11225 }, { "epoch": 4.112123849219072, "grad_norm": 0.1499748267845677, "learning_rate": 6.898504339650244e-06, "loss": 0.377, "num_tokens": 4410410852.0, "step": 11226 }, { "epoch": 4.112490267026978, "grad_norm": 0.14796092894651505, "learning_rate": 6.896180569199095e-06, "loss": 0.3693, "num_tokens": 4411227219.0, "step": 11227 }, { "epoch": 4.112856684834883, "grad_norm": 0.15808777939691124, "learning_rate": 6.893857649123089e-06, "loss": 0.3999, "num_tokens": 4411913140.0, "step": 11228 }, { "epoch": 4.113223102642788, "grad_norm": 0.16739649331478, "learning_rate": 6.891535579553015e-06, "loss": 0.3655, "num_tokens": 4412696978.0, "step": 11229 }, { "epoch": 4.113589520450694, "grad_norm": 0.1599530024393047, "learning_rate": 6.8892143606196025e-06, "loss": 0.385, "num_tokens": 4413441355.0, "step": 11230 }, { "epoch": 4.1139559382585995, "grad_norm": 0.14798175463562407, "learning_rate": 6.886893992453548e-06, "loss": 0.3729, "num_tokens": 4414235172.0, "step": 11231 }, { "epoch": 4.114322356066505, "grad_norm": 0.1510956889921689, "learning_rate": 6.884574475185495e-06, "loss": 0.3462, "num_tokens": 4414963599.0, "step": 11232 }, { "epoch": 4.11468877387441, "grad_norm": 0.14972000518629378, "learning_rate": 6.882255808946032e-06, "loss": 0.371, "num_tokens": 4415717311.0, "step": 11233 }, { "epoch": 4.115055191682316, "grad_norm": 0.1627545022672666, "learning_rate": 6.879937993865708e-06, "loss": 0.4028, "num_tokens": 4416431535.0, "step": 11234 }, { "epoch": 4.115421609490221, "grad_norm": 0.15599607872266807, "learning_rate": 6.877621030075013e-06, "loss": 0.3752, "num_tokens": 4417175910.0, "step": 11235 }, { "epoch": 4.115788027298127, "grad_norm": 0.15503847514777605, "learning_rate": 6.875304917704406e-06, "loss": 0.3968, "num_tokens": 4417880767.0, "step": 11236 }, { "epoch": 4.116154445106032, "grad_norm": 0.15837901762346787, "learning_rate": 6.872989656884286e-06, "loss": 0.3592, "num_tokens": 4418572711.0, "step": 11237 }, { "epoch": 4.116520862913938, "grad_norm": 0.15096341674147945, "learning_rate": 6.870675247745e-06, "loss": 0.354, "num_tokens": 4419256002.0, "step": 11238 }, { "epoch": 4.116887280721843, "grad_norm": 0.14857027813463736, "learning_rate": 6.868361690416865e-06, "loss": 0.3994, "num_tokens": 4419955503.0, "step": 11239 }, { "epoch": 4.1172536985297485, "grad_norm": 0.16240519550240945, "learning_rate": 6.866048985030133e-06, "loss": 0.3501, "num_tokens": 4420610948.0, "step": 11240 }, { "epoch": 4.117620116337654, "grad_norm": 0.1535928469529613, "learning_rate": 6.8637371317150095e-06, "loss": 0.3602, "num_tokens": 4421435963.0, "step": 11241 }, { "epoch": 4.117986534145559, "grad_norm": 0.14681916873114614, "learning_rate": 6.861426130601666e-06, "loss": 0.3726, "num_tokens": 4422257020.0, "step": 11242 }, { "epoch": 4.118352951953465, "grad_norm": 0.13780973832702487, "learning_rate": 6.859115981820212e-06, "loss": 0.3484, "num_tokens": 4423220654.0, "step": 11243 }, { "epoch": 4.11871936976137, "grad_norm": 0.15095346647590732, "learning_rate": 6.856806685500708e-06, "loss": 0.3678, "num_tokens": 4423922112.0, "step": 11244 }, { "epoch": 4.119085787569276, "grad_norm": 0.14446273520504074, "learning_rate": 6.8544982417731844e-06, "loss": 0.3742, "num_tokens": 4424770377.0, "step": 11245 }, { "epoch": 4.119452205377181, "grad_norm": 0.13001890832626964, "learning_rate": 6.8521906507675965e-06, "loss": 0.3655, "num_tokens": 4425591038.0, "step": 11246 }, { "epoch": 4.119818623185087, "grad_norm": 0.15243789998337906, "learning_rate": 6.849883912613879e-06, "loss": 0.3574, "num_tokens": 4426289194.0, "step": 11247 }, { "epoch": 4.120185040992992, "grad_norm": 0.15066201960032902, "learning_rate": 6.8475780274419e-06, "loss": 0.3521, "num_tokens": 4426955721.0, "step": 11248 }, { "epoch": 4.120551458800898, "grad_norm": 0.15605699548213411, "learning_rate": 6.845272995381487e-06, "loss": 0.363, "num_tokens": 4427712442.0, "step": 11249 }, { "epoch": 4.120917876608803, "grad_norm": 0.14096067893832584, "learning_rate": 6.842968816562416e-06, "loss": 0.3636, "num_tokens": 4428545455.0, "step": 11250 }, { "epoch": 4.121284294416709, "grad_norm": 0.14588331851688793, "learning_rate": 6.840665491114415e-06, "loss": 0.359, "num_tokens": 4429342949.0, "step": 11251 }, { "epoch": 4.121650712224614, "grad_norm": 0.14649774905384644, "learning_rate": 6.838363019167169e-06, "loss": 0.3721, "num_tokens": 4430060917.0, "step": 11252 }, { "epoch": 4.1220171300325195, "grad_norm": 0.1481419056161328, "learning_rate": 6.836061400850314e-06, "loss": 0.3679, "num_tokens": 4430794674.0, "step": 11253 }, { "epoch": 4.122383547840425, "grad_norm": 0.16418480633377117, "learning_rate": 6.833760636293431e-06, "loss": 0.3831, "num_tokens": 4431467463.0, "step": 11254 }, { "epoch": 4.122749965648331, "grad_norm": 0.15914412169042624, "learning_rate": 6.831460725626064e-06, "loss": 0.4046, "num_tokens": 4432210519.0, "step": 11255 }, { "epoch": 4.123116383456236, "grad_norm": 0.1546017895715576, "learning_rate": 6.829161668977696e-06, "loss": 0.3687, "num_tokens": 4433032155.0, "step": 11256 }, { "epoch": 4.123482801264141, "grad_norm": 0.15501602211211332, "learning_rate": 6.826863466477771e-06, "loss": 0.3649, "num_tokens": 4433802354.0, "step": 11257 }, { "epoch": 4.123849219072047, "grad_norm": 0.15785570623344863, "learning_rate": 6.82456611825568e-06, "loss": 0.382, "num_tokens": 4434552991.0, "step": 11258 }, { "epoch": 4.124215636879953, "grad_norm": 0.16109873884842144, "learning_rate": 6.822269624440767e-06, "loss": 0.3645, "num_tokens": 4435280624.0, "step": 11259 }, { "epoch": 4.124582054687858, "grad_norm": 0.14439300079012518, "learning_rate": 6.8199739851623405e-06, "loss": 0.3769, "num_tokens": 4436115593.0, "step": 11260 }, { "epoch": 4.124948472495763, "grad_norm": 0.14916793384347118, "learning_rate": 6.817679200549639e-06, "loss": 0.3691, "num_tokens": 4436804398.0, "step": 11261 }, { "epoch": 4.1253148903036685, "grad_norm": 0.15874594881502146, "learning_rate": 6.815385270731867e-06, "loss": 0.3461, "num_tokens": 4437642593.0, "step": 11262 }, { "epoch": 4.125681308111575, "grad_norm": 0.15421072908262826, "learning_rate": 6.81309219583817e-06, "loss": 0.3958, "num_tokens": 4438413549.0, "step": 11263 }, { "epoch": 4.12604772591948, "grad_norm": 0.15736770593820537, "learning_rate": 6.810799975997666e-06, "loss": 0.3808, "num_tokens": 4439156868.0, "step": 11264 }, { "epoch": 4.126414143727385, "grad_norm": 0.14216480902791556, "learning_rate": 6.8085086113393994e-06, "loss": 0.398, "num_tokens": 4439970903.0, "step": 11265 }, { "epoch": 4.12678056153529, "grad_norm": 0.15858474085762364, "learning_rate": 6.806218101992388e-06, "loss": 0.3699, "num_tokens": 4440807217.0, "step": 11266 }, { "epoch": 4.1271469793431965, "grad_norm": 0.14289506237608227, "learning_rate": 6.80392844808559e-06, "loss": 0.3664, "num_tokens": 4441585433.0, "step": 11267 }, { "epoch": 4.127513397151102, "grad_norm": 0.1668166474601792, "learning_rate": 6.8016396497479135e-06, "loss": 0.3769, "num_tokens": 4442350658.0, "step": 11268 }, { "epoch": 4.127879814959007, "grad_norm": 0.14609359473158373, "learning_rate": 6.799351707108219e-06, "loss": 0.3607, "num_tokens": 4443039018.0, "step": 11269 }, { "epoch": 4.128246232766912, "grad_norm": 0.17415754285932497, "learning_rate": 6.797064620295334e-06, "loss": 0.3954, "num_tokens": 4443760422.0, "step": 11270 }, { "epoch": 4.1286126505748175, "grad_norm": 0.15690138735391476, "learning_rate": 6.794778389438019e-06, "loss": 0.415, "num_tokens": 4444488649.0, "step": 11271 }, { "epoch": 4.128979068382724, "grad_norm": 0.15694048078601844, "learning_rate": 6.7924930146649895e-06, "loss": 0.385, "num_tokens": 4445284834.0, "step": 11272 }, { "epoch": 4.129345486190629, "grad_norm": 0.157411492177394, "learning_rate": 6.790208496104924e-06, "loss": 0.3621, "num_tokens": 4446202112.0, "step": 11273 }, { "epoch": 4.129711903998534, "grad_norm": 0.16469597334631814, "learning_rate": 6.787924833886439e-06, "loss": 0.3977, "num_tokens": 4446946049.0, "step": 11274 }, { "epoch": 4.130078321806439, "grad_norm": 0.15385136508531075, "learning_rate": 6.7856420281381175e-06, "loss": 0.3491, "num_tokens": 4447716468.0, "step": 11275 }, { "epoch": 4.1304447396143456, "grad_norm": 0.15052975314133912, "learning_rate": 6.783360078988481e-06, "loss": 0.3726, "num_tokens": 4448447590.0, "step": 11276 }, { "epoch": 4.130811157422251, "grad_norm": 0.15829864127551224, "learning_rate": 6.781078986566009e-06, "loss": 0.3667, "num_tokens": 4449105201.0, "step": 11277 }, { "epoch": 4.131177575230156, "grad_norm": 0.15787322653306962, "learning_rate": 6.778798750999125e-06, "loss": 0.3493, "num_tokens": 4449809292.0, "step": 11278 }, { "epoch": 4.131543993038061, "grad_norm": 0.1547603140357158, "learning_rate": 6.776519372416222e-06, "loss": 0.3557, "num_tokens": 4450589285.0, "step": 11279 }, { "epoch": 4.131910410845967, "grad_norm": 0.13824866169172656, "learning_rate": 6.774240850945623e-06, "loss": 0.3745, "num_tokens": 4451483046.0, "step": 11280 }, { "epoch": 4.132276828653873, "grad_norm": 0.14610981175977752, "learning_rate": 6.771963186715622e-06, "loss": 0.3794, "num_tokens": 4452274615.0, "step": 11281 }, { "epoch": 4.132643246461778, "grad_norm": 0.1450089802851971, "learning_rate": 6.769686379854454e-06, "loss": 0.3458, "num_tokens": 4453086435.0, "step": 11282 }, { "epoch": 4.133009664269683, "grad_norm": 0.13717841478109202, "learning_rate": 6.767410430490305e-06, "loss": 0.3746, "num_tokens": 4453916835.0, "step": 11283 }, { "epoch": 4.133376082077589, "grad_norm": 0.1619964606832097, "learning_rate": 6.765135338751317e-06, "loss": 0.363, "num_tokens": 4454633668.0, "step": 11284 }, { "epoch": 4.133742499885495, "grad_norm": 0.16395665073016844, "learning_rate": 6.762861104765577e-06, "loss": 0.3449, "num_tokens": 4455262753.0, "step": 11285 }, { "epoch": 4.1341089176934, "grad_norm": 0.15956969201163138, "learning_rate": 6.760587728661134e-06, "loss": 0.3723, "num_tokens": 4456003639.0, "step": 11286 }, { "epoch": 4.134475335501305, "grad_norm": 0.1566486589006082, "learning_rate": 6.758315210565989e-06, "loss": 0.3714, "num_tokens": 4456655256.0, "step": 11287 }, { "epoch": 4.134841753309211, "grad_norm": 0.15957953222010995, "learning_rate": 6.75604355060808e-06, "loss": 0.3951, "num_tokens": 4457397382.0, "step": 11288 }, { "epoch": 4.1352081711171165, "grad_norm": 0.16014611175660973, "learning_rate": 6.75377274891531e-06, "loss": 0.356, "num_tokens": 4458071786.0, "step": 11289 }, { "epoch": 4.135574588925022, "grad_norm": 0.14443202829881915, "learning_rate": 6.751502805615533e-06, "loss": 0.3798, "num_tokens": 4458941894.0, "step": 11290 }, { "epoch": 4.135941006732927, "grad_norm": 0.152340517359646, "learning_rate": 6.749233720836541e-06, "loss": 0.3678, "num_tokens": 4459775002.0, "step": 11291 }, { "epoch": 4.136307424540833, "grad_norm": 0.14741842185733345, "learning_rate": 6.746965494706099e-06, "loss": 0.396, "num_tokens": 4460514329.0, "step": 11292 }, { "epoch": 4.136673842348738, "grad_norm": 0.14707431502430265, "learning_rate": 6.744698127351903e-06, "loss": 0.3439, "num_tokens": 4461254409.0, "step": 11293 }, { "epoch": 4.137040260156644, "grad_norm": 0.15658949746873843, "learning_rate": 6.742431618901622e-06, "loss": 0.3734, "num_tokens": 4462101092.0, "step": 11294 }, { "epoch": 4.137406677964549, "grad_norm": 0.1445558198148399, "learning_rate": 6.740165969482857e-06, "loss": 0.3748, "num_tokens": 4462911983.0, "step": 11295 }, { "epoch": 4.137773095772454, "grad_norm": 0.14988709124877897, "learning_rate": 6.737901179223167e-06, "loss": 0.3889, "num_tokens": 4463736343.0, "step": 11296 }, { "epoch": 4.13813951358036, "grad_norm": 0.16156062032544796, "learning_rate": 6.735637248250069e-06, "loss": 0.3569, "num_tokens": 4464421023.0, "step": 11297 }, { "epoch": 4.1385059313882655, "grad_norm": 0.1595673354042253, "learning_rate": 6.733374176691027e-06, "loss": 0.4046, "num_tokens": 4465140308.0, "step": 11298 }, { "epoch": 4.138872349196171, "grad_norm": 0.15912170772182133, "learning_rate": 6.731111964673451e-06, "loss": 0.3426, "num_tokens": 4465877288.0, "step": 11299 }, { "epoch": 4.139238767004076, "grad_norm": 0.14984077635786724, "learning_rate": 6.7288506123247135e-06, "loss": 0.3676, "num_tokens": 4466607192.0, "step": 11300 }, { "epoch": 4.139605184811982, "grad_norm": 0.13494369037952628, "learning_rate": 6.726590119772134e-06, "loss": 0.3643, "num_tokens": 4467423216.0, "step": 11301 }, { "epoch": 4.139971602619887, "grad_norm": 0.15332871910431778, "learning_rate": 6.724330487142973e-06, "loss": 0.3903, "num_tokens": 4468209276.0, "step": 11302 }, { "epoch": 4.140338020427793, "grad_norm": 0.15220331945875815, "learning_rate": 6.7220717145644645e-06, "loss": 0.3858, "num_tokens": 4469041493.0, "step": 11303 }, { "epoch": 4.140704438235698, "grad_norm": 0.1473193469971481, "learning_rate": 6.719813802163777e-06, "loss": 0.3964, "num_tokens": 4469803224.0, "step": 11304 }, { "epoch": 4.141070856043604, "grad_norm": 0.1455261768787373, "learning_rate": 6.717556750068035e-06, "loss": 0.3764, "num_tokens": 4470685633.0, "step": 11305 }, { "epoch": 4.141437273851509, "grad_norm": 0.16666466917418102, "learning_rate": 6.715300558404312e-06, "loss": 0.3745, "num_tokens": 4471320950.0, "step": 11306 }, { "epoch": 4.1418036916594145, "grad_norm": 0.15825732748915072, "learning_rate": 6.713045227299637e-06, "loss": 0.3618, "num_tokens": 4472074825.0, "step": 11307 }, { "epoch": 4.14217010946732, "grad_norm": 0.1548928186522187, "learning_rate": 6.710790756880998e-06, "loss": 0.3892, "num_tokens": 4472894814.0, "step": 11308 }, { "epoch": 4.142536527275226, "grad_norm": 0.14639346626321106, "learning_rate": 6.708537147275318e-06, "loss": 0.3789, "num_tokens": 4473631466.0, "step": 11309 }, { "epoch": 4.142902945083131, "grad_norm": 0.13372089711257093, "learning_rate": 6.706284398609485e-06, "loss": 0.3434, "num_tokens": 4474561369.0, "step": 11310 }, { "epoch": 4.143269362891036, "grad_norm": 0.1495973545979914, "learning_rate": 6.7040325110103274e-06, "loss": 0.3671, "num_tokens": 4475356851.0, "step": 11311 }, { "epoch": 4.143635780698942, "grad_norm": 0.14818585274309654, "learning_rate": 6.701781484604635e-06, "loss": 0.366, "num_tokens": 4476123615.0, "step": 11312 }, { "epoch": 4.144002198506848, "grad_norm": 0.14581350053179915, "learning_rate": 6.699531319519138e-06, "loss": 0.3469, "num_tokens": 4476957579.0, "step": 11313 }, { "epoch": 4.144368616314753, "grad_norm": 0.15898414897775, "learning_rate": 6.697282015880532e-06, "loss": 0.3981, "num_tokens": 4477726336.0, "step": 11314 }, { "epoch": 4.144735034122658, "grad_norm": 0.14993601517916083, "learning_rate": 6.695033573815461e-06, "loss": 0.3658, "num_tokens": 4478492352.0, "step": 11315 }, { "epoch": 4.145101451930564, "grad_norm": 0.15949543574401537, "learning_rate": 6.692785993450512e-06, "loss": 0.3681, "num_tokens": 4479171199.0, "step": 11316 }, { "epoch": 4.14546786973847, "grad_norm": 0.16136154695307267, "learning_rate": 6.690539274912225e-06, "loss": 0.3768, "num_tokens": 4479825066.0, "step": 11317 }, { "epoch": 4.145834287546375, "grad_norm": 0.17014315302906555, "learning_rate": 6.688293418327098e-06, "loss": 0.3495, "num_tokens": 4480464482.0, "step": 11318 }, { "epoch": 4.14620070535428, "grad_norm": 0.16330608368714297, "learning_rate": 6.686048423821579e-06, "loss": 0.3597, "num_tokens": 4481210996.0, "step": 11319 }, { "epoch": 4.1465671231621855, "grad_norm": 0.1493273452902541, "learning_rate": 6.683804291522058e-06, "loss": 0.3533, "num_tokens": 4482021592.0, "step": 11320 }, { "epoch": 4.146933540970091, "grad_norm": 0.13971455723219203, "learning_rate": 6.681561021554897e-06, "loss": 0.3706, "num_tokens": 4482804203.0, "step": 11321 }, { "epoch": 4.147299958777997, "grad_norm": 0.15180001618212866, "learning_rate": 6.679318614046388e-06, "loss": 0.3368, "num_tokens": 4483481167.0, "step": 11322 }, { "epoch": 4.147666376585902, "grad_norm": 0.1601349909866811, "learning_rate": 6.677077069122786e-06, "loss": 0.3452, "num_tokens": 4484273253.0, "step": 11323 }, { "epoch": 4.148032794393807, "grad_norm": 0.16187557787486284, "learning_rate": 6.6748363869102865e-06, "loss": 0.3568, "num_tokens": 4485033302.0, "step": 11324 }, { "epoch": 4.148399212201713, "grad_norm": 0.14355536637373081, "learning_rate": 6.672596567535055e-06, "loss": 0.3583, "num_tokens": 4485837795.0, "step": 11325 }, { "epoch": 4.148765630009619, "grad_norm": 0.14404070515934564, "learning_rate": 6.670357611123197e-06, "loss": 0.3694, "num_tokens": 4486634640.0, "step": 11326 }, { "epoch": 4.149132047817524, "grad_norm": 0.1562266565930678, "learning_rate": 6.66811951780076e-06, "loss": 0.3704, "num_tokens": 4487420636.0, "step": 11327 }, { "epoch": 4.149498465625429, "grad_norm": 0.1507534361119331, "learning_rate": 6.665882287693764e-06, "loss": 0.324, "num_tokens": 4488233778.0, "step": 11328 }, { "epoch": 4.1498648834333345, "grad_norm": 0.14971798501824887, "learning_rate": 6.663645920928165e-06, "loss": 0.3661, "num_tokens": 4488979321.0, "step": 11329 }, { "epoch": 4.150231301241241, "grad_norm": 0.1491073936409162, "learning_rate": 6.661410417629871e-06, "loss": 0.3602, "num_tokens": 4489757818.0, "step": 11330 }, { "epoch": 4.150597719049146, "grad_norm": 0.14460237020679018, "learning_rate": 6.659175777924756e-06, "loss": 0.3594, "num_tokens": 4490646467.0, "step": 11331 }, { "epoch": 4.150964136857051, "grad_norm": 0.147021546524286, "learning_rate": 6.656942001938627e-06, "loss": 0.3751, "num_tokens": 4491355530.0, "step": 11332 }, { "epoch": 4.151330554664956, "grad_norm": 0.15309573937565096, "learning_rate": 6.654709089797246e-06, "loss": 0.3886, "num_tokens": 4492130188.0, "step": 11333 }, { "epoch": 4.1516969724728625, "grad_norm": 0.1526408495250534, "learning_rate": 6.652477041626342e-06, "loss": 0.323, "num_tokens": 4492833257.0, "step": 11334 }, { "epoch": 4.152063390280768, "grad_norm": 0.14902414091333155, "learning_rate": 6.650245857551573e-06, "loss": 0.3495, "num_tokens": 4493563832.0, "step": 11335 }, { "epoch": 4.152429808088673, "grad_norm": 0.14634389158891598, "learning_rate": 6.648015537698566e-06, "loss": 0.3588, "num_tokens": 4494311757.0, "step": 11336 }, { "epoch": 4.152796225896578, "grad_norm": 0.1507964856903119, "learning_rate": 6.645786082192891e-06, "loss": 0.4, "num_tokens": 4495239705.0, "step": 11337 }, { "epoch": 4.153162643704484, "grad_norm": 0.1501379755317081, "learning_rate": 6.643557491160071e-06, "loss": 0.3608, "num_tokens": 4496013577.0, "step": 11338 }, { "epoch": 4.15352906151239, "grad_norm": 0.15890208558473173, "learning_rate": 6.641329764725579e-06, "loss": 0.3669, "num_tokens": 4496741770.0, "step": 11339 }, { "epoch": 4.153895479320295, "grad_norm": 0.13640013653265323, "learning_rate": 6.639102903014835e-06, "loss": 0.364, "num_tokens": 4497553791.0, "step": 11340 }, { "epoch": 4.1542618971282, "grad_norm": 0.15017440673684343, "learning_rate": 6.6368769061532204e-06, "loss": 0.3566, "num_tokens": 4498322815.0, "step": 11341 }, { "epoch": 4.154628314936106, "grad_norm": 0.1631950522910812, "learning_rate": 6.634651774266071e-06, "loss": 0.3691, "num_tokens": 4499101749.0, "step": 11342 }, { "epoch": 4.1549947327440115, "grad_norm": 0.16794452148609573, "learning_rate": 6.632427507478657e-06, "loss": 0.3926, "num_tokens": 4499772493.0, "step": 11343 }, { "epoch": 4.155361150551917, "grad_norm": 0.15139564980210385, "learning_rate": 6.6302041059162114e-06, "loss": 0.3696, "num_tokens": 4500494994.0, "step": 11344 }, { "epoch": 4.155727568359822, "grad_norm": 0.1675561445966282, "learning_rate": 6.627981569703916e-06, "loss": 0.3695, "num_tokens": 4501162166.0, "step": 11345 }, { "epoch": 4.156093986167727, "grad_norm": 0.13981888474274412, "learning_rate": 6.625759898966902e-06, "loss": 0.3514, "num_tokens": 4502012109.0, "step": 11346 }, { "epoch": 4.156460403975633, "grad_norm": 0.16625013828453689, "learning_rate": 6.623539093830257e-06, "loss": 0.3373, "num_tokens": 4502669642.0, "step": 11347 }, { "epoch": 4.156826821783539, "grad_norm": 0.1598557227589832, "learning_rate": 6.621319154419012e-06, "loss": 0.3313, "num_tokens": 4503494866.0, "step": 11348 }, { "epoch": 4.157193239591444, "grad_norm": 0.13357597453320935, "learning_rate": 6.619100080858163e-06, "loss": 0.3732, "num_tokens": 4504313857.0, "step": 11349 }, { "epoch": 4.157559657399349, "grad_norm": 0.15000055840979526, "learning_rate": 6.616881873272643e-06, "loss": 0.3427, "num_tokens": 4505055483.0, "step": 11350 }, { "epoch": 4.157926075207255, "grad_norm": 0.1426202557496576, "learning_rate": 6.6146645317873406e-06, "loss": 0.3687, "num_tokens": 4505851877.0, "step": 11351 }, { "epoch": 4.158292493015161, "grad_norm": 0.14962336458726547, "learning_rate": 6.6124480565270945e-06, "loss": 0.3507, "num_tokens": 4506584521.0, "step": 11352 }, { "epoch": 4.158658910823066, "grad_norm": 0.15566739897556212, "learning_rate": 6.610232447616702e-06, "loss": 0.3418, "num_tokens": 4507243202.0, "step": 11353 }, { "epoch": 4.159025328630971, "grad_norm": 0.16585159619572223, "learning_rate": 6.608017705180901e-06, "loss": 0.3384, "num_tokens": 4507946435.0, "step": 11354 }, { "epoch": 4.159391746438877, "grad_norm": 0.15141201370596663, "learning_rate": 6.605803829344392e-06, "loss": 0.377, "num_tokens": 4508736743.0, "step": 11355 }, { "epoch": 4.1597581642467825, "grad_norm": 0.15429072948050537, "learning_rate": 6.603590820231818e-06, "loss": 0.3715, "num_tokens": 4509558509.0, "step": 11356 }, { "epoch": 4.160124582054688, "grad_norm": 0.16345448201967394, "learning_rate": 6.601378677967771e-06, "loss": 0.3609, "num_tokens": 4510220204.0, "step": 11357 }, { "epoch": 4.160490999862593, "grad_norm": 0.16314294717100863, "learning_rate": 6.599167402676807e-06, "loss": 0.3644, "num_tokens": 4510927176.0, "step": 11358 }, { "epoch": 4.160857417670499, "grad_norm": 0.16390075545889965, "learning_rate": 6.596956994483422e-06, "loss": 0.3721, "num_tokens": 4511597659.0, "step": 11359 }, { "epoch": 4.161223835478404, "grad_norm": 0.1412467934443927, "learning_rate": 6.594747453512067e-06, "loss": 0.3596, "num_tokens": 4512457897.0, "step": 11360 }, { "epoch": 4.16159025328631, "grad_norm": 0.17607934030371802, "learning_rate": 6.592538779887137e-06, "loss": 0.3928, "num_tokens": 4513097750.0, "step": 11361 }, { "epoch": 4.161956671094215, "grad_norm": 0.15309166018720585, "learning_rate": 6.590330973732994e-06, "loss": 0.3515, "num_tokens": 4513916688.0, "step": 11362 }, { "epoch": 4.162323088902121, "grad_norm": 0.15182069202827372, "learning_rate": 6.588124035173935e-06, "loss": 0.3779, "num_tokens": 4514692895.0, "step": 11363 }, { "epoch": 4.162689506710026, "grad_norm": 0.14394721693098392, "learning_rate": 6.585917964334221e-06, "loss": 0.3708, "num_tokens": 4515508520.0, "step": 11364 }, { "epoch": 4.1630559245179315, "grad_norm": 0.1433304775944581, "learning_rate": 6.5837127613380544e-06, "loss": 0.3416, "num_tokens": 4516337671.0, "step": 11365 }, { "epoch": 4.163422342325837, "grad_norm": 0.16330539673106256, "learning_rate": 6.581508426309595e-06, "loss": 0.3425, "num_tokens": 4517046595.0, "step": 11366 }, { "epoch": 4.163788760133743, "grad_norm": 0.1402015904058946, "learning_rate": 6.57930495937295e-06, "loss": 0.3451, "num_tokens": 4517819864.0, "step": 11367 }, { "epoch": 4.164155177941648, "grad_norm": 0.16045626557632517, "learning_rate": 6.577102360652173e-06, "loss": 0.3788, "num_tokens": 4518578213.0, "step": 11368 }, { "epoch": 4.164521595749553, "grad_norm": 0.1413999720583992, "learning_rate": 6.574900630271281e-06, "loss": 0.3806, "num_tokens": 4519396251.0, "step": 11369 }, { "epoch": 4.164888013557459, "grad_norm": 0.14771453109087385, "learning_rate": 6.572699768354241e-06, "loss": 0.3631, "num_tokens": 4520189951.0, "step": 11370 }, { "epoch": 4.165254431365365, "grad_norm": 0.14740752178576016, "learning_rate": 6.5704997750249576e-06, "loss": 0.3502, "num_tokens": 4521011535.0, "step": 11371 }, { "epoch": 4.16562084917327, "grad_norm": 0.14254916869292764, "learning_rate": 6.568300650407297e-06, "loss": 0.3394, "num_tokens": 4521856420.0, "step": 11372 }, { "epoch": 4.165987266981175, "grad_norm": 0.14398594383133598, "learning_rate": 6.566102394625075e-06, "loss": 0.3764, "num_tokens": 4522692392.0, "step": 11373 }, { "epoch": 4.1663536847890805, "grad_norm": 0.1490121070511275, "learning_rate": 6.563905007802054e-06, "loss": 0.3163, "num_tokens": 4523439156.0, "step": 11374 }, { "epoch": 4.166720102596987, "grad_norm": 0.1609044977059741, "learning_rate": 6.561708490061956e-06, "loss": 0.383, "num_tokens": 4524166557.0, "step": 11375 }, { "epoch": 4.167086520404892, "grad_norm": 0.14657071050256157, "learning_rate": 6.5595128415284485e-06, "loss": 0.3711, "num_tokens": 4525006262.0, "step": 11376 }, { "epoch": 4.167452938212797, "grad_norm": 0.15305489161327243, "learning_rate": 6.5573180623251535e-06, "loss": 0.358, "num_tokens": 4525686300.0, "step": 11377 }, { "epoch": 4.167819356020702, "grad_norm": 0.16160722749175438, "learning_rate": 6.555124152575636e-06, "loss": 0.358, "num_tokens": 4526405054.0, "step": 11378 }, { "epoch": 4.168185773828608, "grad_norm": 0.16233598925013834, "learning_rate": 6.552931112403415e-06, "loss": 0.3618, "num_tokens": 4527127412.0, "step": 11379 }, { "epoch": 4.168552191636514, "grad_norm": 0.14588226900340492, "learning_rate": 6.5507389419319755e-06, "loss": 0.352, "num_tokens": 4527906745.0, "step": 11380 }, { "epoch": 4.168918609444419, "grad_norm": 0.14178765715248434, "learning_rate": 6.5485476412847305e-06, "loss": 0.338, "num_tokens": 4528715537.0, "step": 11381 }, { "epoch": 4.169285027252324, "grad_norm": 0.14319235676757114, "learning_rate": 6.546357210585055e-06, "loss": 0.3474, "num_tokens": 4529528233.0, "step": 11382 }, { "epoch": 4.1696514450602296, "grad_norm": 0.14130539247625784, "learning_rate": 6.54416764995628e-06, "loss": 0.3421, "num_tokens": 4530266361.0, "step": 11383 }, { "epoch": 4.170017862868136, "grad_norm": 0.14386347049479697, "learning_rate": 6.54197895952168e-06, "loss": 0.3718, "num_tokens": 4531151456.0, "step": 11384 }, { "epoch": 4.170384280676041, "grad_norm": 0.16742363144453876, "learning_rate": 6.5397911394044775e-06, "loss": 0.3764, "num_tokens": 4531790326.0, "step": 11385 }, { "epoch": 4.170750698483946, "grad_norm": 0.1581363387677994, "learning_rate": 6.537604189727859e-06, "loss": 0.3506, "num_tokens": 4532546587.0, "step": 11386 }, { "epoch": 4.1711171162918514, "grad_norm": 0.15825964424813566, "learning_rate": 6.535418110614951e-06, "loss": 0.3664, "num_tokens": 4533240621.0, "step": 11387 }, { "epoch": 4.171483534099758, "grad_norm": 0.15629826713793463, "learning_rate": 6.533232902188833e-06, "loss": 0.3594, "num_tokens": 4533892239.0, "step": 11388 }, { "epoch": 4.171849951907663, "grad_norm": 0.15536890610164017, "learning_rate": 6.531048564572535e-06, "loss": 0.3509, "num_tokens": 4534631785.0, "step": 11389 }, { "epoch": 4.172216369715568, "grad_norm": 0.1473157034498054, "learning_rate": 6.52886509788904e-06, "loss": 0.3795, "num_tokens": 4535362839.0, "step": 11390 }, { "epoch": 4.172582787523473, "grad_norm": 0.14736347875331077, "learning_rate": 6.526682502261288e-06, "loss": 0.3593, "num_tokens": 4536210992.0, "step": 11391 }, { "epoch": 4.1729492053313795, "grad_norm": 0.14024726073021654, "learning_rate": 6.5245007778121595e-06, "loss": 0.3662, "num_tokens": 4536952131.0, "step": 11392 }, { "epoch": 4.173315623139285, "grad_norm": 0.15445366799805207, "learning_rate": 6.522319924664489e-06, "loss": 0.353, "num_tokens": 4537670816.0, "step": 11393 }, { "epoch": 4.17368204094719, "grad_norm": 0.1518801327958471, "learning_rate": 6.520139942941062e-06, "loss": 0.3773, "num_tokens": 4538484806.0, "step": 11394 }, { "epoch": 4.174048458755095, "grad_norm": 0.1364955463577584, "learning_rate": 6.517960832764614e-06, "loss": 0.3618, "num_tokens": 4539429541.0, "step": 11395 }, { "epoch": 4.174414876563001, "grad_norm": 0.13663189471786405, "learning_rate": 6.515782594257836e-06, "loss": 0.3462, "num_tokens": 4540278718.0, "step": 11396 }, { "epoch": 4.174781294370907, "grad_norm": 0.14568087358164045, "learning_rate": 6.513605227543372e-06, "loss": 0.3641, "num_tokens": 4541153032.0, "step": 11397 }, { "epoch": 4.175147712178812, "grad_norm": 0.15937173268516713, "learning_rate": 6.511428732743807e-06, "loss": 0.388, "num_tokens": 4541952248.0, "step": 11398 }, { "epoch": 4.175514129986717, "grad_norm": 0.17248569845589323, "learning_rate": 6.509253109981681e-06, "loss": 0.3913, "num_tokens": 4542666097.0, "step": 11399 }, { "epoch": 4.175880547794623, "grad_norm": 0.14772843728776497, "learning_rate": 6.5070783593794884e-06, "loss": 0.3161, "num_tokens": 4543334784.0, "step": 11400 }, { "epoch": 4.1762469656025285, "grad_norm": 0.15591003107128712, "learning_rate": 6.50490448105967e-06, "loss": 0.3488, "num_tokens": 4544108909.0, "step": 11401 }, { "epoch": 4.176613383410434, "grad_norm": 0.14236222417122313, "learning_rate": 6.50273147514462e-06, "loss": 0.3777, "num_tokens": 4544865554.0, "step": 11402 }, { "epoch": 4.176979801218339, "grad_norm": 0.1612188623196885, "learning_rate": 6.500559341756682e-06, "loss": 0.3544, "num_tokens": 4545516070.0, "step": 11403 }, { "epoch": 4.177346219026244, "grad_norm": 0.15447508469705443, "learning_rate": 6.498388081018157e-06, "loss": 0.3653, "num_tokens": 4546286845.0, "step": 11404 }, { "epoch": 4.17771263683415, "grad_norm": 0.1496718329927978, "learning_rate": 6.496217693051286e-06, "loss": 0.3594, "num_tokens": 4546994408.0, "step": 11405 }, { "epoch": 4.178079054642056, "grad_norm": 0.14749891957570369, "learning_rate": 6.494048177978267e-06, "loss": 0.3177, "num_tokens": 4547716092.0, "step": 11406 }, { "epoch": 4.178445472449961, "grad_norm": 0.16115274367733595, "learning_rate": 6.491879535921245e-06, "loss": 0.3453, "num_tokens": 4548328692.0, "step": 11407 }, { "epoch": 4.178811890257866, "grad_norm": 0.1525389007436776, "learning_rate": 6.489711767002327e-06, "loss": 0.4025, "num_tokens": 4549014842.0, "step": 11408 }, { "epoch": 4.179178308065772, "grad_norm": 0.14880820911762913, "learning_rate": 6.487544871343553e-06, "loss": 0.3605, "num_tokens": 4549745394.0, "step": 11409 }, { "epoch": 4.1795447258736775, "grad_norm": 0.15701622833312767, "learning_rate": 6.485378849066933e-06, "loss": 0.3545, "num_tokens": 4550444275.0, "step": 11410 }, { "epoch": 4.179911143681583, "grad_norm": 0.1604812115009535, "learning_rate": 6.483213700294415e-06, "loss": 0.3773, "num_tokens": 4551202169.0, "step": 11411 }, { "epoch": 4.180277561489488, "grad_norm": 0.1502718799576095, "learning_rate": 6.481049425147898e-06, "loss": 0.3468, "num_tokens": 4551934548.0, "step": 11412 }, { "epoch": 4.180643979297394, "grad_norm": 0.14110787411756512, "learning_rate": 6.478886023749233e-06, "loss": 0.3581, "num_tokens": 4552677425.0, "step": 11413 }, { "epoch": 4.181010397105299, "grad_norm": 0.16234391150023944, "learning_rate": 6.476723496220233e-06, "loss": 0.3653, "num_tokens": 4553315754.0, "step": 11414 }, { "epoch": 4.181376814913205, "grad_norm": 0.15119156204886064, "learning_rate": 6.474561842682648e-06, "loss": 0.3607, "num_tokens": 4554084625.0, "step": 11415 }, { "epoch": 4.18174323272111, "grad_norm": 0.15065737023592807, "learning_rate": 6.4724010632581776e-06, "loss": 0.3723, "num_tokens": 4554875298.0, "step": 11416 }, { "epoch": 4.182109650529016, "grad_norm": 0.1556419391416368, "learning_rate": 6.470241158068488e-06, "loss": 0.3737, "num_tokens": 4555571453.0, "step": 11417 }, { "epoch": 4.182476068336921, "grad_norm": 0.13579878843473203, "learning_rate": 6.468082127235177e-06, "loss": 0.3296, "num_tokens": 4556360207.0, "step": 11418 }, { "epoch": 4.182842486144827, "grad_norm": 0.15663272773333856, "learning_rate": 6.465923970879812e-06, "loss": 0.3863, "num_tokens": 4557150565.0, "step": 11419 }, { "epoch": 4.183208903952732, "grad_norm": 0.14738294338183902, "learning_rate": 6.463766689123895e-06, "loss": 0.3554, "num_tokens": 4557885852.0, "step": 11420 }, { "epoch": 4.183575321760638, "grad_norm": 0.14259143618398137, "learning_rate": 6.461610282088886e-06, "loss": 0.3739, "num_tokens": 4558668161.0, "step": 11421 }, { "epoch": 4.183941739568543, "grad_norm": 0.15148029114245123, "learning_rate": 6.459454749896199e-06, "loss": 0.351, "num_tokens": 4559400111.0, "step": 11422 }, { "epoch": 4.1843081573764485, "grad_norm": 0.13651659139646843, "learning_rate": 6.457300092667183e-06, "loss": 0.3934, "num_tokens": 4560295979.0, "step": 11423 }, { "epoch": 4.184674575184354, "grad_norm": 0.1467201859574472, "learning_rate": 6.45514631052316e-06, "loss": 0.3741, "num_tokens": 4561114273.0, "step": 11424 }, { "epoch": 4.18504099299226, "grad_norm": 0.13661654665530545, "learning_rate": 6.4529934035853936e-06, "loss": 0.3537, "num_tokens": 4561938320.0, "step": 11425 }, { "epoch": 4.185407410800165, "grad_norm": 0.15426531846756816, "learning_rate": 6.450841371975093e-06, "loss": 0.3692, "num_tokens": 4562683916.0, "step": 11426 }, { "epoch": 4.18577382860807, "grad_norm": 0.1585824639514531, "learning_rate": 6.448690215813422e-06, "loss": 0.3602, "num_tokens": 4563418982.0, "step": 11427 }, { "epoch": 4.186140246415976, "grad_norm": 0.1541768105708757, "learning_rate": 6.446539935221496e-06, "loss": 0.39, "num_tokens": 4564237664.0, "step": 11428 }, { "epoch": 4.186506664223881, "grad_norm": 0.15586475783496903, "learning_rate": 6.4443905303203765e-06, "loss": 0.3707, "num_tokens": 4565001474.0, "step": 11429 }, { "epoch": 4.186873082031787, "grad_norm": 0.1574532839479236, "learning_rate": 6.442242001231081e-06, "loss": 0.348, "num_tokens": 4565671361.0, "step": 11430 }, { "epoch": 4.187239499839692, "grad_norm": 0.14337873075082092, "learning_rate": 6.440094348074579e-06, "loss": 0.3507, "num_tokens": 4566481789.0, "step": 11431 }, { "epoch": 4.1876059176475975, "grad_norm": 0.15681449462860766, "learning_rate": 6.437947570971787e-06, "loss": 0.3607, "num_tokens": 4567180575.0, "step": 11432 }, { "epoch": 4.187972335455503, "grad_norm": 0.163355353769998, "learning_rate": 6.435801670043571e-06, "loss": 0.3414, "num_tokens": 4567906322.0, "step": 11433 }, { "epoch": 4.188338753263409, "grad_norm": 0.1329164010995483, "learning_rate": 6.433656645410751e-06, "loss": 0.3514, "num_tokens": 4568745484.0, "step": 11434 }, { "epoch": 4.188705171071314, "grad_norm": 0.1528930235371282, "learning_rate": 6.4315124971940875e-06, "loss": 0.3717, "num_tokens": 4569487363.0, "step": 11435 }, { "epoch": 4.189071588879219, "grad_norm": 0.16131949648526905, "learning_rate": 6.4293692255143145e-06, "loss": 0.4019, "num_tokens": 4570132575.0, "step": 11436 }, { "epoch": 4.189438006687125, "grad_norm": 0.15051327913423673, "learning_rate": 6.4272268304920924e-06, "loss": 0.3644, "num_tokens": 4570988411.0, "step": 11437 }, { "epoch": 4.189804424495031, "grad_norm": 0.14748995082460642, "learning_rate": 6.425085312248051e-06, "loss": 0.353, "num_tokens": 4571746727.0, "step": 11438 }, { "epoch": 4.190170842302936, "grad_norm": 0.1358588338605338, "learning_rate": 6.422944670902753e-06, "loss": 0.3664, "num_tokens": 4572664750.0, "step": 11439 }, { "epoch": 4.190537260110841, "grad_norm": 0.15047715276115012, "learning_rate": 6.420804906576723e-06, "loss": 0.3686, "num_tokens": 4573386266.0, "step": 11440 }, { "epoch": 4.1909036779187465, "grad_norm": 0.15978476665113386, "learning_rate": 6.418666019390438e-06, "loss": 0.339, "num_tokens": 4574013809.0, "step": 11441 }, { "epoch": 4.191270095726653, "grad_norm": 0.15024381211633614, "learning_rate": 6.416528009464322e-06, "loss": 0.37, "num_tokens": 4574804140.0, "step": 11442 }, { "epoch": 4.191636513534558, "grad_norm": 0.14626517149830018, "learning_rate": 6.414390876918745e-06, "loss": 0.372, "num_tokens": 4575572204.0, "step": 11443 }, { "epoch": 4.192002931342463, "grad_norm": 0.1618184743477846, "learning_rate": 6.412254621874028e-06, "loss": 0.3587, "num_tokens": 4576281918.0, "step": 11444 }, { "epoch": 4.192369349150368, "grad_norm": 0.13600057958896686, "learning_rate": 6.410119244450455e-06, "loss": 0.3536, "num_tokens": 4577103610.0, "step": 11445 }, { "epoch": 4.1927357669582745, "grad_norm": 0.1509133550090022, "learning_rate": 6.407984744768248e-06, "loss": 0.3759, "num_tokens": 4577896286.0, "step": 11446 }, { "epoch": 4.19310218476618, "grad_norm": 0.1555675654059727, "learning_rate": 6.405851122947586e-06, "loss": 0.3501, "num_tokens": 4578621865.0, "step": 11447 }, { "epoch": 4.193468602574085, "grad_norm": 0.14409675055464066, "learning_rate": 6.403718379108596e-06, "loss": 0.3259, "num_tokens": 4579299275.0, "step": 11448 }, { "epoch": 4.19383502038199, "grad_norm": 0.15250456555547576, "learning_rate": 6.401586513371353e-06, "loss": 0.3845, "num_tokens": 4579997262.0, "step": 11449 }, { "epoch": 4.194201438189896, "grad_norm": 0.13653434749620574, "learning_rate": 6.3994555258558826e-06, "loss": 0.3614, "num_tokens": 4580864136.0, "step": 11450 }, { "epoch": 4.194567855997802, "grad_norm": 0.1484528787644955, "learning_rate": 6.397325416682165e-06, "loss": 0.3664, "num_tokens": 4581673846.0, "step": 11451 }, { "epoch": 4.194934273805707, "grad_norm": 0.14649093165118426, "learning_rate": 6.395196185970138e-06, "loss": 0.3782, "num_tokens": 4582456029.0, "step": 11452 }, { "epoch": 4.195300691613612, "grad_norm": 0.15920377290029072, "learning_rate": 6.393067833839678e-06, "loss": 0.3734, "num_tokens": 4583211537.0, "step": 11453 }, { "epoch": 4.195667109421517, "grad_norm": 0.16061559872323203, "learning_rate": 6.39094036041061e-06, "loss": 0.3719, "num_tokens": 4583841115.0, "step": 11454 }, { "epoch": 4.196033527229424, "grad_norm": 0.15429266625777802, "learning_rate": 6.388813765802721e-06, "loss": 0.3895, "num_tokens": 4584660960.0, "step": 11455 }, { "epoch": 4.196399945037329, "grad_norm": 0.14436969063603403, "learning_rate": 6.386688050135737e-06, "loss": 0.3528, "num_tokens": 4585459796.0, "step": 11456 }, { "epoch": 4.196766362845234, "grad_norm": 0.15172157782225584, "learning_rate": 6.38456321352934e-06, "loss": 0.367, "num_tokens": 4586246645.0, "step": 11457 }, { "epoch": 4.197132780653139, "grad_norm": 0.13842534047050403, "learning_rate": 6.382439256103164e-06, "loss": 0.361, "num_tokens": 4587095355.0, "step": 11458 }, { "epoch": 4.1974991984610455, "grad_norm": 0.136981912698006, "learning_rate": 6.380316177976798e-06, "loss": 0.3681, "num_tokens": 4588017969.0, "step": 11459 }, { "epoch": 4.197865616268951, "grad_norm": 0.14580824698173403, "learning_rate": 6.378193979269769e-06, "loss": 0.3653, "num_tokens": 4588718591.0, "step": 11460 }, { "epoch": 4.198232034076856, "grad_norm": 0.15900541090221446, "learning_rate": 6.3760726601015645e-06, "loss": 0.3651, "num_tokens": 4589469405.0, "step": 11461 }, { "epoch": 4.198598451884761, "grad_norm": 0.13106964681143196, "learning_rate": 6.373952220591613e-06, "loss": 0.3614, "num_tokens": 4590389250.0, "step": 11462 }, { "epoch": 4.198964869692667, "grad_norm": 0.14947421484342766, "learning_rate": 6.3718326608593065e-06, "loss": 0.3733, "num_tokens": 4591211330.0, "step": 11463 }, { "epoch": 4.199331287500573, "grad_norm": 0.14034020166445665, "learning_rate": 6.3697139810239735e-06, "loss": 0.3845, "num_tokens": 4591977935.0, "step": 11464 }, { "epoch": 4.199697705308478, "grad_norm": 0.16003283302010804, "learning_rate": 6.367596181204911e-06, "loss": 0.363, "num_tokens": 4592785599.0, "step": 11465 }, { "epoch": 4.200064123116383, "grad_norm": 0.14824389464133464, "learning_rate": 6.365479261521346e-06, "loss": 0.3515, "num_tokens": 4593605663.0, "step": 11466 }, { "epoch": 4.200430540924289, "grad_norm": 0.15304454960590375, "learning_rate": 6.3633632220924694e-06, "loss": 0.3885, "num_tokens": 4594407392.0, "step": 11467 }, { "epoch": 4.2007969587321945, "grad_norm": 0.15570018268075306, "learning_rate": 6.36124806303741e-06, "loss": 0.377, "num_tokens": 4595144529.0, "step": 11468 }, { "epoch": 4.2011633765401, "grad_norm": 0.14203546944093365, "learning_rate": 6.35913378447527e-06, "loss": 0.3684, "num_tokens": 4596010019.0, "step": 11469 }, { "epoch": 4.201529794348005, "grad_norm": 0.1490727447659847, "learning_rate": 6.357020386525077e-06, "loss": 0.3553, "num_tokens": 4596731058.0, "step": 11470 }, { "epoch": 4.201896212155911, "grad_norm": 0.14796538778721263, "learning_rate": 6.354907869305817e-06, "loss": 0.3964, "num_tokens": 4597525961.0, "step": 11471 }, { "epoch": 4.202262629963816, "grad_norm": 0.16286790787162095, "learning_rate": 6.35279623293644e-06, "loss": 0.3894, "num_tokens": 4598317314.0, "step": 11472 }, { "epoch": 4.202629047771722, "grad_norm": 0.1423926892009017, "learning_rate": 6.350685477535826e-06, "loss": 0.3474, "num_tokens": 4599024355.0, "step": 11473 }, { "epoch": 4.202995465579627, "grad_norm": 0.16774469797592884, "learning_rate": 6.348575603222821e-06, "loss": 0.4204, "num_tokens": 4599753520.0, "step": 11474 }, { "epoch": 4.203361883387533, "grad_norm": 0.15815823697088802, "learning_rate": 6.346466610116213e-06, "loss": 0.3766, "num_tokens": 4600511665.0, "step": 11475 }, { "epoch": 4.203728301195438, "grad_norm": 0.1441220074312367, "learning_rate": 6.344358498334742e-06, "loss": 0.3503, "num_tokens": 4601336745.0, "step": 11476 }, { "epoch": 4.2040947190033435, "grad_norm": 0.1704422700580494, "learning_rate": 6.342251267997099e-06, "loss": 0.3867, "num_tokens": 4602064535.0, "step": 11477 }, { "epoch": 4.204461136811249, "grad_norm": 0.13130805390449912, "learning_rate": 6.34014491922192e-06, "loss": 0.3433, "num_tokens": 4602933269.0, "step": 11478 }, { "epoch": 4.204827554619155, "grad_norm": 0.14846273005515775, "learning_rate": 6.3380394521278025e-06, "loss": 0.3721, "num_tokens": 4603678501.0, "step": 11479 }, { "epoch": 4.20519397242706, "grad_norm": 0.1634741344128555, "learning_rate": 6.335934866833291e-06, "loss": 0.3168, "num_tokens": 4604391525.0, "step": 11480 }, { "epoch": 4.205560390234965, "grad_norm": 0.14723920897391243, "learning_rate": 6.333831163456875e-06, "loss": 0.348, "num_tokens": 4605267853.0, "step": 11481 }, { "epoch": 4.205926808042871, "grad_norm": 0.15251295158588157, "learning_rate": 6.3317283421169985e-06, "loss": 0.3528, "num_tokens": 4606021255.0, "step": 11482 }, { "epoch": 4.206293225850776, "grad_norm": 0.15216270719151506, "learning_rate": 6.32962640293205e-06, "loss": 0.3558, "num_tokens": 4606742601.0, "step": 11483 }, { "epoch": 4.206659643658682, "grad_norm": 0.15888176818048552, "learning_rate": 6.327525346020371e-06, "loss": 0.3594, "num_tokens": 4607399381.0, "step": 11484 }, { "epoch": 4.207026061466587, "grad_norm": 0.15284394085963474, "learning_rate": 6.3254251715002614e-06, "loss": 0.3781, "num_tokens": 4608178713.0, "step": 11485 }, { "epoch": 4.207392479274493, "grad_norm": 0.154998472589671, "learning_rate": 6.323325879489968e-06, "loss": 0.3984, "num_tokens": 4608990335.0, "step": 11486 }, { "epoch": 4.207758897082398, "grad_norm": 0.14684563619662822, "learning_rate": 6.32122747010768e-06, "loss": 0.3832, "num_tokens": 4609761315.0, "step": 11487 }, { "epoch": 4.208125314890304, "grad_norm": 0.15238523559956277, "learning_rate": 6.3191299434715425e-06, "loss": 0.3676, "num_tokens": 4610534980.0, "step": 11488 }, { "epoch": 4.208491732698209, "grad_norm": 0.14639409569093828, "learning_rate": 6.317033299699652e-06, "loss": 0.3719, "num_tokens": 4611296476.0, "step": 11489 }, { "epoch": 4.2088581505061144, "grad_norm": 0.14644979204103487, "learning_rate": 6.314937538910046e-06, "loss": 0.345, "num_tokens": 4612066225.0, "step": 11490 }, { "epoch": 4.20922456831402, "grad_norm": 0.13817832231806673, "learning_rate": 6.312842661220733e-06, "loss": 0.3527, "num_tokens": 4612924887.0, "step": 11491 }, { "epoch": 4.209590986121926, "grad_norm": 0.15054323265316236, "learning_rate": 6.310748666749647e-06, "loss": 0.3824, "num_tokens": 4613729294.0, "step": 11492 }, { "epoch": 4.209957403929831, "grad_norm": 0.17466695472566823, "learning_rate": 6.308655555614694e-06, "loss": 0.3882, "num_tokens": 4614310561.0, "step": 11493 }, { "epoch": 4.210323821737736, "grad_norm": 0.14333883126120242, "learning_rate": 6.3065633279337144e-06, "loss": 0.3594, "num_tokens": 4615179134.0, "step": 11494 }, { "epoch": 4.210690239545642, "grad_norm": 0.16264166242475583, "learning_rate": 6.304471983824507e-06, "loss": 0.3792, "num_tokens": 4615899001.0, "step": 11495 }, { "epoch": 4.211056657353548, "grad_norm": 0.14276014297910214, "learning_rate": 6.302381523404815e-06, "loss": 0.3523, "num_tokens": 4616622237.0, "step": 11496 }, { "epoch": 4.211423075161453, "grad_norm": 0.1728733690676493, "learning_rate": 6.30029194679234e-06, "loss": 0.367, "num_tokens": 4617277495.0, "step": 11497 }, { "epoch": 4.211789492969358, "grad_norm": 0.1519370260892895, "learning_rate": 6.2982032541047266e-06, "loss": 0.3416, "num_tokens": 4618002848.0, "step": 11498 }, { "epoch": 4.2121559107772635, "grad_norm": 0.15816962560973866, "learning_rate": 6.296115445459572e-06, "loss": 0.3648, "num_tokens": 4618761034.0, "step": 11499 }, { "epoch": 4.21252232858517, "grad_norm": 0.1437466954778708, "learning_rate": 6.294028520974428e-06, "loss": 0.3818, "num_tokens": 4619544839.0, "step": 11500 }, { "epoch": 4.212888746393075, "grad_norm": 0.15832155759732322, "learning_rate": 6.2919424807667865e-06, "loss": 0.3756, "num_tokens": 4620243891.0, "step": 11501 }, { "epoch": 4.21325516420098, "grad_norm": 0.16202207321534529, "learning_rate": 6.289857324954102e-06, "loss": 0.3642, "num_tokens": 4620996196.0, "step": 11502 }, { "epoch": 4.213621582008885, "grad_norm": 0.14474870871654474, "learning_rate": 6.2877730536537686e-06, "loss": 0.3491, "num_tokens": 4621738408.0, "step": 11503 }, { "epoch": 4.2139879998167915, "grad_norm": 0.15619954772161257, "learning_rate": 6.2856896669831395e-06, "loss": 0.349, "num_tokens": 4622390864.0, "step": 11504 }, { "epoch": 4.214354417624697, "grad_norm": 0.13978228623362052, "learning_rate": 6.283607165059503e-06, "loss": 0.3673, "num_tokens": 4623181358.0, "step": 11505 }, { "epoch": 4.214720835432602, "grad_norm": 0.15546495514911135, "learning_rate": 6.28152554800012e-06, "loss": 0.3828, "num_tokens": 4623888308.0, "step": 11506 }, { "epoch": 4.215087253240507, "grad_norm": 0.15274653585950976, "learning_rate": 6.279444815922184e-06, "loss": 0.3535, "num_tokens": 4624619703.0, "step": 11507 }, { "epoch": 4.215453671048413, "grad_norm": 0.1464342537024719, "learning_rate": 6.277364968942847e-06, "loss": 0.3521, "num_tokens": 4625455971.0, "step": 11508 }, { "epoch": 4.215820088856319, "grad_norm": 0.1609262964872918, "learning_rate": 6.275286007179209e-06, "loss": 0.3925, "num_tokens": 4626080938.0, "step": 11509 }, { "epoch": 4.216186506664224, "grad_norm": 0.16440183234918443, "learning_rate": 6.273207930748317e-06, "loss": 0.3947, "num_tokens": 4626876725.0, "step": 11510 }, { "epoch": 4.216552924472129, "grad_norm": 0.15416940909124127, "learning_rate": 6.27113073976717e-06, "loss": 0.3636, "num_tokens": 4627607796.0, "step": 11511 }, { "epoch": 4.216919342280034, "grad_norm": 0.166985781972723, "learning_rate": 6.269054434352717e-06, "loss": 0.3768, "num_tokens": 4628444760.0, "step": 11512 }, { "epoch": 4.2172857600879405, "grad_norm": 0.1477501777656695, "learning_rate": 6.266979014621861e-06, "loss": 0.3546, "num_tokens": 4629215514.0, "step": 11513 }, { "epoch": 4.217652177895846, "grad_norm": 0.15913569948618345, "learning_rate": 6.264904480691454e-06, "loss": 0.3721, "num_tokens": 4630017928.0, "step": 11514 }, { "epoch": 4.218018595703751, "grad_norm": 0.13995694757670663, "learning_rate": 6.262830832678297e-06, "loss": 0.3572, "num_tokens": 4630806651.0, "step": 11515 }, { "epoch": 4.218385013511656, "grad_norm": 0.1546200491878713, "learning_rate": 6.260758070699137e-06, "loss": 0.3796, "num_tokens": 4631498429.0, "step": 11516 }, { "epoch": 4.218751431319562, "grad_norm": 0.1554986732981462, "learning_rate": 6.258686194870675e-06, "loss": 0.3748, "num_tokens": 4632216667.0, "step": 11517 }, { "epoch": 4.219117849127468, "grad_norm": 0.16131005311958968, "learning_rate": 6.256615205309558e-06, "loss": 0.3721, "num_tokens": 4632961811.0, "step": 11518 }, { "epoch": 4.219484266935373, "grad_norm": 0.1536146240065346, "learning_rate": 6.254545102132391e-06, "loss": 0.3401, "num_tokens": 4633758165.0, "step": 11519 }, { "epoch": 4.219850684743278, "grad_norm": 0.1422593181502851, "learning_rate": 6.25247588545573e-06, "loss": 0.3762, "num_tokens": 4634554295.0, "step": 11520 }, { "epoch": 4.220217102551184, "grad_norm": 0.15974801200165611, "learning_rate": 6.25040755539607e-06, "loss": 0.3885, "num_tokens": 4635248736.0, "step": 11521 }, { "epoch": 4.22058352035909, "grad_norm": 0.15860112222647008, "learning_rate": 6.248340112069864e-06, "loss": 0.3968, "num_tokens": 4635956668.0, "step": 11522 }, { "epoch": 4.220949938166995, "grad_norm": 0.1544620404806736, "learning_rate": 6.246273555593508e-06, "loss": 0.3784, "num_tokens": 4636695475.0, "step": 11523 }, { "epoch": 4.2213163559749, "grad_norm": 0.13653255278708684, "learning_rate": 6.244207886083361e-06, "loss": 0.3687, "num_tokens": 4637513954.0, "step": 11524 }, { "epoch": 4.221682773782806, "grad_norm": 0.14955134475415988, "learning_rate": 6.242143103655722e-06, "loss": 0.3588, "num_tokens": 4638264811.0, "step": 11525 }, { "epoch": 4.2220491915907115, "grad_norm": 0.1558769952381204, "learning_rate": 6.2400792084268366e-06, "loss": 0.3954, "num_tokens": 4639035327.0, "step": 11526 }, { "epoch": 4.222415609398617, "grad_norm": 0.15857072253879492, "learning_rate": 6.238016200512913e-06, "loss": 0.3855, "num_tokens": 4639742645.0, "step": 11527 }, { "epoch": 4.222782027206522, "grad_norm": 0.14556861572138938, "learning_rate": 6.235954080030105e-06, "loss": 0.3362, "num_tokens": 4640404029.0, "step": 11528 }, { "epoch": 4.223148445014428, "grad_norm": 0.15215872463689822, "learning_rate": 6.2338928470945015e-06, "loss": 0.3872, "num_tokens": 4641171156.0, "step": 11529 }, { "epoch": 4.223514862822333, "grad_norm": 0.16921897066912914, "learning_rate": 6.231832501822169e-06, "loss": 0.3478, "num_tokens": 4641759596.0, "step": 11530 }, { "epoch": 4.223881280630239, "grad_norm": 0.15539147195313546, "learning_rate": 6.229773044329101e-06, "loss": 0.3804, "num_tokens": 4642435375.0, "step": 11531 }, { "epoch": 4.224247698438144, "grad_norm": 0.1506051796149451, "learning_rate": 6.22771447473125e-06, "loss": 0.343, "num_tokens": 4643252421.0, "step": 11532 }, { "epoch": 4.22461411624605, "grad_norm": 0.155768754800799, "learning_rate": 6.225656793144515e-06, "loss": 0.363, "num_tokens": 4643979817.0, "step": 11533 }, { "epoch": 4.224980534053955, "grad_norm": 0.14623747693814365, "learning_rate": 6.223599999684749e-06, "loss": 0.3562, "num_tokens": 4644737260.0, "step": 11534 }, { "epoch": 4.2253469518618605, "grad_norm": 0.16201729854427843, "learning_rate": 6.221544094467759e-06, "loss": 0.3636, "num_tokens": 4645416677.0, "step": 11535 }, { "epoch": 4.225713369669766, "grad_norm": 0.15239446169886856, "learning_rate": 6.219489077609293e-06, "loss": 0.3579, "num_tokens": 4646131357.0, "step": 11536 }, { "epoch": 4.226079787477671, "grad_norm": 0.15115874321582756, "learning_rate": 6.217434949225051e-06, "loss": 0.3555, "num_tokens": 4646847932.0, "step": 11537 }, { "epoch": 4.226446205285577, "grad_norm": 0.13470746119044955, "learning_rate": 6.215381709430687e-06, "loss": 0.3576, "num_tokens": 4647611503.0, "step": 11538 }, { "epoch": 4.226812623093482, "grad_norm": 0.16719977242479042, "learning_rate": 6.2133293583417976e-06, "loss": 0.3745, "num_tokens": 4648347161.0, "step": 11539 }, { "epoch": 4.227179040901388, "grad_norm": 0.15854858420020498, "learning_rate": 6.211277896073937e-06, "loss": 0.4051, "num_tokens": 4649080494.0, "step": 11540 }, { "epoch": 4.227545458709293, "grad_norm": 0.15017996641702275, "learning_rate": 6.209227322742611e-06, "loss": 0.3716, "num_tokens": 4649767936.0, "step": 11541 }, { "epoch": 4.227911876517199, "grad_norm": 0.15182287326035726, "learning_rate": 6.207177638463268e-06, "loss": 0.3852, "num_tokens": 4650680123.0, "step": 11542 }, { "epoch": 4.228278294325104, "grad_norm": 0.14949656679614157, "learning_rate": 6.2051288433513076e-06, "loss": 0.3551, "num_tokens": 4651415656.0, "step": 11543 }, { "epoch": 4.2286447121330095, "grad_norm": 0.14244950688001223, "learning_rate": 6.203080937522084e-06, "loss": 0.3851, "num_tokens": 4652245826.0, "step": 11544 }, { "epoch": 4.229011129940915, "grad_norm": 0.16091749993963717, "learning_rate": 6.201033921090892e-06, "loss": 0.412, "num_tokens": 4653003609.0, "step": 11545 }, { "epoch": 4.229377547748821, "grad_norm": 0.16201207817111637, "learning_rate": 6.198987794172989e-06, "loss": 0.3853, "num_tokens": 4653678437.0, "step": 11546 }, { "epoch": 4.229743965556726, "grad_norm": 0.14599883786630913, "learning_rate": 6.196942556883572e-06, "loss": 0.3391, "num_tokens": 4654471110.0, "step": 11547 }, { "epoch": 4.230110383364631, "grad_norm": 0.15379647653860537, "learning_rate": 6.194898209337799e-06, "loss": 0.374, "num_tokens": 4655255910.0, "step": 11548 }, { "epoch": 4.230476801172537, "grad_norm": 0.155601272641377, "learning_rate": 6.1928547516507656e-06, "loss": 0.3834, "num_tokens": 4656049997.0, "step": 11549 }, { "epoch": 4.230843218980443, "grad_norm": 0.1581448743710269, "learning_rate": 6.190812183937521e-06, "loss": 0.3979, "num_tokens": 4656754640.0, "step": 11550 }, { "epoch": 4.231209636788348, "grad_norm": 0.15487210724670902, "learning_rate": 6.1887705063130645e-06, "loss": 0.4026, "num_tokens": 4657481654.0, "step": 11551 }, { "epoch": 4.231576054596253, "grad_norm": 0.1635384742859752, "learning_rate": 6.186729718892355e-06, "loss": 0.35, "num_tokens": 4658195503.0, "step": 11552 }, { "epoch": 4.2319424724041586, "grad_norm": 0.13808425262150417, "learning_rate": 6.184689821790287e-06, "loss": 0.3572, "num_tokens": 4658950629.0, "step": 11553 }, { "epoch": 4.232308890212065, "grad_norm": 0.13237874982276907, "learning_rate": 6.182650815121707e-06, "loss": 0.3465, "num_tokens": 4659824909.0, "step": 11554 }, { "epoch": 4.23267530801997, "grad_norm": 0.14432311731104874, "learning_rate": 6.180612699001426e-06, "loss": 0.3632, "num_tokens": 4660616560.0, "step": 11555 }, { "epoch": 4.233041725827875, "grad_norm": 0.16178458211871263, "learning_rate": 6.178575473544183e-06, "loss": 0.4064, "num_tokens": 4661296499.0, "step": 11556 }, { "epoch": 4.23340814363578, "grad_norm": 0.1468153790699997, "learning_rate": 6.176539138864685e-06, "loss": 0.3316, "num_tokens": 4662079897.0, "step": 11557 }, { "epoch": 4.233774561443687, "grad_norm": 0.1435052935159774, "learning_rate": 6.174503695077579e-06, "loss": 0.3499, "num_tokens": 4662898985.0, "step": 11558 }, { "epoch": 4.234140979251592, "grad_norm": 0.17136371702819334, "learning_rate": 6.172469142297466e-06, "loss": 0.3943, "num_tokens": 4663605644.0, "step": 11559 }, { "epoch": 4.234507397059497, "grad_norm": 0.1485090943810682, "learning_rate": 6.17043548063889e-06, "loss": 0.3488, "num_tokens": 4664373965.0, "step": 11560 }, { "epoch": 4.234873814867402, "grad_norm": 0.13978189698952392, "learning_rate": 6.168402710216359e-06, "loss": 0.3579, "num_tokens": 4665269781.0, "step": 11561 }, { "epoch": 4.235240232675308, "grad_norm": 0.15106133720679346, "learning_rate": 6.166370831144314e-06, "loss": 0.4115, "num_tokens": 4666019224.0, "step": 11562 }, { "epoch": 4.235606650483214, "grad_norm": 0.1628259710606393, "learning_rate": 6.164339843537158e-06, "loss": 0.3814, "num_tokens": 4666660783.0, "step": 11563 }, { "epoch": 4.235973068291119, "grad_norm": 0.17598246451453167, "learning_rate": 6.162309747509242e-06, "loss": 0.3687, "num_tokens": 4667442172.0, "step": 11564 }, { "epoch": 4.236339486099024, "grad_norm": 0.15866512455142662, "learning_rate": 6.160280543174861e-06, "loss": 0.3922, "num_tokens": 4668180841.0, "step": 11565 }, { "epoch": 4.2367059039069295, "grad_norm": 0.14498868145495233, "learning_rate": 6.158252230648263e-06, "loss": 0.3833, "num_tokens": 4669104226.0, "step": 11566 }, { "epoch": 4.237072321714836, "grad_norm": 0.1335924654372111, "learning_rate": 6.156224810043643e-06, "loss": 0.3583, "num_tokens": 4669990612.0, "step": 11567 }, { "epoch": 4.237438739522741, "grad_norm": 0.14924253736785342, "learning_rate": 6.154198281475152e-06, "loss": 0.4115, "num_tokens": 4670717707.0, "step": 11568 }, { "epoch": 4.237805157330646, "grad_norm": 0.14429095313808016, "learning_rate": 6.1521726450568934e-06, "loss": 0.3694, "num_tokens": 4671711126.0, "step": 11569 }, { "epoch": 4.238171575138551, "grad_norm": 0.14540616674838705, "learning_rate": 6.150147900902906e-06, "loss": 0.3878, "num_tokens": 4672597461.0, "step": 11570 }, { "epoch": 4.2385379929464575, "grad_norm": 0.13544103985538228, "learning_rate": 6.148124049127192e-06, "loss": 0.3496, "num_tokens": 4673503102.0, "step": 11571 }, { "epoch": 4.238904410754363, "grad_norm": 0.1420565885916495, "learning_rate": 6.146101089843697e-06, "loss": 0.3577, "num_tokens": 4674256508.0, "step": 11572 }, { "epoch": 4.239270828562268, "grad_norm": 0.15826243832773765, "learning_rate": 6.144079023166314e-06, "loss": 0.3549, "num_tokens": 4675004255.0, "step": 11573 }, { "epoch": 4.239637246370173, "grad_norm": 0.1469697811866412, "learning_rate": 6.142057849208895e-06, "loss": 0.3749, "num_tokens": 4675782499.0, "step": 11574 }, { "epoch": 4.240003664178079, "grad_norm": 0.15051315475927138, "learning_rate": 6.1400375680852306e-06, "loss": 0.375, "num_tokens": 4676574127.0, "step": 11575 }, { "epoch": 4.240370081985985, "grad_norm": 0.1445779048345714, "learning_rate": 6.138018179909073e-06, "loss": 0.3767, "num_tokens": 4677361864.0, "step": 11576 }, { "epoch": 4.24073649979389, "grad_norm": 0.1565705356861603, "learning_rate": 6.135999684794114e-06, "loss": 0.3627, "num_tokens": 4678070473.0, "step": 11577 }, { "epoch": 4.241102917601795, "grad_norm": 0.14379730108650113, "learning_rate": 6.133982082854e-06, "loss": 0.3687, "num_tokens": 4678869837.0, "step": 11578 }, { "epoch": 4.241469335409701, "grad_norm": 0.14790849550699603, "learning_rate": 6.131965374202322e-06, "loss": 0.367, "num_tokens": 4679575648.0, "step": 11579 }, { "epoch": 4.2418357532176065, "grad_norm": 0.16037666801368455, "learning_rate": 6.129949558952632e-06, "loss": 0.3871, "num_tokens": 4680313229.0, "step": 11580 }, { "epoch": 4.242202171025512, "grad_norm": 0.15641120946514045, "learning_rate": 6.127934637218414e-06, "loss": 0.3774, "num_tokens": 4681094441.0, "step": 11581 }, { "epoch": 4.242568588833417, "grad_norm": 0.14609081123937184, "learning_rate": 6.125920609113125e-06, "loss": 0.3583, "num_tokens": 4681867071.0, "step": 11582 }, { "epoch": 4.242935006641323, "grad_norm": 0.15539731556582317, "learning_rate": 6.123907474750151e-06, "loss": 0.3939, "num_tokens": 4682603258.0, "step": 11583 }, { "epoch": 4.243301424449228, "grad_norm": 0.1486316299884644, "learning_rate": 6.121895234242834e-06, "loss": 0.3665, "num_tokens": 4683344796.0, "step": 11584 }, { "epoch": 4.243667842257134, "grad_norm": 0.16950109371966196, "learning_rate": 6.119883887704474e-06, "loss": 0.3925, "num_tokens": 4684040238.0, "step": 11585 }, { "epoch": 4.244034260065039, "grad_norm": 0.14660896712132765, "learning_rate": 6.11787343524831e-06, "loss": 0.3718, "num_tokens": 4684845821.0, "step": 11586 }, { "epoch": 4.244400677872944, "grad_norm": 0.14714558235124842, "learning_rate": 6.1158638769875345e-06, "loss": 0.3879, "num_tokens": 4685576761.0, "step": 11587 }, { "epoch": 4.24476709568085, "grad_norm": 0.15637583657137788, "learning_rate": 6.113855213035283e-06, "loss": 0.3511, "num_tokens": 4686294935.0, "step": 11588 }, { "epoch": 4.245133513488756, "grad_norm": 0.14249089126568587, "learning_rate": 6.111847443504661e-06, "loss": 0.3318, "num_tokens": 4687066910.0, "step": 11589 }, { "epoch": 4.245499931296661, "grad_norm": 0.1418810132516303, "learning_rate": 6.1098405685086985e-06, "loss": 0.3378, "num_tokens": 4687802329.0, "step": 11590 }, { "epoch": 4.245866349104566, "grad_norm": 0.15163135898207097, "learning_rate": 6.107834588160393e-06, "loss": 0.3506, "num_tokens": 4688581041.0, "step": 11591 }, { "epoch": 4.246232766912472, "grad_norm": 0.14674564017708747, "learning_rate": 6.105829502572682e-06, "loss": 0.3584, "num_tokens": 4689331757.0, "step": 11592 }, { "epoch": 4.2465991847203775, "grad_norm": 0.15947156745928695, "learning_rate": 6.10382531185846e-06, "loss": 0.3614, "num_tokens": 4690019848.0, "step": 11593 }, { "epoch": 4.246965602528283, "grad_norm": 0.1514406364792899, "learning_rate": 6.101822016130557e-06, "loss": 0.3634, "num_tokens": 4690755656.0, "step": 11594 }, { "epoch": 4.247332020336188, "grad_norm": 0.13745725695815755, "learning_rate": 6.099819615501771e-06, "loss": 0.3817, "num_tokens": 4691561685.0, "step": 11595 }, { "epoch": 4.247698438144094, "grad_norm": 0.14556437752226778, "learning_rate": 6.097818110084843e-06, "loss": 0.3897, "num_tokens": 4692390457.0, "step": 11596 }, { "epoch": 4.248064855951999, "grad_norm": 0.15619380629640664, "learning_rate": 6.095817499992457e-06, "loss": 0.3628, "num_tokens": 4693093628.0, "step": 11597 }, { "epoch": 4.248431273759905, "grad_norm": 0.14958332654951625, "learning_rate": 6.093817785337253e-06, "loss": 0.3453, "num_tokens": 4693859358.0, "step": 11598 }, { "epoch": 4.24879769156781, "grad_norm": 0.1507586246589163, "learning_rate": 6.091818966231818e-06, "loss": 0.3475, "num_tokens": 4694707878.0, "step": 11599 }, { "epoch": 4.249164109375716, "grad_norm": 0.13788568998165712, "learning_rate": 6.0898210427886914e-06, "loss": 0.366, "num_tokens": 4695602835.0, "step": 11600 }, { "epoch": 4.249530527183621, "grad_norm": 0.14353905451874588, "learning_rate": 6.087824015120354e-06, "loss": 0.3646, "num_tokens": 4696400002.0, "step": 11601 }, { "epoch": 4.2498969449915265, "grad_norm": 0.1370140455268736, "learning_rate": 6.085827883339248e-06, "loss": 0.3697, "num_tokens": 4697357324.0, "step": 11602 }, { "epoch": 4.250263362799432, "grad_norm": 0.15551730197544253, "learning_rate": 6.083832647557761e-06, "loss": 0.348, "num_tokens": 4698055563.0, "step": 11603 }, { "epoch": 4.250629780607338, "grad_norm": 0.14176447776611553, "learning_rate": 6.0818383078882285e-06, "loss": 0.3836, "num_tokens": 4698935812.0, "step": 11604 }, { "epoch": 4.250996198415243, "grad_norm": 0.1514386308921839, "learning_rate": 6.079844864442932e-06, "loss": 0.3748, "num_tokens": 4699643558.0, "step": 11605 }, { "epoch": 4.251362616223148, "grad_norm": 0.15767255959871665, "learning_rate": 6.077852317334105e-06, "loss": 0.3858, "num_tokens": 4700376104.0, "step": 11606 }, { "epoch": 4.251729034031054, "grad_norm": 0.151117031477035, "learning_rate": 6.075860666673939e-06, "loss": 0.3838, "num_tokens": 4701148291.0, "step": 11607 }, { "epoch": 4.25209545183896, "grad_norm": 0.1584456571592457, "learning_rate": 6.073869912574563e-06, "loss": 0.3862, "num_tokens": 4701905049.0, "step": 11608 }, { "epoch": 4.252461869646865, "grad_norm": 0.14820997419972418, "learning_rate": 6.071880055148059e-06, "loss": 0.3578, "num_tokens": 4702760618.0, "step": 11609 }, { "epoch": 4.25282828745477, "grad_norm": 0.14604754414380444, "learning_rate": 6.069891094506466e-06, "loss": 0.3513, "num_tokens": 4703543301.0, "step": 11610 }, { "epoch": 4.2531947052626755, "grad_norm": 0.14298392277963634, "learning_rate": 6.067903030761762e-06, "loss": 0.3595, "num_tokens": 4704344525.0, "step": 11611 }, { "epoch": 4.253561123070581, "grad_norm": 0.13280054872769392, "learning_rate": 6.065915864025877e-06, "loss": 0.3664, "num_tokens": 4705270869.0, "step": 11612 }, { "epoch": 4.253927540878487, "grad_norm": 0.14013979016736636, "learning_rate": 6.063929594410697e-06, "loss": 0.3645, "num_tokens": 4706095996.0, "step": 11613 }, { "epoch": 4.254293958686392, "grad_norm": 0.16386535474043032, "learning_rate": 6.061944222028053e-06, "loss": 0.3729, "num_tokens": 4706817532.0, "step": 11614 }, { "epoch": 4.254660376494297, "grad_norm": 0.1471563378531824, "learning_rate": 6.059959746989719e-06, "loss": 0.3535, "num_tokens": 4707512871.0, "step": 11615 }, { "epoch": 4.2550267943022035, "grad_norm": 0.14336758107406447, "learning_rate": 6.057976169407433e-06, "loss": 0.3682, "num_tokens": 4708405581.0, "step": 11616 }, { "epoch": 4.255393212110109, "grad_norm": 0.13839451850097081, "learning_rate": 6.055993489392867e-06, "loss": 0.3689, "num_tokens": 4709208156.0, "step": 11617 }, { "epoch": 4.255759629918014, "grad_norm": 0.1484851530285983, "learning_rate": 6.0540117070576586e-06, "loss": 0.349, "num_tokens": 4709887495.0, "step": 11618 }, { "epoch": 4.256126047725919, "grad_norm": 0.1525480541213304, "learning_rate": 6.052030822513381e-06, "loss": 0.3941, "num_tokens": 4710617215.0, "step": 11619 }, { "epoch": 4.2564924655338245, "grad_norm": 0.16308916569007145, "learning_rate": 6.050050835871562e-06, "loss": 0.3706, "num_tokens": 4711333684.0, "step": 11620 }, { "epoch": 4.256858883341731, "grad_norm": 0.15449222113829802, "learning_rate": 6.048071747243677e-06, "loss": 0.3509, "num_tokens": 4712071101.0, "step": 11621 }, { "epoch": 4.257225301149636, "grad_norm": 0.15488938774113464, "learning_rate": 6.046093556741154e-06, "loss": 0.373, "num_tokens": 4712789981.0, "step": 11622 }, { "epoch": 4.257591718957541, "grad_norm": 0.13911017139860818, "learning_rate": 6.044116264475367e-06, "loss": 0.3648, "num_tokens": 4713714622.0, "step": 11623 }, { "epoch": 4.257958136765446, "grad_norm": 0.15752129199227016, "learning_rate": 6.042139870557648e-06, "loss": 0.3742, "num_tokens": 4714441760.0, "step": 11624 }, { "epoch": 4.258324554573353, "grad_norm": 0.15811869882968813, "learning_rate": 6.0401643750992674e-06, "loss": 0.3747, "num_tokens": 4715238133.0, "step": 11625 }, { "epoch": 4.258690972381258, "grad_norm": 0.15441670721974593, "learning_rate": 6.0381897782114525e-06, "loss": 0.3692, "num_tokens": 4715986525.0, "step": 11626 }, { "epoch": 4.259057390189163, "grad_norm": 0.15458846800622672, "learning_rate": 6.036216080005375e-06, "loss": 0.3616, "num_tokens": 4716787087.0, "step": 11627 }, { "epoch": 4.259423807997068, "grad_norm": 0.13753224092176908, "learning_rate": 6.034243280592152e-06, "loss": 0.354, "num_tokens": 4717548819.0, "step": 11628 }, { "epoch": 4.2597902258049745, "grad_norm": 0.1477474767690884, "learning_rate": 6.032271380082866e-06, "loss": 0.354, "num_tokens": 4718275092.0, "step": 11629 }, { "epoch": 4.26015664361288, "grad_norm": 0.14287763608581427, "learning_rate": 6.03030037858853e-06, "loss": 0.3746, "num_tokens": 4719094235.0, "step": 11630 }, { "epoch": 4.260523061420785, "grad_norm": 0.13774774121347358, "learning_rate": 6.028330276220125e-06, "loss": 0.3587, "num_tokens": 4719896193.0, "step": 11631 }, { "epoch": 4.26088947922869, "grad_norm": 0.1411820800563574, "learning_rate": 6.026361073088567e-06, "loss": 0.3418, "num_tokens": 4720735093.0, "step": 11632 }, { "epoch": 4.261255897036596, "grad_norm": 0.1538710966414636, "learning_rate": 6.024392769304726e-06, "loss": 0.3972, "num_tokens": 4721454936.0, "step": 11633 }, { "epoch": 4.261622314844502, "grad_norm": 0.15325033696885276, "learning_rate": 6.022425364979415e-06, "loss": 0.3821, "num_tokens": 4722277232.0, "step": 11634 }, { "epoch": 4.261988732652407, "grad_norm": 0.1426701125996947, "learning_rate": 6.020458860223415e-06, "loss": 0.3596, "num_tokens": 4723047850.0, "step": 11635 }, { "epoch": 4.262355150460312, "grad_norm": 0.16520613949877258, "learning_rate": 6.018493255147435e-06, "loss": 0.3628, "num_tokens": 4723698687.0, "step": 11636 }, { "epoch": 4.262721568268218, "grad_norm": 0.15114125607256532, "learning_rate": 6.016528549862147e-06, "loss": 0.3978, "num_tokens": 4724571311.0, "step": 11637 }, { "epoch": 4.2630879860761235, "grad_norm": 0.14164780076902786, "learning_rate": 6.014564744478167e-06, "loss": 0.3678, "num_tokens": 4725266389.0, "step": 11638 }, { "epoch": 4.263454403884029, "grad_norm": 0.17234267754426516, "learning_rate": 6.012601839106058e-06, "loss": 0.3833, "num_tokens": 4725983646.0, "step": 11639 }, { "epoch": 4.263820821691934, "grad_norm": 0.14353765210006342, "learning_rate": 6.010639833856341e-06, "loss": 0.3761, "num_tokens": 4726777456.0, "step": 11640 }, { "epoch": 4.26418723949984, "grad_norm": 0.14542255613714422, "learning_rate": 6.008678728839478e-06, "loss": 0.3559, "num_tokens": 4727554354.0, "step": 11641 }, { "epoch": 4.264553657307745, "grad_norm": 0.15021087430429395, "learning_rate": 6.006718524165883e-06, "loss": 0.384, "num_tokens": 4728315610.0, "step": 11642 }, { "epoch": 4.264920075115651, "grad_norm": 0.1618593050864786, "learning_rate": 6.004759219945916e-06, "loss": 0.3571, "num_tokens": 4729027089.0, "step": 11643 }, { "epoch": 4.265286492923556, "grad_norm": 0.1435818015008688, "learning_rate": 6.002800816289898e-06, "loss": 0.3685, "num_tokens": 4729911918.0, "step": 11644 }, { "epoch": 4.265652910731461, "grad_norm": 0.15323233365368455, "learning_rate": 6.000843313308081e-06, "loss": 0.3617, "num_tokens": 4730613153.0, "step": 11645 }, { "epoch": 4.266019328539367, "grad_norm": 0.15776455504595768, "learning_rate": 5.998886711110685e-06, "loss": 0.3777, "num_tokens": 4731311859.0, "step": 11646 }, { "epoch": 4.2663857463472725, "grad_norm": 0.1538151723222894, "learning_rate": 5.996931009807867e-06, "loss": 0.3889, "num_tokens": 4731995194.0, "step": 11647 }, { "epoch": 4.266752164155178, "grad_norm": 0.16824954306814438, "learning_rate": 5.994976209509737e-06, "loss": 0.3819, "num_tokens": 4732757601.0, "step": 11648 }, { "epoch": 4.267118581963083, "grad_norm": 0.1559394501617228, "learning_rate": 5.9930223103263515e-06, "loss": 0.3288, "num_tokens": 4733449655.0, "step": 11649 }, { "epoch": 4.267484999770989, "grad_norm": 0.1528504787160408, "learning_rate": 5.9910693123677256e-06, "loss": 0.3905, "num_tokens": 4734208515.0, "step": 11650 }, { "epoch": 4.267851417578894, "grad_norm": 0.1499069194539014, "learning_rate": 5.9891172157438095e-06, "loss": 0.3598, "num_tokens": 4735014139.0, "step": 11651 }, { "epoch": 4.2682178353868, "grad_norm": 0.15100431341024653, "learning_rate": 5.9871660205645144e-06, "loss": 0.3526, "num_tokens": 4735771267.0, "step": 11652 }, { "epoch": 4.268584253194705, "grad_norm": 0.15052264369551144, "learning_rate": 5.985215726939697e-06, "loss": 0.4054, "num_tokens": 4736534654.0, "step": 11653 }, { "epoch": 4.268950671002611, "grad_norm": 0.16155701996082225, "learning_rate": 5.983266334979163e-06, "loss": 0.3836, "num_tokens": 4737249351.0, "step": 11654 }, { "epoch": 4.269317088810516, "grad_norm": 0.15586455237793873, "learning_rate": 5.981317844792664e-06, "loss": 0.3673, "num_tokens": 4737984735.0, "step": 11655 }, { "epoch": 4.2696835066184216, "grad_norm": 0.15550612347013443, "learning_rate": 5.979370256489901e-06, "loss": 0.3257, "num_tokens": 4738710668.0, "step": 11656 }, { "epoch": 4.270049924426327, "grad_norm": 0.1556968428552108, "learning_rate": 5.977423570180533e-06, "loss": 0.386, "num_tokens": 4739424708.0, "step": 11657 }, { "epoch": 4.270416342234233, "grad_norm": 0.14943280265791467, "learning_rate": 5.975477785974164e-06, "loss": 0.3751, "num_tokens": 4740270345.0, "step": 11658 }, { "epoch": 4.270782760042138, "grad_norm": 0.1521625143679055, "learning_rate": 5.9735329039803425e-06, "loss": 0.359, "num_tokens": 4740983825.0, "step": 11659 }, { "epoch": 4.271149177850043, "grad_norm": 0.16288892812914904, "learning_rate": 5.971588924308568e-06, "loss": 0.4052, "num_tokens": 4741733813.0, "step": 11660 }, { "epoch": 4.271515595657949, "grad_norm": 0.15564937400120057, "learning_rate": 5.969645847068292e-06, "loss": 0.3618, "num_tokens": 4742456764.0, "step": 11661 }, { "epoch": 4.271882013465855, "grad_norm": 0.16157975034876473, "learning_rate": 5.9677036723689085e-06, "loss": 0.3543, "num_tokens": 4743197187.0, "step": 11662 }, { "epoch": 4.27224843127376, "grad_norm": 0.14994106143106692, "learning_rate": 5.965762400319777e-06, "loss": 0.3461, "num_tokens": 4743904162.0, "step": 11663 }, { "epoch": 4.272614849081665, "grad_norm": 0.14891689734298083, "learning_rate": 5.963822031030181e-06, "loss": 0.3769, "num_tokens": 4744678353.0, "step": 11664 }, { "epoch": 4.272981266889571, "grad_norm": 0.15773423040054038, "learning_rate": 5.96188256460938e-06, "loss": 0.3679, "num_tokens": 4745460804.0, "step": 11665 }, { "epoch": 4.273347684697477, "grad_norm": 0.16412586906479784, "learning_rate": 5.959944001166566e-06, "loss": 0.3713, "num_tokens": 4746067373.0, "step": 11666 }, { "epoch": 4.273714102505382, "grad_norm": 0.16409785701684126, "learning_rate": 5.958006340810875e-06, "loss": 0.399, "num_tokens": 4746797678.0, "step": 11667 }, { "epoch": 4.274080520313287, "grad_norm": 0.15729735997548622, "learning_rate": 5.956069583651414e-06, "loss": 0.3819, "num_tokens": 4747524983.0, "step": 11668 }, { "epoch": 4.2744469381211925, "grad_norm": 0.1457083654298674, "learning_rate": 5.95413372979722e-06, "loss": 0.385, "num_tokens": 4748341716.0, "step": 11669 }, { "epoch": 4.274813355929098, "grad_norm": 0.14490978151436623, "learning_rate": 5.952198779357283e-06, "loss": 0.3782, "num_tokens": 4749184704.0, "step": 11670 }, { "epoch": 4.275179773737004, "grad_norm": 0.15460211638251214, "learning_rate": 5.950264732440553e-06, "loss": 0.3823, "num_tokens": 4749908351.0, "step": 11671 }, { "epoch": 4.275546191544909, "grad_norm": 0.16566053100682718, "learning_rate": 5.948331589155914e-06, "loss": 0.364, "num_tokens": 4750518009.0, "step": 11672 }, { "epoch": 4.275912609352814, "grad_norm": 0.15871649744660077, "learning_rate": 5.946399349612203e-06, "loss": 0.3913, "num_tokens": 4751333708.0, "step": 11673 }, { "epoch": 4.27627902716072, "grad_norm": 0.13975712068120288, "learning_rate": 5.944468013918219e-06, "loss": 0.3587, "num_tokens": 4752102378.0, "step": 11674 }, { "epoch": 4.276645444968626, "grad_norm": 0.15051783181097128, "learning_rate": 5.94253758218269e-06, "loss": 0.3702, "num_tokens": 4752922243.0, "step": 11675 }, { "epoch": 4.277011862776531, "grad_norm": 0.17450353294171633, "learning_rate": 5.940608054514312e-06, "loss": 0.4075, "num_tokens": 4753600945.0, "step": 11676 }, { "epoch": 4.277378280584436, "grad_norm": 0.15466024648163532, "learning_rate": 5.938679431021711e-06, "loss": 0.3746, "num_tokens": 4754336867.0, "step": 11677 }, { "epoch": 4.2777446983923415, "grad_norm": 0.15474383223242982, "learning_rate": 5.936751711813477e-06, "loss": 0.3497, "num_tokens": 4755130510.0, "step": 11678 }, { "epoch": 4.278111116200248, "grad_norm": 0.1516420807468328, "learning_rate": 5.934824896998151e-06, "loss": 0.3583, "num_tokens": 4755803831.0, "step": 11679 }, { "epoch": 4.278477534008153, "grad_norm": 0.14530163562540574, "learning_rate": 5.93289898668421e-06, "loss": 0.3667, "num_tokens": 4756575948.0, "step": 11680 }, { "epoch": 4.278843951816058, "grad_norm": 0.15486219237080392, "learning_rate": 5.930973980980088e-06, "loss": 0.3657, "num_tokens": 4757282211.0, "step": 11681 }, { "epoch": 4.279210369623963, "grad_norm": 0.1537010613768924, "learning_rate": 5.929049879994167e-06, "loss": 0.3781, "num_tokens": 4758076322.0, "step": 11682 }, { "epoch": 4.2795767874318695, "grad_norm": 0.15145463284064883, "learning_rate": 5.927126683834772e-06, "loss": 0.3614, "num_tokens": 4758836150.0, "step": 11683 }, { "epoch": 4.279943205239775, "grad_norm": 0.15473900808776164, "learning_rate": 5.9252043926101935e-06, "loss": 0.3844, "num_tokens": 4759589930.0, "step": 11684 }, { "epoch": 4.28030962304768, "grad_norm": 0.14749730445415307, "learning_rate": 5.923283006428649e-06, "loss": 0.3741, "num_tokens": 4760338043.0, "step": 11685 }, { "epoch": 4.280676040855585, "grad_norm": 0.14826175076802134, "learning_rate": 5.921362525398324e-06, "loss": 0.3816, "num_tokens": 4761206686.0, "step": 11686 }, { "epoch": 4.281042458663491, "grad_norm": 0.15106070225786605, "learning_rate": 5.919442949627345e-06, "loss": 0.3637, "num_tokens": 4761948951.0, "step": 11687 }, { "epoch": 4.281408876471397, "grad_norm": 0.15082247496684503, "learning_rate": 5.917524279223786e-06, "loss": 0.3626, "num_tokens": 4762697630.0, "step": 11688 }, { "epoch": 4.281775294279302, "grad_norm": 0.14152659148203828, "learning_rate": 5.915606514295669e-06, "loss": 0.3426, "num_tokens": 4763549702.0, "step": 11689 }, { "epoch": 4.282141712087207, "grad_norm": 0.133549119688273, "learning_rate": 5.9136896549509755e-06, "loss": 0.3678, "num_tokens": 4764390506.0, "step": 11690 }, { "epoch": 4.282508129895113, "grad_norm": 0.1614414349005145, "learning_rate": 5.9117737012976195e-06, "loss": 0.38, "num_tokens": 4765142393.0, "step": 11691 }, { "epoch": 4.282874547703019, "grad_norm": 0.14443237176069965, "learning_rate": 5.909858653443482e-06, "loss": 0.3519, "num_tokens": 4766053900.0, "step": 11692 }, { "epoch": 4.283240965510924, "grad_norm": 0.1471236436038069, "learning_rate": 5.907944511496381e-06, "loss": 0.3953, "num_tokens": 4766822170.0, "step": 11693 }, { "epoch": 4.283607383318829, "grad_norm": 0.15737225342604377, "learning_rate": 5.9060312755640815e-06, "loss": 0.384, "num_tokens": 4767563601.0, "step": 11694 }, { "epoch": 4.283973801126734, "grad_norm": 0.13726971930465212, "learning_rate": 5.904118945754307e-06, "loss": 0.3768, "num_tokens": 4768560711.0, "step": 11695 }, { "epoch": 4.2843402189346405, "grad_norm": 0.1469390099036011, "learning_rate": 5.902207522174725e-06, "loss": 0.3471, "num_tokens": 4769371956.0, "step": 11696 }, { "epoch": 4.284706636742546, "grad_norm": 0.15672877917247777, "learning_rate": 5.900297004932953e-06, "loss": 0.3953, "num_tokens": 4770087854.0, "step": 11697 }, { "epoch": 4.285073054550451, "grad_norm": 0.1432542270891961, "learning_rate": 5.898387394136554e-06, "loss": 0.3502, "num_tokens": 4770890144.0, "step": 11698 }, { "epoch": 4.285439472358356, "grad_norm": 0.15060700729170506, "learning_rate": 5.896478689893048e-06, "loss": 0.352, "num_tokens": 4771589791.0, "step": 11699 }, { "epoch": 4.285805890166262, "grad_norm": 0.14490612361268115, "learning_rate": 5.89457089230989e-06, "loss": 0.3661, "num_tokens": 4772400844.0, "step": 11700 }, { "epoch": 4.286172307974168, "grad_norm": 0.1510098244992695, "learning_rate": 5.892664001494506e-06, "loss": 0.3281, "num_tokens": 4773097218.0, "step": 11701 }, { "epoch": 4.286538725782073, "grad_norm": 0.15259420124706563, "learning_rate": 5.890758017554248e-06, "loss": 0.3599, "num_tokens": 4773834281.0, "step": 11702 }, { "epoch": 4.286905143589978, "grad_norm": 0.15022254788974682, "learning_rate": 5.888852940596429e-06, "loss": 0.3709, "num_tokens": 4774648643.0, "step": 11703 }, { "epoch": 4.287271561397884, "grad_norm": 0.14432230961545428, "learning_rate": 5.886948770728306e-06, "loss": 0.3779, "num_tokens": 4775455977.0, "step": 11704 }, { "epoch": 4.2876379792057895, "grad_norm": 0.15186859430066788, "learning_rate": 5.885045508057094e-06, "loss": 0.3544, "num_tokens": 4776186250.0, "step": 11705 }, { "epoch": 4.288004397013695, "grad_norm": 0.15426251779964367, "learning_rate": 5.883143152689943e-06, "loss": 0.3491, "num_tokens": 4776835484.0, "step": 11706 }, { "epoch": 4.2883708148216, "grad_norm": 0.16344714259000187, "learning_rate": 5.881241704733966e-06, "loss": 0.3814, "num_tokens": 4777471126.0, "step": 11707 }, { "epoch": 4.288737232629506, "grad_norm": 0.14946356946572484, "learning_rate": 5.879341164296218e-06, "loss": 0.3907, "num_tokens": 4778264946.0, "step": 11708 }, { "epoch": 4.289103650437411, "grad_norm": 0.15334541859640943, "learning_rate": 5.877441531483699e-06, "loss": 0.3906, "num_tokens": 4779113120.0, "step": 11709 }, { "epoch": 4.289470068245317, "grad_norm": 0.15441499602304762, "learning_rate": 5.875542806403363e-06, "loss": 0.3949, "num_tokens": 4779795179.0, "step": 11710 }, { "epoch": 4.289836486053222, "grad_norm": 0.14796144125159505, "learning_rate": 5.873644989162111e-06, "loss": 0.3799, "num_tokens": 4780597883.0, "step": 11711 }, { "epoch": 4.290202903861128, "grad_norm": 0.138295161161626, "learning_rate": 5.871748079866795e-06, "loss": 0.3155, "num_tokens": 4781400165.0, "step": 11712 }, { "epoch": 4.290569321669033, "grad_norm": 0.14687816311670618, "learning_rate": 5.869852078624219e-06, "loss": 0.3593, "num_tokens": 4782186192.0, "step": 11713 }, { "epoch": 4.2909357394769385, "grad_norm": 0.15125534302185517, "learning_rate": 5.8679569855411275e-06, "loss": 0.3923, "num_tokens": 4782991740.0, "step": 11714 }, { "epoch": 4.291302157284844, "grad_norm": 0.1530620406157555, "learning_rate": 5.86606280072422e-06, "loss": 0.3522, "num_tokens": 4783702743.0, "step": 11715 }, { "epoch": 4.29166857509275, "grad_norm": 0.15196302535929657, "learning_rate": 5.8641695242801414e-06, "loss": 0.3459, "num_tokens": 4784480970.0, "step": 11716 }, { "epoch": 4.292034992900655, "grad_norm": 0.13731195732499543, "learning_rate": 5.862277156315484e-06, "loss": 0.3451, "num_tokens": 4785285308.0, "step": 11717 }, { "epoch": 4.29240141070856, "grad_norm": 0.14052512208619689, "learning_rate": 5.860385696936797e-06, "loss": 0.3674, "num_tokens": 4786170472.0, "step": 11718 }, { "epoch": 4.292767828516466, "grad_norm": 0.1497487146404167, "learning_rate": 5.858495146250568e-06, "loss": 0.3903, "num_tokens": 4786993799.0, "step": 11719 }, { "epoch": 4.293134246324371, "grad_norm": 0.153192953277636, "learning_rate": 5.856605504363246e-06, "loss": 0.3782, "num_tokens": 4787729876.0, "step": 11720 }, { "epoch": 4.293500664132277, "grad_norm": 0.1506619093750465, "learning_rate": 5.854716771381217e-06, "loss": 0.3675, "num_tokens": 4788498424.0, "step": 11721 }, { "epoch": 4.293867081940182, "grad_norm": 0.1515930770038812, "learning_rate": 5.852828947410822e-06, "loss": 0.3939, "num_tokens": 4789264394.0, "step": 11722 }, { "epoch": 4.2942334997480875, "grad_norm": 0.1526786487869732, "learning_rate": 5.8509420325583465e-06, "loss": 0.3684, "num_tokens": 4790053430.0, "step": 11723 }, { "epoch": 4.294599917555994, "grad_norm": 0.1585828649669157, "learning_rate": 5.849056026930031e-06, "loss": 0.3387, "num_tokens": 4790733517.0, "step": 11724 }, { "epoch": 4.294966335363899, "grad_norm": 0.14241092027370036, "learning_rate": 5.847170930632056e-06, "loss": 0.355, "num_tokens": 4791569962.0, "step": 11725 }, { "epoch": 4.295332753171804, "grad_norm": 0.14827830005002537, "learning_rate": 5.845286743770563e-06, "loss": 0.395, "num_tokens": 4792337057.0, "step": 11726 }, { "epoch": 4.295699170979709, "grad_norm": 0.15365774634816906, "learning_rate": 5.843403466451635e-06, "loss": 0.358, "num_tokens": 4793139279.0, "step": 11727 }, { "epoch": 4.296065588787615, "grad_norm": 0.14016357738046367, "learning_rate": 5.841521098781296e-06, "loss": 0.3683, "num_tokens": 4794075142.0, "step": 11728 }, { "epoch": 4.296432006595521, "grad_norm": 0.15012341425311462, "learning_rate": 5.839639640865538e-06, "loss": 0.377, "num_tokens": 4794803327.0, "step": 11729 }, { "epoch": 4.296798424403426, "grad_norm": 0.15538136389869786, "learning_rate": 5.837759092810285e-06, "loss": 0.3484, "num_tokens": 4795596051.0, "step": 11730 }, { "epoch": 4.297164842211331, "grad_norm": 0.14188341776235439, "learning_rate": 5.835879454721416e-06, "loss": 0.3524, "num_tokens": 4796381846.0, "step": 11731 }, { "epoch": 4.297531260019237, "grad_norm": 0.13789623942693954, "learning_rate": 5.834000726704756e-06, "loss": 0.355, "num_tokens": 4797225269.0, "step": 11732 }, { "epoch": 4.297897677827143, "grad_norm": 0.1506912486258156, "learning_rate": 5.832122908866087e-06, "loss": 0.3468, "num_tokens": 4797953286.0, "step": 11733 }, { "epoch": 4.298264095635048, "grad_norm": 0.15828001781119766, "learning_rate": 5.830246001311126e-06, "loss": 0.3962, "num_tokens": 4798612346.0, "step": 11734 }, { "epoch": 4.298630513442953, "grad_norm": 0.1421576225512271, "learning_rate": 5.828370004145556e-06, "loss": 0.3731, "num_tokens": 4799491870.0, "step": 11735 }, { "epoch": 4.2989969312508585, "grad_norm": 0.1665013387967133, "learning_rate": 5.826494917474993e-06, "loss": 0.3683, "num_tokens": 4800161156.0, "step": 11736 }, { "epoch": 4.299363349058765, "grad_norm": 0.14971990860236603, "learning_rate": 5.824620741405011e-06, "loss": 0.3784, "num_tokens": 4800941935.0, "step": 11737 }, { "epoch": 4.29972976686667, "grad_norm": 0.14822096226288026, "learning_rate": 5.822747476041126e-06, "loss": 0.3541, "num_tokens": 4801734868.0, "step": 11738 }, { "epoch": 4.300096184674575, "grad_norm": 0.15240598212783518, "learning_rate": 5.820875121488813e-06, "loss": 0.3663, "num_tokens": 4802528774.0, "step": 11739 }, { "epoch": 4.30046260248248, "grad_norm": 0.1479169574703296, "learning_rate": 5.819003677853478e-06, "loss": 0.3691, "num_tokens": 4803347156.0, "step": 11740 }, { "epoch": 4.3008290202903865, "grad_norm": 0.1534527038487273, "learning_rate": 5.817133145240503e-06, "loss": 0.3768, "num_tokens": 4804084091.0, "step": 11741 }, { "epoch": 4.301195438098292, "grad_norm": 0.1547546664809204, "learning_rate": 5.8152635237551925e-06, "loss": 0.3603, "num_tokens": 4804802221.0, "step": 11742 }, { "epoch": 4.301561855906197, "grad_norm": 0.15002885250644343, "learning_rate": 5.8133948135028104e-06, "loss": 0.3877, "num_tokens": 4805568641.0, "step": 11743 }, { "epoch": 4.301928273714102, "grad_norm": 0.16358148431358133, "learning_rate": 5.8115270145885694e-06, "loss": 0.3843, "num_tokens": 4806329015.0, "step": 11744 }, { "epoch": 4.302294691522008, "grad_norm": 0.14904527849106516, "learning_rate": 5.80966012711763e-06, "loss": 0.364, "num_tokens": 4807106109.0, "step": 11745 }, { "epoch": 4.302661109329914, "grad_norm": 0.16129749358993448, "learning_rate": 5.807794151195101e-06, "loss": 0.3736, "num_tokens": 4807880082.0, "step": 11746 }, { "epoch": 4.303027527137819, "grad_norm": 0.1418044774445683, "learning_rate": 5.805929086926046e-06, "loss": 0.3551, "num_tokens": 4808696544.0, "step": 11747 }, { "epoch": 4.303393944945724, "grad_norm": 0.14545513069663182, "learning_rate": 5.80406493441547e-06, "loss": 0.3928, "num_tokens": 4809448395.0, "step": 11748 }, { "epoch": 4.30376036275363, "grad_norm": 0.14498323904856358, "learning_rate": 5.802201693768324e-06, "loss": 0.3914, "num_tokens": 4810270010.0, "step": 11749 }, { "epoch": 4.3041267805615355, "grad_norm": 0.15580088268087686, "learning_rate": 5.8003393650895125e-06, "loss": 0.3559, "num_tokens": 4811039214.0, "step": 11750 }, { "epoch": 4.304493198369441, "grad_norm": 0.1356926326015555, "learning_rate": 5.7984779484838935e-06, "loss": 0.3713, "num_tokens": 4811803661.0, "step": 11751 }, { "epoch": 4.304859616177346, "grad_norm": 0.15053661132524723, "learning_rate": 5.796617444056265e-06, "loss": 0.3741, "num_tokens": 4812560723.0, "step": 11752 }, { "epoch": 4.305226033985251, "grad_norm": 0.1555654603533533, "learning_rate": 5.794757851911375e-06, "loss": 0.359, "num_tokens": 4813291614.0, "step": 11753 }, { "epoch": 4.305592451793157, "grad_norm": 0.1444019339628544, "learning_rate": 5.792899172153929e-06, "loss": 0.3563, "num_tokens": 4814020617.0, "step": 11754 }, { "epoch": 4.305958869601063, "grad_norm": 0.16367924756781851, "learning_rate": 5.791041404888566e-06, "loss": 0.3762, "num_tokens": 4814742503.0, "step": 11755 }, { "epoch": 4.306325287408968, "grad_norm": 0.1360871382931965, "learning_rate": 5.7891845502198885e-06, "loss": 0.3688, "num_tokens": 4815550736.0, "step": 11756 }, { "epoch": 4.306691705216873, "grad_norm": 0.16745856249335261, "learning_rate": 5.787328608252438e-06, "loss": 0.3951, "num_tokens": 4816241301.0, "step": 11757 }, { "epoch": 4.307058123024779, "grad_norm": 0.1625077926579329, "learning_rate": 5.785473579090711e-06, "loss": 0.3981, "num_tokens": 4816915554.0, "step": 11758 }, { "epoch": 4.307424540832685, "grad_norm": 0.1510705300314115, "learning_rate": 5.7836194628391425e-06, "loss": 0.3811, "num_tokens": 4817720806.0, "step": 11759 }, { "epoch": 4.30779095864059, "grad_norm": 0.14537435804594456, "learning_rate": 5.781766259602132e-06, "loss": 0.3922, "num_tokens": 4818517717.0, "step": 11760 }, { "epoch": 4.308157376448495, "grad_norm": 0.15470883543082248, "learning_rate": 5.779913969484008e-06, "loss": 0.3596, "num_tokens": 4819198189.0, "step": 11761 }, { "epoch": 4.308523794256401, "grad_norm": 0.14481273751906745, "learning_rate": 5.7780625925890695e-06, "loss": 0.338, "num_tokens": 4819991265.0, "step": 11762 }, { "epoch": 4.3088902120643064, "grad_norm": 0.14276672986872366, "learning_rate": 5.776212129021548e-06, "loss": 0.3338, "num_tokens": 4820811893.0, "step": 11763 }, { "epoch": 4.309256629872212, "grad_norm": 0.15347284432456668, "learning_rate": 5.774362578885625e-06, "loss": 0.3655, "num_tokens": 4821498647.0, "step": 11764 }, { "epoch": 4.309623047680117, "grad_norm": 0.1414786891224998, "learning_rate": 5.772513942285438e-06, "loss": 0.3394, "num_tokens": 4822299152.0, "step": 11765 }, { "epoch": 4.309989465488023, "grad_norm": 0.16192258098769677, "learning_rate": 5.7706662193250625e-06, "loss": 0.3835, "num_tokens": 4822948271.0, "step": 11766 }, { "epoch": 4.310355883295928, "grad_norm": 0.15725993849731648, "learning_rate": 5.768819410108536e-06, "loss": 0.3648, "num_tokens": 4823737900.0, "step": 11767 }, { "epoch": 4.310722301103834, "grad_norm": 0.15514596849590287, "learning_rate": 5.766973514739838e-06, "loss": 0.3525, "num_tokens": 4824512592.0, "step": 11768 }, { "epoch": 4.311088718911739, "grad_norm": 0.14497992406490018, "learning_rate": 5.7651285333228925e-06, "loss": 0.3489, "num_tokens": 4825257443.0, "step": 11769 }, { "epoch": 4.311455136719645, "grad_norm": 0.1597065049133204, "learning_rate": 5.763284465961578e-06, "loss": 0.362, "num_tokens": 4826023592.0, "step": 11770 }, { "epoch": 4.31182155452755, "grad_norm": 0.1528295096387372, "learning_rate": 5.761441312759717e-06, "loss": 0.3736, "num_tokens": 4826757380.0, "step": 11771 }, { "epoch": 4.3121879723354555, "grad_norm": 0.15599439988776925, "learning_rate": 5.759599073821078e-06, "loss": 0.376, "num_tokens": 4827502670.0, "step": 11772 }, { "epoch": 4.312554390143361, "grad_norm": 0.15142979655387087, "learning_rate": 5.757757749249393e-06, "loss": 0.3721, "num_tokens": 4828218428.0, "step": 11773 }, { "epoch": 4.312920807951267, "grad_norm": 0.1563156569954204, "learning_rate": 5.7559173391483245e-06, "loss": 0.3631, "num_tokens": 4829045921.0, "step": 11774 }, { "epoch": 4.313287225759172, "grad_norm": 0.14912983832138257, "learning_rate": 5.754077843621498e-06, "loss": 0.3663, "num_tokens": 4829773142.0, "step": 11775 }, { "epoch": 4.313653643567077, "grad_norm": 0.15353483257649572, "learning_rate": 5.752239262772476e-06, "loss": 0.3498, "num_tokens": 4830548540.0, "step": 11776 }, { "epoch": 4.314020061374983, "grad_norm": 0.14448552450104823, "learning_rate": 5.750401596704776e-06, "loss": 0.3493, "num_tokens": 4831269139.0, "step": 11777 }, { "epoch": 4.314386479182888, "grad_norm": 0.14437277490982506, "learning_rate": 5.748564845521857e-06, "loss": 0.3742, "num_tokens": 4832063005.0, "step": 11778 }, { "epoch": 4.314752896990794, "grad_norm": 0.15348953029078288, "learning_rate": 5.746729009327141e-06, "loss": 0.3618, "num_tokens": 4832820346.0, "step": 11779 }, { "epoch": 4.315119314798699, "grad_norm": 0.1706840695436928, "learning_rate": 5.7448940882239805e-06, "loss": 0.3885, "num_tokens": 4833557445.0, "step": 11780 }, { "epoch": 4.3154857326066045, "grad_norm": 0.14577643104401483, "learning_rate": 5.743060082315691e-06, "loss": 0.3633, "num_tokens": 4834233617.0, "step": 11781 }, { "epoch": 4.31585215041451, "grad_norm": 0.15487530620308637, "learning_rate": 5.74122699170553e-06, "loss": 0.3739, "num_tokens": 4835078894.0, "step": 11782 }, { "epoch": 4.316218568222416, "grad_norm": 0.1619178338470848, "learning_rate": 5.7393948164967004e-06, "loss": 0.3642, "num_tokens": 4835711236.0, "step": 11783 }, { "epoch": 4.316584986030321, "grad_norm": 0.15540433666260065, "learning_rate": 5.7375635567923615e-06, "loss": 0.3833, "num_tokens": 4836454146.0, "step": 11784 }, { "epoch": 4.316951403838226, "grad_norm": 0.1435075114029182, "learning_rate": 5.735733212695616e-06, "loss": 0.3781, "num_tokens": 4837302570.0, "step": 11785 }, { "epoch": 4.317317821646132, "grad_norm": 0.14716229514342596, "learning_rate": 5.733903784309517e-06, "loss": 0.3683, "num_tokens": 4838001653.0, "step": 11786 }, { "epoch": 4.317684239454038, "grad_norm": 0.16021193255862573, "learning_rate": 5.732075271737058e-06, "loss": 0.3775, "num_tokens": 4838769011.0, "step": 11787 }, { "epoch": 4.318050657261943, "grad_norm": 0.1501084842730658, "learning_rate": 5.730247675081195e-06, "loss": 0.3851, "num_tokens": 4839599318.0, "step": 11788 }, { "epoch": 4.318417075069848, "grad_norm": 0.1382663902515773, "learning_rate": 5.728420994444823e-06, "loss": 0.346, "num_tokens": 4840381708.0, "step": 11789 }, { "epoch": 4.3187834928777535, "grad_norm": 0.1584760494622142, "learning_rate": 5.726595229930789e-06, "loss": 0.354, "num_tokens": 4841127444.0, "step": 11790 }, { "epoch": 4.31914991068566, "grad_norm": 0.15377388014025845, "learning_rate": 5.724770381641889e-06, "loss": 0.3904, "num_tokens": 4841947995.0, "step": 11791 }, { "epoch": 4.319516328493565, "grad_norm": 0.15339896003935763, "learning_rate": 5.7229464496808605e-06, "loss": 0.3564, "num_tokens": 4842748131.0, "step": 11792 }, { "epoch": 4.31988274630147, "grad_norm": 0.15992808290852123, "learning_rate": 5.721123434150395e-06, "loss": 0.3594, "num_tokens": 4843521656.0, "step": 11793 }, { "epoch": 4.320249164109375, "grad_norm": 0.14371607155995283, "learning_rate": 5.7193013351531375e-06, "loss": 0.388, "num_tokens": 4844345062.0, "step": 11794 }, { "epoch": 4.320615581917282, "grad_norm": 0.14753489041893905, "learning_rate": 5.717480152791669e-06, "loss": 0.3369, "num_tokens": 4845087903.0, "step": 11795 }, { "epoch": 4.320981999725187, "grad_norm": 0.15182798786079252, "learning_rate": 5.715659887168531e-06, "loss": 0.3971, "num_tokens": 4845852162.0, "step": 11796 }, { "epoch": 4.321348417533092, "grad_norm": 0.15648014571516541, "learning_rate": 5.713840538386207e-06, "loss": 0.3595, "num_tokens": 4846585486.0, "step": 11797 }, { "epoch": 4.321714835340997, "grad_norm": 0.1571996913456621, "learning_rate": 5.712022106547128e-06, "loss": 0.3421, "num_tokens": 4847296458.0, "step": 11798 }, { "epoch": 4.3220812531489035, "grad_norm": 0.15308749136798733, "learning_rate": 5.710204591753677e-06, "loss": 0.3594, "num_tokens": 4848075051.0, "step": 11799 }, { "epoch": 4.322447670956809, "grad_norm": 0.148105715647843, "learning_rate": 5.7083879941081785e-06, "loss": 0.3616, "num_tokens": 4848887625.0, "step": 11800 }, { "epoch": 4.322814088764714, "grad_norm": 0.14307165197010366, "learning_rate": 5.706572313712915e-06, "loss": 0.3752, "num_tokens": 4849709485.0, "step": 11801 }, { "epoch": 4.323180506572619, "grad_norm": 0.14031073912176548, "learning_rate": 5.704757550670117e-06, "loss": 0.3825, "num_tokens": 4850608420.0, "step": 11802 }, { "epoch": 4.3235469243805245, "grad_norm": 0.14406395844783323, "learning_rate": 5.702943705081956e-06, "loss": 0.3624, "num_tokens": 4851418634.0, "step": 11803 }, { "epoch": 4.323913342188431, "grad_norm": 0.15964479970540135, "learning_rate": 5.701130777050554e-06, "loss": 0.3959, "num_tokens": 4852117706.0, "step": 11804 }, { "epoch": 4.324279759996336, "grad_norm": 0.15968621365729552, "learning_rate": 5.699318766677982e-06, "loss": 0.3602, "num_tokens": 4852800151.0, "step": 11805 }, { "epoch": 4.324646177804241, "grad_norm": 0.155810805746463, "learning_rate": 5.697507674066257e-06, "loss": 0.3717, "num_tokens": 4853482734.0, "step": 11806 }, { "epoch": 4.325012595612146, "grad_norm": 0.14279860738145583, "learning_rate": 5.695697499317355e-06, "loss": 0.3489, "num_tokens": 4854296018.0, "step": 11807 }, { "epoch": 4.3253790134200525, "grad_norm": 0.15790167911078518, "learning_rate": 5.6938882425331845e-06, "loss": 0.3655, "num_tokens": 4854987885.0, "step": 11808 }, { "epoch": 4.325745431227958, "grad_norm": 0.15479401022513056, "learning_rate": 5.692079903815618e-06, "loss": 0.3727, "num_tokens": 4855762407.0, "step": 11809 }, { "epoch": 4.326111849035863, "grad_norm": 0.14089620300525788, "learning_rate": 5.690272483266464e-06, "loss": 0.3867, "num_tokens": 4856533517.0, "step": 11810 }, { "epoch": 4.326478266843768, "grad_norm": 0.1482900874161289, "learning_rate": 5.688465980987481e-06, "loss": 0.3571, "num_tokens": 4857388294.0, "step": 11811 }, { "epoch": 4.326844684651674, "grad_norm": 0.1465070676460418, "learning_rate": 5.6866603970803855e-06, "loss": 0.3609, "num_tokens": 4858200243.0, "step": 11812 }, { "epoch": 4.32721110245958, "grad_norm": 0.1568097691445163, "learning_rate": 5.6848557316468325e-06, "loss": 0.3481, "num_tokens": 4858881858.0, "step": 11813 }, { "epoch": 4.327577520267485, "grad_norm": 0.1494839331054563, "learning_rate": 5.6830519847884235e-06, "loss": 0.4041, "num_tokens": 4859723245.0, "step": 11814 }, { "epoch": 4.32794393807539, "grad_norm": 0.1475204260896081, "learning_rate": 5.68124915660672e-06, "loss": 0.3569, "num_tokens": 4860493765.0, "step": 11815 }, { "epoch": 4.328310355883296, "grad_norm": 0.1652656224548081, "learning_rate": 5.679447247203224e-06, "loss": 0.3713, "num_tokens": 4861195167.0, "step": 11816 }, { "epoch": 4.3286767736912015, "grad_norm": 0.15344403373264343, "learning_rate": 5.677646256679381e-06, "loss": 0.3728, "num_tokens": 4861944789.0, "step": 11817 }, { "epoch": 4.329043191499107, "grad_norm": 0.13768141218330773, "learning_rate": 5.675846185136598e-06, "loss": 0.3727, "num_tokens": 4862835243.0, "step": 11818 }, { "epoch": 4.329409609307012, "grad_norm": 0.14343483305838317, "learning_rate": 5.6740470326762175e-06, "loss": 0.39, "num_tokens": 4863629645.0, "step": 11819 }, { "epoch": 4.329776027114918, "grad_norm": 0.15077413863524683, "learning_rate": 5.6722487993995355e-06, "loss": 0.3853, "num_tokens": 4864404499.0, "step": 11820 }, { "epoch": 4.330142444922823, "grad_norm": 0.1498330056202651, "learning_rate": 5.670451485407795e-06, "loss": 0.3555, "num_tokens": 4865132751.0, "step": 11821 }, { "epoch": 4.330508862730729, "grad_norm": 0.16030202566919421, "learning_rate": 5.6686550908021935e-06, "loss": 0.374, "num_tokens": 4865786884.0, "step": 11822 }, { "epoch": 4.330875280538634, "grad_norm": 0.15776865765089018, "learning_rate": 5.666859615683869e-06, "loss": 0.3947, "num_tokens": 4866508580.0, "step": 11823 }, { "epoch": 4.33124169834654, "grad_norm": 0.15598523723710936, "learning_rate": 5.665065060153909e-06, "loss": 0.3927, "num_tokens": 4867301793.0, "step": 11824 }, { "epoch": 4.331608116154445, "grad_norm": 0.14637395343848278, "learning_rate": 5.663271424313355e-06, "loss": 0.3378, "num_tokens": 4868026321.0, "step": 11825 }, { "epoch": 4.3319745339623505, "grad_norm": 0.13619530721752462, "learning_rate": 5.661478708263186e-06, "loss": 0.3516, "num_tokens": 4868767612.0, "step": 11826 }, { "epoch": 4.332340951770256, "grad_norm": 0.16573764156074944, "learning_rate": 5.659686912104339e-06, "loss": 0.3589, "num_tokens": 4869622650.0, "step": 11827 }, { "epoch": 4.332707369578161, "grad_norm": 0.15219543930896304, "learning_rate": 5.657896035937694e-06, "loss": 0.3683, "num_tokens": 4870319636.0, "step": 11828 }, { "epoch": 4.333073787386067, "grad_norm": 0.168307012185628, "learning_rate": 5.656106079864079e-06, "loss": 0.4235, "num_tokens": 4871026545.0, "step": 11829 }, { "epoch": 4.333440205193972, "grad_norm": 0.15685673893572677, "learning_rate": 5.654317043984279e-06, "loss": 0.3733, "num_tokens": 4871753765.0, "step": 11830 }, { "epoch": 4.333806623001878, "grad_norm": 0.16816860673138, "learning_rate": 5.652528928399017e-06, "loss": 0.411, "num_tokens": 4872443991.0, "step": 11831 }, { "epoch": 4.334173040809783, "grad_norm": 0.14968606770416396, "learning_rate": 5.6507417332089686e-06, "loss": 0.378, "num_tokens": 4873259125.0, "step": 11832 }, { "epoch": 4.334539458617689, "grad_norm": 0.1456990516268805, "learning_rate": 5.648955458514749e-06, "loss": 0.3448, "num_tokens": 4874001799.0, "step": 11833 }, { "epoch": 4.334905876425594, "grad_norm": 0.14874244396764538, "learning_rate": 5.647170104416937e-06, "loss": 0.3534, "num_tokens": 4874756478.0, "step": 11834 }, { "epoch": 4.3352722942335, "grad_norm": 0.15485267504246653, "learning_rate": 5.6453856710160475e-06, "loss": 0.3755, "num_tokens": 4875473684.0, "step": 11835 }, { "epoch": 4.335638712041405, "grad_norm": 0.15949036813548653, "learning_rate": 5.643602158412551e-06, "loss": 0.3537, "num_tokens": 4876242860.0, "step": 11836 }, { "epoch": 4.336005129849311, "grad_norm": 0.14776100947081033, "learning_rate": 5.641819566706864e-06, "loss": 0.3812, "num_tokens": 4876927987.0, "step": 11837 }, { "epoch": 4.336371547657216, "grad_norm": 0.15631831349678318, "learning_rate": 5.6400378959993465e-06, "loss": 0.4044, "num_tokens": 4877757261.0, "step": 11838 }, { "epoch": 4.3367379654651215, "grad_norm": 0.15255125517437226, "learning_rate": 5.638257146390306e-06, "loss": 0.3847, "num_tokens": 4878527083.0, "step": 11839 }, { "epoch": 4.337104383273027, "grad_norm": 0.14276173742098777, "learning_rate": 5.63647731798001e-06, "loss": 0.3644, "num_tokens": 4879302983.0, "step": 11840 }, { "epoch": 4.337470801080933, "grad_norm": 0.14564180658124656, "learning_rate": 5.6346984108686666e-06, "loss": 0.3616, "num_tokens": 4880106743.0, "step": 11841 }, { "epoch": 4.337837218888838, "grad_norm": 0.16100584669817955, "learning_rate": 5.632920425156423e-06, "loss": 0.3387, "num_tokens": 4880760548.0, "step": 11842 }, { "epoch": 4.338203636696743, "grad_norm": 0.1504293248755805, "learning_rate": 5.631143360943393e-06, "loss": 0.3341, "num_tokens": 4881459624.0, "step": 11843 }, { "epoch": 4.338570054504649, "grad_norm": 0.14577788199532474, "learning_rate": 5.629367218329622e-06, "loss": 0.376, "num_tokens": 4882264390.0, "step": 11844 }, { "epoch": 4.338936472312555, "grad_norm": 0.1734414720067439, "learning_rate": 5.627591997415115e-06, "loss": 0.4051, "num_tokens": 4882919282.0, "step": 11845 }, { "epoch": 4.33930289012046, "grad_norm": 0.14439338973703772, "learning_rate": 5.62581769829982e-06, "loss": 0.3517, "num_tokens": 4883765499.0, "step": 11846 }, { "epoch": 4.339669307928365, "grad_norm": 0.13780009061660842, "learning_rate": 5.624044321083632e-06, "loss": 0.3571, "num_tokens": 4884507545.0, "step": 11847 }, { "epoch": 4.3400357257362705, "grad_norm": 0.152381151884452, "learning_rate": 5.622271865866392e-06, "loss": 0.3478, "num_tokens": 4885156507.0, "step": 11848 }, { "epoch": 4.340402143544177, "grad_norm": 0.14876959792976435, "learning_rate": 5.6205003327479e-06, "loss": 0.3361, "num_tokens": 4885952421.0, "step": 11849 }, { "epoch": 4.340768561352082, "grad_norm": 0.15450308926650666, "learning_rate": 5.61872972182789e-06, "loss": 0.3385, "num_tokens": 4886697376.0, "step": 11850 }, { "epoch": 4.341134979159987, "grad_norm": 0.14588374405102433, "learning_rate": 5.61696003320606e-06, "loss": 0.3689, "num_tokens": 4887515540.0, "step": 11851 }, { "epoch": 4.341501396967892, "grad_norm": 0.14755169642162347, "learning_rate": 5.615191266982039e-06, "loss": 0.3722, "num_tokens": 4888305768.0, "step": 11852 }, { "epoch": 4.341867814775798, "grad_norm": 0.14962275239182596, "learning_rate": 5.613423423255416e-06, "loss": 0.3661, "num_tokens": 4889092997.0, "step": 11853 }, { "epoch": 4.342234232583704, "grad_norm": 0.14350506543809205, "learning_rate": 5.611656502125723e-06, "loss": 0.3571, "num_tokens": 4889809942.0, "step": 11854 }, { "epoch": 4.342600650391609, "grad_norm": 0.16040345256583885, "learning_rate": 5.609890503692438e-06, "loss": 0.3693, "num_tokens": 4890533238.0, "step": 11855 }, { "epoch": 4.342967068199514, "grad_norm": 0.1626593496164288, "learning_rate": 5.608125428054991e-06, "loss": 0.3712, "num_tokens": 4891283805.0, "step": 11856 }, { "epoch": 4.34333348600742, "grad_norm": 0.1637899806902431, "learning_rate": 5.606361275312766e-06, "loss": 0.3566, "num_tokens": 4891944502.0, "step": 11857 }, { "epoch": 4.343699903815326, "grad_norm": 0.15255045188727995, "learning_rate": 5.604598045565084e-06, "loss": 0.3408, "num_tokens": 4892658979.0, "step": 11858 }, { "epoch": 4.344066321623231, "grad_norm": 0.1572558588074398, "learning_rate": 5.602835738911217e-06, "loss": 0.3662, "num_tokens": 4893431569.0, "step": 11859 }, { "epoch": 4.344432739431136, "grad_norm": 0.14662702243278125, "learning_rate": 5.6010743554503885e-06, "loss": 0.3651, "num_tokens": 4894212392.0, "step": 11860 }, { "epoch": 4.344799157239041, "grad_norm": 0.14182188227905515, "learning_rate": 5.599313895281761e-06, "loss": 0.4075, "num_tokens": 4895074461.0, "step": 11861 }, { "epoch": 4.345165575046948, "grad_norm": 0.15933958748374114, "learning_rate": 5.597554358504464e-06, "loss": 0.3732, "num_tokens": 4895862229.0, "step": 11862 }, { "epoch": 4.345531992854853, "grad_norm": 0.15598940833127917, "learning_rate": 5.595795745217554e-06, "loss": 0.341, "num_tokens": 4896510940.0, "step": 11863 }, { "epoch": 4.345898410662758, "grad_norm": 0.145618377342274, "learning_rate": 5.594038055520047e-06, "loss": 0.3579, "num_tokens": 4897350118.0, "step": 11864 }, { "epoch": 4.346264828470663, "grad_norm": 0.1423741053706633, "learning_rate": 5.5922812895109066e-06, "loss": 0.3614, "num_tokens": 4898144252.0, "step": 11865 }, { "epoch": 4.3466312462785694, "grad_norm": 0.15349580980221447, "learning_rate": 5.590525447289037e-06, "loss": 0.4043, "num_tokens": 4898886535.0, "step": 11866 }, { "epoch": 4.346997664086475, "grad_norm": 0.13587505712506734, "learning_rate": 5.5887705289533e-06, "loss": 0.3557, "num_tokens": 4899755354.0, "step": 11867 }, { "epoch": 4.34736408189438, "grad_norm": 0.14715970015716276, "learning_rate": 5.587016534602503e-06, "loss": 0.3855, "num_tokens": 4900662615.0, "step": 11868 }, { "epoch": 4.347730499702285, "grad_norm": 0.15034395820798438, "learning_rate": 5.585263464335391e-06, "loss": 0.3598, "num_tokens": 4901380701.0, "step": 11869 }, { "epoch": 4.348096917510191, "grad_norm": 0.14237127192831192, "learning_rate": 5.583511318250674e-06, "loss": 0.376, "num_tokens": 4902201333.0, "step": 11870 }, { "epoch": 4.348463335318097, "grad_norm": 0.15711760504522168, "learning_rate": 5.581760096446998e-06, "loss": 0.391, "num_tokens": 4902940119.0, "step": 11871 }, { "epoch": 4.348829753126002, "grad_norm": 0.16147430089978912, "learning_rate": 5.5800097990229576e-06, "loss": 0.3687, "num_tokens": 4903612221.0, "step": 11872 }, { "epoch": 4.349196170933907, "grad_norm": 0.14818812476097243, "learning_rate": 5.578260426077103e-06, "loss": 0.3682, "num_tokens": 4904418636.0, "step": 11873 }, { "epoch": 4.349562588741813, "grad_norm": 0.14440079395545033, "learning_rate": 5.576511977707924e-06, "loss": 0.3372, "num_tokens": 4905198493.0, "step": 11874 }, { "epoch": 4.3499290065497185, "grad_norm": 0.14911231995092458, "learning_rate": 5.574764454013864e-06, "loss": 0.3798, "num_tokens": 4905946835.0, "step": 11875 }, { "epoch": 4.350295424357624, "grad_norm": 0.14572863230260416, "learning_rate": 5.573017855093308e-06, "loss": 0.3278, "num_tokens": 4906703062.0, "step": 11876 }, { "epoch": 4.350661842165529, "grad_norm": 0.15226250511166114, "learning_rate": 5.571272181044594e-06, "loss": 0.37, "num_tokens": 4907491091.0, "step": 11877 }, { "epoch": 4.351028259973435, "grad_norm": 0.15406560880328898, "learning_rate": 5.569527431966012e-06, "loss": 0.3555, "num_tokens": 4908238316.0, "step": 11878 }, { "epoch": 4.35139467778134, "grad_norm": 0.16728357523749976, "learning_rate": 5.567783607955793e-06, "loss": 0.4228, "num_tokens": 4908946787.0, "step": 11879 }, { "epoch": 4.351761095589246, "grad_norm": 0.16262333428465642, "learning_rate": 5.566040709112115e-06, "loss": 0.3497, "num_tokens": 4909700450.0, "step": 11880 }, { "epoch": 4.352127513397151, "grad_norm": 0.12968691455958314, "learning_rate": 5.56429873553311e-06, "loss": 0.361, "num_tokens": 4910632600.0, "step": 11881 }, { "epoch": 4.352493931205057, "grad_norm": 0.144141718708763, "learning_rate": 5.562557687316851e-06, "loss": 0.3542, "num_tokens": 4911477133.0, "step": 11882 }, { "epoch": 4.352860349012962, "grad_norm": 0.13717997408441648, "learning_rate": 5.560817564561359e-06, "loss": 0.379, "num_tokens": 4912248853.0, "step": 11883 }, { "epoch": 4.3532267668208675, "grad_norm": 0.15855156856603358, "learning_rate": 5.5590783673646145e-06, "loss": 0.3539, "num_tokens": 4912942231.0, "step": 11884 }, { "epoch": 4.353593184628773, "grad_norm": 0.16029642639034164, "learning_rate": 5.557340095824536e-06, "loss": 0.3709, "num_tokens": 4913665230.0, "step": 11885 }, { "epoch": 4.353959602436678, "grad_norm": 0.15238782165194062, "learning_rate": 5.5556027500389904e-06, "loss": 0.3456, "num_tokens": 4914445990.0, "step": 11886 }, { "epoch": 4.354326020244584, "grad_norm": 0.14816045464376248, "learning_rate": 5.5538663301057935e-06, "loss": 0.3319, "num_tokens": 4915162075.0, "step": 11887 }, { "epoch": 4.354692438052489, "grad_norm": 0.14762290872806255, "learning_rate": 5.5521308361227124e-06, "loss": 0.3876, "num_tokens": 4916022606.0, "step": 11888 }, { "epoch": 4.355058855860395, "grad_norm": 0.14624482785972073, "learning_rate": 5.550396268187449e-06, "loss": 0.3938, "num_tokens": 4916729381.0, "step": 11889 }, { "epoch": 4.3554252736683, "grad_norm": 0.14814131171864944, "learning_rate": 5.5486626263976715e-06, "loss": 0.3608, "num_tokens": 4917557387.0, "step": 11890 }, { "epoch": 4.355791691476206, "grad_norm": 0.16007257997635926, "learning_rate": 5.546929910850989e-06, "loss": 0.3799, "num_tokens": 4918304445.0, "step": 11891 }, { "epoch": 4.356158109284111, "grad_norm": 0.1517403032209832, "learning_rate": 5.545198121644952e-06, "loss": 0.3703, "num_tokens": 4919116295.0, "step": 11892 }, { "epoch": 4.3565245270920165, "grad_norm": 0.13670221956277775, "learning_rate": 5.543467258877065e-06, "loss": 0.3659, "num_tokens": 4919895777.0, "step": 11893 }, { "epoch": 4.356890944899922, "grad_norm": 0.15504131634352253, "learning_rate": 5.541737322644778e-06, "loss": 0.3688, "num_tokens": 4920630590.0, "step": 11894 }, { "epoch": 4.357257362707828, "grad_norm": 0.16124405084474125, "learning_rate": 5.5400083130454944e-06, "loss": 0.3893, "num_tokens": 4921383830.0, "step": 11895 }, { "epoch": 4.357623780515733, "grad_norm": 0.13859051397083133, "learning_rate": 5.538280230176556e-06, "loss": 0.3728, "num_tokens": 4922231954.0, "step": 11896 }, { "epoch": 4.357990198323638, "grad_norm": 0.14364242335256042, "learning_rate": 5.536553074135255e-06, "loss": 0.3661, "num_tokens": 4923052982.0, "step": 11897 }, { "epoch": 4.358356616131544, "grad_norm": 0.13707964312244947, "learning_rate": 5.534826845018844e-06, "loss": 0.3745, "num_tokens": 4923912604.0, "step": 11898 }, { "epoch": 4.35872303393945, "grad_norm": 0.15444060465566026, "learning_rate": 5.533101542924505e-06, "loss": 0.3475, "num_tokens": 4924608248.0, "step": 11899 }, { "epoch": 4.359089451747355, "grad_norm": 0.1575222445260667, "learning_rate": 5.531377167949373e-06, "loss": 0.3766, "num_tokens": 4925349353.0, "step": 11900 }, { "epoch": 4.35945586955526, "grad_norm": 0.15073463370450013, "learning_rate": 5.529653720190543e-06, "loss": 0.3697, "num_tokens": 4926115474.0, "step": 11901 }, { "epoch": 4.359822287363166, "grad_norm": 0.15690310352728187, "learning_rate": 5.527931199745044e-06, "loss": 0.3852, "num_tokens": 4926824627.0, "step": 11902 }, { "epoch": 4.360188705171072, "grad_norm": 0.14597254725326778, "learning_rate": 5.526209606709857e-06, "loss": 0.3515, "num_tokens": 4927574713.0, "step": 11903 }, { "epoch": 4.360555122978977, "grad_norm": 0.1484241101717143, "learning_rate": 5.5244889411819094e-06, "loss": 0.3691, "num_tokens": 4928386242.0, "step": 11904 }, { "epoch": 4.360921540786882, "grad_norm": 0.15097987658443007, "learning_rate": 5.522769203258078e-06, "loss": 0.3658, "num_tokens": 4929238202.0, "step": 11905 }, { "epoch": 4.3612879585947875, "grad_norm": 0.144211881040267, "learning_rate": 5.5210503930351945e-06, "loss": 0.3673, "num_tokens": 4930040250.0, "step": 11906 }, { "epoch": 4.361654376402694, "grad_norm": 0.14286459036478646, "learning_rate": 5.519332510610026e-06, "loss": 0.3554, "num_tokens": 4930856295.0, "step": 11907 }, { "epoch": 4.362020794210599, "grad_norm": 0.1407692494298541, "learning_rate": 5.517615556079292e-06, "loss": 0.3993, "num_tokens": 4931700755.0, "step": 11908 }, { "epoch": 4.362387212018504, "grad_norm": 0.14266266381701181, "learning_rate": 5.515899529539664e-06, "loss": 0.3998, "num_tokens": 4932504057.0, "step": 11909 }, { "epoch": 4.362753629826409, "grad_norm": 0.16264658437926865, "learning_rate": 5.514184431087749e-06, "loss": 0.3772, "num_tokens": 4933264441.0, "step": 11910 }, { "epoch": 4.363120047634315, "grad_norm": 0.15072408438705764, "learning_rate": 5.512470260820119e-06, "loss": 0.373, "num_tokens": 4934026159.0, "step": 11911 }, { "epoch": 4.363486465442221, "grad_norm": 0.15598745761935445, "learning_rate": 5.510757018833286e-06, "loss": 0.3939, "num_tokens": 4934836820.0, "step": 11912 }, { "epoch": 4.363852883250126, "grad_norm": 0.16119230564658574, "learning_rate": 5.509044705223705e-06, "loss": 0.36, "num_tokens": 4935499748.0, "step": 11913 }, { "epoch": 4.364219301058031, "grad_norm": 0.14580352097257374, "learning_rate": 5.507333320087784e-06, "loss": 0.3849, "num_tokens": 4936261936.0, "step": 11914 }, { "epoch": 4.3645857188659365, "grad_norm": 0.15663380298520768, "learning_rate": 5.505622863521878e-06, "loss": 0.3601, "num_tokens": 4937023814.0, "step": 11915 }, { "epoch": 4.364952136673843, "grad_norm": 0.15182983995256905, "learning_rate": 5.503913335622284e-06, "loss": 0.402, "num_tokens": 4937717673.0, "step": 11916 }, { "epoch": 4.365318554481748, "grad_norm": 0.14426297566541676, "learning_rate": 5.502204736485259e-06, "loss": 0.3953, "num_tokens": 4938472950.0, "step": 11917 }, { "epoch": 4.365684972289653, "grad_norm": 0.15028764975800055, "learning_rate": 5.500497066206996e-06, "loss": 0.3583, "num_tokens": 4939302937.0, "step": 11918 }, { "epoch": 4.366051390097558, "grad_norm": 0.14996257095024115, "learning_rate": 5.498790324883644e-06, "loss": 0.3733, "num_tokens": 4939977028.0, "step": 11919 }, { "epoch": 4.3664178079054645, "grad_norm": 0.1519099751150725, "learning_rate": 5.497084512611293e-06, "loss": 0.3829, "num_tokens": 4940754755.0, "step": 11920 }, { "epoch": 4.36678422571337, "grad_norm": 0.14450115166254732, "learning_rate": 5.495379629485984e-06, "loss": 0.3781, "num_tokens": 4941561066.0, "step": 11921 }, { "epoch": 4.367150643521275, "grad_norm": 0.1405397860440109, "learning_rate": 5.493675675603703e-06, "loss": 0.3915, "num_tokens": 4942427606.0, "step": 11922 }, { "epoch": 4.36751706132918, "grad_norm": 0.1708522390855641, "learning_rate": 5.491972651060392e-06, "loss": 0.3885, "num_tokens": 4943089129.0, "step": 11923 }, { "epoch": 4.367883479137086, "grad_norm": 0.15547443455994384, "learning_rate": 5.490270555951927e-06, "loss": 0.3735, "num_tokens": 4943816070.0, "step": 11924 }, { "epoch": 4.368249896944992, "grad_norm": 0.1529874001001192, "learning_rate": 5.488569390374147e-06, "loss": 0.3684, "num_tokens": 4944587534.0, "step": 11925 }, { "epoch": 4.368616314752897, "grad_norm": 0.1562453324933358, "learning_rate": 5.486869154422829e-06, "loss": 0.3799, "num_tokens": 4945306099.0, "step": 11926 }, { "epoch": 4.368982732560802, "grad_norm": 0.1543738772384548, "learning_rate": 5.485169848193695e-06, "loss": 0.3652, "num_tokens": 4946078504.0, "step": 11927 }, { "epoch": 4.369349150368708, "grad_norm": 0.14949016020424727, "learning_rate": 5.483471471782424e-06, "loss": 0.3508, "num_tokens": 4946835408.0, "step": 11928 }, { "epoch": 4.3697155681766136, "grad_norm": 0.14360291114808993, "learning_rate": 5.481774025284638e-06, "loss": 0.3671, "num_tokens": 4947633941.0, "step": 11929 }, { "epoch": 4.370081985984519, "grad_norm": 0.14047131551403202, "learning_rate": 5.480077508795906e-06, "loss": 0.3885, "num_tokens": 4948391424.0, "step": 11930 }, { "epoch": 4.370448403792424, "grad_norm": 0.14870936645985536, "learning_rate": 5.4783819224117395e-06, "loss": 0.3973, "num_tokens": 4949150232.0, "step": 11931 }, { "epoch": 4.37081482160033, "grad_norm": 0.1571019214415353, "learning_rate": 5.476687266227612e-06, "loss": 0.3673, "num_tokens": 4949971200.0, "step": 11932 }, { "epoch": 4.371181239408235, "grad_norm": 0.14417048066457486, "learning_rate": 5.474993540338931e-06, "loss": 0.3595, "num_tokens": 4950743171.0, "step": 11933 }, { "epoch": 4.371547657216141, "grad_norm": 0.15250831086490496, "learning_rate": 5.473300744841059e-06, "loss": 0.3483, "num_tokens": 4951460054.0, "step": 11934 }, { "epoch": 4.371914075024046, "grad_norm": 0.14008444396440567, "learning_rate": 5.471608879829305e-06, "loss": 0.3787, "num_tokens": 4952226310.0, "step": 11935 }, { "epoch": 4.372280492831951, "grad_norm": 0.16068095168661856, "learning_rate": 5.469917945398921e-06, "loss": 0.3479, "num_tokens": 4952907537.0, "step": 11936 }, { "epoch": 4.372646910639857, "grad_norm": 0.15514903178857287, "learning_rate": 5.46822794164511e-06, "loss": 0.3539, "num_tokens": 4953644272.0, "step": 11937 }, { "epoch": 4.373013328447763, "grad_norm": 0.1444583863025756, "learning_rate": 5.4665388686630236e-06, "loss": 0.3711, "num_tokens": 4954461751.0, "step": 11938 }, { "epoch": 4.373379746255668, "grad_norm": 0.16919566609784464, "learning_rate": 5.464850726547757e-06, "loss": 0.355, "num_tokens": 4955222001.0, "step": 11939 }, { "epoch": 4.373746164063573, "grad_norm": 0.14345917065371866, "learning_rate": 5.463163515394365e-06, "loss": 0.3625, "num_tokens": 4956008444.0, "step": 11940 }, { "epoch": 4.374112581871479, "grad_norm": 0.16013575134058144, "learning_rate": 5.461477235297834e-06, "loss": 0.3782, "num_tokens": 4956664443.0, "step": 11941 }, { "epoch": 4.3744789996793845, "grad_norm": 0.15104482673129907, "learning_rate": 5.459791886353105e-06, "loss": 0.3515, "num_tokens": 4957369251.0, "step": 11942 }, { "epoch": 4.37484541748729, "grad_norm": 0.15918640063951484, "learning_rate": 5.45810746865507e-06, "loss": 0.3853, "num_tokens": 4958069417.0, "step": 11943 }, { "epoch": 4.375211835295195, "grad_norm": 0.1486689832622607, "learning_rate": 5.456423982298556e-06, "loss": 0.3487, "num_tokens": 4958857827.0, "step": 11944 }, { "epoch": 4.375578253103101, "grad_norm": 0.14901388705526203, "learning_rate": 5.454741427378354e-06, "loss": 0.3637, "num_tokens": 4959601179.0, "step": 11945 }, { "epoch": 4.375944670911006, "grad_norm": 0.1603078949271934, "learning_rate": 5.4530598039892e-06, "loss": 0.3889, "num_tokens": 4960306207.0, "step": 11946 }, { "epoch": 4.376311088718912, "grad_norm": 0.14748168873175635, "learning_rate": 5.451379112225764e-06, "loss": 0.3781, "num_tokens": 4961209665.0, "step": 11947 }, { "epoch": 4.376677506526817, "grad_norm": 0.14996378585792428, "learning_rate": 5.449699352182678e-06, "loss": 0.3459, "num_tokens": 4961901960.0, "step": 11948 }, { "epoch": 4.377043924334723, "grad_norm": 0.14732720462814658, "learning_rate": 5.448020523954508e-06, "loss": 0.41, "num_tokens": 4962716931.0, "step": 11949 }, { "epoch": 4.377410342142628, "grad_norm": 0.15675442310642873, "learning_rate": 5.446342627635787e-06, "loss": 0.3743, "num_tokens": 4963549631.0, "step": 11950 }, { "epoch": 4.3777767599505335, "grad_norm": 0.1560034199888063, "learning_rate": 5.444665663320975e-06, "loss": 0.3593, "num_tokens": 4964246664.0, "step": 11951 }, { "epoch": 4.378143177758439, "grad_norm": 0.13933122307434417, "learning_rate": 5.442989631104487e-06, "loss": 0.3505, "num_tokens": 4965058144.0, "step": 11952 }, { "epoch": 4.378509595566345, "grad_norm": 0.1456890032760947, "learning_rate": 5.441314531080697e-06, "loss": 0.3832, "num_tokens": 4965891617.0, "step": 11953 }, { "epoch": 4.37887601337425, "grad_norm": 0.1610357365835179, "learning_rate": 5.4396403633439085e-06, "loss": 0.3851, "num_tokens": 4966561035.0, "step": 11954 }, { "epoch": 4.379242431182155, "grad_norm": 0.1738514737925121, "learning_rate": 5.437967127988381e-06, "loss": 0.3611, "num_tokens": 4967228639.0, "step": 11955 }, { "epoch": 4.379608848990061, "grad_norm": 0.14228091715975894, "learning_rate": 5.436294825108323e-06, "loss": 0.3518, "num_tokens": 4968108872.0, "step": 11956 }, { "epoch": 4.379975266797967, "grad_norm": 0.12898687198639874, "learning_rate": 5.434623454797888e-06, "loss": 0.3441, "num_tokens": 4968991064.0, "step": 11957 }, { "epoch": 4.380341684605872, "grad_norm": 0.1355088349446503, "learning_rate": 5.4329530171511815e-06, "loss": 0.3813, "num_tokens": 4969844165.0, "step": 11958 }, { "epoch": 4.380708102413777, "grad_norm": 0.14391242280449593, "learning_rate": 5.43128351226224e-06, "loss": 0.3649, "num_tokens": 4970663837.0, "step": 11959 }, { "epoch": 4.3810745202216825, "grad_norm": 0.14949748207841285, "learning_rate": 5.429614940225075e-06, "loss": 0.3773, "num_tokens": 4971468947.0, "step": 11960 }, { "epoch": 4.381440938029588, "grad_norm": 0.1363059882046985, "learning_rate": 5.427947301133618e-06, "loss": 0.325, "num_tokens": 4972294787.0, "step": 11961 }, { "epoch": 4.381807355837494, "grad_norm": 0.14263322678789336, "learning_rate": 5.426280595081772e-06, "loss": 0.4034, "num_tokens": 4973137874.0, "step": 11962 }, { "epoch": 4.382173773645399, "grad_norm": 0.14701910045738112, "learning_rate": 5.4246148221633676e-06, "loss": 0.3697, "num_tokens": 4973926186.0, "step": 11963 }, { "epoch": 4.382540191453304, "grad_norm": 0.15333564151158033, "learning_rate": 5.422949982472195e-06, "loss": 0.3731, "num_tokens": 4974615353.0, "step": 11964 }, { "epoch": 4.382906609261211, "grad_norm": 0.15046252179376227, "learning_rate": 5.421286076101983e-06, "loss": 0.3491, "num_tokens": 4975372376.0, "step": 11965 }, { "epoch": 4.383273027069116, "grad_norm": 0.15605282398971132, "learning_rate": 5.419623103146417e-06, "loss": 0.3563, "num_tokens": 4976185400.0, "step": 11966 }, { "epoch": 4.383639444877021, "grad_norm": 0.15259468948570454, "learning_rate": 5.417961063699126e-06, "loss": 0.3655, "num_tokens": 4976987194.0, "step": 11967 }, { "epoch": 4.384005862684926, "grad_norm": 0.14413896204443513, "learning_rate": 5.416299957853685e-06, "loss": 0.3531, "num_tokens": 4977775751.0, "step": 11968 }, { "epoch": 4.384372280492832, "grad_norm": 0.14945165233128288, "learning_rate": 5.414639785703621e-06, "loss": 0.3873, "num_tokens": 4978568196.0, "step": 11969 }, { "epoch": 4.384738698300738, "grad_norm": 0.16351677062382336, "learning_rate": 5.4129805473424e-06, "loss": 0.3889, "num_tokens": 4979272746.0, "step": 11970 }, { "epoch": 4.385105116108643, "grad_norm": 0.15217314622093403, "learning_rate": 5.411322242863443e-06, "loss": 0.3549, "num_tokens": 4979994202.0, "step": 11971 }, { "epoch": 4.385471533916548, "grad_norm": 0.15477181778162669, "learning_rate": 5.409664872360111e-06, "loss": 0.3569, "num_tokens": 4980691625.0, "step": 11972 }, { "epoch": 4.3858379517244535, "grad_norm": 0.15419536063271336, "learning_rate": 5.408008435925722e-06, "loss": 0.3868, "num_tokens": 4981492567.0, "step": 11973 }, { "epoch": 4.38620436953236, "grad_norm": 0.15122644318870881, "learning_rate": 5.406352933653539e-06, "loss": 0.3757, "num_tokens": 4982216142.0, "step": 11974 }, { "epoch": 4.386570787340265, "grad_norm": 0.15473070043762055, "learning_rate": 5.4046983656367655e-06, "loss": 0.3802, "num_tokens": 4982907896.0, "step": 11975 }, { "epoch": 4.38693720514817, "grad_norm": 0.16161302736125485, "learning_rate": 5.4030447319685605e-06, "loss": 0.394, "num_tokens": 4983596738.0, "step": 11976 }, { "epoch": 4.387303622956075, "grad_norm": 0.1523879954372189, "learning_rate": 5.401392032742021e-06, "loss": 0.3504, "num_tokens": 4984285617.0, "step": 11977 }, { "epoch": 4.3876700407639815, "grad_norm": 0.14670609367878001, "learning_rate": 5.399740268050206e-06, "loss": 0.3566, "num_tokens": 4985064751.0, "step": 11978 }, { "epoch": 4.388036458571887, "grad_norm": 0.14910700180630942, "learning_rate": 5.398089437986104e-06, "loss": 0.3474, "num_tokens": 4985825100.0, "step": 11979 }, { "epoch": 4.388402876379792, "grad_norm": 0.16311825021677578, "learning_rate": 5.396439542642666e-06, "loss": 0.3729, "num_tokens": 4986546052.0, "step": 11980 }, { "epoch": 4.388769294187697, "grad_norm": 0.13913170900543242, "learning_rate": 5.394790582112784e-06, "loss": 0.3469, "num_tokens": 4987265391.0, "step": 11981 }, { "epoch": 4.389135711995603, "grad_norm": 0.1509392286295555, "learning_rate": 5.393142556489297e-06, "loss": 0.3683, "num_tokens": 4988026511.0, "step": 11982 }, { "epoch": 4.389502129803509, "grad_norm": 0.15016564209354274, "learning_rate": 5.391495465864989e-06, "loss": 0.366, "num_tokens": 4988837011.0, "step": 11983 }, { "epoch": 4.389868547611414, "grad_norm": 0.1499226009595746, "learning_rate": 5.389849310332598e-06, "loss": 0.3817, "num_tokens": 4989610971.0, "step": 11984 }, { "epoch": 4.390234965419319, "grad_norm": 0.1410670591482071, "learning_rate": 5.388204089984807e-06, "loss": 0.3677, "num_tokens": 4990517470.0, "step": 11985 }, { "epoch": 4.390601383227224, "grad_norm": 0.13709236782607923, "learning_rate": 5.386559804914238e-06, "loss": 0.3602, "num_tokens": 4991374100.0, "step": 11986 }, { "epoch": 4.3909678010351305, "grad_norm": 0.15376934386387972, "learning_rate": 5.384916455213478e-06, "loss": 0.3572, "num_tokens": 4992084909.0, "step": 11987 }, { "epoch": 4.391334218843036, "grad_norm": 0.14804246840755222, "learning_rate": 5.3832740409750404e-06, "loss": 0.361, "num_tokens": 4992808673.0, "step": 11988 }, { "epoch": 4.391700636650941, "grad_norm": 0.14671209137908303, "learning_rate": 5.381632562291407e-06, "loss": 0.3635, "num_tokens": 4993616087.0, "step": 11989 }, { "epoch": 4.392067054458847, "grad_norm": 0.15342404011549535, "learning_rate": 5.379992019254989e-06, "loss": 0.369, "num_tokens": 4994381233.0, "step": 11990 }, { "epoch": 4.392433472266752, "grad_norm": 0.1398485649470857, "learning_rate": 5.3783524119581555e-06, "loss": 0.3618, "num_tokens": 4995142010.0, "step": 11991 }, { "epoch": 4.392799890074658, "grad_norm": 0.15087514412340905, "learning_rate": 5.376713740493215e-06, "loss": 0.3912, "num_tokens": 4995944892.0, "step": 11992 }, { "epoch": 4.393166307882563, "grad_norm": 0.15428005635870964, "learning_rate": 5.375076004952432e-06, "loss": 0.3585, "num_tokens": 4996781289.0, "step": 11993 }, { "epoch": 4.393532725690468, "grad_norm": 0.14688835650709087, "learning_rate": 5.3734392054280116e-06, "loss": 0.3905, "num_tokens": 4997540646.0, "step": 11994 }, { "epoch": 4.393899143498374, "grad_norm": 0.1494193206875118, "learning_rate": 5.371803342012116e-06, "loss": 0.3825, "num_tokens": 4998285907.0, "step": 11995 }, { "epoch": 4.3942655613062795, "grad_norm": 0.148177571386792, "learning_rate": 5.370168414796839e-06, "loss": 0.358, "num_tokens": 4999068857.0, "step": 11996 }, { "epoch": 4.394631979114185, "grad_norm": 0.14169198412634243, "learning_rate": 5.368534423874237e-06, "loss": 0.3436, "num_tokens": 4999817917.0, "step": 11997 }, { "epoch": 4.39499839692209, "grad_norm": 0.15562722110258706, "learning_rate": 5.366901369336301e-06, "loss": 0.384, "num_tokens": 5000493977.0, "step": 11998 }, { "epoch": 4.395364814729996, "grad_norm": 0.16263987944066474, "learning_rate": 5.365269251274976e-06, "loss": 0.3632, "num_tokens": 5001121937.0, "step": 11999 }, { "epoch": 4.395731232537901, "grad_norm": 0.1597700932547262, "learning_rate": 5.363638069782153e-06, "loss": 0.3501, "num_tokens": 5001888427.0, "step": 12000 }, { "epoch": 4.396097650345807, "grad_norm": 0.14105915176531683, "learning_rate": 5.362007824949679e-06, "loss": 0.3815, "num_tokens": 5002769071.0, "step": 12001 }, { "epoch": 4.396464068153712, "grad_norm": 0.17423933645383435, "learning_rate": 5.360378516869334e-06, "loss": 0.3888, "num_tokens": 5003423444.0, "step": 12002 }, { "epoch": 4.396830485961618, "grad_norm": 0.15686507800533134, "learning_rate": 5.3587501456328515e-06, "loss": 0.3485, "num_tokens": 5004127779.0, "step": 12003 }, { "epoch": 4.397196903769523, "grad_norm": 0.14447490159120793, "learning_rate": 5.3571227113319105e-06, "loss": 0.3803, "num_tokens": 5004919328.0, "step": 12004 }, { "epoch": 4.397563321577429, "grad_norm": 0.15342968113282368, "learning_rate": 5.355496214058138e-06, "loss": 0.3726, "num_tokens": 5005609514.0, "step": 12005 }, { "epoch": 4.397929739385334, "grad_norm": 0.14887352507706358, "learning_rate": 5.353870653903112e-06, "loss": 0.3579, "num_tokens": 5006415196.0, "step": 12006 }, { "epoch": 4.39829615719324, "grad_norm": 0.14878639781744, "learning_rate": 5.352246030958353e-06, "loss": 0.3797, "num_tokens": 5007183225.0, "step": 12007 }, { "epoch": 4.398662575001145, "grad_norm": 0.16444611307978266, "learning_rate": 5.350622345315336e-06, "loss": 0.3618, "num_tokens": 5007879087.0, "step": 12008 }, { "epoch": 4.3990289928090505, "grad_norm": 0.16205368355623154, "learning_rate": 5.3489995970654715e-06, "loss": 0.3788, "num_tokens": 5008534775.0, "step": 12009 }, { "epoch": 4.399395410616956, "grad_norm": 0.1524422470260613, "learning_rate": 5.347377786300123e-06, "loss": 0.352, "num_tokens": 5009299048.0, "step": 12010 }, { "epoch": 4.399761828424862, "grad_norm": 0.15387540721652715, "learning_rate": 5.345756913110606e-06, "loss": 0.3651, "num_tokens": 5009980692.0, "step": 12011 }, { "epoch": 4.400128246232767, "grad_norm": 0.15192385672364103, "learning_rate": 5.344136977588179e-06, "loss": 0.3771, "num_tokens": 5010673877.0, "step": 12012 }, { "epoch": 4.400494664040672, "grad_norm": 0.15252219151262797, "learning_rate": 5.342517979824042e-06, "loss": 0.3593, "num_tokens": 5011414195.0, "step": 12013 }, { "epoch": 4.400861081848578, "grad_norm": 0.15290270936486275, "learning_rate": 5.34089991990935e-06, "loss": 0.399, "num_tokens": 5012208707.0, "step": 12014 }, { "epoch": 4.401227499656484, "grad_norm": 0.15897893144856054, "learning_rate": 5.3392827979352066e-06, "loss": 0.3736, "num_tokens": 5013016780.0, "step": 12015 }, { "epoch": 4.401593917464389, "grad_norm": 0.1507792215686824, "learning_rate": 5.337666613992657e-06, "loss": 0.3791, "num_tokens": 5013692583.0, "step": 12016 }, { "epoch": 4.401960335272294, "grad_norm": 0.14510871588382013, "learning_rate": 5.336051368172693e-06, "loss": 0.3597, "num_tokens": 5014556514.0, "step": 12017 }, { "epoch": 4.4023267530801995, "grad_norm": 0.13777699490288825, "learning_rate": 5.334437060566262e-06, "loss": 0.3549, "num_tokens": 5015343187.0, "step": 12018 }, { "epoch": 4.402693170888105, "grad_norm": 0.15772357080436422, "learning_rate": 5.332823691264248e-06, "loss": 0.3841, "num_tokens": 5016141415.0, "step": 12019 }, { "epoch": 4.403059588696011, "grad_norm": 0.14656435021444184, "learning_rate": 5.331211260357485e-06, "loss": 0.3761, "num_tokens": 5016936606.0, "step": 12020 }, { "epoch": 4.403426006503916, "grad_norm": 0.15272958443546336, "learning_rate": 5.3295997679367594e-06, "loss": 0.37, "num_tokens": 5017761023.0, "step": 12021 }, { "epoch": 4.403792424311821, "grad_norm": 0.14278845908064103, "learning_rate": 5.327989214092805e-06, "loss": 0.3686, "num_tokens": 5018522618.0, "step": 12022 }, { "epoch": 4.404158842119727, "grad_norm": 0.1568712222971997, "learning_rate": 5.3263795989162926e-06, "loss": 0.3544, "num_tokens": 5019240756.0, "step": 12023 }, { "epoch": 4.404525259927633, "grad_norm": 0.1623308380192448, "learning_rate": 5.324770922497851e-06, "loss": 0.3849, "num_tokens": 5019908592.0, "step": 12024 }, { "epoch": 4.404891677735538, "grad_norm": 0.15925650127555377, "learning_rate": 5.32316318492805e-06, "loss": 0.367, "num_tokens": 5020607267.0, "step": 12025 }, { "epoch": 4.405258095543443, "grad_norm": 0.15580050118471386, "learning_rate": 5.321556386297411e-06, "loss": 0.373, "num_tokens": 5021327597.0, "step": 12026 }, { "epoch": 4.4056245133513485, "grad_norm": 0.16207323465468512, "learning_rate": 5.319950526696392e-06, "loss": 0.3924, "num_tokens": 5022047781.0, "step": 12027 }, { "epoch": 4.405990931159255, "grad_norm": 0.14343279084824484, "learning_rate": 5.318345606215412e-06, "loss": 0.3742, "num_tokens": 5022878824.0, "step": 12028 }, { "epoch": 4.40635734896716, "grad_norm": 0.16837665881364342, "learning_rate": 5.316741624944834e-06, "loss": 0.3631, "num_tokens": 5023532211.0, "step": 12029 }, { "epoch": 4.406723766775065, "grad_norm": 0.16441645984461678, "learning_rate": 5.3151385829749634e-06, "loss": 0.3874, "num_tokens": 5024292246.0, "step": 12030 }, { "epoch": 4.40709018458297, "grad_norm": 0.15812623386481825, "learning_rate": 5.313536480396054e-06, "loss": 0.3658, "num_tokens": 5025004357.0, "step": 12031 }, { "epoch": 4.4074566023908766, "grad_norm": 0.16190901339020697, "learning_rate": 5.311935317298305e-06, "loss": 0.3472, "num_tokens": 5025712682.0, "step": 12032 }, { "epoch": 4.407823020198782, "grad_norm": 0.14855804720783433, "learning_rate": 5.3103350937718655e-06, "loss": 0.3821, "num_tokens": 5026427454.0, "step": 12033 }, { "epoch": 4.408189438006687, "grad_norm": 0.15614075401458105, "learning_rate": 5.3087358099068355e-06, "loss": 0.3426, "num_tokens": 5027172951.0, "step": 12034 }, { "epoch": 4.408555855814592, "grad_norm": 0.16134662190678234, "learning_rate": 5.307137465793255e-06, "loss": 0.3976, "num_tokens": 5027858917.0, "step": 12035 }, { "epoch": 4.4089222736224984, "grad_norm": 0.1522075972193738, "learning_rate": 5.305540061521115e-06, "loss": 0.3474, "num_tokens": 5028570067.0, "step": 12036 }, { "epoch": 4.409288691430404, "grad_norm": 0.1692830849722525, "learning_rate": 5.303943597180352e-06, "loss": 0.3689, "num_tokens": 5029215651.0, "step": 12037 }, { "epoch": 4.409655109238309, "grad_norm": 0.16143299488445037, "learning_rate": 5.3023480728608454e-06, "loss": 0.3822, "num_tokens": 5029978846.0, "step": 12038 }, { "epoch": 4.410021527046214, "grad_norm": 0.14388347005475574, "learning_rate": 5.300753488652434e-06, "loss": 0.3737, "num_tokens": 5030787603.0, "step": 12039 }, { "epoch": 4.41038794485412, "grad_norm": 0.1512694645806537, "learning_rate": 5.299159844644894e-06, "loss": 0.3318, "num_tokens": 5031447346.0, "step": 12040 }, { "epoch": 4.410754362662026, "grad_norm": 0.1563688478436827, "learning_rate": 5.297567140927947e-06, "loss": 0.3769, "num_tokens": 5032204645.0, "step": 12041 }, { "epoch": 4.411120780469931, "grad_norm": 0.16687756311638977, "learning_rate": 5.29597537759127e-06, "loss": 0.3915, "num_tokens": 5032912624.0, "step": 12042 }, { "epoch": 4.411487198277836, "grad_norm": 0.15249498619624463, "learning_rate": 5.2943845547244835e-06, "loss": 0.3644, "num_tokens": 5033618014.0, "step": 12043 }, { "epoch": 4.411853616085741, "grad_norm": 0.15855143273460728, "learning_rate": 5.292794672417144e-06, "loss": 0.3628, "num_tokens": 5034381829.0, "step": 12044 }, { "epoch": 4.4122200338936475, "grad_norm": 0.1447514551863645, "learning_rate": 5.2912057307587786e-06, "loss": 0.3457, "num_tokens": 5035114659.0, "step": 12045 }, { "epoch": 4.412586451701553, "grad_norm": 0.16440425237305895, "learning_rate": 5.2896177298388375e-06, "loss": 0.3705, "num_tokens": 5035823163.0, "step": 12046 }, { "epoch": 4.412952869509458, "grad_norm": 0.15394211853263304, "learning_rate": 5.2880306697467334e-06, "loss": 0.3818, "num_tokens": 5036550405.0, "step": 12047 }, { "epoch": 4.413319287317363, "grad_norm": 0.15524823824022968, "learning_rate": 5.286444550571818e-06, "loss": 0.3506, "num_tokens": 5037273174.0, "step": 12048 }, { "epoch": 4.413685705125269, "grad_norm": 0.15490692640711218, "learning_rate": 5.284859372403395e-06, "loss": 0.3705, "num_tokens": 5037998862.0, "step": 12049 }, { "epoch": 4.414052122933175, "grad_norm": 0.14865251819564698, "learning_rate": 5.2832751353307155e-06, "loss": 0.3653, "num_tokens": 5038784673.0, "step": 12050 }, { "epoch": 4.41441854074108, "grad_norm": 0.15763501615091324, "learning_rate": 5.2816918394429715e-06, "loss": 0.3811, "num_tokens": 5039509248.0, "step": 12051 }, { "epoch": 4.414784958548985, "grad_norm": 0.14648908420077855, "learning_rate": 5.280109484829308e-06, "loss": 0.3593, "num_tokens": 5040235897.0, "step": 12052 }, { "epoch": 4.415151376356891, "grad_norm": 0.14562215784218493, "learning_rate": 5.2785280715788126e-06, "loss": 0.336, "num_tokens": 5040983914.0, "step": 12053 }, { "epoch": 4.4155177941647965, "grad_norm": 0.14705204403794678, "learning_rate": 5.2769475997805195e-06, "loss": 0.3686, "num_tokens": 5041707412.0, "step": 12054 }, { "epoch": 4.415884211972702, "grad_norm": 0.15205244556137154, "learning_rate": 5.275368069523417e-06, "loss": 0.3741, "num_tokens": 5042494598.0, "step": 12055 }, { "epoch": 4.416250629780607, "grad_norm": 0.14786483457445496, "learning_rate": 5.273789480896436e-06, "loss": 0.4021, "num_tokens": 5043389056.0, "step": 12056 }, { "epoch": 4.416617047588513, "grad_norm": 0.14737546484010874, "learning_rate": 5.272211833988453e-06, "loss": 0.3478, "num_tokens": 5044066826.0, "step": 12057 }, { "epoch": 4.416983465396418, "grad_norm": 0.1609267478881593, "learning_rate": 5.2706351288882914e-06, "loss": 0.3523, "num_tokens": 5044837186.0, "step": 12058 }, { "epoch": 4.417349883204324, "grad_norm": 0.15848351213859616, "learning_rate": 5.269059365684727e-06, "loss": 0.3772, "num_tokens": 5045496426.0, "step": 12059 }, { "epoch": 4.417716301012229, "grad_norm": 0.14815707939694234, "learning_rate": 5.26748454446647e-06, "loss": 0.3709, "num_tokens": 5046262958.0, "step": 12060 }, { "epoch": 4.418082718820135, "grad_norm": 0.16737348288846335, "learning_rate": 5.2659106653221934e-06, "loss": 0.3839, "num_tokens": 5046873348.0, "step": 12061 }, { "epoch": 4.41844913662804, "grad_norm": 0.15194434978620194, "learning_rate": 5.2643377283405075e-06, "loss": 0.3812, "num_tokens": 5047757957.0, "step": 12062 }, { "epoch": 4.4188155544359455, "grad_norm": 0.1474949940229456, "learning_rate": 5.262765733609972e-06, "loss": 0.3373, "num_tokens": 5048485446.0, "step": 12063 }, { "epoch": 4.419181972243851, "grad_norm": 0.15652146218671625, "learning_rate": 5.261194681219094e-06, "loss": 0.3507, "num_tokens": 5049240768.0, "step": 12064 }, { "epoch": 4.419548390051757, "grad_norm": 0.14071569076896706, "learning_rate": 5.2596245712563265e-06, "loss": 0.3636, "num_tokens": 5050052741.0, "step": 12065 }, { "epoch": 4.419914807859662, "grad_norm": 0.14023003185793448, "learning_rate": 5.2580554038100665e-06, "loss": 0.345, "num_tokens": 5050857391.0, "step": 12066 }, { "epoch": 4.420281225667567, "grad_norm": 0.1555289984298145, "learning_rate": 5.256487178968668e-06, "loss": 0.3753, "num_tokens": 5051590453.0, "step": 12067 }, { "epoch": 4.420647643475473, "grad_norm": 0.1598685705425766, "learning_rate": 5.25491989682042e-06, "loss": 0.3508, "num_tokens": 5052268024.0, "step": 12068 }, { "epoch": 4.421014061283378, "grad_norm": 0.15230929391067413, "learning_rate": 5.253353557453562e-06, "loss": 0.3628, "num_tokens": 5053033217.0, "step": 12069 }, { "epoch": 4.421380479091284, "grad_norm": 0.1566983020444452, "learning_rate": 5.251788160956288e-06, "loss": 0.3985, "num_tokens": 5053758102.0, "step": 12070 }, { "epoch": 4.421746896899189, "grad_norm": 0.14220814993619535, "learning_rate": 5.250223707416727e-06, "loss": 0.3729, "num_tokens": 5054581728.0, "step": 12071 }, { "epoch": 4.422113314707095, "grad_norm": 0.14432735734644841, "learning_rate": 5.248660196922968e-06, "loss": 0.3875, "num_tokens": 5055424732.0, "step": 12072 }, { "epoch": 4.422479732515, "grad_norm": 0.14329009441134025, "learning_rate": 5.2470976295630345e-06, "loss": 0.358, "num_tokens": 5056185815.0, "step": 12073 }, { "epoch": 4.422846150322906, "grad_norm": 0.1561734927684838, "learning_rate": 5.245536005424903e-06, "loss": 0.3584, "num_tokens": 5056948184.0, "step": 12074 }, { "epoch": 4.423212568130811, "grad_norm": 0.1476010820408294, "learning_rate": 5.243975324596494e-06, "loss": 0.3449, "num_tokens": 5057716718.0, "step": 12075 }, { "epoch": 4.4235789859387165, "grad_norm": 0.1384748636022403, "learning_rate": 5.24241558716568e-06, "loss": 0.3556, "num_tokens": 5058519222.0, "step": 12076 }, { "epoch": 4.423945403746622, "grad_norm": 0.1627219499297585, "learning_rate": 5.240856793220277e-06, "loss": 0.3932, "num_tokens": 5059220572.0, "step": 12077 }, { "epoch": 4.424311821554528, "grad_norm": 0.14507128731996688, "learning_rate": 5.239298942848048e-06, "loss": 0.338, "num_tokens": 5060026467.0, "step": 12078 }, { "epoch": 4.424678239362433, "grad_norm": 0.14204241484083516, "learning_rate": 5.237742036136703e-06, "loss": 0.3543, "num_tokens": 5060782004.0, "step": 12079 }, { "epoch": 4.425044657170338, "grad_norm": 0.1511295924140237, "learning_rate": 5.236186073173899e-06, "loss": 0.3765, "num_tokens": 5061538732.0, "step": 12080 }, { "epoch": 4.425411074978244, "grad_norm": 0.15099991044631117, "learning_rate": 5.234631054047241e-06, "loss": 0.3762, "num_tokens": 5062279310.0, "step": 12081 }, { "epoch": 4.42577749278615, "grad_norm": 0.14772341500017774, "learning_rate": 5.233076978844273e-06, "loss": 0.3779, "num_tokens": 5063057582.0, "step": 12082 }, { "epoch": 4.426143910594055, "grad_norm": 0.16271117635876095, "learning_rate": 5.2315238476524996e-06, "loss": 0.3506, "num_tokens": 5063663017.0, "step": 12083 }, { "epoch": 4.42651032840196, "grad_norm": 0.15393350298401753, "learning_rate": 5.229971660559366e-06, "loss": 0.3764, "num_tokens": 5064523699.0, "step": 12084 }, { "epoch": 4.4268767462098655, "grad_norm": 0.14212383503245357, "learning_rate": 5.228420417652262e-06, "loss": 0.3531, "num_tokens": 5065312110.0, "step": 12085 }, { "epoch": 4.427243164017772, "grad_norm": 0.15804326519013603, "learning_rate": 5.226870119018521e-06, "loss": 0.3681, "num_tokens": 5066033476.0, "step": 12086 }, { "epoch": 4.427609581825677, "grad_norm": 0.15627678871249548, "learning_rate": 5.225320764745433e-06, "loss": 0.3524, "num_tokens": 5066757331.0, "step": 12087 }, { "epoch": 4.427975999633582, "grad_norm": 0.14773023421252804, "learning_rate": 5.223772354920227e-06, "loss": 0.3437, "num_tokens": 5067514417.0, "step": 12088 }, { "epoch": 4.428342417441487, "grad_norm": 0.1353213411273403, "learning_rate": 5.222224889630081e-06, "loss": 0.391, "num_tokens": 5068359732.0, "step": 12089 }, { "epoch": 4.4287088352493935, "grad_norm": 0.16610184811370438, "learning_rate": 5.220678368962123e-06, "loss": 0.3502, "num_tokens": 5069097860.0, "step": 12090 }, { "epoch": 4.429075253057299, "grad_norm": 0.14347598809985515, "learning_rate": 5.219132793003428e-06, "loss": 0.3788, "num_tokens": 5069843238.0, "step": 12091 }, { "epoch": 4.429441670865204, "grad_norm": 0.16025924645732126, "learning_rate": 5.217588161841008e-06, "loss": 0.3738, "num_tokens": 5070570202.0, "step": 12092 }, { "epoch": 4.429808088673109, "grad_norm": 0.14166652643802832, "learning_rate": 5.216044475561831e-06, "loss": 0.3683, "num_tokens": 5071435838.0, "step": 12093 }, { "epoch": 4.4301745064810145, "grad_norm": 0.15367612192818528, "learning_rate": 5.214501734252813e-06, "loss": 0.3608, "num_tokens": 5072150475.0, "step": 12094 }, { "epoch": 4.430540924288921, "grad_norm": 0.14864391390077442, "learning_rate": 5.2129599380008115e-06, "loss": 0.3616, "num_tokens": 5072909470.0, "step": 12095 }, { "epoch": 4.430907342096826, "grad_norm": 0.145222777497686, "learning_rate": 5.211419086892629e-06, "loss": 0.3762, "num_tokens": 5073660732.0, "step": 12096 }, { "epoch": 4.431273759904731, "grad_norm": 0.15456869653379088, "learning_rate": 5.209879181015025e-06, "loss": 0.3819, "num_tokens": 5074381655.0, "step": 12097 }, { "epoch": 4.431640177712637, "grad_norm": 0.15188984919890128, "learning_rate": 5.208340220454695e-06, "loss": 0.3855, "num_tokens": 5075187166.0, "step": 12098 }, { "epoch": 4.4320065955205425, "grad_norm": 0.15625211674134923, "learning_rate": 5.2068022052982845e-06, "loss": 0.344, "num_tokens": 5075845758.0, "step": 12099 }, { "epoch": 4.432373013328448, "grad_norm": 0.15038223479791904, "learning_rate": 5.2052651356323925e-06, "loss": 0.3644, "num_tokens": 5076684813.0, "step": 12100 }, { "epoch": 4.432739431136353, "grad_norm": 0.15316478185463542, "learning_rate": 5.203729011543555e-06, "loss": 0.3902, "num_tokens": 5077455079.0, "step": 12101 }, { "epoch": 4.433105848944258, "grad_norm": 0.15199512860228195, "learning_rate": 5.20219383311826e-06, "loss": 0.3567, "num_tokens": 5078208320.0, "step": 12102 }, { "epoch": 4.433472266752164, "grad_norm": 0.1510658461624455, "learning_rate": 5.2006596004429356e-06, "loss": 0.3667, "num_tokens": 5079002497.0, "step": 12103 }, { "epoch": 4.43383868456007, "grad_norm": 0.14053541297155459, "learning_rate": 5.199126313603968e-06, "loss": 0.3388, "num_tokens": 5079715702.0, "step": 12104 }, { "epoch": 4.434205102367975, "grad_norm": 0.14596894686743844, "learning_rate": 5.197593972687688e-06, "loss": 0.3854, "num_tokens": 5080521796.0, "step": 12105 }, { "epoch": 4.43457152017588, "grad_norm": 0.15173421917209634, "learning_rate": 5.196062577780362e-06, "loss": 0.361, "num_tokens": 5081312295.0, "step": 12106 }, { "epoch": 4.434937937983786, "grad_norm": 0.14076540931097237, "learning_rate": 5.194532128968214e-06, "loss": 0.3696, "num_tokens": 5082126206.0, "step": 12107 }, { "epoch": 4.435304355791692, "grad_norm": 0.13893385641205158, "learning_rate": 5.193002626337412e-06, "loss": 0.3899, "num_tokens": 5082967473.0, "step": 12108 }, { "epoch": 4.435670773599597, "grad_norm": 0.15450292378068886, "learning_rate": 5.191474069974064e-06, "loss": 0.399, "num_tokens": 5083686388.0, "step": 12109 }, { "epoch": 4.436037191407502, "grad_norm": 0.1546395168951575, "learning_rate": 5.189946459964235e-06, "loss": 0.3701, "num_tokens": 5084503494.0, "step": 12110 }, { "epoch": 4.436403609215408, "grad_norm": 0.15044433115010777, "learning_rate": 5.1884197963939385e-06, "loss": 0.3773, "num_tokens": 5085246596.0, "step": 12111 }, { "epoch": 4.4367700270233135, "grad_norm": 0.1525136428138184, "learning_rate": 5.18689407934912e-06, "loss": 0.3831, "num_tokens": 5086021871.0, "step": 12112 }, { "epoch": 4.437136444831219, "grad_norm": 0.15530399939706388, "learning_rate": 5.185369308915686e-06, "loss": 0.3849, "num_tokens": 5086809384.0, "step": 12113 }, { "epoch": 4.437502862639124, "grad_norm": 0.16178652865408022, "learning_rate": 5.183845485179483e-06, "loss": 0.374, "num_tokens": 5087467048.0, "step": 12114 }, { "epoch": 4.43786928044703, "grad_norm": 0.1434362274500124, "learning_rate": 5.182322608226301e-06, "loss": 0.3563, "num_tokens": 5088361724.0, "step": 12115 }, { "epoch": 4.438235698254935, "grad_norm": 0.13455240688178813, "learning_rate": 5.180800678141883e-06, "loss": 0.3749, "num_tokens": 5089228446.0, "step": 12116 }, { "epoch": 4.438602116062841, "grad_norm": 0.15717688910649613, "learning_rate": 5.179279695011917e-06, "loss": 0.3745, "num_tokens": 5089917166.0, "step": 12117 }, { "epoch": 4.438968533870746, "grad_norm": 0.16611750344386483, "learning_rate": 5.17775965892204e-06, "loss": 0.3658, "num_tokens": 5090524443.0, "step": 12118 }, { "epoch": 4.439334951678652, "grad_norm": 0.15036201280593148, "learning_rate": 5.176240569957833e-06, "loss": 0.3778, "num_tokens": 5091381574.0, "step": 12119 }, { "epoch": 4.439701369486557, "grad_norm": 0.16009674583496442, "learning_rate": 5.174722428204821e-06, "loss": 0.3577, "num_tokens": 5092077610.0, "step": 12120 }, { "epoch": 4.4400677872944625, "grad_norm": 0.144033439044387, "learning_rate": 5.173205233748477e-06, "loss": 0.3974, "num_tokens": 5092902088.0, "step": 12121 }, { "epoch": 4.440434205102368, "grad_norm": 0.15144243793684967, "learning_rate": 5.171688986674227e-06, "loss": 0.3594, "num_tokens": 5093679703.0, "step": 12122 }, { "epoch": 4.440800622910274, "grad_norm": 0.153715648391559, "learning_rate": 5.170173687067436e-06, "loss": 0.3429, "num_tokens": 5094380935.0, "step": 12123 }, { "epoch": 4.441167040718179, "grad_norm": 0.15065439639763997, "learning_rate": 5.168659335013415e-06, "loss": 0.3533, "num_tokens": 5095191390.0, "step": 12124 }, { "epoch": 4.441533458526084, "grad_norm": 0.1444564831272916, "learning_rate": 5.167145930597432e-06, "loss": 0.3585, "num_tokens": 5096005176.0, "step": 12125 }, { "epoch": 4.44189987633399, "grad_norm": 0.1533815647716962, "learning_rate": 5.165633473904691e-06, "loss": 0.3593, "num_tokens": 5096706493.0, "step": 12126 }, { "epoch": 4.442266294141895, "grad_norm": 0.13628978735660122, "learning_rate": 5.164121965020343e-06, "loss": 0.3691, "num_tokens": 5097625945.0, "step": 12127 }, { "epoch": 4.442632711949801, "grad_norm": 0.15724698416794822, "learning_rate": 5.1626114040294986e-06, "loss": 0.3291, "num_tokens": 5098259392.0, "step": 12128 }, { "epoch": 4.442999129757706, "grad_norm": 0.1492826034078189, "learning_rate": 5.1611017910171955e-06, "loss": 0.3697, "num_tokens": 5098977032.0, "step": 12129 }, { "epoch": 4.4433655475656115, "grad_norm": 0.15045970645227985, "learning_rate": 5.15959312606843e-06, "loss": 0.3667, "num_tokens": 5099784209.0, "step": 12130 }, { "epoch": 4.443731965373517, "grad_norm": 0.14729716812290125, "learning_rate": 5.158085409268148e-06, "loss": 0.3966, "num_tokens": 5100610938.0, "step": 12131 }, { "epoch": 4.444098383181423, "grad_norm": 0.1442600937650826, "learning_rate": 5.15657864070123e-06, "loss": 0.3779, "num_tokens": 5101376001.0, "step": 12132 }, { "epoch": 4.444464800989328, "grad_norm": 0.15287409920798473, "learning_rate": 5.155072820452517e-06, "loss": 0.3729, "num_tokens": 5102079134.0, "step": 12133 }, { "epoch": 4.444831218797233, "grad_norm": 0.1443771963590353, "learning_rate": 5.153567948606787e-06, "loss": 0.3688, "num_tokens": 5102827117.0, "step": 12134 }, { "epoch": 4.445197636605139, "grad_norm": 0.15249919112032798, "learning_rate": 5.152064025248766e-06, "loss": 0.3484, "num_tokens": 5103511687.0, "step": 12135 }, { "epoch": 4.445564054413045, "grad_norm": 0.14208594517073525, "learning_rate": 5.150561050463129e-06, "loss": 0.3899, "num_tokens": 5104339196.0, "step": 12136 }, { "epoch": 4.44593047222095, "grad_norm": 0.15532099655468454, "learning_rate": 5.149059024334494e-06, "loss": 0.371, "num_tokens": 5105058755.0, "step": 12137 }, { "epoch": 4.446296890028855, "grad_norm": 0.1508813434279896, "learning_rate": 5.147557946947429e-06, "loss": 0.3652, "num_tokens": 5105796917.0, "step": 12138 }, { "epoch": 4.446663307836761, "grad_norm": 0.15946954710363148, "learning_rate": 5.146057818386451e-06, "loss": 0.3677, "num_tokens": 5106487333.0, "step": 12139 }, { "epoch": 4.447029725644667, "grad_norm": 0.1489366512201922, "learning_rate": 5.144558638736019e-06, "loss": 0.3366, "num_tokens": 5107243462.0, "step": 12140 }, { "epoch": 4.447396143452572, "grad_norm": 0.1776894489426517, "learning_rate": 5.143060408080538e-06, "loss": 0.4209, "num_tokens": 5107912793.0, "step": 12141 }, { "epoch": 4.447762561260477, "grad_norm": 0.15511459178175377, "learning_rate": 5.141563126504363e-06, "loss": 0.3418, "num_tokens": 5108594484.0, "step": 12142 }, { "epoch": 4.4481289790683824, "grad_norm": 0.143763818016891, "learning_rate": 5.14006679409179e-06, "loss": 0.3492, "num_tokens": 5109371095.0, "step": 12143 }, { "epoch": 4.448495396876289, "grad_norm": 0.14414947173360115, "learning_rate": 5.138571410927072e-06, "loss": 0.357, "num_tokens": 5110181559.0, "step": 12144 }, { "epoch": 4.448861814684194, "grad_norm": 0.1454809823783699, "learning_rate": 5.137076977094396e-06, "loss": 0.3992, "num_tokens": 5110941752.0, "step": 12145 }, { "epoch": 4.449228232492099, "grad_norm": 0.15290765005324528, "learning_rate": 5.1355834926779044e-06, "loss": 0.3584, "num_tokens": 5111666168.0, "step": 12146 }, { "epoch": 4.449594650300004, "grad_norm": 0.15170797287581028, "learning_rate": 5.134090957761686e-06, "loss": 0.3794, "num_tokens": 5112404050.0, "step": 12147 }, { "epoch": 4.4499610681079105, "grad_norm": 0.1545939263204368, "learning_rate": 5.132599372429771e-06, "loss": 0.3504, "num_tokens": 5113110994.0, "step": 12148 }, { "epoch": 4.450327485915816, "grad_norm": 0.1649080110938769, "learning_rate": 5.131108736766133e-06, "loss": 0.368, "num_tokens": 5113806836.0, "step": 12149 }, { "epoch": 4.450693903723721, "grad_norm": 0.14648843754159682, "learning_rate": 5.1296190508547065e-06, "loss": 0.372, "num_tokens": 5114622420.0, "step": 12150 }, { "epoch": 4.451060321531626, "grad_norm": 0.15141524363503764, "learning_rate": 5.128130314779359e-06, "loss": 0.3696, "num_tokens": 5115389562.0, "step": 12151 }, { "epoch": 4.4514267393395315, "grad_norm": 0.14345266372407775, "learning_rate": 5.126642528623914e-06, "loss": 0.3644, "num_tokens": 5116153947.0, "step": 12152 }, { "epoch": 4.451793157147438, "grad_norm": 0.15405262502694647, "learning_rate": 5.125155692472131e-06, "loss": 0.3698, "num_tokens": 5116907346.0, "step": 12153 }, { "epoch": 4.452159574955343, "grad_norm": 0.1468045417763537, "learning_rate": 5.123669806407724e-06, "loss": 0.371, "num_tokens": 5117678210.0, "step": 12154 }, { "epoch": 4.452525992763248, "grad_norm": 0.1490270189121234, "learning_rate": 5.122184870514352e-06, "loss": 0.368, "num_tokens": 5118507404.0, "step": 12155 }, { "epoch": 4.452892410571153, "grad_norm": 0.14804008391518417, "learning_rate": 5.120700884875621e-06, "loss": 0.3709, "num_tokens": 5119338783.0, "step": 12156 }, { "epoch": 4.4532588283790595, "grad_norm": 0.13839712073108557, "learning_rate": 5.119217849575081e-06, "loss": 0.3773, "num_tokens": 5120241287.0, "step": 12157 }, { "epoch": 4.453625246186965, "grad_norm": 0.13059677417099064, "learning_rate": 5.117735764696228e-06, "loss": 0.3543, "num_tokens": 5121088902.0, "step": 12158 }, { "epoch": 4.45399166399487, "grad_norm": 0.14713759296960224, "learning_rate": 5.116254630322509e-06, "loss": 0.3999, "num_tokens": 5121940241.0, "step": 12159 }, { "epoch": 4.454358081802775, "grad_norm": 0.15091543906240412, "learning_rate": 5.11477444653731e-06, "loss": 0.3788, "num_tokens": 5122624479.0, "step": 12160 }, { "epoch": 4.454724499610681, "grad_norm": 0.16474220750209306, "learning_rate": 5.1132952134239765e-06, "loss": 0.3516, "num_tokens": 5123303789.0, "step": 12161 }, { "epoch": 4.455090917418587, "grad_norm": 0.1444569885789931, "learning_rate": 5.1118169310657895e-06, "loss": 0.3734, "num_tokens": 5124131076.0, "step": 12162 }, { "epoch": 4.455457335226492, "grad_norm": 0.14535223886146262, "learning_rate": 5.110339599545977e-06, "loss": 0.3622, "num_tokens": 5124809788.0, "step": 12163 }, { "epoch": 4.455823753034397, "grad_norm": 0.15636933537195521, "learning_rate": 5.108863218947712e-06, "loss": 0.3864, "num_tokens": 5125605189.0, "step": 12164 }, { "epoch": 4.456190170842303, "grad_norm": 0.16402977632540672, "learning_rate": 5.1073877893541225e-06, "loss": 0.3937, "num_tokens": 5126252614.0, "step": 12165 }, { "epoch": 4.4565565886502085, "grad_norm": 0.14357513263596478, "learning_rate": 5.1059133108482805e-06, "loss": 0.3356, "num_tokens": 5127044716.0, "step": 12166 }, { "epoch": 4.456923006458114, "grad_norm": 0.1502196262761862, "learning_rate": 5.104439783513201e-06, "loss": 0.3836, "num_tokens": 5127810451.0, "step": 12167 }, { "epoch": 4.457289424266019, "grad_norm": 0.15213977426923203, "learning_rate": 5.102967207431844e-06, "loss": 0.383, "num_tokens": 5128714584.0, "step": 12168 }, { "epoch": 4.457655842073925, "grad_norm": 0.1603254992234727, "learning_rate": 5.101495582687117e-06, "loss": 0.3877, "num_tokens": 5129357631.0, "step": 12169 }, { "epoch": 4.45802225988183, "grad_norm": 0.1484256513391643, "learning_rate": 5.10002490936188e-06, "loss": 0.4037, "num_tokens": 5130122534.0, "step": 12170 }, { "epoch": 4.458388677689736, "grad_norm": 0.1528703715764029, "learning_rate": 5.09855518753893e-06, "loss": 0.3764, "num_tokens": 5130881678.0, "step": 12171 }, { "epoch": 4.458755095497641, "grad_norm": 0.14852762756083124, "learning_rate": 5.097086417301018e-06, "loss": 0.3754, "num_tokens": 5131699416.0, "step": 12172 }, { "epoch": 4.459121513305547, "grad_norm": 0.1390950170252801, "learning_rate": 5.095618598730839e-06, "loss": 0.3556, "num_tokens": 5132494907.0, "step": 12173 }, { "epoch": 4.459487931113452, "grad_norm": 0.15112073640337376, "learning_rate": 5.0941517319110345e-06, "loss": 0.3749, "num_tokens": 5133305466.0, "step": 12174 }, { "epoch": 4.459854348921358, "grad_norm": 0.156414304444364, "learning_rate": 5.092685816924193e-06, "loss": 0.3707, "num_tokens": 5134055924.0, "step": 12175 }, { "epoch": 4.460220766729263, "grad_norm": 0.16653072860707296, "learning_rate": 5.091220853852842e-06, "loss": 0.4005, "num_tokens": 5134735264.0, "step": 12176 }, { "epoch": 4.460587184537168, "grad_norm": 0.15000546452718183, "learning_rate": 5.089756842779471e-06, "loss": 0.3236, "num_tokens": 5135458034.0, "step": 12177 }, { "epoch": 4.460953602345074, "grad_norm": 0.15101129785424763, "learning_rate": 5.0882937837865e-06, "loss": 0.3725, "num_tokens": 5136260079.0, "step": 12178 }, { "epoch": 4.4613200201529795, "grad_norm": 0.15210492724322724, "learning_rate": 5.086831676956302e-06, "loss": 0.3615, "num_tokens": 5136980793.0, "step": 12179 }, { "epoch": 4.461686437960885, "grad_norm": 0.1423277461501058, "learning_rate": 5.085370522371204e-06, "loss": 0.3502, "num_tokens": 5137933702.0, "step": 12180 }, { "epoch": 4.46205285576879, "grad_norm": 0.14760004691002593, "learning_rate": 5.083910320113463e-06, "loss": 0.3468, "num_tokens": 5138695375.0, "step": 12181 }, { "epoch": 4.462419273576696, "grad_norm": 0.1498660286317699, "learning_rate": 5.082451070265294e-06, "loss": 0.347, "num_tokens": 5139511296.0, "step": 12182 }, { "epoch": 4.462785691384601, "grad_norm": 0.1621712941239631, "learning_rate": 5.0809927729088585e-06, "loss": 0.3994, "num_tokens": 5140207810.0, "step": 12183 }, { "epoch": 4.463152109192507, "grad_norm": 0.1582380673549093, "learning_rate": 5.079535428126261e-06, "loss": 0.3696, "num_tokens": 5140986971.0, "step": 12184 }, { "epoch": 4.463518527000412, "grad_norm": 0.14522469286617257, "learning_rate": 5.078079035999549e-06, "loss": 0.3589, "num_tokens": 5141815722.0, "step": 12185 }, { "epoch": 4.463884944808318, "grad_norm": 0.1500460309258153, "learning_rate": 5.076623596610725e-06, "loss": 0.3607, "num_tokens": 5142526190.0, "step": 12186 }, { "epoch": 4.464251362616223, "grad_norm": 0.15847372676187726, "learning_rate": 5.075169110041729e-06, "loss": 0.3885, "num_tokens": 5143255297.0, "step": 12187 }, { "epoch": 4.4646177804241285, "grad_norm": 0.15210141216546505, "learning_rate": 5.073715576374456e-06, "loss": 0.3382, "num_tokens": 5144014399.0, "step": 12188 }, { "epoch": 4.464984198232034, "grad_norm": 0.15860683491526908, "learning_rate": 5.0722629956907395e-06, "loss": 0.3507, "num_tokens": 5144749558.0, "step": 12189 }, { "epoch": 4.46535061603994, "grad_norm": 0.15403293649645783, "learning_rate": 5.070811368072367e-06, "loss": 0.3641, "num_tokens": 5145401916.0, "step": 12190 }, { "epoch": 4.465717033847845, "grad_norm": 0.1429565191394155, "learning_rate": 5.069360693601061e-06, "loss": 0.3821, "num_tokens": 5146214204.0, "step": 12191 }, { "epoch": 4.46608345165575, "grad_norm": 0.14468788514635486, "learning_rate": 5.0679109723585e-06, "loss": 0.3832, "num_tokens": 5146947243.0, "step": 12192 }, { "epoch": 4.466449869463656, "grad_norm": 0.16851085206138208, "learning_rate": 5.066462204426307e-06, "loss": 0.3685, "num_tokens": 5147650478.0, "step": 12193 }, { "epoch": 4.466816287271562, "grad_norm": 0.16372560847392092, "learning_rate": 5.065014389886052e-06, "loss": 0.3952, "num_tokens": 5148370347.0, "step": 12194 }, { "epoch": 4.467182705079467, "grad_norm": 0.15838576713991384, "learning_rate": 5.0635675288192495e-06, "loss": 0.3477, "num_tokens": 5149105184.0, "step": 12195 }, { "epoch": 4.467549122887372, "grad_norm": 0.15810702217911374, "learning_rate": 5.062121621307361e-06, "loss": 0.3793, "num_tokens": 5149941693.0, "step": 12196 }, { "epoch": 4.4679155406952775, "grad_norm": 0.150263539408184, "learning_rate": 5.060676667431792e-06, "loss": 0.3826, "num_tokens": 5150582742.0, "step": 12197 }, { "epoch": 4.468281958503184, "grad_norm": 0.16129131070198993, "learning_rate": 5.059232667273893e-06, "loss": 0.3668, "num_tokens": 5151303513.0, "step": 12198 }, { "epoch": 4.468648376311089, "grad_norm": 0.15475381525278248, "learning_rate": 5.057789620914973e-06, "loss": 0.3758, "num_tokens": 5152072384.0, "step": 12199 }, { "epoch": 4.469014794118994, "grad_norm": 0.1411023063336093, "learning_rate": 5.056347528436268e-06, "loss": 0.3752, "num_tokens": 5152836065.0, "step": 12200 }, { "epoch": 4.469381211926899, "grad_norm": 0.16434063539020538, "learning_rate": 5.054906389918981e-06, "loss": 0.3824, "num_tokens": 5153530939.0, "step": 12201 }, { "epoch": 4.469747629734805, "grad_norm": 0.15518157633852428, "learning_rate": 5.053466205444246e-06, "loss": 0.364, "num_tokens": 5154263361.0, "step": 12202 }, { "epoch": 4.470114047542711, "grad_norm": 0.14899634227763875, "learning_rate": 5.0520269750931476e-06, "loss": 0.3569, "num_tokens": 5155011264.0, "step": 12203 }, { "epoch": 4.470480465350616, "grad_norm": 0.15820993012785128, "learning_rate": 5.050588698946716e-06, "loss": 0.3753, "num_tokens": 5155776214.0, "step": 12204 }, { "epoch": 4.470846883158521, "grad_norm": 0.15345467461640827, "learning_rate": 5.049151377085935e-06, "loss": 0.3708, "num_tokens": 5156487418.0, "step": 12205 }, { "epoch": 4.471213300966427, "grad_norm": 0.1577353372859566, "learning_rate": 5.047715009591719e-06, "loss": 0.3733, "num_tokens": 5157227474.0, "step": 12206 }, { "epoch": 4.471579718774333, "grad_norm": 0.14744195914639419, "learning_rate": 5.046279596544949e-06, "loss": 0.3208, "num_tokens": 5157982514.0, "step": 12207 }, { "epoch": 4.471946136582238, "grad_norm": 0.14381215876449147, "learning_rate": 5.044845138026437e-06, "loss": 0.34, "num_tokens": 5158836407.0, "step": 12208 }, { "epoch": 4.472312554390143, "grad_norm": 0.1462887667565082, "learning_rate": 5.043411634116946e-06, "loss": 0.3504, "num_tokens": 5159553479.0, "step": 12209 }, { "epoch": 4.472678972198048, "grad_norm": 0.1545538757544974, "learning_rate": 5.041979084897181e-06, "loss": 0.3778, "num_tokens": 5160267442.0, "step": 12210 }, { "epoch": 4.473045390005955, "grad_norm": 0.13757045338634485, "learning_rate": 5.040547490447802e-06, "loss": 0.3837, "num_tokens": 5161125349.0, "step": 12211 }, { "epoch": 4.47341180781386, "grad_norm": 0.156067055207333, "learning_rate": 5.039116850849414e-06, "loss": 0.3836, "num_tokens": 5161807928.0, "step": 12212 }, { "epoch": 4.473778225621765, "grad_norm": 0.14950994349963126, "learning_rate": 5.037687166182555e-06, "loss": 0.344, "num_tokens": 5162588259.0, "step": 12213 }, { "epoch": 4.47414464342967, "grad_norm": 0.14072513584596882, "learning_rate": 5.036258436527727e-06, "loss": 0.3514, "num_tokens": 5163375317.0, "step": 12214 }, { "epoch": 4.4745110612375765, "grad_norm": 0.14935245174505313, "learning_rate": 5.0348306619653675e-06, "loss": 0.3301, "num_tokens": 5164159150.0, "step": 12215 }, { "epoch": 4.474877479045482, "grad_norm": 0.14618933603333445, "learning_rate": 5.033403842575866e-06, "loss": 0.3442, "num_tokens": 5165004264.0, "step": 12216 }, { "epoch": 4.475243896853387, "grad_norm": 0.13893654919093568, "learning_rate": 5.0319779784395515e-06, "loss": 0.3657, "num_tokens": 5165854137.0, "step": 12217 }, { "epoch": 4.475610314661292, "grad_norm": 0.15118767642795203, "learning_rate": 5.030553069636707e-06, "loss": 0.3789, "num_tokens": 5166600215.0, "step": 12218 }, { "epoch": 4.475976732469198, "grad_norm": 0.1510605901269523, "learning_rate": 5.029129116247549e-06, "loss": 0.373, "num_tokens": 5167400843.0, "step": 12219 }, { "epoch": 4.476343150277104, "grad_norm": 0.15377923470493945, "learning_rate": 5.027706118352259e-06, "loss": 0.3618, "num_tokens": 5168206564.0, "step": 12220 }, { "epoch": 4.476709568085009, "grad_norm": 0.15168409421561438, "learning_rate": 5.0262840760309505e-06, "loss": 0.3782, "num_tokens": 5168952856.0, "step": 12221 }, { "epoch": 4.477075985892914, "grad_norm": 0.14937798104870137, "learning_rate": 5.0248629893636866e-06, "loss": 0.3699, "num_tokens": 5169785807.0, "step": 12222 }, { "epoch": 4.47744240370082, "grad_norm": 0.1519116005051636, "learning_rate": 5.02344285843048e-06, "loss": 0.4088, "num_tokens": 5170491947.0, "step": 12223 }, { "epoch": 4.4778088215087255, "grad_norm": 0.15728351027895554, "learning_rate": 5.0220236833112854e-06, "loss": 0.3957, "num_tokens": 5171246172.0, "step": 12224 }, { "epoch": 4.478175239316631, "grad_norm": 0.15590728293476872, "learning_rate": 5.020605464086004e-06, "loss": 0.3265, "num_tokens": 5171960305.0, "step": 12225 }, { "epoch": 4.478541657124536, "grad_norm": 0.14363811863400577, "learning_rate": 5.019188200834483e-06, "loss": 0.3694, "num_tokens": 5172761850.0, "step": 12226 }, { "epoch": 4.478908074932441, "grad_norm": 0.14671333867102795, "learning_rate": 5.017771893636519e-06, "loss": 0.3402, "num_tokens": 5173591591.0, "step": 12227 }, { "epoch": 4.479274492740347, "grad_norm": 0.1374252165501192, "learning_rate": 5.0163565425718566e-06, "loss": 0.3679, "num_tokens": 5174447754.0, "step": 12228 }, { "epoch": 4.479640910548253, "grad_norm": 0.15129534220263818, "learning_rate": 5.0149421477201785e-06, "loss": 0.3415, "num_tokens": 5175208858.0, "step": 12229 }, { "epoch": 4.480007328356158, "grad_norm": 0.14429046292764458, "learning_rate": 5.0135287091611194e-06, "loss": 0.359, "num_tokens": 5175944142.0, "step": 12230 }, { "epoch": 4.480373746164064, "grad_norm": 0.14548756727802753, "learning_rate": 5.012116226974258e-06, "loss": 0.3423, "num_tokens": 5176780987.0, "step": 12231 }, { "epoch": 4.480740163971969, "grad_norm": 0.16614881475615684, "learning_rate": 5.010704701239119e-06, "loss": 0.3543, "num_tokens": 5177418334.0, "step": 12232 }, { "epoch": 4.4811065817798745, "grad_norm": 0.1516922034644572, "learning_rate": 5.009294132035176e-06, "loss": 0.3684, "num_tokens": 5178268763.0, "step": 12233 }, { "epoch": 4.48147299958778, "grad_norm": 0.1369540836042501, "learning_rate": 5.007884519441846e-06, "loss": 0.3434, "num_tokens": 5179099105.0, "step": 12234 }, { "epoch": 4.481839417395685, "grad_norm": 0.13956887402177465, "learning_rate": 5.006475863538495e-06, "loss": 0.3545, "num_tokens": 5179914486.0, "step": 12235 }, { "epoch": 4.482205835203591, "grad_norm": 0.15674559471165705, "learning_rate": 5.0050681644044285e-06, "loss": 0.3492, "num_tokens": 5180626703.0, "step": 12236 }, { "epoch": 4.482572253011496, "grad_norm": 0.16417598473967904, "learning_rate": 5.0036614221189066e-06, "loss": 0.3719, "num_tokens": 5181361127.0, "step": 12237 }, { "epoch": 4.482938670819402, "grad_norm": 0.1423916163482566, "learning_rate": 5.002255636761132e-06, "loss": 0.3768, "num_tokens": 5182253828.0, "step": 12238 }, { "epoch": 4.483305088627307, "grad_norm": 0.1450134990976379, "learning_rate": 5.000850808410251e-06, "loss": 0.3863, "num_tokens": 5183037019.0, "step": 12239 }, { "epoch": 4.483671506435213, "grad_norm": 0.15050713520627246, "learning_rate": 4.999446937145357e-06, "loss": 0.341, "num_tokens": 5183790763.0, "step": 12240 }, { "epoch": 4.484037924243118, "grad_norm": 0.1456286221958867, "learning_rate": 4.998044023045497e-06, "loss": 0.389, "num_tokens": 5184648790.0, "step": 12241 }, { "epoch": 4.484404342051024, "grad_norm": 0.14576665295320046, "learning_rate": 4.996642066189653e-06, "loss": 0.3638, "num_tokens": 5185489157.0, "step": 12242 }, { "epoch": 4.484770759858929, "grad_norm": 0.1486524731583165, "learning_rate": 4.995241066656754e-06, "loss": 0.375, "num_tokens": 5186211889.0, "step": 12243 }, { "epoch": 4.485137177666835, "grad_norm": 0.1610896121601736, "learning_rate": 4.993841024525689e-06, "loss": 0.3645, "num_tokens": 5186914549.0, "step": 12244 }, { "epoch": 4.48550359547474, "grad_norm": 0.15328489979719478, "learning_rate": 4.992441939875278e-06, "loss": 0.3838, "num_tokens": 5187643997.0, "step": 12245 }, { "epoch": 4.4858700132826455, "grad_norm": 0.1483545162359466, "learning_rate": 4.991043812784291e-06, "loss": 0.3629, "num_tokens": 5188370181.0, "step": 12246 }, { "epoch": 4.486236431090551, "grad_norm": 0.14988290172107252, "learning_rate": 4.9896466433314435e-06, "loss": 0.3888, "num_tokens": 5189111454.0, "step": 12247 }, { "epoch": 4.486602848898457, "grad_norm": 0.14800086779710778, "learning_rate": 4.988250431595404e-06, "loss": 0.3824, "num_tokens": 5189932074.0, "step": 12248 }, { "epoch": 4.486969266706362, "grad_norm": 0.15773295626111422, "learning_rate": 4.986855177654779e-06, "loss": 0.3731, "num_tokens": 5190646890.0, "step": 12249 }, { "epoch": 4.487335684514267, "grad_norm": 0.1421856107818128, "learning_rate": 4.985460881588129e-06, "loss": 0.3733, "num_tokens": 5191431410.0, "step": 12250 }, { "epoch": 4.487702102322173, "grad_norm": 0.14421428997583363, "learning_rate": 4.98406754347395e-06, "loss": 0.3528, "num_tokens": 5192233688.0, "step": 12251 }, { "epoch": 4.488068520130079, "grad_norm": 0.1416083820447134, "learning_rate": 4.982675163390691e-06, "loss": 0.3709, "num_tokens": 5193062234.0, "step": 12252 }, { "epoch": 4.488434937937984, "grad_norm": 0.13433620640586633, "learning_rate": 4.981283741416742e-06, "loss": 0.344, "num_tokens": 5193886882.0, "step": 12253 }, { "epoch": 4.488801355745889, "grad_norm": 0.15719513620683184, "learning_rate": 4.979893277630452e-06, "loss": 0.3626, "num_tokens": 5194519964.0, "step": 12254 }, { "epoch": 4.4891677735537945, "grad_norm": 0.1539131029431624, "learning_rate": 4.9785037721101e-06, "loss": 0.366, "num_tokens": 5195304581.0, "step": 12255 }, { "epoch": 4.489534191361701, "grad_norm": 0.15866063029550231, "learning_rate": 4.97711522493392e-06, "loss": 0.3641, "num_tokens": 5195943932.0, "step": 12256 }, { "epoch": 4.489900609169606, "grad_norm": 0.14852215701882926, "learning_rate": 4.975727636180092e-06, "loss": 0.3472, "num_tokens": 5196777432.0, "step": 12257 }, { "epoch": 4.490267026977511, "grad_norm": 0.1385635939141842, "learning_rate": 4.9743410059267366e-06, "loss": 0.3865, "num_tokens": 5197612888.0, "step": 12258 }, { "epoch": 4.490633444785416, "grad_norm": 0.1403257719527839, "learning_rate": 4.972955334251924e-06, "loss": 0.3513, "num_tokens": 5198472932.0, "step": 12259 }, { "epoch": 4.490999862593322, "grad_norm": 0.13893888607978375, "learning_rate": 4.971570621233671e-06, "loss": 0.3505, "num_tokens": 5199266548.0, "step": 12260 }, { "epoch": 4.491366280401228, "grad_norm": 0.1522366495975131, "learning_rate": 4.9701868669499405e-06, "loss": 0.3352, "num_tokens": 5199969649.0, "step": 12261 }, { "epoch": 4.491732698209133, "grad_norm": 0.14677106646932345, "learning_rate": 4.96880407147864e-06, "loss": 0.364, "num_tokens": 5200780474.0, "step": 12262 }, { "epoch": 4.492099116017038, "grad_norm": 0.14096754174262827, "learning_rate": 4.967422234897626e-06, "loss": 0.3546, "num_tokens": 5201638811.0, "step": 12263 }, { "epoch": 4.4924655338249435, "grad_norm": 0.14112083896651764, "learning_rate": 4.9660413572846975e-06, "loss": 0.3493, "num_tokens": 5202372561.0, "step": 12264 }, { "epoch": 4.49283195163285, "grad_norm": 0.15089102113269048, "learning_rate": 4.964661438717595e-06, "loss": 0.374, "num_tokens": 5203114402.0, "step": 12265 }, { "epoch": 4.493198369440755, "grad_norm": 0.15109518364299623, "learning_rate": 4.963282479274016e-06, "loss": 0.3502, "num_tokens": 5203907102.0, "step": 12266 }, { "epoch": 4.49356478724866, "grad_norm": 0.1438133250350715, "learning_rate": 4.961904479031601e-06, "loss": 0.3502, "num_tokens": 5204654974.0, "step": 12267 }, { "epoch": 4.493931205056565, "grad_norm": 0.14114239694037936, "learning_rate": 4.960527438067928e-06, "loss": 0.3706, "num_tokens": 5205488396.0, "step": 12268 }, { "epoch": 4.4942976228644715, "grad_norm": 0.15555216349199705, "learning_rate": 4.9591513564605335e-06, "loss": 0.3751, "num_tokens": 5206177263.0, "step": 12269 }, { "epoch": 4.494664040672377, "grad_norm": 0.13831971547673844, "learning_rate": 4.957776234286885e-06, "loss": 0.3526, "num_tokens": 5207040426.0, "step": 12270 }, { "epoch": 4.495030458480282, "grad_norm": 0.15117084736139946, "learning_rate": 4.956402071624416e-06, "loss": 0.3695, "num_tokens": 5207731643.0, "step": 12271 }, { "epoch": 4.495396876288187, "grad_norm": 0.1484917708207732, "learning_rate": 4.955028868550486e-06, "loss": 0.3686, "num_tokens": 5208461937.0, "step": 12272 }, { "epoch": 4.495763294096093, "grad_norm": 0.1466686351678472, "learning_rate": 4.953656625142412e-06, "loss": 0.3571, "num_tokens": 5209234636.0, "step": 12273 }, { "epoch": 4.496129711903999, "grad_norm": 0.1390422891785792, "learning_rate": 4.952285341477452e-06, "loss": 0.3701, "num_tokens": 5210004001.0, "step": 12274 }, { "epoch": 4.496496129711904, "grad_norm": 0.14673899871805351, "learning_rate": 4.950915017632817e-06, "loss": 0.3651, "num_tokens": 5210784945.0, "step": 12275 }, { "epoch": 4.496862547519809, "grad_norm": 0.1468537226799194, "learning_rate": 4.949545653685652e-06, "loss": 0.3629, "num_tokens": 5211583797.0, "step": 12276 }, { "epoch": 4.497228965327715, "grad_norm": 0.14705990249424014, "learning_rate": 4.9481772497130606e-06, "loss": 0.3681, "num_tokens": 5212397672.0, "step": 12277 }, { "epoch": 4.497595383135621, "grad_norm": 0.14604436652061029, "learning_rate": 4.9468098057920865e-06, "loss": 0.3766, "num_tokens": 5213170875.0, "step": 12278 }, { "epoch": 4.497961800943526, "grad_norm": 0.1344456357937783, "learning_rate": 4.945443321999717e-06, "loss": 0.35, "num_tokens": 5214040757.0, "step": 12279 }, { "epoch": 4.498328218751431, "grad_norm": 0.1573799126813251, "learning_rate": 4.9440777984128895e-06, "loss": 0.368, "num_tokens": 5214690466.0, "step": 12280 }, { "epoch": 4.498694636559337, "grad_norm": 0.1508780496640841, "learning_rate": 4.94271323510848e-06, "loss": 0.3557, "num_tokens": 5215427507.0, "step": 12281 }, { "epoch": 4.4990610543672425, "grad_norm": 0.1574702781025657, "learning_rate": 4.941349632163323e-06, "loss": 0.3656, "num_tokens": 5216132733.0, "step": 12282 }, { "epoch": 4.499427472175148, "grad_norm": 0.1547659140842388, "learning_rate": 4.939986989654191e-06, "loss": 0.3838, "num_tokens": 5216921713.0, "step": 12283 }, { "epoch": 4.499793889983053, "grad_norm": 0.1557879025579412, "learning_rate": 4.938625307657804e-06, "loss": 0.38, "num_tokens": 5217668761.0, "step": 12284 }, { "epoch": 4.500160307790958, "grad_norm": 0.14965873246327493, "learning_rate": 4.937264586250825e-06, "loss": 0.3665, "num_tokens": 5218359269.0, "step": 12285 }, { "epoch": 4.500526725598864, "grad_norm": 0.14003481101068493, "learning_rate": 4.935904825509866e-06, "loss": 0.3685, "num_tokens": 5219149994.0, "step": 12286 }, { "epoch": 4.50089314340677, "grad_norm": 0.15061745400299462, "learning_rate": 4.934546025511482e-06, "loss": 0.3664, "num_tokens": 5219912078.0, "step": 12287 }, { "epoch": 4.501259561214675, "grad_norm": 0.15491418628713427, "learning_rate": 4.9331881863321815e-06, "loss": 0.3862, "num_tokens": 5220717762.0, "step": 12288 }, { "epoch": 4.501625979022581, "grad_norm": 0.14850916829370342, "learning_rate": 4.9318313080484075e-06, "loss": 0.3619, "num_tokens": 5221479892.0, "step": 12289 }, { "epoch": 4.501992396830486, "grad_norm": 0.1517322886981021, "learning_rate": 4.930475390736561e-06, "loss": 0.3727, "num_tokens": 5222207868.0, "step": 12290 }, { "epoch": 4.5023588146383915, "grad_norm": 0.15163406822308148, "learning_rate": 4.92912043447298e-06, "loss": 0.3571, "num_tokens": 5222995288.0, "step": 12291 }, { "epoch": 4.502725232446297, "grad_norm": 0.14822138379365227, "learning_rate": 4.927766439333952e-06, "loss": 0.3828, "num_tokens": 5223801610.0, "step": 12292 }, { "epoch": 4.503091650254202, "grad_norm": 0.16379469791292928, "learning_rate": 4.926413405395704e-06, "loss": 0.3703, "num_tokens": 5224524481.0, "step": 12293 }, { "epoch": 4.503458068062108, "grad_norm": 0.16305324791211476, "learning_rate": 4.9250613327344246e-06, "loss": 0.3435, "num_tokens": 5225182942.0, "step": 12294 }, { "epoch": 4.503824485870013, "grad_norm": 0.16148033272683027, "learning_rate": 4.923710221426228e-06, "loss": 0.339, "num_tokens": 5225900535.0, "step": 12295 }, { "epoch": 4.504190903677919, "grad_norm": 0.1735346827999141, "learning_rate": 4.922360071547192e-06, "loss": 0.3681, "num_tokens": 5226573440.0, "step": 12296 }, { "epoch": 4.504557321485824, "grad_norm": 0.13962678240677107, "learning_rate": 4.9210108831733315e-06, "loss": 0.409, "num_tokens": 5227304418.0, "step": 12297 }, { "epoch": 4.50492373929373, "grad_norm": 0.17430743651570424, "learning_rate": 4.919662656380604e-06, "loss": 0.3975, "num_tokens": 5227996200.0, "step": 12298 }, { "epoch": 4.505290157101635, "grad_norm": 0.15161533207839978, "learning_rate": 4.918315391244924e-06, "loss": 0.3474, "num_tokens": 5228810504.0, "step": 12299 }, { "epoch": 4.5056565749095405, "grad_norm": 0.1396990274640213, "learning_rate": 4.916969087842142e-06, "loss": 0.3605, "num_tokens": 5229621603.0, "step": 12300 }, { "epoch": 4.506022992717446, "grad_norm": 0.14219348456320025, "learning_rate": 4.915623746248054e-06, "loss": 0.3487, "num_tokens": 5230433829.0, "step": 12301 }, { "epoch": 4.506389410525352, "grad_norm": 0.13508654886970278, "learning_rate": 4.9142793665384085e-06, "loss": 0.3681, "num_tokens": 5231247068.0, "step": 12302 }, { "epoch": 4.506755828333257, "grad_norm": 0.1552917928530319, "learning_rate": 4.9129359487889e-06, "loss": 0.3628, "num_tokens": 5232043901.0, "step": 12303 }, { "epoch": 4.507122246141162, "grad_norm": 0.14713359181039942, "learning_rate": 4.911593493075159e-06, "loss": 0.3801, "num_tokens": 5232835294.0, "step": 12304 }, { "epoch": 4.507488663949068, "grad_norm": 0.1530836140795398, "learning_rate": 4.910251999472773e-06, "loss": 0.399, "num_tokens": 5233619834.0, "step": 12305 }, { "epoch": 4.507855081756974, "grad_norm": 0.14128752110016068, "learning_rate": 4.9089114680572715e-06, "loss": 0.3519, "num_tokens": 5234430651.0, "step": 12306 }, { "epoch": 4.508221499564879, "grad_norm": 0.14269392021817376, "learning_rate": 4.907571898904127e-06, "loss": 0.3934, "num_tokens": 5235318060.0, "step": 12307 }, { "epoch": 4.508587917372784, "grad_norm": 0.1376106472569807, "learning_rate": 4.906233292088758e-06, "loss": 0.3575, "num_tokens": 5236128881.0, "step": 12308 }, { "epoch": 4.5089543351806896, "grad_norm": 0.14636887110297378, "learning_rate": 4.9048956476865365e-06, "loss": 0.3488, "num_tokens": 5236901096.0, "step": 12309 }, { "epoch": 4.509320752988595, "grad_norm": 0.1405510118605175, "learning_rate": 4.903558965772765e-06, "loss": 0.3828, "num_tokens": 5237692750.0, "step": 12310 }, { "epoch": 4.509687170796501, "grad_norm": 0.1403415879722269, "learning_rate": 4.902223246422713e-06, "loss": 0.3786, "num_tokens": 5238565525.0, "step": 12311 }, { "epoch": 4.510053588604406, "grad_norm": 0.13709994455753416, "learning_rate": 4.900888489711577e-06, "loss": 0.356, "num_tokens": 5239428676.0, "step": 12312 }, { "epoch": 4.5104200064123114, "grad_norm": 0.14215021033384576, "learning_rate": 4.899554695714508e-06, "loss": 0.3695, "num_tokens": 5240130604.0, "step": 12313 }, { "epoch": 4.510786424220218, "grad_norm": 0.15904231642691716, "learning_rate": 4.898221864506601e-06, "loss": 0.3505, "num_tokens": 5240901870.0, "step": 12314 }, { "epoch": 4.511152842028123, "grad_norm": 0.14427893917978515, "learning_rate": 4.8968899961628965e-06, "loss": 0.3607, "num_tokens": 5241651192.0, "step": 12315 }, { "epoch": 4.511519259836028, "grad_norm": 0.15480133479826638, "learning_rate": 4.895559090758381e-06, "loss": 0.3573, "num_tokens": 5242350665.0, "step": 12316 }, { "epoch": 4.511885677643933, "grad_norm": 0.13835346519951, "learning_rate": 4.894229148367991e-06, "loss": 0.3382, "num_tokens": 5243202752.0, "step": 12317 }, { "epoch": 4.512252095451839, "grad_norm": 0.1353894250858318, "learning_rate": 4.892900169066602e-06, "loss": 0.3452, "num_tokens": 5244080684.0, "step": 12318 }, { "epoch": 4.512618513259745, "grad_norm": 0.14793257742401844, "learning_rate": 4.891572152929037e-06, "loss": 0.3791, "num_tokens": 5244840692.0, "step": 12319 }, { "epoch": 4.51298493106765, "grad_norm": 0.14139014357391422, "learning_rate": 4.890245100030065e-06, "loss": 0.3821, "num_tokens": 5245788082.0, "step": 12320 }, { "epoch": 4.513351348875555, "grad_norm": 0.14994858274454398, "learning_rate": 4.888919010444409e-06, "loss": 0.362, "num_tokens": 5246495051.0, "step": 12321 }, { "epoch": 4.5137177666834605, "grad_norm": 0.14707485375620657, "learning_rate": 4.8875938842467245e-06, "loss": 0.3599, "num_tokens": 5247258148.0, "step": 12322 }, { "epoch": 4.514084184491367, "grad_norm": 0.1476479639424543, "learning_rate": 4.886269721511617e-06, "loss": 0.3655, "num_tokens": 5248066049.0, "step": 12323 }, { "epoch": 4.514450602299272, "grad_norm": 0.15217591194359636, "learning_rate": 4.884946522313644e-06, "loss": 0.3661, "num_tokens": 5248883580.0, "step": 12324 }, { "epoch": 4.514817020107177, "grad_norm": 0.1419914355819198, "learning_rate": 4.8836242867273035e-06, "loss": 0.3798, "num_tokens": 5249650441.0, "step": 12325 }, { "epoch": 4.515183437915082, "grad_norm": 0.14463121786529642, "learning_rate": 4.882303014827035e-06, "loss": 0.375, "num_tokens": 5250379495.0, "step": 12326 }, { "epoch": 4.5155498557229885, "grad_norm": 0.1543138079880821, "learning_rate": 4.880982706687237e-06, "loss": 0.3317, "num_tokens": 5251086808.0, "step": 12327 }, { "epoch": 4.515916273530894, "grad_norm": 0.14814332923467727, "learning_rate": 4.879663362382241e-06, "loss": 0.3445, "num_tokens": 5251836928.0, "step": 12328 }, { "epoch": 4.516282691338799, "grad_norm": 0.15395635147225192, "learning_rate": 4.878344981986325e-06, "loss": 0.3745, "num_tokens": 5252588567.0, "step": 12329 }, { "epoch": 4.516649109146704, "grad_norm": 0.14187336528101707, "learning_rate": 4.877027565573724e-06, "loss": 0.3704, "num_tokens": 5253326170.0, "step": 12330 }, { "epoch": 4.51701552695461, "grad_norm": 0.1568589663965852, "learning_rate": 4.875711113218605e-06, "loss": 0.3824, "num_tokens": 5254062377.0, "step": 12331 }, { "epoch": 4.517381944762516, "grad_norm": 0.1503179722556102, "learning_rate": 4.874395624995093e-06, "loss": 0.3395, "num_tokens": 5254768478.0, "step": 12332 }, { "epoch": 4.517748362570421, "grad_norm": 0.1480927953171904, "learning_rate": 4.873081100977246e-06, "loss": 0.3773, "num_tokens": 5255506518.0, "step": 12333 }, { "epoch": 4.518114780378326, "grad_norm": 0.14515561744040376, "learning_rate": 4.871767541239081e-06, "loss": 0.353, "num_tokens": 5256371879.0, "step": 12334 }, { "epoch": 4.518481198186231, "grad_norm": 0.14994614633010658, "learning_rate": 4.8704549458545475e-06, "loss": 0.3759, "num_tokens": 5257073899.0, "step": 12335 }, { "epoch": 4.5188476159941375, "grad_norm": 0.14986780463154803, "learning_rate": 4.86914331489755e-06, "loss": 0.3788, "num_tokens": 5257826321.0, "step": 12336 }, { "epoch": 4.519214033802043, "grad_norm": 0.15280361420573477, "learning_rate": 4.867832648441934e-06, "loss": 0.3746, "num_tokens": 5258722851.0, "step": 12337 }, { "epoch": 4.519580451609948, "grad_norm": 0.14208486317301466, "learning_rate": 4.866522946561499e-06, "loss": 0.3611, "num_tokens": 5259505016.0, "step": 12338 }, { "epoch": 4.519946869417854, "grad_norm": 0.13876972382288502, "learning_rate": 4.86521420932998e-06, "loss": 0.3619, "num_tokens": 5260302233.0, "step": 12339 }, { "epoch": 4.520313287225759, "grad_norm": 0.14204160127853874, "learning_rate": 4.863906436821059e-06, "loss": 0.3479, "num_tokens": 5261140799.0, "step": 12340 }, { "epoch": 4.520679705033665, "grad_norm": 0.1483370602623587, "learning_rate": 4.862599629108369e-06, "loss": 0.3895, "num_tokens": 5261913541.0, "step": 12341 }, { "epoch": 4.52104612284157, "grad_norm": 0.15061722601566754, "learning_rate": 4.861293786265483e-06, "loss": 0.3753, "num_tokens": 5262726214.0, "step": 12342 }, { "epoch": 4.521412540649475, "grad_norm": 0.14721510773096982, "learning_rate": 4.859988908365926e-06, "loss": 0.3765, "num_tokens": 5263482153.0, "step": 12343 }, { "epoch": 4.521778958457381, "grad_norm": 0.15056394990994698, "learning_rate": 4.858684995483164e-06, "loss": 0.3451, "num_tokens": 5264222471.0, "step": 12344 }, { "epoch": 4.522145376265287, "grad_norm": 0.1557902938492169, "learning_rate": 4.857382047690609e-06, "loss": 0.3673, "num_tokens": 5264949521.0, "step": 12345 }, { "epoch": 4.522511794073192, "grad_norm": 0.13589940634473596, "learning_rate": 4.856080065061623e-06, "loss": 0.3598, "num_tokens": 5265760615.0, "step": 12346 }, { "epoch": 4.522878211881097, "grad_norm": 0.1586577733334897, "learning_rate": 4.854779047669506e-06, "loss": 0.3845, "num_tokens": 5266553351.0, "step": 12347 }, { "epoch": 4.523244629689003, "grad_norm": 0.1374725864958148, "learning_rate": 4.853478995587509e-06, "loss": 0.3585, "num_tokens": 5267392767.0, "step": 12348 }, { "epoch": 4.5236110474969085, "grad_norm": 0.16674786147752865, "learning_rate": 4.852179908888829e-06, "loss": 0.3623, "num_tokens": 5268007441.0, "step": 12349 }, { "epoch": 4.523977465304814, "grad_norm": 0.14985323711426038, "learning_rate": 4.850881787646604e-06, "loss": 0.3967, "num_tokens": 5268820881.0, "step": 12350 }, { "epoch": 4.524343883112719, "grad_norm": 0.15330694548438417, "learning_rate": 4.849584631933925e-06, "loss": 0.3754, "num_tokens": 5269609071.0, "step": 12351 }, { "epoch": 4.524710300920625, "grad_norm": 0.1573926930138781, "learning_rate": 4.848288441823822e-06, "loss": 0.3848, "num_tokens": 5270297246.0, "step": 12352 }, { "epoch": 4.52507671872853, "grad_norm": 0.14940721876135576, "learning_rate": 4.846993217389275e-06, "loss": 0.3942, "num_tokens": 5271079201.0, "step": 12353 }, { "epoch": 4.525443136536436, "grad_norm": 0.15838188772977385, "learning_rate": 4.845698958703202e-06, "loss": 0.3724, "num_tokens": 5271755288.0, "step": 12354 }, { "epoch": 4.525809554344341, "grad_norm": 0.1536437384900548, "learning_rate": 4.8444056658384816e-06, "loss": 0.3717, "num_tokens": 5272554849.0, "step": 12355 }, { "epoch": 4.526175972152247, "grad_norm": 0.16351366464022932, "learning_rate": 4.843113338867923e-06, "loss": 0.3608, "num_tokens": 5273257336.0, "step": 12356 }, { "epoch": 4.526542389960152, "grad_norm": 0.1496254073059516, "learning_rate": 4.841821977864285e-06, "loss": 0.3821, "num_tokens": 5274029937.0, "step": 12357 }, { "epoch": 4.5269088077680575, "grad_norm": 0.15176283829049503, "learning_rate": 4.840531582900277e-06, "loss": 0.3454, "num_tokens": 5274774252.0, "step": 12358 }, { "epoch": 4.527275225575963, "grad_norm": 0.1533902170275421, "learning_rate": 4.839242154048548e-06, "loss": 0.3785, "num_tokens": 5275510491.0, "step": 12359 }, { "epoch": 4.527641643383868, "grad_norm": 0.14811201813910274, "learning_rate": 4.837953691381701e-06, "loss": 0.3776, "num_tokens": 5276261712.0, "step": 12360 }, { "epoch": 4.528008061191774, "grad_norm": 0.14433490097238855, "learning_rate": 4.836666194972275e-06, "loss": 0.373, "num_tokens": 5277105429.0, "step": 12361 }, { "epoch": 4.528374478999679, "grad_norm": 0.1469908644666898, "learning_rate": 4.835379664892759e-06, "loss": 0.3708, "num_tokens": 5277862970.0, "step": 12362 }, { "epoch": 4.528740896807585, "grad_norm": 0.14871689899250257, "learning_rate": 4.8340941012155865e-06, "loss": 0.3609, "num_tokens": 5278655159.0, "step": 12363 }, { "epoch": 4.529107314615491, "grad_norm": 0.14507256782836608, "learning_rate": 4.8328095040131395e-06, "loss": 0.3594, "num_tokens": 5279453824.0, "step": 12364 }, { "epoch": 4.529473732423396, "grad_norm": 0.1481432234536844, "learning_rate": 4.8315258733577405e-06, "loss": 0.3676, "num_tokens": 5280278390.0, "step": 12365 }, { "epoch": 4.529840150231301, "grad_norm": 0.14203812556314874, "learning_rate": 4.8302432093216644e-06, "loss": 0.3813, "num_tokens": 5281110577.0, "step": 12366 }, { "epoch": 4.5302065680392065, "grad_norm": 0.14002691659756872, "learning_rate": 4.828961511977125e-06, "loss": 0.3612, "num_tokens": 5281940720.0, "step": 12367 }, { "epoch": 4.530572985847112, "grad_norm": 0.14611246501193337, "learning_rate": 4.827680781396285e-06, "loss": 0.3705, "num_tokens": 5282717424.0, "step": 12368 }, { "epoch": 4.530939403655018, "grad_norm": 0.14526418100114755, "learning_rate": 4.82640101765125e-06, "loss": 0.3817, "num_tokens": 5283500299.0, "step": 12369 }, { "epoch": 4.531305821462923, "grad_norm": 0.1523870703442442, "learning_rate": 4.825122220814075e-06, "loss": 0.3542, "num_tokens": 5284253994.0, "step": 12370 }, { "epoch": 4.531672239270828, "grad_norm": 0.15848430100132666, "learning_rate": 4.823844390956758e-06, "loss": 0.3887, "num_tokens": 5284932839.0, "step": 12371 }, { "epoch": 4.532038657078734, "grad_norm": 0.1417309375260163, "learning_rate": 4.822567528151246e-06, "loss": 0.3529, "num_tokens": 5285788842.0, "step": 12372 }, { "epoch": 4.53240507488664, "grad_norm": 0.1463510725682107, "learning_rate": 4.821291632469429e-06, "loss": 0.3758, "num_tokens": 5286547650.0, "step": 12373 }, { "epoch": 4.532771492694545, "grad_norm": 0.1428459613705, "learning_rate": 4.82001670398314e-06, "loss": 0.36, "num_tokens": 5287389928.0, "step": 12374 }, { "epoch": 4.53313791050245, "grad_norm": 0.14901429925577533, "learning_rate": 4.8187427427641605e-06, "loss": 0.3657, "num_tokens": 5288172986.0, "step": 12375 }, { "epoch": 4.5335043283103555, "grad_norm": 0.14762574911211385, "learning_rate": 4.817469748884214e-06, "loss": 0.3732, "num_tokens": 5288970104.0, "step": 12376 }, { "epoch": 4.533870746118262, "grad_norm": 0.15059445649885014, "learning_rate": 4.816197722414979e-06, "loss": 0.3793, "num_tokens": 5289822834.0, "step": 12377 }, { "epoch": 4.534237163926167, "grad_norm": 0.14145988518350816, "learning_rate": 4.8149266634280685e-06, "loss": 0.3617, "num_tokens": 5290588999.0, "step": 12378 }, { "epoch": 4.534603581734072, "grad_norm": 0.15030360563677703, "learning_rate": 4.81365657199505e-06, "loss": 0.3785, "num_tokens": 5291383396.0, "step": 12379 }, { "epoch": 4.534969999541977, "grad_norm": 0.15430349107588828, "learning_rate": 4.812387448187428e-06, "loss": 0.3748, "num_tokens": 5292020008.0, "step": 12380 }, { "epoch": 4.535336417349884, "grad_norm": 0.15719915462089346, "learning_rate": 4.811119292076657e-06, "loss": 0.3614, "num_tokens": 5292796854.0, "step": 12381 }, { "epoch": 4.535702835157789, "grad_norm": 0.1461596680444519, "learning_rate": 4.809852103734137e-06, "loss": 0.3891, "num_tokens": 5293557781.0, "step": 12382 }, { "epoch": 4.536069252965694, "grad_norm": 0.2126530188680229, "learning_rate": 4.808585883231216e-06, "loss": 0.3614, "num_tokens": 5294374662.0, "step": 12383 }, { "epoch": 4.536435670773599, "grad_norm": 0.14437780822056392, "learning_rate": 4.807320630639181e-06, "loss": 0.3761, "num_tokens": 5295067746.0, "step": 12384 }, { "epoch": 4.536802088581505, "grad_norm": 0.15407024022001245, "learning_rate": 4.806056346029271e-06, "loss": 0.3702, "num_tokens": 5295771854.0, "step": 12385 }, { "epoch": 4.537168506389411, "grad_norm": 0.16235956375582747, "learning_rate": 4.804793029472668e-06, "loss": 0.3637, "num_tokens": 5296483821.0, "step": 12386 }, { "epoch": 4.537534924197316, "grad_norm": 0.16269058504868672, "learning_rate": 4.803530681040495e-06, "loss": 0.3887, "num_tokens": 5297205501.0, "step": 12387 }, { "epoch": 4.537901342005221, "grad_norm": 0.1429473663435013, "learning_rate": 4.80226930080383e-06, "loss": 0.3454, "num_tokens": 5297958595.0, "step": 12388 }, { "epoch": 4.538267759813127, "grad_norm": 0.1598400093476992, "learning_rate": 4.801008888833689e-06, "loss": 0.3723, "num_tokens": 5298643566.0, "step": 12389 }, { "epoch": 4.538634177621033, "grad_norm": 0.14888935179865853, "learning_rate": 4.799749445201035e-06, "loss": 0.3725, "num_tokens": 5299357692.0, "step": 12390 }, { "epoch": 4.539000595428938, "grad_norm": 0.1693394553105307, "learning_rate": 4.798490969976777e-06, "loss": 0.3679, "num_tokens": 5300043712.0, "step": 12391 }, { "epoch": 4.539367013236843, "grad_norm": 0.1492422288413398, "learning_rate": 4.797233463231769e-06, "loss": 0.3621, "num_tokens": 5300802414.0, "step": 12392 }, { "epoch": 4.539733431044748, "grad_norm": 0.15299796379190944, "learning_rate": 4.795976925036816e-06, "loss": 0.357, "num_tokens": 5301721293.0, "step": 12393 }, { "epoch": 4.5400998488526545, "grad_norm": 0.1438196200908549, "learning_rate": 4.794721355462659e-06, "loss": 0.3774, "num_tokens": 5302369653.0, "step": 12394 }, { "epoch": 4.54046626666056, "grad_norm": 0.16333114188528824, "learning_rate": 4.7934667545799925e-06, "loss": 0.3482, "num_tokens": 5303053350.0, "step": 12395 }, { "epoch": 4.540832684468465, "grad_norm": 0.14535462782565395, "learning_rate": 4.792213122459447e-06, "loss": 0.3767, "num_tokens": 5303896462.0, "step": 12396 }, { "epoch": 4.54119910227637, "grad_norm": 0.1474429870452419, "learning_rate": 4.790960459171612e-06, "loss": 0.3621, "num_tokens": 5304671520.0, "step": 12397 }, { "epoch": 4.541565520084276, "grad_norm": 0.14346610816974617, "learning_rate": 4.789708764787006e-06, "loss": 0.355, "num_tokens": 5305472437.0, "step": 12398 }, { "epoch": 4.541931937892182, "grad_norm": 0.15465113748530138, "learning_rate": 4.788458039376109e-06, "loss": 0.3759, "num_tokens": 5306190401.0, "step": 12399 }, { "epoch": 4.542298355700087, "grad_norm": 0.14724227444854665, "learning_rate": 4.78720828300934e-06, "loss": 0.3968, "num_tokens": 5306920914.0, "step": 12400 }, { "epoch": 4.542664773507992, "grad_norm": 0.1715845411793364, "learning_rate": 4.785959495757061e-06, "loss": 0.3694, "num_tokens": 5307569111.0, "step": 12401 }, { "epoch": 4.543031191315898, "grad_norm": 0.14124918936885938, "learning_rate": 4.784711677689577e-06, "loss": 0.3596, "num_tokens": 5308459585.0, "step": 12402 }, { "epoch": 4.5433976091238035, "grad_norm": 0.14581576328331722, "learning_rate": 4.783464828877146e-06, "loss": 0.3835, "num_tokens": 5309190627.0, "step": 12403 }, { "epoch": 4.543764026931709, "grad_norm": 0.14072813195175993, "learning_rate": 4.78221894938997e-06, "loss": 0.3673, "num_tokens": 5310033052.0, "step": 12404 }, { "epoch": 4.544130444739614, "grad_norm": 0.14567885281777357, "learning_rate": 4.78097403929819e-06, "loss": 0.346, "num_tokens": 5310786627.0, "step": 12405 }, { "epoch": 4.54449686254752, "grad_norm": 0.1457021085086592, "learning_rate": 4.779730098671904e-06, "loss": 0.3672, "num_tokens": 5311586715.0, "step": 12406 }, { "epoch": 4.544863280355425, "grad_norm": 0.14907843157106632, "learning_rate": 4.778487127581141e-06, "loss": 0.396, "num_tokens": 5312376665.0, "step": 12407 }, { "epoch": 4.545229698163331, "grad_norm": 0.1469407863332364, "learning_rate": 4.7772451260958886e-06, "loss": 0.405, "num_tokens": 5313132921.0, "step": 12408 }, { "epoch": 4.545596115971236, "grad_norm": 0.1537588797373505, "learning_rate": 4.776004094286066e-06, "loss": 0.3815, "num_tokens": 5313877256.0, "step": 12409 }, { "epoch": 4.545962533779141, "grad_norm": 0.14324035613964267, "learning_rate": 4.7747640322215546e-06, "loss": 0.368, "num_tokens": 5314776369.0, "step": 12410 }, { "epoch": 4.546328951587047, "grad_norm": 0.1335865338791845, "learning_rate": 4.773524939972169e-06, "loss": 0.3591, "num_tokens": 5315753475.0, "step": 12411 }, { "epoch": 4.546695369394953, "grad_norm": 0.14239303274764686, "learning_rate": 4.772286817607669e-06, "loss": 0.3693, "num_tokens": 5316597870.0, "step": 12412 }, { "epoch": 4.547061787202858, "grad_norm": 0.15996037229532495, "learning_rate": 4.77104966519777e-06, "loss": 0.4194, "num_tokens": 5317326212.0, "step": 12413 }, { "epoch": 4.547428205010764, "grad_norm": 0.14379940638342065, "learning_rate": 4.769813482812118e-06, "loss": 0.3598, "num_tokens": 5318069653.0, "step": 12414 }, { "epoch": 4.547794622818669, "grad_norm": 0.16709994497920966, "learning_rate": 4.76857827052032e-06, "loss": 0.3784, "num_tokens": 5318793690.0, "step": 12415 }, { "epoch": 4.5481610406265744, "grad_norm": 0.13640868866024194, "learning_rate": 4.76734402839192e-06, "loss": 0.3684, "num_tokens": 5319579639.0, "step": 12416 }, { "epoch": 4.54852745843448, "grad_norm": 0.15814770807149253, "learning_rate": 4.766110756496405e-06, "loss": 0.358, "num_tokens": 5320314851.0, "step": 12417 }, { "epoch": 4.548893876242385, "grad_norm": 0.1493181822616693, "learning_rate": 4.764878454903209e-06, "loss": 0.3768, "num_tokens": 5321056131.0, "step": 12418 }, { "epoch": 4.549260294050291, "grad_norm": 0.15018721217402742, "learning_rate": 4.76364712368172e-06, "loss": 0.3916, "num_tokens": 5321883957.0, "step": 12419 }, { "epoch": 4.549626711858196, "grad_norm": 0.1598382296640792, "learning_rate": 4.762416762901258e-06, "loss": 0.3617, "num_tokens": 5322617276.0, "step": 12420 }, { "epoch": 4.549993129666102, "grad_norm": 0.1565420866782614, "learning_rate": 4.761187372631099e-06, "loss": 0.3628, "num_tokens": 5323297594.0, "step": 12421 }, { "epoch": 4.550359547474008, "grad_norm": 0.14811203830409125, "learning_rate": 4.7599589529404585e-06, "loss": 0.3446, "num_tokens": 5324035359.0, "step": 12422 }, { "epoch": 4.550725965281913, "grad_norm": 0.1521879452345141, "learning_rate": 4.758731503898499e-06, "loss": 0.3712, "num_tokens": 5324789546.0, "step": 12423 }, { "epoch": 4.551092383089818, "grad_norm": 0.15595098366874985, "learning_rate": 4.757505025574328e-06, "loss": 0.3739, "num_tokens": 5325540527.0, "step": 12424 }, { "epoch": 4.5514588008977235, "grad_norm": 0.16548055042640697, "learning_rate": 4.756279518036996e-06, "loss": 0.4097, "num_tokens": 5326196257.0, "step": 12425 }, { "epoch": 4.551825218705629, "grad_norm": 0.14784057685174617, "learning_rate": 4.755054981355503e-06, "loss": 0.3359, "num_tokens": 5326934164.0, "step": 12426 }, { "epoch": 4.552191636513535, "grad_norm": 0.1481931693316405, "learning_rate": 4.753831415598799e-06, "loss": 0.3624, "num_tokens": 5327709240.0, "step": 12427 }, { "epoch": 4.55255805432144, "grad_norm": 0.14515583164890355, "learning_rate": 4.752608820835769e-06, "loss": 0.3259, "num_tokens": 5328487167.0, "step": 12428 }, { "epoch": 4.552924472129345, "grad_norm": 0.14477099982720476, "learning_rate": 4.751387197135245e-06, "loss": 0.3443, "num_tokens": 5329190636.0, "step": 12429 }, { "epoch": 4.553290889937251, "grad_norm": 0.16556761161970052, "learning_rate": 4.750166544566008e-06, "loss": 0.3605, "num_tokens": 5329890106.0, "step": 12430 }, { "epoch": 4.553657307745157, "grad_norm": 0.15788235357949973, "learning_rate": 4.748946863196784e-06, "loss": 0.3791, "num_tokens": 5330617584.0, "step": 12431 }, { "epoch": 4.554023725553062, "grad_norm": 0.1407179804582362, "learning_rate": 4.747728153096245e-06, "loss": 0.3466, "num_tokens": 5331428330.0, "step": 12432 }, { "epoch": 4.554390143360967, "grad_norm": 0.15591524161994444, "learning_rate": 4.7465104143330035e-06, "loss": 0.3756, "num_tokens": 5332059422.0, "step": 12433 }, { "epoch": 4.5547565611688725, "grad_norm": 0.16304447301540986, "learning_rate": 4.745293646975623e-06, "loss": 0.3529, "num_tokens": 5332824527.0, "step": 12434 }, { "epoch": 4.555122978976779, "grad_norm": 0.15090713481713716, "learning_rate": 4.7440778510926115e-06, "loss": 0.3578, "num_tokens": 5333572960.0, "step": 12435 }, { "epoch": 4.555489396784684, "grad_norm": 0.1587823152704333, "learning_rate": 4.742863026752415e-06, "loss": 0.3816, "num_tokens": 5334171805.0, "step": 12436 }, { "epoch": 4.555855814592589, "grad_norm": 0.1640623951783656, "learning_rate": 4.741649174023436e-06, "loss": 0.4146, "num_tokens": 5334902528.0, "step": 12437 }, { "epoch": 4.556222232400494, "grad_norm": 0.16234489959102502, "learning_rate": 4.740436292974015e-06, "loss": 0.3515, "num_tokens": 5335705068.0, "step": 12438 }, { "epoch": 4.5565886502084005, "grad_norm": 0.14150948875354682, "learning_rate": 4.739224383672436e-06, "loss": 0.3706, "num_tokens": 5336523097.0, "step": 12439 }, { "epoch": 4.556955068016306, "grad_norm": 0.15294743778724523, "learning_rate": 4.738013446186939e-06, "loss": 0.3581, "num_tokens": 5337241021.0, "step": 12440 }, { "epoch": 4.557321485824211, "grad_norm": 0.16442851701362318, "learning_rate": 4.736803480585699e-06, "loss": 0.4011, "num_tokens": 5337986844.0, "step": 12441 }, { "epoch": 4.557687903632116, "grad_norm": 0.14962314382027223, "learning_rate": 4.735594486936838e-06, "loss": 0.3919, "num_tokens": 5338736766.0, "step": 12442 }, { "epoch": 4.5580543214400215, "grad_norm": 0.16190452001151395, "learning_rate": 4.734386465308426e-06, "loss": 0.3478, "num_tokens": 5339426816.0, "step": 12443 }, { "epoch": 4.558420739247928, "grad_norm": 0.14704120177285965, "learning_rate": 4.7331794157684785e-06, "loss": 0.346, "num_tokens": 5340180082.0, "step": 12444 }, { "epoch": 4.558787157055833, "grad_norm": 0.16028290138463036, "learning_rate": 4.731973338384952e-06, "loss": 0.3502, "num_tokens": 5340785471.0, "step": 12445 }, { "epoch": 4.559153574863738, "grad_norm": 0.14912319042796335, "learning_rate": 4.730768233225751e-06, "loss": 0.3835, "num_tokens": 5341696197.0, "step": 12446 }, { "epoch": 4.559519992671644, "grad_norm": 0.14218048014260395, "learning_rate": 4.729564100358726e-06, "loss": 0.3704, "num_tokens": 5342582495.0, "step": 12447 }, { "epoch": 4.55988641047955, "grad_norm": 0.14465934701242802, "learning_rate": 4.728360939851674e-06, "loss": 0.3962, "num_tokens": 5343364017.0, "step": 12448 }, { "epoch": 4.560252828287455, "grad_norm": 0.1581664665587342, "learning_rate": 4.727158751772335e-06, "loss": 0.4038, "num_tokens": 5344095068.0, "step": 12449 }, { "epoch": 4.56061924609536, "grad_norm": 0.1337173892041479, "learning_rate": 4.725957536188391e-06, "loss": 0.3645, "num_tokens": 5344963160.0, "step": 12450 }, { "epoch": 4.560985663903265, "grad_norm": 0.15130067461311322, "learning_rate": 4.724757293167479e-06, "loss": 0.3771, "num_tokens": 5345766634.0, "step": 12451 }, { "epoch": 4.5613520817111715, "grad_norm": 0.15321221638655338, "learning_rate": 4.723558022777171e-06, "loss": 0.3905, "num_tokens": 5346496134.0, "step": 12452 }, { "epoch": 4.561718499519077, "grad_norm": 0.1448131175096081, "learning_rate": 4.722359725084984e-06, "loss": 0.3547, "num_tokens": 5347296934.0, "step": 12453 }, { "epoch": 4.562084917326982, "grad_norm": 0.14822694266161027, "learning_rate": 4.721162400158391e-06, "loss": 0.3979, "num_tokens": 5348064895.0, "step": 12454 }, { "epoch": 4.562451335134887, "grad_norm": 0.15210842224243776, "learning_rate": 4.719966048064806e-06, "loss": 0.358, "num_tokens": 5348897400.0, "step": 12455 }, { "epoch": 4.562817752942793, "grad_norm": 0.1506354802158186, "learning_rate": 4.7187706688715795e-06, "loss": 0.3863, "num_tokens": 5349687345.0, "step": 12456 }, { "epoch": 4.563184170750699, "grad_norm": 0.14443187848546407, "learning_rate": 4.717576262646017e-06, "loss": 0.3311, "num_tokens": 5350423524.0, "step": 12457 }, { "epoch": 4.563550588558604, "grad_norm": 0.15599816124331864, "learning_rate": 4.7163828294553655e-06, "loss": 0.3463, "num_tokens": 5351169287.0, "step": 12458 }, { "epoch": 4.563917006366509, "grad_norm": 0.15881879350321113, "learning_rate": 4.715190369366815e-06, "loss": 0.3763, "num_tokens": 5351956784.0, "step": 12459 }, { "epoch": 4.564283424174415, "grad_norm": 0.14914737513202492, "learning_rate": 4.713998882447507e-06, "loss": 0.3558, "num_tokens": 5352798405.0, "step": 12460 }, { "epoch": 4.5646498419823205, "grad_norm": 0.1421507647922344, "learning_rate": 4.712808368764525e-06, "loss": 0.3437, "num_tokens": 5353568786.0, "step": 12461 }, { "epoch": 4.565016259790226, "grad_norm": 0.16138957263309126, "learning_rate": 4.711618828384895e-06, "loss": 0.3678, "num_tokens": 5354279996.0, "step": 12462 }, { "epoch": 4.565382677598131, "grad_norm": 0.15889400476915647, "learning_rate": 4.7104302613755916e-06, "loss": 0.3919, "num_tokens": 5355058790.0, "step": 12463 }, { "epoch": 4.565749095406037, "grad_norm": 0.14298532473591885, "learning_rate": 4.709242667803531e-06, "loss": 0.3594, "num_tokens": 5355921332.0, "step": 12464 }, { "epoch": 4.566115513213942, "grad_norm": 0.14517708179084657, "learning_rate": 4.708056047735581e-06, "loss": 0.3535, "num_tokens": 5356674871.0, "step": 12465 }, { "epoch": 4.566481931021848, "grad_norm": 0.1760487505893514, "learning_rate": 4.706870401238549e-06, "loss": 0.3788, "num_tokens": 5357338854.0, "step": 12466 }, { "epoch": 4.566848348829753, "grad_norm": 0.1418825493681937, "learning_rate": 4.705685728379188e-06, "loss": 0.3569, "num_tokens": 5358151057.0, "step": 12467 }, { "epoch": 4.567214766637658, "grad_norm": 0.13799687422355064, "learning_rate": 4.7045020292242e-06, "loss": 0.3295, "num_tokens": 5358955938.0, "step": 12468 }, { "epoch": 4.567581184445564, "grad_norm": 0.13874047892861177, "learning_rate": 4.70331930384023e-06, "loss": 0.3517, "num_tokens": 5359773612.0, "step": 12469 }, { "epoch": 4.5679476022534695, "grad_norm": 0.13926052315889392, "learning_rate": 4.70213755229386e-06, "loss": 0.3671, "num_tokens": 5360615107.0, "step": 12470 }, { "epoch": 4.568314020061375, "grad_norm": 0.15985839032941157, "learning_rate": 4.700956774651636e-06, "loss": 0.388, "num_tokens": 5361362576.0, "step": 12471 }, { "epoch": 4.568680437869281, "grad_norm": 0.1516441849153984, "learning_rate": 4.699776970980034e-06, "loss": 0.3665, "num_tokens": 5362074819.0, "step": 12472 }, { "epoch": 4.569046855677186, "grad_norm": 0.15914664227933664, "learning_rate": 4.698598141345477e-06, "loss": 0.3469, "num_tokens": 5362873514.0, "step": 12473 }, { "epoch": 4.569413273485091, "grad_norm": 0.15919896500370195, "learning_rate": 4.697420285814334e-06, "loss": 0.3606, "num_tokens": 5363625972.0, "step": 12474 }, { "epoch": 4.569779691292997, "grad_norm": 0.13828924931080586, "learning_rate": 4.696243404452923e-06, "loss": 0.3681, "num_tokens": 5364472905.0, "step": 12475 }, { "epoch": 4.570146109100902, "grad_norm": 0.14698750375413486, "learning_rate": 4.695067497327508e-06, "loss": 0.3581, "num_tokens": 5365185254.0, "step": 12476 }, { "epoch": 4.570512526908808, "grad_norm": 0.1568424781968628, "learning_rate": 4.6938925645042935e-06, "loss": 0.3836, "num_tokens": 5365952739.0, "step": 12477 }, { "epoch": 4.570878944716713, "grad_norm": 0.1419315467592198, "learning_rate": 4.692718606049428e-06, "loss": 0.3694, "num_tokens": 5366793463.0, "step": 12478 }, { "epoch": 4.5712453625246185, "grad_norm": 0.1499399726123019, "learning_rate": 4.691545622029009e-06, "loss": 0.3741, "num_tokens": 5367485594.0, "step": 12479 }, { "epoch": 4.571611780332524, "grad_norm": 0.16031409198282368, "learning_rate": 4.690373612509075e-06, "loss": 0.3917, "num_tokens": 5368160193.0, "step": 12480 }, { "epoch": 4.57197819814043, "grad_norm": 0.1692354776408342, "learning_rate": 4.689202577555616e-06, "loss": 0.387, "num_tokens": 5368785847.0, "step": 12481 }, { "epoch": 4.572344615948335, "grad_norm": 0.16095405267152277, "learning_rate": 4.688032517234566e-06, "loss": 0.3585, "num_tokens": 5369541325.0, "step": 12482 }, { "epoch": 4.57271103375624, "grad_norm": 0.15009900140496288, "learning_rate": 4.686863431611797e-06, "loss": 0.3557, "num_tokens": 5370264791.0, "step": 12483 }, { "epoch": 4.573077451564146, "grad_norm": 0.14395922665256275, "learning_rate": 4.685695320753134e-06, "loss": 0.3565, "num_tokens": 5371049479.0, "step": 12484 }, { "epoch": 4.573443869372052, "grad_norm": 0.15221834310809862, "learning_rate": 4.684528184724341e-06, "loss": 0.374, "num_tokens": 5371735343.0, "step": 12485 }, { "epoch": 4.573810287179957, "grad_norm": 0.15314158228523933, "learning_rate": 4.68336202359113e-06, "loss": 0.3548, "num_tokens": 5372486713.0, "step": 12486 }, { "epoch": 4.574176704987862, "grad_norm": 0.15294559216239095, "learning_rate": 4.68219683741916e-06, "loss": 0.3579, "num_tokens": 5373154392.0, "step": 12487 }, { "epoch": 4.574543122795768, "grad_norm": 0.15671079191046033, "learning_rate": 4.681032626274034e-06, "loss": 0.3701, "num_tokens": 5373912274.0, "step": 12488 }, { "epoch": 4.574909540603674, "grad_norm": 0.1538911049412024, "learning_rate": 4.679869390221299e-06, "loss": 0.3771, "num_tokens": 5374653889.0, "step": 12489 }, { "epoch": 4.575275958411579, "grad_norm": 0.14730836602423183, "learning_rate": 4.678707129326446e-06, "loss": 0.3762, "num_tokens": 5375394727.0, "step": 12490 }, { "epoch": 4.575642376219484, "grad_norm": 0.14120042736314892, "learning_rate": 4.677545843654915e-06, "loss": 0.3509, "num_tokens": 5376220816.0, "step": 12491 }, { "epoch": 4.5760087940273895, "grad_norm": 0.14292191633542572, "learning_rate": 4.676385533272084e-06, "loss": 0.3516, "num_tokens": 5377015773.0, "step": 12492 }, { "epoch": 4.576375211835295, "grad_norm": 0.15666048588616405, "learning_rate": 4.675226198243286e-06, "loss": 0.3716, "num_tokens": 5377641154.0, "step": 12493 }, { "epoch": 4.576741629643201, "grad_norm": 0.16693834619901132, "learning_rate": 4.67406783863379e-06, "loss": 0.4016, "num_tokens": 5378321409.0, "step": 12494 }, { "epoch": 4.577108047451106, "grad_norm": 0.15574177596812264, "learning_rate": 4.672910454508819e-06, "loss": 0.3753, "num_tokens": 5379071581.0, "step": 12495 }, { "epoch": 4.577474465259011, "grad_norm": 0.14684922784336688, "learning_rate": 4.671754045933531e-06, "loss": 0.3781, "num_tokens": 5379927232.0, "step": 12496 }, { "epoch": 4.5778408830669175, "grad_norm": 0.1822682555365618, "learning_rate": 4.6705986129730345e-06, "loss": 0.3806, "num_tokens": 5380573772.0, "step": 12497 }, { "epoch": 4.578207300874823, "grad_norm": 0.15770227812416776, "learning_rate": 4.669444155692386e-06, "loss": 0.3563, "num_tokens": 5381269999.0, "step": 12498 }, { "epoch": 4.578573718682728, "grad_norm": 0.14860072647977618, "learning_rate": 4.668290674156582e-06, "loss": 0.3536, "num_tokens": 5382031338.0, "step": 12499 }, { "epoch": 4.578940136490633, "grad_norm": 0.15630432904579317, "learning_rate": 4.6671381684305665e-06, "loss": 0.3425, "num_tokens": 5382784026.0, "step": 12500 }, { "epoch": 4.5793065542985385, "grad_norm": 0.14603457922573357, "learning_rate": 4.665986638579226e-06, "loss": 0.3853, "num_tokens": 5383620779.0, "step": 12501 }, { "epoch": 4.579672972106445, "grad_norm": 0.14366893358074456, "learning_rate": 4.664836084667396e-06, "loss": 0.3707, "num_tokens": 5384387525.0, "step": 12502 }, { "epoch": 4.58003938991435, "grad_norm": 0.14644164790897196, "learning_rate": 4.663686506759851e-06, "loss": 0.344, "num_tokens": 5385171908.0, "step": 12503 }, { "epoch": 4.580405807722255, "grad_norm": 0.14071339602571414, "learning_rate": 4.6625379049213195e-06, "loss": 0.3863, "num_tokens": 5386026120.0, "step": 12504 }, { "epoch": 4.58077222553016, "grad_norm": 0.1421996987188161, "learning_rate": 4.6613902792164685e-06, "loss": 0.3749, "num_tokens": 5386827409.0, "step": 12505 }, { "epoch": 4.5811386433380665, "grad_norm": 0.1605650352638398, "learning_rate": 4.6602436297099115e-06, "loss": 0.3989, "num_tokens": 5387563450.0, "step": 12506 }, { "epoch": 4.581505061145972, "grad_norm": 0.14228648975149932, "learning_rate": 4.659097956466206e-06, "loss": 0.3601, "num_tokens": 5388352178.0, "step": 12507 }, { "epoch": 4.581871478953877, "grad_norm": 0.14008446349972814, "learning_rate": 4.657953259549857e-06, "loss": 0.3621, "num_tokens": 5389212467.0, "step": 12508 }, { "epoch": 4.582237896761782, "grad_norm": 0.14219688119383259, "learning_rate": 4.6568095390253085e-06, "loss": 0.3792, "num_tokens": 5390026634.0, "step": 12509 }, { "epoch": 4.582604314569688, "grad_norm": 0.1684802776148132, "learning_rate": 4.655666794956965e-06, "loss": 0.354, "num_tokens": 5390871487.0, "step": 12510 }, { "epoch": 4.582970732377594, "grad_norm": 0.15498933149273536, "learning_rate": 4.6545250274091545e-06, "loss": 0.3803, "num_tokens": 5391559172.0, "step": 12511 }, { "epoch": 4.583337150185499, "grad_norm": 0.16781914106046325, "learning_rate": 4.653384236446168e-06, "loss": 0.4128, "num_tokens": 5392297388.0, "step": 12512 }, { "epoch": 4.583703567993404, "grad_norm": 0.14297872740377554, "learning_rate": 4.652244422132232e-06, "loss": 0.3707, "num_tokens": 5393076532.0, "step": 12513 }, { "epoch": 4.58406998580131, "grad_norm": 0.1503733597976006, "learning_rate": 4.651105584531514e-06, "loss": 0.381, "num_tokens": 5393834601.0, "step": 12514 }, { "epoch": 4.584436403609216, "grad_norm": 0.16325582209424702, "learning_rate": 4.649967723708141e-06, "loss": 0.3409, "num_tokens": 5394479406.0, "step": 12515 }, { "epoch": 4.584802821417121, "grad_norm": 0.14374944477860307, "learning_rate": 4.6488308397261775e-06, "loss": 0.3393, "num_tokens": 5395189568.0, "step": 12516 }, { "epoch": 4.585169239225026, "grad_norm": 0.14898630159343998, "learning_rate": 4.647694932649625e-06, "loss": 0.3513, "num_tokens": 5395893954.0, "step": 12517 }, { "epoch": 4.585535657032931, "grad_norm": 0.16086807781505644, "learning_rate": 4.646560002542446e-06, "loss": 0.3844, "num_tokens": 5396557748.0, "step": 12518 }, { "epoch": 4.5859020748408375, "grad_norm": 0.17100922191618145, "learning_rate": 4.6454260494685324e-06, "loss": 0.3684, "num_tokens": 5397189106.0, "step": 12519 }, { "epoch": 4.586268492648743, "grad_norm": 0.15055994125264077, "learning_rate": 4.64429307349173e-06, "loss": 0.3839, "num_tokens": 5397986785.0, "step": 12520 }, { "epoch": 4.586634910456648, "grad_norm": 0.14946833200623633, "learning_rate": 4.643161074675828e-06, "loss": 0.346, "num_tokens": 5398791337.0, "step": 12521 }, { "epoch": 4.587001328264554, "grad_norm": 0.15079396699179978, "learning_rate": 4.642030053084559e-06, "loss": 0.3751, "num_tokens": 5399674380.0, "step": 12522 }, { "epoch": 4.587367746072459, "grad_norm": 0.14680504079216147, "learning_rate": 4.6409000087816055e-06, "loss": 0.3563, "num_tokens": 5400379497.0, "step": 12523 }, { "epoch": 4.587734163880365, "grad_norm": 0.15209797478698234, "learning_rate": 4.639770941830589e-06, "loss": 0.349, "num_tokens": 5401147505.0, "step": 12524 }, { "epoch": 4.58810058168827, "grad_norm": 0.1528802953278917, "learning_rate": 4.638642852295076e-06, "loss": 0.3978, "num_tokens": 5401962482.0, "step": 12525 }, { "epoch": 4.588466999496175, "grad_norm": 0.14367487798959516, "learning_rate": 4.637515740238583e-06, "loss": 0.3803, "num_tokens": 5402780937.0, "step": 12526 }, { "epoch": 4.588833417304081, "grad_norm": 0.15098383857154352, "learning_rate": 4.636389605724569e-06, "loss": 0.3592, "num_tokens": 5403430322.0, "step": 12527 }, { "epoch": 4.5891998351119865, "grad_norm": 0.1679164801412258, "learning_rate": 4.635264448816435e-06, "loss": 0.3478, "num_tokens": 5404140727.0, "step": 12528 }, { "epoch": 4.589566252919892, "grad_norm": 0.14992397740889352, "learning_rate": 4.6341402695775304e-06, "loss": 0.36, "num_tokens": 5404931431.0, "step": 12529 }, { "epoch": 4.589932670727798, "grad_norm": 0.15306741687269196, "learning_rate": 4.633017068071151e-06, "loss": 0.3752, "num_tokens": 5405603588.0, "step": 12530 }, { "epoch": 4.590299088535703, "grad_norm": 0.15270007754960485, "learning_rate": 4.631894844360532e-06, "loss": 0.3182, "num_tokens": 5406314321.0, "step": 12531 }, { "epoch": 4.590665506343608, "grad_norm": 0.15481358549084562, "learning_rate": 4.630773598508859e-06, "loss": 0.3235, "num_tokens": 5407070743.0, "step": 12532 }, { "epoch": 4.591031924151514, "grad_norm": 0.14393095053438448, "learning_rate": 4.629653330579261e-06, "loss": 0.3724, "num_tokens": 5407902911.0, "step": 12533 }, { "epoch": 4.591398341959419, "grad_norm": 0.15024873985754253, "learning_rate": 4.62853404063481e-06, "loss": 0.3647, "num_tokens": 5408698318.0, "step": 12534 }, { "epoch": 4.591764759767325, "grad_norm": 0.1439092329473395, "learning_rate": 4.627415728738523e-06, "loss": 0.3531, "num_tokens": 5409538713.0, "step": 12535 }, { "epoch": 4.59213117757523, "grad_norm": 0.14013028223097757, "learning_rate": 4.6262983949533635e-06, "loss": 0.3556, "num_tokens": 5410328285.0, "step": 12536 }, { "epoch": 4.5924975953831355, "grad_norm": 0.15593263624174594, "learning_rate": 4.625182039342243e-06, "loss": 0.3766, "num_tokens": 5411028770.0, "step": 12537 }, { "epoch": 4.592864013191041, "grad_norm": 0.16059200020469266, "learning_rate": 4.624066661968013e-06, "loss": 0.384, "num_tokens": 5411763063.0, "step": 12538 }, { "epoch": 4.593230430998947, "grad_norm": 0.16678088837525187, "learning_rate": 4.62295226289347e-06, "loss": 0.3915, "num_tokens": 5412393177.0, "step": 12539 }, { "epoch": 4.593596848806852, "grad_norm": 0.15178922837767486, "learning_rate": 4.621838842181359e-06, "loss": 0.3547, "num_tokens": 5413184132.0, "step": 12540 }, { "epoch": 4.593963266614757, "grad_norm": 0.15304143078896126, "learning_rate": 4.6207263998943645e-06, "loss": 0.3668, "num_tokens": 5413894812.0, "step": 12541 }, { "epoch": 4.594329684422663, "grad_norm": 0.15750073330407421, "learning_rate": 4.61961493609512e-06, "loss": 0.3724, "num_tokens": 5414591295.0, "step": 12542 }, { "epoch": 4.594696102230569, "grad_norm": 0.15879629896288783, "learning_rate": 4.6185044508462035e-06, "loss": 0.4088, "num_tokens": 5415408960.0, "step": 12543 }, { "epoch": 4.595062520038474, "grad_norm": 0.15784764341229363, "learning_rate": 4.617394944210142e-06, "loss": 0.3417, "num_tokens": 5416058026.0, "step": 12544 }, { "epoch": 4.595428937846379, "grad_norm": 0.162290586720894, "learning_rate": 4.6162864162493996e-06, "loss": 0.348, "num_tokens": 5416744067.0, "step": 12545 }, { "epoch": 4.5957953556542845, "grad_norm": 0.14483472785622087, "learning_rate": 4.615178867026385e-06, "loss": 0.3826, "num_tokens": 5417504423.0, "step": 12546 }, { "epoch": 4.596161773462191, "grad_norm": 0.16544864741404378, "learning_rate": 4.61407229660346e-06, "loss": 0.3659, "num_tokens": 5418187610.0, "step": 12547 }, { "epoch": 4.596528191270096, "grad_norm": 0.14852947908760666, "learning_rate": 4.612966705042926e-06, "loss": 0.3546, "num_tokens": 5418911901.0, "step": 12548 }, { "epoch": 4.596894609078001, "grad_norm": 0.1405029145947002, "learning_rate": 4.611862092407028e-06, "loss": 0.3633, "num_tokens": 5419712514.0, "step": 12549 }, { "epoch": 4.597261026885906, "grad_norm": 0.14257152461304265, "learning_rate": 4.610758458757959e-06, "loss": 0.3656, "num_tokens": 5420548848.0, "step": 12550 }, { "epoch": 4.597627444693812, "grad_norm": 0.1504541257126119, "learning_rate": 4.6096558041578584e-06, "loss": 0.3727, "num_tokens": 5421252578.0, "step": 12551 }, { "epoch": 4.597993862501718, "grad_norm": 0.14666362419387502, "learning_rate": 4.608554128668805e-06, "loss": 0.3663, "num_tokens": 5422066523.0, "step": 12552 }, { "epoch": 4.598360280309623, "grad_norm": 0.1574105217443659, "learning_rate": 4.607453432352826e-06, "loss": 0.3884, "num_tokens": 5422780625.0, "step": 12553 }, { "epoch": 4.598726698117528, "grad_norm": 0.18211958791668958, "learning_rate": 4.606353715271892e-06, "loss": 0.3653, "num_tokens": 5423516948.0, "step": 12554 }, { "epoch": 4.5990931159254345, "grad_norm": 0.14235652060724244, "learning_rate": 4.605254977487921e-06, "loss": 0.366, "num_tokens": 5424359549.0, "step": 12555 }, { "epoch": 4.59945953373334, "grad_norm": 0.15213720556988902, "learning_rate": 4.604157219062769e-06, "loss": 0.3635, "num_tokens": 5425059593.0, "step": 12556 }, { "epoch": 4.599825951541245, "grad_norm": 0.15114428487976697, "learning_rate": 4.60306044005825e-06, "loss": 0.3691, "num_tokens": 5425810154.0, "step": 12557 }, { "epoch": 4.60019236934915, "grad_norm": 0.15533971522750717, "learning_rate": 4.601964640536111e-06, "loss": 0.3557, "num_tokens": 5426480164.0, "step": 12558 }, { "epoch": 4.6005587871570555, "grad_norm": 0.15427690227366783, "learning_rate": 4.600869820558047e-06, "loss": 0.3638, "num_tokens": 5427266162.0, "step": 12559 }, { "epoch": 4.600925204964962, "grad_norm": 0.15454318670271056, "learning_rate": 4.599775980185699e-06, "loss": 0.3611, "num_tokens": 5427982080.0, "step": 12560 }, { "epoch": 4.601291622772867, "grad_norm": 0.1494861957054629, "learning_rate": 4.598683119480654e-06, "loss": 0.3835, "num_tokens": 5428780665.0, "step": 12561 }, { "epoch": 4.601658040580772, "grad_norm": 0.1608957121057371, "learning_rate": 4.59759123850444e-06, "loss": 0.372, "num_tokens": 5429501723.0, "step": 12562 }, { "epoch": 4.602024458388677, "grad_norm": 0.16361137809122192, "learning_rate": 4.596500337318528e-06, "loss": 0.3799, "num_tokens": 5430216865.0, "step": 12563 }, { "epoch": 4.6023908761965835, "grad_norm": 0.1539521254744033, "learning_rate": 4.5954104159843474e-06, "loss": 0.3706, "num_tokens": 5431035734.0, "step": 12564 }, { "epoch": 4.602757294004489, "grad_norm": 0.14415060600761803, "learning_rate": 4.594321474563258e-06, "loss": 0.347, "num_tokens": 5431800648.0, "step": 12565 }, { "epoch": 4.603123711812394, "grad_norm": 0.16119027862457527, "learning_rate": 4.59323351311657e-06, "loss": 0.3359, "num_tokens": 5432569185.0, "step": 12566 }, { "epoch": 4.603490129620299, "grad_norm": 0.1504208497079082, "learning_rate": 4.592146531705538e-06, "loss": 0.3561, "num_tokens": 5433366586.0, "step": 12567 }, { "epoch": 4.603856547428205, "grad_norm": 0.15133924909338717, "learning_rate": 4.591060530391361e-06, "loss": 0.3955, "num_tokens": 5434194625.0, "step": 12568 }, { "epoch": 4.604222965236111, "grad_norm": 0.14430051499449606, "learning_rate": 4.589975509235179e-06, "loss": 0.3683, "num_tokens": 5435006857.0, "step": 12569 }, { "epoch": 4.604589383044016, "grad_norm": 0.14442407985528688, "learning_rate": 4.588891468298086e-06, "loss": 0.3509, "num_tokens": 5435780451.0, "step": 12570 }, { "epoch": 4.604955800851921, "grad_norm": 0.15303081373392036, "learning_rate": 4.587808407641116e-06, "loss": 0.3653, "num_tokens": 5436529605.0, "step": 12571 }, { "epoch": 4.605322218659827, "grad_norm": 0.1551357277328221, "learning_rate": 4.5867263273252445e-06, "loss": 0.3385, "num_tokens": 5437262937.0, "step": 12572 }, { "epoch": 4.6056886364677325, "grad_norm": 0.15882181990154792, "learning_rate": 4.5856452274113984e-06, "loss": 0.3745, "num_tokens": 5438010339.0, "step": 12573 }, { "epoch": 4.606055054275638, "grad_norm": 0.15331900349745303, "learning_rate": 4.584565107960442e-06, "loss": 0.3337, "num_tokens": 5438696431.0, "step": 12574 }, { "epoch": 4.606421472083543, "grad_norm": 0.14584293390129346, "learning_rate": 4.583485969033188e-06, "loss": 0.3834, "num_tokens": 5439489758.0, "step": 12575 }, { "epoch": 4.606787889891448, "grad_norm": 0.14584672219838107, "learning_rate": 4.582407810690398e-06, "loss": 0.3709, "num_tokens": 5440276041.0, "step": 12576 }, { "epoch": 4.607154307699354, "grad_norm": 0.13883612346090923, "learning_rate": 4.581330632992768e-06, "loss": 0.3461, "num_tokens": 5441044999.0, "step": 12577 }, { "epoch": 4.60752072550726, "grad_norm": 0.16361243752771168, "learning_rate": 4.580254436000954e-06, "loss": 0.393, "num_tokens": 5441801456.0, "step": 12578 }, { "epoch": 4.607887143315165, "grad_norm": 0.16466105151635885, "learning_rate": 4.579179219775543e-06, "loss": 0.381, "num_tokens": 5442531716.0, "step": 12579 }, { "epoch": 4.608253561123071, "grad_norm": 0.15881679737239474, "learning_rate": 4.578104984377068e-06, "loss": 0.3948, "num_tokens": 5443176993.0, "step": 12580 }, { "epoch": 4.608619978930976, "grad_norm": 0.15039665456356285, "learning_rate": 4.577031729866018e-06, "loss": 0.3504, "num_tokens": 5443950179.0, "step": 12581 }, { "epoch": 4.6089863967388816, "grad_norm": 0.15742537256608038, "learning_rate": 4.575959456302818e-06, "loss": 0.3703, "num_tokens": 5444747854.0, "step": 12582 }, { "epoch": 4.609352814546787, "grad_norm": 0.1388859339415845, "learning_rate": 4.574888163747836e-06, "loss": 0.3555, "num_tokens": 5445548998.0, "step": 12583 }, { "epoch": 4.609719232354692, "grad_norm": 0.16734111899553117, "learning_rate": 4.573817852261386e-06, "loss": 0.3708, "num_tokens": 5446204947.0, "step": 12584 }, { "epoch": 4.610085650162598, "grad_norm": 0.1610017145471484, "learning_rate": 4.572748521903736e-06, "loss": 0.3219, "num_tokens": 5446940369.0, "step": 12585 }, { "epoch": 4.610452067970503, "grad_norm": 0.13981790361807034, "learning_rate": 4.571680172735085e-06, "loss": 0.3736, "num_tokens": 5447829586.0, "step": 12586 }, { "epoch": 4.610818485778409, "grad_norm": 0.13974766167306896, "learning_rate": 4.570612804815585e-06, "loss": 0.3724, "num_tokens": 5448751982.0, "step": 12587 }, { "epoch": 4.611184903586314, "grad_norm": 0.14284865239203182, "learning_rate": 4.569546418205334e-06, "loss": 0.3629, "num_tokens": 5449561688.0, "step": 12588 }, { "epoch": 4.61155132139422, "grad_norm": 0.14836959214699177, "learning_rate": 4.568481012964368e-06, "loss": 0.3804, "num_tokens": 5450364239.0, "step": 12589 }, { "epoch": 4.611917739202125, "grad_norm": 0.15406847829098488, "learning_rate": 4.567416589152672e-06, "loss": 0.3261, "num_tokens": 5451023971.0, "step": 12590 }, { "epoch": 4.612284157010031, "grad_norm": 0.16224987382419082, "learning_rate": 4.566353146830176e-06, "loss": 0.3771, "num_tokens": 5451704185.0, "step": 12591 }, { "epoch": 4.612650574817936, "grad_norm": 0.16101018439540826, "learning_rate": 4.565290686056754e-06, "loss": 0.3879, "num_tokens": 5452540782.0, "step": 12592 }, { "epoch": 4.613016992625842, "grad_norm": 0.14302463653982944, "learning_rate": 4.564229206892224e-06, "loss": 0.3763, "num_tokens": 5453342806.0, "step": 12593 }, { "epoch": 4.613383410433747, "grad_norm": 0.13969281958323182, "learning_rate": 4.563168709396351e-06, "loss": 0.3655, "num_tokens": 5454166421.0, "step": 12594 }, { "epoch": 4.6137498282416525, "grad_norm": 0.1514827079576258, "learning_rate": 4.562109193628842e-06, "loss": 0.3768, "num_tokens": 5454905185.0, "step": 12595 }, { "epoch": 4.614116246049558, "grad_norm": 0.13616854491386368, "learning_rate": 4.561050659649348e-06, "loss": 0.3328, "num_tokens": 5455694154.0, "step": 12596 }, { "epoch": 4.614482663857464, "grad_norm": 0.14817769583016874, "learning_rate": 4.559993107517469e-06, "loss": 0.3472, "num_tokens": 5456490396.0, "step": 12597 }, { "epoch": 4.614849081665369, "grad_norm": 0.15243099027261617, "learning_rate": 4.558936537292744e-06, "loss": 0.4034, "num_tokens": 5457227045.0, "step": 12598 }, { "epoch": 4.615215499473274, "grad_norm": 0.1446134563068519, "learning_rate": 4.557880949034667e-06, "loss": 0.3499, "num_tokens": 5457999020.0, "step": 12599 }, { "epoch": 4.61558191728118, "grad_norm": 0.14619664984075592, "learning_rate": 4.556826342802665e-06, "loss": 0.3745, "num_tokens": 5458751698.0, "step": 12600 }, { "epoch": 4.615948335089085, "grad_norm": 0.14694288980920786, "learning_rate": 4.5557727186561126e-06, "loss": 0.3491, "num_tokens": 5459565689.0, "step": 12601 }, { "epoch": 4.616314752896991, "grad_norm": 0.14848283079741542, "learning_rate": 4.554720076654334e-06, "loss": 0.3781, "num_tokens": 5460402178.0, "step": 12602 }, { "epoch": 4.616681170704896, "grad_norm": 0.15026524965207785, "learning_rate": 4.5536684168565925e-06, "loss": 0.3656, "num_tokens": 5461125408.0, "step": 12603 }, { "epoch": 4.6170475885128015, "grad_norm": 0.14957967598944175, "learning_rate": 4.552617739322101e-06, "loss": 0.3614, "num_tokens": 5461865086.0, "step": 12604 }, { "epoch": 4.617414006320708, "grad_norm": 0.1620156367551955, "learning_rate": 4.5515680441100155e-06, "loss": 0.3667, "num_tokens": 5462595855.0, "step": 12605 }, { "epoch": 4.617780424128613, "grad_norm": 0.1618272711545838, "learning_rate": 4.5505193312794334e-06, "loss": 0.3844, "num_tokens": 5463200037.0, "step": 12606 }, { "epoch": 4.618146841936518, "grad_norm": 0.14726127725727892, "learning_rate": 4.549471600889402e-06, "loss": 0.3446, "num_tokens": 5463997508.0, "step": 12607 }, { "epoch": 4.618513259744423, "grad_norm": 0.14938596270651935, "learning_rate": 4.548424852998905e-06, "loss": 0.3634, "num_tokens": 5464701286.0, "step": 12608 }, { "epoch": 4.618879677552329, "grad_norm": 0.151181205860188, "learning_rate": 4.547379087666884e-06, "loss": 0.3608, "num_tokens": 5465437231.0, "step": 12609 }, { "epoch": 4.619246095360235, "grad_norm": 0.16424035156632344, "learning_rate": 4.546334304952213e-06, "loss": 0.3845, "num_tokens": 5466187970.0, "step": 12610 }, { "epoch": 4.61961251316814, "grad_norm": 0.15737415971693536, "learning_rate": 4.545290504913716e-06, "loss": 0.3833, "num_tokens": 5466923897.0, "step": 12611 }, { "epoch": 4.619978930976045, "grad_norm": 0.1557587409192936, "learning_rate": 4.544247687610163e-06, "loss": 0.3463, "num_tokens": 5467658748.0, "step": 12612 }, { "epoch": 4.6203453487839505, "grad_norm": 0.15598586424147626, "learning_rate": 4.543205853100264e-06, "loss": 0.3653, "num_tokens": 5468342121.0, "step": 12613 }, { "epoch": 4.620711766591857, "grad_norm": 0.15190468407239952, "learning_rate": 4.542165001442677e-06, "loss": 0.3904, "num_tokens": 5469093576.0, "step": 12614 }, { "epoch": 4.621078184399762, "grad_norm": 0.15066134723054173, "learning_rate": 4.541125132696005e-06, "loss": 0.3298, "num_tokens": 5469847856.0, "step": 12615 }, { "epoch": 4.621444602207667, "grad_norm": 0.14086486177853844, "learning_rate": 4.5400862469187955e-06, "loss": 0.3335, "num_tokens": 5470586004.0, "step": 12616 }, { "epoch": 4.621811020015572, "grad_norm": 0.15711298465628096, "learning_rate": 4.539048344169538e-06, "loss": 0.3729, "num_tokens": 5471393295.0, "step": 12617 }, { "epoch": 4.622177437823479, "grad_norm": 0.14456858934913772, "learning_rate": 4.538011424506667e-06, "loss": 0.3724, "num_tokens": 5472158000.0, "step": 12618 }, { "epoch": 4.622543855631384, "grad_norm": 0.15526132060428674, "learning_rate": 4.536975487988565e-06, "loss": 0.3509, "num_tokens": 5472864297.0, "step": 12619 }, { "epoch": 4.622910273439289, "grad_norm": 0.14970009511994425, "learning_rate": 4.535940534673558e-06, "loss": 0.358, "num_tokens": 5473726290.0, "step": 12620 }, { "epoch": 4.623276691247194, "grad_norm": 0.15368587434613412, "learning_rate": 4.534906564619917e-06, "loss": 0.3783, "num_tokens": 5474470662.0, "step": 12621 }, { "epoch": 4.6236431090551005, "grad_norm": 0.1396603366703065, "learning_rate": 4.533873577885857e-06, "loss": 0.3642, "num_tokens": 5475312968.0, "step": 12622 }, { "epoch": 4.624009526863006, "grad_norm": 0.1354935573389604, "learning_rate": 4.5328415745295306e-06, "loss": 0.3631, "num_tokens": 5476168639.0, "step": 12623 }, { "epoch": 4.624375944670911, "grad_norm": 0.1440881909989787, "learning_rate": 4.531810554609048e-06, "loss": 0.3825, "num_tokens": 5476939601.0, "step": 12624 }, { "epoch": 4.624742362478816, "grad_norm": 0.1468929422257969, "learning_rate": 4.530780518182454e-06, "loss": 0.3545, "num_tokens": 5477762631.0, "step": 12625 }, { "epoch": 4.6251087802867215, "grad_norm": 0.15047411779464595, "learning_rate": 4.529751465307746e-06, "loss": 0.3761, "num_tokens": 5478538580.0, "step": 12626 }, { "epoch": 4.625475198094628, "grad_norm": 0.14809631176756566, "learning_rate": 4.52872339604286e-06, "loss": 0.3653, "num_tokens": 5479267854.0, "step": 12627 }, { "epoch": 4.625841615902533, "grad_norm": 0.1471877927039874, "learning_rate": 4.527696310445676e-06, "loss": 0.3654, "num_tokens": 5480113045.0, "step": 12628 }, { "epoch": 4.626208033710438, "grad_norm": 0.13684005131863247, "learning_rate": 4.5266702085740255e-06, "loss": 0.3652, "num_tokens": 5480933961.0, "step": 12629 }, { "epoch": 4.626574451518344, "grad_norm": 0.15085636033304584, "learning_rate": 4.5256450904856745e-06, "loss": 0.3471, "num_tokens": 5481676020.0, "step": 12630 }, { "epoch": 4.6269408693262495, "grad_norm": 0.14327374156195216, "learning_rate": 4.524620956238342e-06, "loss": 0.3753, "num_tokens": 5482551798.0, "step": 12631 }, { "epoch": 4.627307287134155, "grad_norm": 0.14031025809151815, "learning_rate": 4.523597805889688e-06, "loss": 0.3479, "num_tokens": 5483345366.0, "step": 12632 }, { "epoch": 4.62767370494206, "grad_norm": 0.13307233439689647, "learning_rate": 4.52257563949732e-06, "loss": 0.3507, "num_tokens": 5484266496.0, "step": 12633 }, { "epoch": 4.628040122749965, "grad_norm": 0.14834546852274103, "learning_rate": 4.521554457118788e-06, "loss": 0.3729, "num_tokens": 5485014121.0, "step": 12634 }, { "epoch": 4.628406540557871, "grad_norm": 0.1522630739138066, "learning_rate": 4.520534258811583e-06, "loss": 0.3636, "num_tokens": 5485771843.0, "step": 12635 }, { "epoch": 4.628772958365777, "grad_norm": 0.14637551943434055, "learning_rate": 4.519515044633146e-06, "loss": 0.3573, "num_tokens": 5486491796.0, "step": 12636 }, { "epoch": 4.629139376173682, "grad_norm": 0.1502118808485068, "learning_rate": 4.518496814640864e-06, "loss": 0.3816, "num_tokens": 5487345902.0, "step": 12637 }, { "epoch": 4.629505793981587, "grad_norm": 0.14434130067231996, "learning_rate": 4.51747956889206e-06, "loss": 0.381, "num_tokens": 5488163666.0, "step": 12638 }, { "epoch": 4.629872211789493, "grad_norm": 0.1490704691579506, "learning_rate": 4.5164633074440094e-06, "loss": 0.3504, "num_tokens": 5488837372.0, "step": 12639 }, { "epoch": 4.6302386295973985, "grad_norm": 0.1515600736218381, "learning_rate": 4.515448030353932e-06, "loss": 0.3545, "num_tokens": 5489604049.0, "step": 12640 }, { "epoch": 4.630605047405304, "grad_norm": 0.1462303856971775, "learning_rate": 4.514433737678986e-06, "loss": 0.358, "num_tokens": 5490342956.0, "step": 12641 }, { "epoch": 4.630971465213209, "grad_norm": 0.15151272068215488, "learning_rate": 4.513420429476281e-06, "loss": 0.3798, "num_tokens": 5491125314.0, "step": 12642 }, { "epoch": 4.631337883021115, "grad_norm": 0.15074826166779756, "learning_rate": 4.512408105802867e-06, "loss": 0.3816, "num_tokens": 5491920477.0, "step": 12643 }, { "epoch": 4.63170430082902, "grad_norm": 0.15108743684881948, "learning_rate": 4.511396766715741e-06, "loss": 0.3709, "num_tokens": 5492729801.0, "step": 12644 }, { "epoch": 4.632070718636926, "grad_norm": 0.14300675245408417, "learning_rate": 4.51038641227184e-06, "loss": 0.3596, "num_tokens": 5493630667.0, "step": 12645 }, { "epoch": 4.632437136444831, "grad_norm": 0.1560577411513651, "learning_rate": 4.509377042528055e-06, "loss": 0.3317, "num_tokens": 5494293923.0, "step": 12646 }, { "epoch": 4.632803554252737, "grad_norm": 0.14558265635960652, "learning_rate": 4.5083686575412075e-06, "loss": 0.3906, "num_tokens": 5495095865.0, "step": 12647 }, { "epoch": 4.633169972060642, "grad_norm": 0.1448939822214432, "learning_rate": 4.507361257368079e-06, "loss": 0.3582, "num_tokens": 5495888642.0, "step": 12648 }, { "epoch": 4.6335363898685475, "grad_norm": 0.16333151890908035, "learning_rate": 4.506354842065385e-06, "loss": 0.3584, "num_tokens": 5496518607.0, "step": 12649 }, { "epoch": 4.633902807676453, "grad_norm": 0.16971993116487508, "learning_rate": 4.505349411689789e-06, "loss": 0.3601, "num_tokens": 5497172649.0, "step": 12650 }, { "epoch": 4.634269225484358, "grad_norm": 0.14901988531218044, "learning_rate": 4.5043449662979e-06, "loss": 0.3588, "num_tokens": 5497954658.0, "step": 12651 }, { "epoch": 4.634635643292264, "grad_norm": 0.15740033291911223, "learning_rate": 4.503341505946267e-06, "loss": 0.3722, "num_tokens": 5498706395.0, "step": 12652 }, { "epoch": 4.635002061100169, "grad_norm": 0.14554851579673872, "learning_rate": 4.502339030691387e-06, "loss": 0.3815, "num_tokens": 5499521926.0, "step": 12653 }, { "epoch": 4.635368478908075, "grad_norm": 0.1509167935116071, "learning_rate": 4.501337540589704e-06, "loss": 0.3694, "num_tokens": 5500278960.0, "step": 12654 }, { "epoch": 4.635734896715981, "grad_norm": 0.1429900744228293, "learning_rate": 4.500337035697606e-06, "loss": 0.3647, "num_tokens": 5501019969.0, "step": 12655 }, { "epoch": 4.636101314523886, "grad_norm": 0.15496220700581317, "learning_rate": 4.499337516071417e-06, "loss": 0.3521, "num_tokens": 5501831661.0, "step": 12656 }, { "epoch": 4.636467732331791, "grad_norm": 0.1522057477485423, "learning_rate": 4.498338981767417e-06, "loss": 0.357, "num_tokens": 5502615000.0, "step": 12657 }, { "epoch": 4.636834150139697, "grad_norm": 0.14961814239703655, "learning_rate": 4.4973414328418225e-06, "loss": 0.3893, "num_tokens": 5503337430.0, "step": 12658 }, { "epoch": 4.637200567947602, "grad_norm": 0.1570568315054158, "learning_rate": 4.496344869350799e-06, "loss": 0.3586, "num_tokens": 5504048602.0, "step": 12659 }, { "epoch": 4.637566985755508, "grad_norm": 0.1514758507694647, "learning_rate": 4.495349291350456e-06, "loss": 0.3851, "num_tokens": 5504799292.0, "step": 12660 }, { "epoch": 4.637933403563413, "grad_norm": 0.15246051128891105, "learning_rate": 4.494354698896843e-06, "loss": 0.3927, "num_tokens": 5505574470.0, "step": 12661 }, { "epoch": 4.6382998213713185, "grad_norm": 0.163431558662771, "learning_rate": 4.493361092045963e-06, "loss": 0.3474, "num_tokens": 5506268880.0, "step": 12662 }, { "epoch": 4.638666239179225, "grad_norm": 0.13973732669722186, "learning_rate": 4.492368470853751e-06, "loss": 0.3661, "num_tokens": 5507052097.0, "step": 12663 }, { "epoch": 4.63903265698713, "grad_norm": 0.14724748740359747, "learning_rate": 4.491376835376101e-06, "loss": 0.3824, "num_tokens": 5507870942.0, "step": 12664 }, { "epoch": 4.639399074795035, "grad_norm": 0.14609914509776548, "learning_rate": 4.4903861856688405e-06, "loss": 0.3926, "num_tokens": 5508674900.0, "step": 12665 }, { "epoch": 4.63976549260294, "grad_norm": 0.16220920415757487, "learning_rate": 4.489396521787743e-06, "loss": 0.3636, "num_tokens": 5509285445.0, "step": 12666 }, { "epoch": 4.640131910410846, "grad_norm": 0.15431058789369737, "learning_rate": 4.488407843788532e-06, "loss": 0.3643, "num_tokens": 5509990665.0, "step": 12667 }, { "epoch": 4.640498328218752, "grad_norm": 0.16125935352398565, "learning_rate": 4.48742015172687e-06, "loss": 0.3747, "num_tokens": 5510658721.0, "step": 12668 }, { "epoch": 4.640864746026657, "grad_norm": 0.16183771688497883, "learning_rate": 4.4864334456583666e-06, "loss": 0.3849, "num_tokens": 5511351781.0, "step": 12669 }, { "epoch": 4.641231163834562, "grad_norm": 0.16350058732365974, "learning_rate": 4.485447725638576e-06, "loss": 0.379, "num_tokens": 5511962767.0, "step": 12670 }, { "epoch": 4.6415975816424675, "grad_norm": 0.1545260414005929, "learning_rate": 4.484462991722994e-06, "loss": 0.381, "num_tokens": 5512728731.0, "step": 12671 }, { "epoch": 4.641963999450374, "grad_norm": 0.1602823333738115, "learning_rate": 4.483479243967068e-06, "loss": 0.3896, "num_tokens": 5513470237.0, "step": 12672 }, { "epoch": 4.642330417258279, "grad_norm": 0.1509649319380957, "learning_rate": 4.482496482426178e-06, "loss": 0.3598, "num_tokens": 5514213373.0, "step": 12673 }, { "epoch": 4.642696835066184, "grad_norm": 0.1455650514668839, "learning_rate": 4.481514707155661e-06, "loss": 0.3366, "num_tokens": 5515052304.0, "step": 12674 }, { "epoch": 4.643063252874089, "grad_norm": 0.1432768624373399, "learning_rate": 4.4805339182107885e-06, "loss": 0.359, "num_tokens": 5515818072.0, "step": 12675 }, { "epoch": 4.6434296706819955, "grad_norm": 0.14559205589344706, "learning_rate": 4.479554115646788e-06, "loss": 0.3592, "num_tokens": 5516627650.0, "step": 12676 }, { "epoch": 4.643796088489901, "grad_norm": 0.14492803569838, "learning_rate": 4.478575299518816e-06, "loss": 0.3661, "num_tokens": 5517563448.0, "step": 12677 }, { "epoch": 4.644162506297806, "grad_norm": 0.14903116617347592, "learning_rate": 4.477597469881989e-06, "loss": 0.3823, "num_tokens": 5518372466.0, "step": 12678 }, { "epoch": 4.644528924105711, "grad_norm": 0.14641547998946766, "learning_rate": 4.476620626791353e-06, "loss": 0.3709, "num_tokens": 5519188624.0, "step": 12679 }, { "epoch": 4.644895341913617, "grad_norm": 0.15118707797470676, "learning_rate": 4.475644770301911e-06, "loss": 0.3637, "num_tokens": 5519963678.0, "step": 12680 }, { "epoch": 4.645261759721523, "grad_norm": 0.14243224560333365, "learning_rate": 4.474669900468608e-06, "loss": 0.36, "num_tokens": 5520744189.0, "step": 12681 }, { "epoch": 4.645628177529428, "grad_norm": 0.15234023098661964, "learning_rate": 4.4736960173463275e-06, "loss": 0.381, "num_tokens": 5521473424.0, "step": 12682 }, { "epoch": 4.645994595337333, "grad_norm": 0.14640945488023108, "learning_rate": 4.4727231209899e-06, "loss": 0.3474, "num_tokens": 5522317567.0, "step": 12683 }, { "epoch": 4.646361013145238, "grad_norm": 0.15129857003935446, "learning_rate": 4.4717512114541065e-06, "loss": 0.3613, "num_tokens": 5523131861.0, "step": 12684 }, { "epoch": 4.6467274309531446, "grad_norm": 0.16101135717597986, "learning_rate": 4.470780288793662e-06, "loss": 0.3643, "num_tokens": 5523801215.0, "step": 12685 }, { "epoch": 4.64709384876105, "grad_norm": 0.15248601109529214, "learning_rate": 4.469810353063233e-06, "loss": 0.3692, "num_tokens": 5524591558.0, "step": 12686 }, { "epoch": 4.647460266568955, "grad_norm": 0.15480295964688784, "learning_rate": 4.468841404317429e-06, "loss": 0.3773, "num_tokens": 5525418353.0, "step": 12687 }, { "epoch": 4.647826684376861, "grad_norm": 0.13962947939559378, "learning_rate": 4.4678734426108065e-06, "loss": 0.3656, "num_tokens": 5526210324.0, "step": 12688 }, { "epoch": 4.6481931021847664, "grad_norm": 0.14244154159142178, "learning_rate": 4.466906467997861e-06, "loss": 0.353, "num_tokens": 5527012348.0, "step": 12689 }, { "epoch": 4.648559519992672, "grad_norm": 0.14506418898563986, "learning_rate": 4.465940480533035e-06, "loss": 0.3334, "num_tokens": 5527738003.0, "step": 12690 }, { "epoch": 4.648925937800577, "grad_norm": 0.1498046093860331, "learning_rate": 4.464975480270715e-06, "loss": 0.3668, "num_tokens": 5528471879.0, "step": 12691 }, { "epoch": 4.649292355608482, "grad_norm": 0.15759316667665896, "learning_rate": 4.464011467265234e-06, "loss": 0.3603, "num_tokens": 5529235950.0, "step": 12692 }, { "epoch": 4.649658773416388, "grad_norm": 0.14507864280110247, "learning_rate": 4.463048441570868e-06, "loss": 0.3402, "num_tokens": 5530022898.0, "step": 12693 }, { "epoch": 4.650025191224294, "grad_norm": 0.13845829901290366, "learning_rate": 4.462086403241835e-06, "loss": 0.3915, "num_tokens": 5530858451.0, "step": 12694 }, { "epoch": 4.650391609032199, "grad_norm": 0.14726248093746278, "learning_rate": 4.461125352332304e-06, "loss": 0.3845, "num_tokens": 5531708309.0, "step": 12695 }, { "epoch": 4.650758026840104, "grad_norm": 0.16058622066588402, "learning_rate": 4.460165288896381e-06, "loss": 0.3767, "num_tokens": 5532386448.0, "step": 12696 }, { "epoch": 4.65112444464801, "grad_norm": 0.16516404488522346, "learning_rate": 4.459206212988115e-06, "loss": 0.3643, "num_tokens": 5533074908.0, "step": 12697 }, { "epoch": 4.6514908624559155, "grad_norm": 0.15128710818920976, "learning_rate": 4.458248124661513e-06, "loss": 0.3611, "num_tokens": 5533740301.0, "step": 12698 }, { "epoch": 4.651857280263821, "grad_norm": 0.14935343563892578, "learning_rate": 4.457291023970513e-06, "loss": 0.3632, "num_tokens": 5534537944.0, "step": 12699 }, { "epoch": 4.652223698071726, "grad_norm": 0.1455077803283232, "learning_rate": 4.456334910969e-06, "loss": 0.3605, "num_tokens": 5535312250.0, "step": 12700 }, { "epoch": 4.652590115879632, "grad_norm": 0.15279625614858852, "learning_rate": 4.455379785710808e-06, "loss": 0.3524, "num_tokens": 5536140358.0, "step": 12701 }, { "epoch": 4.652956533687537, "grad_norm": 0.1339483599026399, "learning_rate": 4.45442564824971e-06, "loss": 0.3716, "num_tokens": 5537001404.0, "step": 12702 }, { "epoch": 4.653322951495443, "grad_norm": 0.14804923993344374, "learning_rate": 4.4534724986394275e-06, "loss": 0.3712, "num_tokens": 5537798541.0, "step": 12703 }, { "epoch": 4.653689369303348, "grad_norm": 0.15580688044436186, "learning_rate": 4.4525203369336245e-06, "loss": 0.366, "num_tokens": 5538472664.0, "step": 12704 }, { "epoch": 4.654055787111254, "grad_norm": 0.1636491722690913, "learning_rate": 4.451569163185909e-06, "loss": 0.3704, "num_tokens": 5539120610.0, "step": 12705 }, { "epoch": 4.654422204919159, "grad_norm": 0.1561243588407084, "learning_rate": 4.450618977449836e-06, "loss": 0.3506, "num_tokens": 5539792651.0, "step": 12706 }, { "epoch": 4.6547886227270645, "grad_norm": 0.1463791968969207, "learning_rate": 4.4496697797789e-06, "loss": 0.375, "num_tokens": 5540608042.0, "step": 12707 }, { "epoch": 4.65515504053497, "grad_norm": 0.13676083920893695, "learning_rate": 4.448721570226541e-06, "loss": 0.3435, "num_tokens": 5541490061.0, "step": 12708 }, { "epoch": 4.655521458342875, "grad_norm": 0.14510053104122345, "learning_rate": 4.447774348846152e-06, "loss": 0.3665, "num_tokens": 5542313034.0, "step": 12709 }, { "epoch": 4.655887876150781, "grad_norm": 0.15486958174122434, "learning_rate": 4.446828115691058e-06, "loss": 0.366, "num_tokens": 5543020611.0, "step": 12710 }, { "epoch": 4.656254293958686, "grad_norm": 0.15415650446162504, "learning_rate": 4.4458828708145365e-06, "loss": 0.3751, "num_tokens": 5543738870.0, "step": 12711 }, { "epoch": 4.656620711766592, "grad_norm": 0.14791454067677787, "learning_rate": 4.444938614269803e-06, "loss": 0.365, "num_tokens": 5544586568.0, "step": 12712 }, { "epoch": 4.656987129574498, "grad_norm": 0.13403086269716544, "learning_rate": 4.4439953461100245e-06, "loss": 0.3378, "num_tokens": 5545397371.0, "step": 12713 }, { "epoch": 4.657353547382403, "grad_norm": 0.1521806751082999, "learning_rate": 4.443053066388308e-06, "loss": 0.3706, "num_tokens": 5546130864.0, "step": 12714 }, { "epoch": 4.657719965190308, "grad_norm": 0.14608088528631213, "learning_rate": 4.442111775157704e-06, "loss": 0.3615, "num_tokens": 5546949939.0, "step": 12715 }, { "epoch": 4.6580863829982135, "grad_norm": 0.13584863819148302, "learning_rate": 4.441171472471212e-06, "loss": 0.3501, "num_tokens": 5547740356.0, "step": 12716 }, { "epoch": 4.658452800806119, "grad_norm": 0.15606245494659238, "learning_rate": 4.440232158381772e-06, "loss": 0.3636, "num_tokens": 5548484892.0, "step": 12717 }, { "epoch": 4.658819218614025, "grad_norm": 0.14143150689835046, "learning_rate": 4.4392938329422704e-06, "loss": 0.3482, "num_tokens": 5549288440.0, "step": 12718 }, { "epoch": 4.65918563642193, "grad_norm": 0.14349792109872206, "learning_rate": 4.438356496205532e-06, "loss": 0.3628, "num_tokens": 5550061825.0, "step": 12719 }, { "epoch": 4.659552054229835, "grad_norm": 0.15317273790207087, "learning_rate": 4.437420148224335e-06, "loss": 0.3794, "num_tokens": 5550778837.0, "step": 12720 }, { "epoch": 4.659918472037741, "grad_norm": 0.1442607016095203, "learning_rate": 4.436484789051396e-06, "loss": 0.3716, "num_tokens": 5551541109.0, "step": 12721 }, { "epoch": 4.660284889845647, "grad_norm": 0.14056304471952213, "learning_rate": 4.435550418739379e-06, "loss": 0.3619, "num_tokens": 5552449829.0, "step": 12722 }, { "epoch": 4.660651307653552, "grad_norm": 0.13798617769659682, "learning_rate": 4.434617037340891e-06, "loss": 0.3946, "num_tokens": 5553260501.0, "step": 12723 }, { "epoch": 4.661017725461457, "grad_norm": 0.15042996547101317, "learning_rate": 4.4336846449084795e-06, "loss": 0.3858, "num_tokens": 5554126842.0, "step": 12724 }, { "epoch": 4.661384143269363, "grad_norm": 0.148890284000906, "learning_rate": 4.432753241494645e-06, "loss": 0.3744, "num_tokens": 5554926706.0, "step": 12725 }, { "epoch": 4.661750561077269, "grad_norm": 0.15062334314598866, "learning_rate": 4.431822827151824e-06, "loss": 0.3949, "num_tokens": 5555671367.0, "step": 12726 }, { "epoch": 4.662116978885174, "grad_norm": 0.14931560415011905, "learning_rate": 4.430893401932403e-06, "loss": 0.3578, "num_tokens": 5556394296.0, "step": 12727 }, { "epoch": 4.662483396693079, "grad_norm": 0.16221094033261732, "learning_rate": 4.429964965888708e-06, "loss": 0.3586, "num_tokens": 5557088878.0, "step": 12728 }, { "epoch": 4.6628498145009845, "grad_norm": 0.1472879913498325, "learning_rate": 4.429037519073016e-06, "loss": 0.35, "num_tokens": 5557813982.0, "step": 12729 }, { "epoch": 4.663216232308891, "grad_norm": 0.15656654786470778, "learning_rate": 4.4281110615375375e-06, "loss": 0.3765, "num_tokens": 5558529538.0, "step": 12730 }, { "epoch": 4.663582650116796, "grad_norm": 0.14800673139369558, "learning_rate": 4.427185593334441e-06, "loss": 0.3682, "num_tokens": 5559322378.0, "step": 12731 }, { "epoch": 4.663949067924701, "grad_norm": 0.14525522037152358, "learning_rate": 4.426261114515827e-06, "loss": 0.3699, "num_tokens": 5560095076.0, "step": 12732 }, { "epoch": 4.664315485732606, "grad_norm": 0.15886208651601094, "learning_rate": 4.42533762513375e-06, "loss": 0.3707, "num_tokens": 5560841080.0, "step": 12733 }, { "epoch": 4.664681903540512, "grad_norm": 0.15611443480575454, "learning_rate": 4.4244151252401974e-06, "loss": 0.3829, "num_tokens": 5561515942.0, "step": 12734 }, { "epoch": 4.665048321348418, "grad_norm": 0.14607335345643255, "learning_rate": 4.423493614887113e-06, "loss": 0.3783, "num_tokens": 5562246118.0, "step": 12735 }, { "epoch": 4.665414739156323, "grad_norm": 0.15810597885848435, "learning_rate": 4.42257309412638e-06, "loss": 0.3793, "num_tokens": 5562979915.0, "step": 12736 }, { "epoch": 4.665781156964228, "grad_norm": 0.1502099678326905, "learning_rate": 4.421653563009826e-06, "loss": 0.3529, "num_tokens": 5563813870.0, "step": 12737 }, { "epoch": 4.666147574772134, "grad_norm": 0.13731976636955145, "learning_rate": 4.42073502158922e-06, "loss": 0.3713, "num_tokens": 5564594248.0, "step": 12738 }, { "epoch": 4.66651399258004, "grad_norm": 0.15589128413297837, "learning_rate": 4.419817469916276e-06, "loss": 0.3911, "num_tokens": 5565318358.0, "step": 12739 }, { "epoch": 4.666880410387945, "grad_norm": 0.1510264298137289, "learning_rate": 4.4189009080426605e-06, "loss": 0.3691, "num_tokens": 5566056414.0, "step": 12740 }, { "epoch": 4.66724682819585, "grad_norm": 0.16265050756638982, "learning_rate": 4.417985336019969e-06, "loss": 0.363, "num_tokens": 5566797647.0, "step": 12741 }, { "epoch": 4.667613246003755, "grad_norm": 0.1431901530867825, "learning_rate": 4.417070753899756e-06, "loss": 0.3617, "num_tokens": 5567542911.0, "step": 12742 }, { "epoch": 4.6679796638116615, "grad_norm": 0.15695995755846584, "learning_rate": 4.416157161733515e-06, "loss": 0.4241, "num_tokens": 5568297858.0, "step": 12743 }, { "epoch": 4.668346081619567, "grad_norm": 0.15826331936782564, "learning_rate": 4.415244559572681e-06, "loss": 0.3887, "num_tokens": 5569039389.0, "step": 12744 }, { "epoch": 4.668712499427472, "grad_norm": 0.1460142102134367, "learning_rate": 4.414332947468634e-06, "loss": 0.3481, "num_tokens": 5569794939.0, "step": 12745 }, { "epoch": 4.669078917235377, "grad_norm": 0.1422141415164261, "learning_rate": 4.413422325472704e-06, "loss": 0.3564, "num_tokens": 5570646108.0, "step": 12746 }, { "epoch": 4.669445335043283, "grad_norm": 0.13862995940204895, "learning_rate": 4.412512693636154e-06, "loss": 0.3647, "num_tokens": 5571460236.0, "step": 12747 }, { "epoch": 4.669811752851189, "grad_norm": 0.16061414721883155, "learning_rate": 4.411604052010203e-06, "loss": 0.3835, "num_tokens": 5572184297.0, "step": 12748 }, { "epoch": 4.670178170659094, "grad_norm": 0.14850198502706644, "learning_rate": 4.410696400646006e-06, "loss": 0.3626, "num_tokens": 5572945506.0, "step": 12749 }, { "epoch": 4.670544588466999, "grad_norm": 0.15229996135279458, "learning_rate": 4.409789739594672e-06, "loss": 0.3696, "num_tokens": 5573659692.0, "step": 12750 }, { "epoch": 4.670911006274905, "grad_norm": 0.15171522888616532, "learning_rate": 4.408884068907242e-06, "loss": 0.3611, "num_tokens": 5574342121.0, "step": 12751 }, { "epoch": 4.6712774240828105, "grad_norm": 0.1567744371282184, "learning_rate": 4.407979388634709e-06, "loss": 0.3428, "num_tokens": 5575089639.0, "step": 12752 }, { "epoch": 4.671643841890716, "grad_norm": 0.158446531508956, "learning_rate": 4.407075698828007e-06, "loss": 0.3881, "num_tokens": 5575824365.0, "step": 12753 }, { "epoch": 4.672010259698621, "grad_norm": 0.14252000530496306, "learning_rate": 4.406172999538018e-06, "loss": 0.3993, "num_tokens": 5576615105.0, "step": 12754 }, { "epoch": 4.672376677506527, "grad_norm": 0.1425485601323159, "learning_rate": 4.405271290815562e-06, "loss": 0.3665, "num_tokens": 5577390705.0, "step": 12755 }, { "epoch": 4.672743095314432, "grad_norm": 0.14653994818056473, "learning_rate": 4.404370572711412e-06, "loss": 0.354, "num_tokens": 5578146628.0, "step": 12756 }, { "epoch": 4.673109513122338, "grad_norm": 0.15286819417953468, "learning_rate": 4.403470845276277e-06, "loss": 0.3763, "num_tokens": 5578954145.0, "step": 12757 }, { "epoch": 4.673475930930243, "grad_norm": 0.1439020801195004, "learning_rate": 4.402572108560812e-06, "loss": 0.3622, "num_tokens": 5579747091.0, "step": 12758 }, { "epoch": 4.673842348738148, "grad_norm": 0.13796628030553215, "learning_rate": 4.401674362615622e-06, "loss": 0.3489, "num_tokens": 5580507867.0, "step": 12759 }, { "epoch": 4.674208766546054, "grad_norm": 0.18216732084231207, "learning_rate": 4.400777607491249e-06, "loss": 0.3949, "num_tokens": 5581144839.0, "step": 12760 }, { "epoch": 4.67457518435396, "grad_norm": 0.16126969165100213, "learning_rate": 4.399881843238183e-06, "loss": 0.4014, "num_tokens": 5581891438.0, "step": 12761 }, { "epoch": 4.674941602161865, "grad_norm": 0.14926581091491892, "learning_rate": 4.3989870699068565e-06, "loss": 0.3611, "num_tokens": 5582689160.0, "step": 12762 }, { "epoch": 4.675308019969771, "grad_norm": 0.15908905892270894, "learning_rate": 4.398093287547644e-06, "loss": 0.3953, "num_tokens": 5583647289.0, "step": 12763 }, { "epoch": 4.675674437777676, "grad_norm": 0.152057710235327, "learning_rate": 4.397200496210875e-06, "loss": 0.4309, "num_tokens": 5584400591.0, "step": 12764 }, { "epoch": 4.6760408555855815, "grad_norm": 0.16250902809862916, "learning_rate": 4.396308695946811e-06, "loss": 0.3656, "num_tokens": 5585131010.0, "step": 12765 }, { "epoch": 4.676407273393487, "grad_norm": 0.15926048871401918, "learning_rate": 4.395417886805663e-06, "loss": 0.3599, "num_tokens": 5585818876.0, "step": 12766 }, { "epoch": 4.676773691201392, "grad_norm": 0.1499977230459967, "learning_rate": 4.394528068837583e-06, "loss": 0.3903, "num_tokens": 5586568929.0, "step": 12767 }, { "epoch": 4.677140109009298, "grad_norm": 0.1496088923360868, "learning_rate": 4.39363924209267e-06, "loss": 0.3574, "num_tokens": 5587300955.0, "step": 12768 }, { "epoch": 4.677506526817203, "grad_norm": 0.1523430795020997, "learning_rate": 4.392751406620969e-06, "loss": 0.3546, "num_tokens": 5588043784.0, "step": 12769 }, { "epoch": 4.677872944625109, "grad_norm": 0.1544814562140545, "learning_rate": 4.391864562472465e-06, "loss": 0.386, "num_tokens": 5588776288.0, "step": 12770 }, { "epoch": 4.678239362433014, "grad_norm": 0.15032913348023333, "learning_rate": 4.390978709697091e-06, "loss": 0.3641, "num_tokens": 5589589626.0, "step": 12771 }, { "epoch": 4.67860578024092, "grad_norm": 0.1413068511078632, "learning_rate": 4.390093848344723e-06, "loss": 0.3578, "num_tokens": 5590297189.0, "step": 12772 }, { "epoch": 4.678972198048825, "grad_norm": 0.147044174271323, "learning_rate": 4.389209978465177e-06, "loss": 0.3466, "num_tokens": 5591151142.0, "step": 12773 }, { "epoch": 4.6793386158567305, "grad_norm": 0.15048834195891594, "learning_rate": 4.3883271001082165e-06, "loss": 0.3537, "num_tokens": 5591904604.0, "step": 12774 }, { "epoch": 4.679705033664636, "grad_norm": 0.13832447791869806, "learning_rate": 4.3874452133235515e-06, "loss": 0.3577, "num_tokens": 5592761651.0, "step": 12775 }, { "epoch": 4.680071451472542, "grad_norm": 0.16446713854580144, "learning_rate": 4.386564318160834e-06, "loss": 0.3871, "num_tokens": 5593472490.0, "step": 12776 }, { "epoch": 4.680437869280447, "grad_norm": 0.14815611155675837, "learning_rate": 4.385684414669661e-06, "loss": 0.3496, "num_tokens": 5594260953.0, "step": 12777 }, { "epoch": 4.680804287088352, "grad_norm": 0.14093984273311097, "learning_rate": 4.384805502899572e-06, "loss": 0.3364, "num_tokens": 5595077725.0, "step": 12778 }, { "epoch": 4.681170704896258, "grad_norm": 0.14855841805802855, "learning_rate": 4.3839275829000496e-06, "loss": 0.3602, "num_tokens": 5595853744.0, "step": 12779 }, { "epoch": 4.681537122704164, "grad_norm": 0.15910282831847977, "learning_rate": 4.383050654720522e-06, "loss": 0.3755, "num_tokens": 5596523072.0, "step": 12780 }, { "epoch": 4.681903540512069, "grad_norm": 0.13877490133921772, "learning_rate": 4.382174718410367e-06, "loss": 0.3807, "num_tokens": 5597368222.0, "step": 12781 }, { "epoch": 4.682269958319974, "grad_norm": 0.1438577581402033, "learning_rate": 4.3812997740188966e-06, "loss": 0.3526, "num_tokens": 5598120275.0, "step": 12782 }, { "epoch": 4.6826363761278795, "grad_norm": 0.1403306520319454, "learning_rate": 4.380425821595373e-06, "loss": 0.3599, "num_tokens": 5598934285.0, "step": 12783 }, { "epoch": 4.683002793935785, "grad_norm": 0.15808253736373912, "learning_rate": 4.379552861189002e-06, "loss": 0.3793, "num_tokens": 5599584404.0, "step": 12784 }, { "epoch": 4.683369211743691, "grad_norm": 0.1765414360625936, "learning_rate": 4.378680892848934e-06, "loss": 0.3435, "num_tokens": 5600428870.0, "step": 12785 }, { "epoch": 4.683735629551596, "grad_norm": 0.1652465632065458, "learning_rate": 4.377809916624263e-06, "loss": 0.3751, "num_tokens": 5601207838.0, "step": 12786 }, { "epoch": 4.684102047359501, "grad_norm": 0.14548944023867014, "learning_rate": 4.376939932564025e-06, "loss": 0.3521, "num_tokens": 5601931091.0, "step": 12787 }, { "epoch": 4.684468465167408, "grad_norm": 0.15137747246570946, "learning_rate": 4.376070940717203e-06, "loss": 0.3819, "num_tokens": 5602753386.0, "step": 12788 }, { "epoch": 4.684834882975313, "grad_norm": 0.13682721231327433, "learning_rate": 4.37520294113272e-06, "loss": 0.3793, "num_tokens": 5603617778.0, "step": 12789 }, { "epoch": 4.685201300783218, "grad_norm": 0.14817574798146607, "learning_rate": 4.37433593385945e-06, "loss": 0.3737, "num_tokens": 5604438348.0, "step": 12790 }, { "epoch": 4.685567718591123, "grad_norm": 0.16326844872421925, "learning_rate": 4.373469918946204e-06, "loss": 0.3818, "num_tokens": 5605121617.0, "step": 12791 }, { "epoch": 4.685934136399029, "grad_norm": 0.14884320755504388, "learning_rate": 4.372604896441743e-06, "loss": 0.3409, "num_tokens": 5605897553.0, "step": 12792 }, { "epoch": 4.686300554206935, "grad_norm": 0.15517669957008695, "learning_rate": 4.37174086639477e-06, "loss": 0.342, "num_tokens": 5606565716.0, "step": 12793 }, { "epoch": 4.68666697201484, "grad_norm": 0.1373903698309957, "learning_rate": 4.370877828853929e-06, "loss": 0.3662, "num_tokens": 5607404875.0, "step": 12794 }, { "epoch": 4.687033389822745, "grad_norm": 0.1428910537958598, "learning_rate": 4.370015783867811e-06, "loss": 0.3863, "num_tokens": 5608326540.0, "step": 12795 }, { "epoch": 4.687399807630651, "grad_norm": 0.1470108140429183, "learning_rate": 4.369154731484951e-06, "loss": 0.3373, "num_tokens": 5609014635.0, "step": 12796 }, { "epoch": 4.687766225438557, "grad_norm": 0.15774744421259246, "learning_rate": 4.368294671753829e-06, "loss": 0.356, "num_tokens": 5609717920.0, "step": 12797 }, { "epoch": 4.688132643246462, "grad_norm": 0.15487123940108294, "learning_rate": 4.36743560472287e-06, "loss": 0.3791, "num_tokens": 5610388266.0, "step": 12798 }, { "epoch": 4.688499061054367, "grad_norm": 0.13746083861814448, "learning_rate": 4.366577530440437e-06, "loss": 0.3841, "num_tokens": 5611340932.0, "step": 12799 }, { "epoch": 4.688865478862272, "grad_norm": 0.14783901915222897, "learning_rate": 4.365720448954844e-06, "loss": 0.3427, "num_tokens": 5612083153.0, "step": 12800 }, { "epoch": 4.6892318966701785, "grad_norm": 0.1432795442170292, "learning_rate": 4.364864360314344e-06, "loss": 0.3514, "num_tokens": 5612832709.0, "step": 12801 }, { "epoch": 4.689598314478084, "grad_norm": 0.1547917818291569, "learning_rate": 4.364009264567138e-06, "loss": 0.376, "num_tokens": 5613576053.0, "step": 12802 }, { "epoch": 4.689964732285989, "grad_norm": 0.15790697038017065, "learning_rate": 4.363155161761372e-06, "loss": 0.3925, "num_tokens": 5614406143.0, "step": 12803 }, { "epoch": 4.690331150093894, "grad_norm": 0.13894285683046775, "learning_rate": 4.36230205194513e-06, "loss": 0.3672, "num_tokens": 5615171348.0, "step": 12804 }, { "epoch": 4.6906975679018, "grad_norm": 0.16894260416416282, "learning_rate": 4.361449935166444e-06, "loss": 0.3598, "num_tokens": 5615919822.0, "step": 12805 }, { "epoch": 4.691063985709706, "grad_norm": 0.15739073980073903, "learning_rate": 4.3605988114732935e-06, "loss": 0.4001, "num_tokens": 5616687179.0, "step": 12806 }, { "epoch": 4.691430403517611, "grad_norm": 0.14493161420662093, "learning_rate": 4.359748680913593e-06, "loss": 0.3474, "num_tokens": 5617418652.0, "step": 12807 }, { "epoch": 4.691796821325516, "grad_norm": 0.14263477473695094, "learning_rate": 4.358899543535212e-06, "loss": 0.3734, "num_tokens": 5618260070.0, "step": 12808 }, { "epoch": 4.692163239133422, "grad_norm": 0.1608560742785973, "learning_rate": 4.358051399385958e-06, "loss": 0.362, "num_tokens": 5618901861.0, "step": 12809 }, { "epoch": 4.6925296569413275, "grad_norm": 0.13254347206150177, "learning_rate": 4.357204248513576e-06, "loss": 0.3668, "num_tokens": 5619758829.0, "step": 12810 }, { "epoch": 4.692896074749233, "grad_norm": 0.15236306660600143, "learning_rate": 4.356358090965771e-06, "loss": 0.3773, "num_tokens": 5620503377.0, "step": 12811 }, { "epoch": 4.693262492557138, "grad_norm": 0.1555120092315621, "learning_rate": 4.35551292679018e-06, "loss": 0.3654, "num_tokens": 5621213288.0, "step": 12812 }, { "epoch": 4.693628910365044, "grad_norm": 0.13597374463216605, "learning_rate": 4.354668756034385e-06, "loss": 0.3496, "num_tokens": 5622084076.0, "step": 12813 }, { "epoch": 4.693995328172949, "grad_norm": 0.13726321701784996, "learning_rate": 4.353825578745919e-06, "loss": 0.3839, "num_tokens": 5622952393.0, "step": 12814 }, { "epoch": 4.694361745980855, "grad_norm": 0.15373028609268516, "learning_rate": 4.352983394972254e-06, "loss": 0.3739, "num_tokens": 5623669273.0, "step": 12815 }, { "epoch": 4.69472816378876, "grad_norm": 0.15366331460983693, "learning_rate": 4.352142204760803e-06, "loss": 0.3505, "num_tokens": 5624349977.0, "step": 12816 }, { "epoch": 4.695094581596665, "grad_norm": 0.1567315737789662, "learning_rate": 4.3513020081589295e-06, "loss": 0.364, "num_tokens": 5625031593.0, "step": 12817 }, { "epoch": 4.695460999404571, "grad_norm": 0.15122911740878633, "learning_rate": 4.350462805213935e-06, "loss": 0.3638, "num_tokens": 5625740796.0, "step": 12818 }, { "epoch": 4.6958274172124765, "grad_norm": 0.15920345753666154, "learning_rate": 4.349624595973073e-06, "loss": 0.3914, "num_tokens": 5626530396.0, "step": 12819 }, { "epoch": 4.696193835020382, "grad_norm": 0.15179692288218857, "learning_rate": 4.348787380483533e-06, "loss": 0.3611, "num_tokens": 5627293465.0, "step": 12820 }, { "epoch": 4.696560252828288, "grad_norm": 0.14748668331652023, "learning_rate": 4.347951158792456e-06, "loss": 0.3605, "num_tokens": 5628083614.0, "step": 12821 }, { "epoch": 4.696926670636193, "grad_norm": 0.15862682706524325, "learning_rate": 4.34711593094692e-06, "loss": 0.3782, "num_tokens": 5628858502.0, "step": 12822 }, { "epoch": 4.697293088444098, "grad_norm": 0.1671715637859686, "learning_rate": 4.3462816969939465e-06, "loss": 0.3802, "num_tokens": 5629505807.0, "step": 12823 }, { "epoch": 4.697659506252004, "grad_norm": 0.14736683270305337, "learning_rate": 4.34544845698051e-06, "loss": 0.3686, "num_tokens": 5630301567.0, "step": 12824 }, { "epoch": 4.698025924059909, "grad_norm": 0.15143350058074861, "learning_rate": 4.344616210953519e-06, "loss": 0.3621, "num_tokens": 5631038990.0, "step": 12825 }, { "epoch": 4.698392341867815, "grad_norm": 0.1439880411547939, "learning_rate": 4.343784958959837e-06, "loss": 0.3566, "num_tokens": 5631920482.0, "step": 12826 }, { "epoch": 4.69875875967572, "grad_norm": 0.13629562457407643, "learning_rate": 4.342954701046261e-06, "loss": 0.3897, "num_tokens": 5632866092.0, "step": 12827 }, { "epoch": 4.699125177483626, "grad_norm": 0.14094418927583666, "learning_rate": 4.342125437259536e-06, "loss": 0.3552, "num_tokens": 5633596734.0, "step": 12828 }, { "epoch": 4.699491595291531, "grad_norm": 0.16250698258609644, "learning_rate": 4.341297167646353e-06, "loss": 0.3409, "num_tokens": 5634234292.0, "step": 12829 }, { "epoch": 4.699858013099437, "grad_norm": 0.14967539374789826, "learning_rate": 4.3404698922533415e-06, "loss": 0.3542, "num_tokens": 5635052282.0, "step": 12830 }, { "epoch": 4.700224430907342, "grad_norm": 0.13770558275159323, "learning_rate": 4.339643611127081e-06, "loss": 0.3789, "num_tokens": 5635782138.0, "step": 12831 }, { "epoch": 4.7005908487152475, "grad_norm": 0.15745973617735248, "learning_rate": 4.338818324314094e-06, "loss": 0.3772, "num_tokens": 5636538608.0, "step": 12832 }, { "epoch": 4.700957266523153, "grad_norm": 0.14234827908551606, "learning_rate": 4.337994031860846e-06, "loss": 0.3722, "num_tokens": 5637404997.0, "step": 12833 }, { "epoch": 4.701323684331059, "grad_norm": 0.14725460455015554, "learning_rate": 4.337170733813745e-06, "loss": 0.3749, "num_tokens": 5638157814.0, "step": 12834 }, { "epoch": 4.701690102138964, "grad_norm": 0.14452725729451807, "learning_rate": 4.336348430219143e-06, "loss": 0.3852, "num_tokens": 5638961699.0, "step": 12835 }, { "epoch": 4.702056519946869, "grad_norm": 0.15954263747793238, "learning_rate": 4.33552712112334e-06, "loss": 0.3811, "num_tokens": 5639636626.0, "step": 12836 }, { "epoch": 4.702422937754775, "grad_norm": 0.1571211849727604, "learning_rate": 4.3347068065725745e-06, "loss": 0.3853, "num_tokens": 5640426318.0, "step": 12837 }, { "epoch": 4.702789355562681, "grad_norm": 0.1606372192393318, "learning_rate": 4.333887486613033e-06, "loss": 0.3704, "num_tokens": 5641116260.0, "step": 12838 }, { "epoch": 4.703155773370586, "grad_norm": 0.1330928626250722, "learning_rate": 4.3330691612908486e-06, "loss": 0.3665, "num_tokens": 5641982735.0, "step": 12839 }, { "epoch": 4.703522191178491, "grad_norm": 0.14117981334212865, "learning_rate": 4.332251830652089e-06, "loss": 0.3751, "num_tokens": 5642785857.0, "step": 12840 }, { "epoch": 4.7038886089863965, "grad_norm": 0.13868462860997144, "learning_rate": 4.331435494742774e-06, "loss": 0.3905, "num_tokens": 5643639850.0, "step": 12841 }, { "epoch": 4.704255026794302, "grad_norm": 0.15188811862786541, "learning_rate": 4.330620153608866e-06, "loss": 0.3722, "num_tokens": 5644421727.0, "step": 12842 }, { "epoch": 4.704621444602208, "grad_norm": 0.15325502087538345, "learning_rate": 4.329805807296269e-06, "loss": 0.3815, "num_tokens": 5645159837.0, "step": 12843 }, { "epoch": 4.704987862410113, "grad_norm": 0.15154161898396012, "learning_rate": 4.32899245585083e-06, "loss": 0.3712, "num_tokens": 5645896325.0, "step": 12844 }, { "epoch": 4.705354280218018, "grad_norm": 0.16600630755751736, "learning_rate": 4.328180099318347e-06, "loss": 0.381, "num_tokens": 5646635923.0, "step": 12845 }, { "epoch": 4.7057206980259245, "grad_norm": 0.14671813455936342, "learning_rate": 4.327368737744555e-06, "loss": 0.3509, "num_tokens": 5647455433.0, "step": 12846 }, { "epoch": 4.70608711583383, "grad_norm": 0.16323373856474188, "learning_rate": 4.326558371175137e-06, "loss": 0.363, "num_tokens": 5648139745.0, "step": 12847 }, { "epoch": 4.706453533641735, "grad_norm": 0.14325304879145262, "learning_rate": 4.325748999655715e-06, "loss": 0.4005, "num_tokens": 5649010756.0, "step": 12848 }, { "epoch": 4.70681995144964, "grad_norm": 0.141563698295925, "learning_rate": 4.3249406232318596e-06, "loss": 0.368, "num_tokens": 5649843144.0, "step": 12849 }, { "epoch": 4.7071863692575455, "grad_norm": 0.15918761058913536, "learning_rate": 4.324133241949085e-06, "loss": 0.3943, "num_tokens": 5650504994.0, "step": 12850 }, { "epoch": 4.707552787065452, "grad_norm": 0.1487651302114573, "learning_rate": 4.323326855852846e-06, "loss": 0.3668, "num_tokens": 5651361373.0, "step": 12851 }, { "epoch": 4.707919204873357, "grad_norm": 0.1511856687169729, "learning_rate": 4.3225214649885455e-06, "loss": 0.3515, "num_tokens": 5652166700.0, "step": 12852 }, { "epoch": 4.708285622681262, "grad_norm": 0.15995418531900027, "learning_rate": 4.32171706940153e-06, "loss": 0.3855, "num_tokens": 5652842104.0, "step": 12853 }, { "epoch": 4.708652040489167, "grad_norm": 0.16392705708126407, "learning_rate": 4.320913669137087e-06, "loss": 0.3817, "num_tokens": 5653459601.0, "step": 12854 }, { "epoch": 4.7090184582970736, "grad_norm": 0.16432384080973783, "learning_rate": 4.3201112642404495e-06, "loss": 0.3605, "num_tokens": 5654236931.0, "step": 12855 }, { "epoch": 4.709384876104979, "grad_norm": 0.14177850630382507, "learning_rate": 4.319309854756795e-06, "loss": 0.3515, "num_tokens": 5654965848.0, "step": 12856 }, { "epoch": 4.709751293912884, "grad_norm": 0.16221712280346862, "learning_rate": 4.318509440731242e-06, "loss": 0.3607, "num_tokens": 5655649970.0, "step": 12857 }, { "epoch": 4.710117711720789, "grad_norm": 0.1707111697074926, "learning_rate": 4.317710022208858e-06, "loss": 0.3799, "num_tokens": 5656224194.0, "step": 12858 }, { "epoch": 4.710484129528695, "grad_norm": 0.15443948464052892, "learning_rate": 4.316911599234651e-06, "loss": 0.3832, "num_tokens": 5657070566.0, "step": 12859 }, { "epoch": 4.710850547336601, "grad_norm": 0.1574066858028217, "learning_rate": 4.316114171853576e-06, "loss": 0.3875, "num_tokens": 5657786639.0, "step": 12860 }, { "epoch": 4.711216965144506, "grad_norm": 0.16332866652208722, "learning_rate": 4.315317740110528e-06, "loss": 0.3563, "num_tokens": 5658548938.0, "step": 12861 }, { "epoch": 4.711583382952411, "grad_norm": 0.14438926578583633, "learning_rate": 4.314522304050347e-06, "loss": 0.3639, "num_tokens": 5659382467.0, "step": 12862 }, { "epoch": 4.711949800760317, "grad_norm": 0.1645391615172017, "learning_rate": 4.313727863717816e-06, "loss": 0.3577, "num_tokens": 5660054578.0, "step": 12863 }, { "epoch": 4.712316218568223, "grad_norm": 0.1530548386717708, "learning_rate": 4.312934419157669e-06, "loss": 0.3606, "num_tokens": 5660857070.0, "step": 12864 }, { "epoch": 4.712682636376128, "grad_norm": 0.14661758114801382, "learning_rate": 4.312141970414573e-06, "loss": 0.4055, "num_tokens": 5661663854.0, "step": 12865 }, { "epoch": 4.713049054184033, "grad_norm": 0.1446869306137916, "learning_rate": 4.311350517533149e-06, "loss": 0.3701, "num_tokens": 5662445316.0, "step": 12866 }, { "epoch": 4.713415471991938, "grad_norm": 0.14798923955394988, "learning_rate": 4.3105600605579536e-06, "loss": 0.3466, "num_tokens": 5663273819.0, "step": 12867 }, { "epoch": 4.7137818897998445, "grad_norm": 0.1625372375093646, "learning_rate": 4.3097705995334906e-06, "loss": 0.3576, "num_tokens": 5663927964.0, "step": 12868 }, { "epoch": 4.71414830760775, "grad_norm": 0.13475252544372013, "learning_rate": 4.308982134504214e-06, "loss": 0.3638, "num_tokens": 5664777146.0, "step": 12869 }, { "epoch": 4.714514725415655, "grad_norm": 0.15209844088732424, "learning_rate": 4.30819466551451e-06, "loss": 0.3793, "num_tokens": 5665494273.0, "step": 12870 }, { "epoch": 4.714881143223561, "grad_norm": 0.17486574854270664, "learning_rate": 4.307408192608717e-06, "loss": 0.3615, "num_tokens": 5666151001.0, "step": 12871 }, { "epoch": 4.715247561031466, "grad_norm": 0.13990204129981326, "learning_rate": 4.3066227158311146e-06, "loss": 0.3596, "num_tokens": 5666980519.0, "step": 12872 }, { "epoch": 4.715613978839372, "grad_norm": 0.1510442863617424, "learning_rate": 4.3058382352259285e-06, "loss": 0.389, "num_tokens": 5667747868.0, "step": 12873 }, { "epoch": 4.715980396647277, "grad_norm": 0.1540169773997602, "learning_rate": 4.305054750837324e-06, "loss": 0.3473, "num_tokens": 5668449264.0, "step": 12874 }, { "epoch": 4.716346814455182, "grad_norm": 0.14162340500469753, "learning_rate": 4.304272262709414e-06, "loss": 0.3498, "num_tokens": 5669258941.0, "step": 12875 }, { "epoch": 4.716713232263088, "grad_norm": 0.14588552819081704, "learning_rate": 4.303490770886254e-06, "loss": 0.3575, "num_tokens": 5670070880.0, "step": 12876 }, { "epoch": 4.7170796500709935, "grad_norm": 0.15559883502080357, "learning_rate": 4.302710275411845e-06, "loss": 0.3592, "num_tokens": 5670745041.0, "step": 12877 }, { "epoch": 4.717446067878899, "grad_norm": 0.1513089070661022, "learning_rate": 4.301930776330127e-06, "loss": 0.3811, "num_tokens": 5671505839.0, "step": 12878 }, { "epoch": 4.717812485686804, "grad_norm": 0.15815683606941408, "learning_rate": 4.301152273684989e-06, "loss": 0.3831, "num_tokens": 5672276001.0, "step": 12879 }, { "epoch": 4.71817890349471, "grad_norm": 0.1734676670400442, "learning_rate": 4.300374767520262e-06, "loss": 0.3994, "num_tokens": 5672945816.0, "step": 12880 }, { "epoch": 4.718545321302615, "grad_norm": 0.149999463978712, "learning_rate": 4.299598257879723e-06, "loss": 0.3889, "num_tokens": 5673751516.0, "step": 12881 }, { "epoch": 4.718911739110521, "grad_norm": 0.1465419169371329, "learning_rate": 4.29882274480709e-06, "loss": 0.3702, "num_tokens": 5674458533.0, "step": 12882 }, { "epoch": 4.719278156918426, "grad_norm": 0.15539927625070435, "learning_rate": 4.298048228346025e-06, "loss": 0.3667, "num_tokens": 5675240011.0, "step": 12883 }, { "epoch": 4.719644574726332, "grad_norm": 0.13418200973769917, "learning_rate": 4.297274708540137e-06, "loss": 0.3694, "num_tokens": 5676021149.0, "step": 12884 }, { "epoch": 4.720010992534237, "grad_norm": 0.1525595031002921, "learning_rate": 4.296502185432973e-06, "loss": 0.3613, "num_tokens": 5676771265.0, "step": 12885 }, { "epoch": 4.7203774103421425, "grad_norm": 0.1671022770074282, "learning_rate": 4.29573065906803e-06, "loss": 0.3771, "num_tokens": 5677433649.0, "step": 12886 }, { "epoch": 4.720743828150048, "grad_norm": 0.1454598496683797, "learning_rate": 4.294960129488748e-06, "loss": 0.3607, "num_tokens": 5678251815.0, "step": 12887 }, { "epoch": 4.721110245957954, "grad_norm": 0.1546651101355353, "learning_rate": 4.294190596738508e-06, "loss": 0.3799, "num_tokens": 5679018294.0, "step": 12888 }, { "epoch": 4.721476663765859, "grad_norm": 0.14714702311005107, "learning_rate": 4.293422060860635e-06, "loss": 0.3721, "num_tokens": 5679765668.0, "step": 12889 }, { "epoch": 4.721843081573764, "grad_norm": 0.14923845801483734, "learning_rate": 4.292654521898399e-06, "loss": 0.3518, "num_tokens": 5680491080.0, "step": 12890 }, { "epoch": 4.72220949938167, "grad_norm": 0.15349330348862014, "learning_rate": 4.291887979895015e-06, "loss": 0.372, "num_tokens": 5681333823.0, "step": 12891 }, { "epoch": 4.722575917189575, "grad_norm": 0.14613626218343873, "learning_rate": 4.291122434893642e-06, "loss": 0.3611, "num_tokens": 5682097115.0, "step": 12892 }, { "epoch": 4.722942334997481, "grad_norm": 0.14166347187082562, "learning_rate": 4.2903578869373795e-06, "loss": 0.3414, "num_tokens": 5682898943.0, "step": 12893 }, { "epoch": 4.723308752805386, "grad_norm": 0.13788979504990334, "learning_rate": 4.289594336069275e-06, "loss": 0.3391, "num_tokens": 5683746850.0, "step": 12894 }, { "epoch": 4.723675170613292, "grad_norm": 0.14704141785345498, "learning_rate": 4.288831782332316e-06, "loss": 0.3544, "num_tokens": 5684535568.0, "step": 12895 }, { "epoch": 4.724041588421198, "grad_norm": 0.16020692310903492, "learning_rate": 4.288070225769436e-06, "loss": 0.3847, "num_tokens": 5685195233.0, "step": 12896 }, { "epoch": 4.724408006229103, "grad_norm": 0.15905927655276755, "learning_rate": 4.287309666423514e-06, "loss": 0.3532, "num_tokens": 5685878731.0, "step": 12897 }, { "epoch": 4.724774424037008, "grad_norm": 0.16307255744228363, "learning_rate": 4.2865501043373715e-06, "loss": 0.3666, "num_tokens": 5686608312.0, "step": 12898 }, { "epoch": 4.7251408418449135, "grad_norm": 0.15726591520459246, "learning_rate": 4.2857915395537686e-06, "loss": 0.3916, "num_tokens": 5687343583.0, "step": 12899 }, { "epoch": 4.725507259652819, "grad_norm": 0.14696179848738078, "learning_rate": 4.28503397211542e-06, "loss": 0.3797, "num_tokens": 5688061571.0, "step": 12900 }, { "epoch": 4.725873677460725, "grad_norm": 0.1457221639415406, "learning_rate": 4.284277402064974e-06, "loss": 0.3805, "num_tokens": 5688862369.0, "step": 12901 }, { "epoch": 4.72624009526863, "grad_norm": 0.1603878006878476, "learning_rate": 4.283521829445029e-06, "loss": 0.3685, "num_tokens": 5689529828.0, "step": 12902 }, { "epoch": 4.726606513076535, "grad_norm": 0.1469989675329997, "learning_rate": 4.282767254298125e-06, "loss": 0.366, "num_tokens": 5690302227.0, "step": 12903 }, { "epoch": 4.7269729308844415, "grad_norm": 0.1563584549810536, "learning_rate": 4.282013676666746e-06, "loss": 0.3662, "num_tokens": 5691067554.0, "step": 12904 }, { "epoch": 4.727339348692347, "grad_norm": 0.14085322860310573, "learning_rate": 4.281261096593319e-06, "loss": 0.3615, "num_tokens": 5691923631.0, "step": 12905 }, { "epoch": 4.727705766500252, "grad_norm": 0.14137213037609392, "learning_rate": 4.2805095141202155e-06, "loss": 0.3705, "num_tokens": 5692663010.0, "step": 12906 }, { "epoch": 4.728072184308157, "grad_norm": 0.16129253423403686, "learning_rate": 4.279758929289753e-06, "loss": 0.3891, "num_tokens": 5693367619.0, "step": 12907 }, { "epoch": 4.7284386021160625, "grad_norm": 0.1472606211365304, "learning_rate": 4.279009342144191e-06, "loss": 0.369, "num_tokens": 5694175351.0, "step": 12908 }, { "epoch": 4.728805019923969, "grad_norm": 0.16264405272471918, "learning_rate": 4.278260752725731e-06, "loss": 0.3839, "num_tokens": 5694831598.0, "step": 12909 }, { "epoch": 4.729171437731874, "grad_norm": 0.15268122350748978, "learning_rate": 4.277513161076521e-06, "loss": 0.3545, "num_tokens": 5695569531.0, "step": 12910 }, { "epoch": 4.729537855539779, "grad_norm": 0.1447523079458264, "learning_rate": 4.276766567238652e-06, "loss": 0.3774, "num_tokens": 5696435799.0, "step": 12911 }, { "epoch": 4.729904273347684, "grad_norm": 0.1451419274718664, "learning_rate": 4.276020971254159e-06, "loss": 0.3747, "num_tokens": 5697147785.0, "step": 12912 }, { "epoch": 4.7302706911555905, "grad_norm": 0.16909391187585684, "learning_rate": 4.275276373165017e-06, "loss": 0.3866, "num_tokens": 5697879159.0, "step": 12913 }, { "epoch": 4.730637108963496, "grad_norm": 0.134539138064709, "learning_rate": 4.274532773013153e-06, "loss": 0.3992, "num_tokens": 5698782912.0, "step": 12914 }, { "epoch": 4.731003526771401, "grad_norm": 0.14237818184987097, "learning_rate": 4.273790170840433e-06, "loss": 0.3912, "num_tokens": 5699636971.0, "step": 12915 }, { "epoch": 4.731369944579306, "grad_norm": 0.13438623134956562, "learning_rate": 4.273048566688665e-06, "loss": 0.3598, "num_tokens": 5700551398.0, "step": 12916 }, { "epoch": 4.731736362387212, "grad_norm": 0.14838529905715178, "learning_rate": 4.2723079605996034e-06, "loss": 0.3646, "num_tokens": 5701258632.0, "step": 12917 }, { "epoch": 4.732102780195118, "grad_norm": 0.16344677220297463, "learning_rate": 4.271568352614943e-06, "loss": 0.3669, "num_tokens": 5701968582.0, "step": 12918 }, { "epoch": 4.732469198003023, "grad_norm": 0.1464307918189834, "learning_rate": 4.27082974277633e-06, "loss": 0.3801, "num_tokens": 5702765953.0, "step": 12919 }, { "epoch": 4.732835615810928, "grad_norm": 0.1432404102344312, "learning_rate": 4.270092131125348e-06, "loss": 0.3447, "num_tokens": 5703509555.0, "step": 12920 }, { "epoch": 4.733202033618834, "grad_norm": 0.1535776677032609, "learning_rate": 4.269355517703525e-06, "loss": 0.3936, "num_tokens": 5704253754.0, "step": 12921 }, { "epoch": 4.7335684514267395, "grad_norm": 0.1531141373828561, "learning_rate": 4.268619902552334e-06, "loss": 0.3724, "num_tokens": 5704937653.0, "step": 12922 }, { "epoch": 4.733934869234645, "grad_norm": 0.14729753026760883, "learning_rate": 4.267885285713192e-06, "loss": 0.3425, "num_tokens": 5705747609.0, "step": 12923 }, { "epoch": 4.73430128704255, "grad_norm": 0.15122613264559143, "learning_rate": 4.2671516672274585e-06, "loss": 0.3575, "num_tokens": 5706408698.0, "step": 12924 }, { "epoch": 4.734667704850455, "grad_norm": 0.15308893092056483, "learning_rate": 4.2664190471364414e-06, "loss": 0.3738, "num_tokens": 5707250567.0, "step": 12925 }, { "epoch": 4.735034122658361, "grad_norm": 0.15416142150325798, "learning_rate": 4.265687425481384e-06, "loss": 0.3647, "num_tokens": 5707948108.0, "step": 12926 }, { "epoch": 4.735400540466267, "grad_norm": 0.14392119631129258, "learning_rate": 4.264956802303478e-06, "loss": 0.4085, "num_tokens": 5708740207.0, "step": 12927 }, { "epoch": 4.735766958274172, "grad_norm": 0.14294639893794459, "learning_rate": 4.264227177643862e-06, "loss": 0.3216, "num_tokens": 5709491818.0, "step": 12928 }, { "epoch": 4.736133376082078, "grad_norm": 0.1425723892596183, "learning_rate": 4.263498551543616e-06, "loss": 0.3633, "num_tokens": 5710316334.0, "step": 12929 }, { "epoch": 4.736499793889983, "grad_norm": 0.13715177551969393, "learning_rate": 4.262770924043759e-06, "loss": 0.3489, "num_tokens": 5711201085.0, "step": 12930 }, { "epoch": 4.736866211697889, "grad_norm": 0.14963900431767382, "learning_rate": 4.26204429518526e-06, "loss": 0.4094, "num_tokens": 5711973928.0, "step": 12931 }, { "epoch": 4.737232629505794, "grad_norm": 0.13895800393999566, "learning_rate": 4.261318665009032e-06, "loss": 0.3704, "num_tokens": 5712795447.0, "step": 12932 }, { "epoch": 4.737599047313699, "grad_norm": 0.14421671524182234, "learning_rate": 4.260594033555924e-06, "loss": 0.3693, "num_tokens": 5713562059.0, "step": 12933 }, { "epoch": 4.737965465121605, "grad_norm": 0.14768330228033588, "learning_rate": 4.25987040086674e-06, "loss": 0.3498, "num_tokens": 5714365223.0, "step": 12934 }, { "epoch": 4.7383318829295105, "grad_norm": 0.15243845238286347, "learning_rate": 4.259147766982217e-06, "loss": 0.3417, "num_tokens": 5715033746.0, "step": 12935 }, { "epoch": 4.738698300737416, "grad_norm": 0.15106817731058259, "learning_rate": 4.258426131943044e-06, "loss": 0.3585, "num_tokens": 5715723563.0, "step": 12936 }, { "epoch": 4.739064718545321, "grad_norm": 0.1535711307371459, "learning_rate": 4.2577054957898515e-06, "loss": 0.3738, "num_tokens": 5716445053.0, "step": 12937 }, { "epoch": 4.739431136353227, "grad_norm": 0.1473487619672275, "learning_rate": 4.256985858563208e-06, "loss": 0.3425, "num_tokens": 5717250764.0, "step": 12938 }, { "epoch": 4.739797554161132, "grad_norm": 0.1518864065799396, "learning_rate": 4.256267220303636e-06, "loss": 0.3788, "num_tokens": 5717943076.0, "step": 12939 }, { "epoch": 4.740163971969038, "grad_norm": 0.13709058142578703, "learning_rate": 4.25554958105159e-06, "loss": 0.3894, "num_tokens": 5718713293.0, "step": 12940 }, { "epoch": 4.740530389776943, "grad_norm": 0.15162996956300395, "learning_rate": 4.2548329408474784e-06, "loss": 0.3495, "num_tokens": 5719496950.0, "step": 12941 }, { "epoch": 4.740896807584849, "grad_norm": 0.1522915509962496, "learning_rate": 4.254117299731652e-06, "loss": 0.3698, "num_tokens": 5720279082.0, "step": 12942 }, { "epoch": 4.741263225392754, "grad_norm": 0.14230428561959943, "learning_rate": 4.2534026577444e-06, "loss": 0.3815, "num_tokens": 5721051822.0, "step": 12943 }, { "epoch": 4.7416296432006595, "grad_norm": 0.151406052292585, "learning_rate": 4.252689014925957e-06, "loss": 0.3875, "num_tokens": 5721833254.0, "step": 12944 }, { "epoch": 4.741996061008565, "grad_norm": 0.15019489999437105, "learning_rate": 4.251976371316503e-06, "loss": 0.3697, "num_tokens": 5722594082.0, "step": 12945 }, { "epoch": 4.742362478816471, "grad_norm": 0.150567673881868, "learning_rate": 4.2512647269561626e-06, "loss": 0.3502, "num_tokens": 5723423095.0, "step": 12946 }, { "epoch": 4.742728896624376, "grad_norm": 0.15094873471948156, "learning_rate": 4.250554081885001e-06, "loss": 0.3776, "num_tokens": 5724213246.0, "step": 12947 }, { "epoch": 4.743095314432281, "grad_norm": 0.1611049807439348, "learning_rate": 4.249844436143031e-06, "loss": 0.3651, "num_tokens": 5724830387.0, "step": 12948 }, { "epoch": 4.743461732240187, "grad_norm": 0.1522364190441854, "learning_rate": 4.249135789770208e-06, "loss": 0.3866, "num_tokens": 5725695806.0, "step": 12949 }, { "epoch": 4.743828150048092, "grad_norm": 0.1420372793412309, "learning_rate": 4.248428142806424e-06, "loss": 0.3672, "num_tokens": 5726499202.0, "step": 12950 }, { "epoch": 4.744194567855998, "grad_norm": 0.15605696287071577, "learning_rate": 4.247721495291527e-06, "loss": 0.3781, "num_tokens": 5727231261.0, "step": 12951 }, { "epoch": 4.744560985663903, "grad_norm": 0.15963411041672398, "learning_rate": 4.247015847265302e-06, "loss": 0.3857, "num_tokens": 5727905901.0, "step": 12952 }, { "epoch": 4.7449274034718085, "grad_norm": 0.13066565933225974, "learning_rate": 4.246311198767478e-06, "loss": 0.333, "num_tokens": 5728795204.0, "step": 12953 }, { "epoch": 4.745293821279715, "grad_norm": 0.14505620116848467, "learning_rate": 4.2456075498377256e-06, "loss": 0.3885, "num_tokens": 5729566614.0, "step": 12954 }, { "epoch": 4.74566023908762, "grad_norm": 0.15301121702707335, "learning_rate": 4.244904900515664e-06, "loss": 0.3495, "num_tokens": 5730325741.0, "step": 12955 }, { "epoch": 4.746026656895525, "grad_norm": 0.13498184190522622, "learning_rate": 4.244203250840854e-06, "loss": 0.386, "num_tokens": 5731227711.0, "step": 12956 }, { "epoch": 4.74639307470343, "grad_norm": 0.1670942633337103, "learning_rate": 4.243502600852798e-06, "loss": 0.3533, "num_tokens": 5731835215.0, "step": 12957 }, { "epoch": 4.746759492511336, "grad_norm": 0.15683137017378682, "learning_rate": 4.242802950590946e-06, "loss": 0.3918, "num_tokens": 5732606842.0, "step": 12958 }, { "epoch": 4.747125910319242, "grad_norm": 0.14349982827124236, "learning_rate": 4.2421043000946885e-06, "loss": 0.3859, "num_tokens": 5733422890.0, "step": 12959 }, { "epoch": 4.747492328127147, "grad_norm": 0.14607755354717106, "learning_rate": 4.241406649403361e-06, "loss": 0.3753, "num_tokens": 5734254086.0, "step": 12960 }, { "epoch": 4.747858745935052, "grad_norm": 0.15885862169033807, "learning_rate": 4.240709998556241e-06, "loss": 0.3875, "num_tokens": 5735001237.0, "step": 12961 }, { "epoch": 4.7482251637429576, "grad_norm": 0.14933031688100623, "learning_rate": 4.240014347592554e-06, "loss": 0.3936, "num_tokens": 5735759117.0, "step": 12962 }, { "epoch": 4.748591581550864, "grad_norm": 0.15753055346717762, "learning_rate": 4.239319696551467e-06, "loss": 0.4332, "num_tokens": 5736487320.0, "step": 12963 }, { "epoch": 4.748957999358769, "grad_norm": 0.15574919473129553, "learning_rate": 4.23862604547209e-06, "loss": 0.3833, "num_tokens": 5737239742.0, "step": 12964 }, { "epoch": 4.749324417166674, "grad_norm": 0.1426489642291618, "learning_rate": 4.237933394393475e-06, "loss": 0.3588, "num_tokens": 5738019431.0, "step": 12965 }, { "epoch": 4.7496908349745794, "grad_norm": 0.1416891193715196, "learning_rate": 4.237241743354619e-06, "loss": 0.3518, "num_tokens": 5738851083.0, "step": 12966 }, { "epoch": 4.750057252782486, "grad_norm": 0.13997344292010064, "learning_rate": 4.236551092394466e-06, "loss": 0.3809, "num_tokens": 5739701389.0, "step": 12967 }, { "epoch": 4.750423670590391, "grad_norm": 0.15662533639620393, "learning_rate": 4.235861441551899e-06, "loss": 0.3724, "num_tokens": 5740436587.0, "step": 12968 }, { "epoch": 4.750790088398296, "grad_norm": 0.1435249161109213, "learning_rate": 4.235172790865747e-06, "loss": 0.3661, "num_tokens": 5741234917.0, "step": 12969 }, { "epoch": 4.751156506206201, "grad_norm": 0.14491240941467695, "learning_rate": 4.2344851403747856e-06, "loss": 0.3917, "num_tokens": 5742010215.0, "step": 12970 }, { "epoch": 4.7515229240141075, "grad_norm": 0.15966881514029804, "learning_rate": 4.233798490117727e-06, "loss": 0.3407, "num_tokens": 5742738493.0, "step": 12971 }, { "epoch": 4.751889341822013, "grad_norm": 0.14388553837092782, "learning_rate": 4.233112840133232e-06, "loss": 0.3651, "num_tokens": 5743470527.0, "step": 12972 }, { "epoch": 4.752255759629918, "grad_norm": 0.1691766025966717, "learning_rate": 4.2324281904599045e-06, "loss": 0.3624, "num_tokens": 5744169505.0, "step": 12973 }, { "epoch": 4.752622177437823, "grad_norm": 0.14362157001348372, "learning_rate": 4.231744541136291e-06, "loss": 0.3713, "num_tokens": 5744969638.0, "step": 12974 }, { "epoch": 4.7529885952457285, "grad_norm": 0.16223388210134101, "learning_rate": 4.231061892200882e-06, "loss": 0.3788, "num_tokens": 5745668388.0, "step": 12975 }, { "epoch": 4.753355013053635, "grad_norm": 0.149102368950445, "learning_rate": 4.2303802436921155e-06, "loss": 0.3529, "num_tokens": 5746386196.0, "step": 12976 }, { "epoch": 4.75372143086154, "grad_norm": 0.15694403983357585, "learning_rate": 4.229699595648365e-06, "loss": 0.3708, "num_tokens": 5747184217.0, "step": 12977 }, { "epoch": 4.754087848669445, "grad_norm": 0.14445800600304137, "learning_rate": 4.229019948107956e-06, "loss": 0.3935, "num_tokens": 5748020776.0, "step": 12978 }, { "epoch": 4.754454266477351, "grad_norm": 0.13765487411259986, "learning_rate": 4.228341301109148e-06, "loss": 0.3498, "num_tokens": 5748872217.0, "step": 12979 }, { "epoch": 4.7548206842852565, "grad_norm": 0.1454693655902254, "learning_rate": 4.22766365469016e-06, "loss": 0.3631, "num_tokens": 5749693934.0, "step": 12980 }, { "epoch": 4.755187102093162, "grad_norm": 0.1603978636489666, "learning_rate": 4.226987008889135e-06, "loss": 0.3855, "num_tokens": 5750350506.0, "step": 12981 }, { "epoch": 4.755553519901067, "grad_norm": 0.1499149956876426, "learning_rate": 4.226311363744176e-06, "loss": 0.3742, "num_tokens": 5751087969.0, "step": 12982 }, { "epoch": 4.755919937708972, "grad_norm": 0.1390797166875715, "learning_rate": 4.22563671929332e-06, "loss": 0.3707, "num_tokens": 5751889762.0, "step": 12983 }, { "epoch": 4.756286355516878, "grad_norm": 0.1481992904528426, "learning_rate": 4.224963075574553e-06, "loss": 0.3397, "num_tokens": 5752640862.0, "step": 12984 }, { "epoch": 4.756652773324784, "grad_norm": 0.15102569291978338, "learning_rate": 4.2242904326258e-06, "loss": 0.3604, "num_tokens": 5753395481.0, "step": 12985 }, { "epoch": 4.757019191132689, "grad_norm": 0.15106144875009567, "learning_rate": 4.223618790484934e-06, "loss": 0.3404, "num_tokens": 5754125693.0, "step": 12986 }, { "epoch": 4.757385608940594, "grad_norm": 0.1643984090607443, "learning_rate": 4.222948149189771e-06, "loss": 0.3455, "num_tokens": 5754852824.0, "step": 12987 }, { "epoch": 4.7577520267485, "grad_norm": 0.1384209037081496, "learning_rate": 4.222278508778064e-06, "loss": 0.383, "num_tokens": 5755609538.0, "step": 12988 }, { "epoch": 4.7581184445564055, "grad_norm": 0.15368541307066969, "learning_rate": 4.221609869287523e-06, "loss": 0.3754, "num_tokens": 5756398304.0, "step": 12989 }, { "epoch": 4.758484862364311, "grad_norm": 0.15426037539967072, "learning_rate": 4.220942230755785e-06, "loss": 0.3563, "num_tokens": 5757109112.0, "step": 12990 }, { "epoch": 4.758851280172216, "grad_norm": 0.16309167012773168, "learning_rate": 4.220275593220448e-06, "loss": 0.3865, "num_tokens": 5757802571.0, "step": 12991 }, { "epoch": 4.759217697980122, "grad_norm": 0.14788320747642109, "learning_rate": 4.219609956719037e-06, "loss": 0.3677, "num_tokens": 5758607925.0, "step": 12992 }, { "epoch": 4.759584115788027, "grad_norm": 0.15340066825957774, "learning_rate": 4.218945321289037e-06, "loss": 0.4043, "num_tokens": 5759345615.0, "step": 12993 }, { "epoch": 4.759950533595933, "grad_norm": 0.1661143778934896, "learning_rate": 4.218281686967861e-06, "loss": 0.3323, "num_tokens": 5759929177.0, "step": 12994 }, { "epoch": 4.760316951403838, "grad_norm": 0.1596631657419143, "learning_rate": 4.217619053792877e-06, "loss": 0.3674, "num_tokens": 5760658709.0, "step": 12995 }, { "epoch": 4.760683369211744, "grad_norm": 0.1460031011045661, "learning_rate": 4.216957421801389e-06, "loss": 0.3842, "num_tokens": 5761553640.0, "step": 12996 }, { "epoch": 4.761049787019649, "grad_norm": 0.13516567148901323, "learning_rate": 4.216296791030652e-06, "loss": 0.3696, "num_tokens": 5762342146.0, "step": 12997 }, { "epoch": 4.761416204827555, "grad_norm": 0.1592504942278594, "learning_rate": 4.215637161517861e-06, "loss": 0.376, "num_tokens": 5763044913.0, "step": 12998 }, { "epoch": 4.76178262263546, "grad_norm": 0.14733090649725267, "learning_rate": 4.214978533300152e-06, "loss": 0.3342, "num_tokens": 5763841154.0, "step": 12999 }, { "epoch": 4.762149040443365, "grad_norm": 0.13556218479880422, "learning_rate": 4.214320906414607e-06, "loss": 0.3739, "num_tokens": 5764692863.0, "step": 13000 }, { "epoch": 4.762515458251271, "grad_norm": 0.15203655045011027, "learning_rate": 4.213664280898253e-06, "loss": 0.3981, "num_tokens": 5765407318.0, "step": 13001 }, { "epoch": 4.7628818760591765, "grad_norm": 0.1506087257597195, "learning_rate": 4.213008656788059e-06, "loss": 0.3902, "num_tokens": 5766259296.0, "step": 13002 }, { "epoch": 4.763248293867082, "grad_norm": 0.14883880272335837, "learning_rate": 4.212354034120938e-06, "loss": 0.3608, "num_tokens": 5767023026.0, "step": 13003 }, { "epoch": 4.763614711674988, "grad_norm": 0.158019826656906, "learning_rate": 4.211700412933748e-06, "loss": 0.3817, "num_tokens": 5767734632.0, "step": 13004 }, { "epoch": 4.763981129482893, "grad_norm": 0.13507221895701535, "learning_rate": 4.211047793263285e-06, "loss": 0.3627, "num_tokens": 5768477783.0, "step": 13005 }, { "epoch": 4.764347547290798, "grad_norm": 0.16496602926772044, "learning_rate": 4.210396175146298e-06, "loss": 0.3743, "num_tokens": 5769129049.0, "step": 13006 }, { "epoch": 4.764713965098704, "grad_norm": 0.1504872050178919, "learning_rate": 4.209745558619468e-06, "loss": 0.3516, "num_tokens": 5769834679.0, "step": 13007 }, { "epoch": 4.765080382906609, "grad_norm": 0.1368478400009644, "learning_rate": 4.209095943719434e-06, "loss": 0.3487, "num_tokens": 5770720925.0, "step": 13008 }, { "epoch": 4.765446800714515, "grad_norm": 0.13651398465177525, "learning_rate": 4.208447330482764e-06, "loss": 0.3727, "num_tokens": 5771619651.0, "step": 13009 }, { "epoch": 4.76581321852242, "grad_norm": 0.16205632660372085, "learning_rate": 4.207799718945981e-06, "loss": 0.3815, "num_tokens": 5772301813.0, "step": 13010 }, { "epoch": 4.7661796363303255, "grad_norm": 0.13840954259112626, "learning_rate": 4.207153109145542e-06, "loss": 0.394, "num_tokens": 5773125392.0, "step": 13011 }, { "epoch": 4.766546054138231, "grad_norm": 0.15342037340694506, "learning_rate": 4.206507501117856e-06, "loss": 0.3246, "num_tokens": 5773766071.0, "step": 13012 }, { "epoch": 4.766912471946137, "grad_norm": 0.15175675354120774, "learning_rate": 4.2058628948992704e-06, "loss": 0.3784, "num_tokens": 5774624148.0, "step": 13013 }, { "epoch": 4.767278889754042, "grad_norm": 0.140384401180117, "learning_rate": 4.205219290526081e-06, "loss": 0.3858, "num_tokens": 5775483787.0, "step": 13014 }, { "epoch": 4.767645307561947, "grad_norm": 0.1443238849261169, "learning_rate": 4.204576688034518e-06, "loss": 0.3763, "num_tokens": 5776230435.0, "step": 13015 }, { "epoch": 4.768011725369853, "grad_norm": 0.15128021749350426, "learning_rate": 4.203935087460767e-06, "loss": 0.3675, "num_tokens": 5776964264.0, "step": 13016 }, { "epoch": 4.768378143177759, "grad_norm": 0.14300705537454708, "learning_rate": 4.2032944888409466e-06, "loss": 0.3699, "num_tokens": 5777724675.0, "step": 13017 }, { "epoch": 4.768744560985664, "grad_norm": 0.1549390083013282, "learning_rate": 4.202654892211125e-06, "loss": 0.3833, "num_tokens": 5778518698.0, "step": 13018 }, { "epoch": 4.769110978793569, "grad_norm": 0.1442017588936158, "learning_rate": 4.202016297607318e-06, "loss": 0.3514, "num_tokens": 5779330783.0, "step": 13019 }, { "epoch": 4.7694773966014745, "grad_norm": 0.15571371825592553, "learning_rate": 4.201378705065473e-06, "loss": 0.3689, "num_tokens": 5780011233.0, "step": 13020 }, { "epoch": 4.769843814409381, "grad_norm": 0.1393569030829189, "learning_rate": 4.200742114621493e-06, "loss": 0.3513, "num_tokens": 5780847438.0, "step": 13021 }, { "epoch": 4.770210232217286, "grad_norm": 0.1404469848165378, "learning_rate": 4.200106526311216e-06, "loss": 0.3627, "num_tokens": 5781674669.0, "step": 13022 }, { "epoch": 4.770576650025191, "grad_norm": 0.1406823602672852, "learning_rate": 4.199471940170425e-06, "loss": 0.3763, "num_tokens": 5782555618.0, "step": 13023 }, { "epoch": 4.770943067833096, "grad_norm": 0.1401019078573822, "learning_rate": 4.198838356234852e-06, "loss": 0.3805, "num_tokens": 5783485640.0, "step": 13024 }, { "epoch": 4.771309485641002, "grad_norm": 0.14763477652681384, "learning_rate": 4.198205774540168e-06, "loss": 0.3718, "num_tokens": 5784270278.0, "step": 13025 }, { "epoch": 4.771675903448908, "grad_norm": 0.13926543923753068, "learning_rate": 4.19757419512199e-06, "loss": 0.3718, "num_tokens": 5785137892.0, "step": 13026 }, { "epoch": 4.772042321256813, "grad_norm": 0.13455523038777967, "learning_rate": 4.196943618015877e-06, "loss": 0.3666, "num_tokens": 5785932308.0, "step": 13027 }, { "epoch": 4.772408739064718, "grad_norm": 0.15977250444212765, "learning_rate": 4.196314043257329e-06, "loss": 0.3773, "num_tokens": 5786608556.0, "step": 13028 }, { "epoch": 4.772775156872624, "grad_norm": 0.16511810391738357, "learning_rate": 4.195685470881793e-06, "loss": 0.4005, "num_tokens": 5787333318.0, "step": 13029 }, { "epoch": 4.77314157468053, "grad_norm": 0.1512363830614065, "learning_rate": 4.19505790092466e-06, "loss": 0.3644, "num_tokens": 5788098594.0, "step": 13030 }, { "epoch": 4.773507992488435, "grad_norm": 0.15819852691666403, "learning_rate": 4.194431333421263e-06, "loss": 0.367, "num_tokens": 5788745180.0, "step": 13031 }, { "epoch": 4.77387441029634, "grad_norm": 0.16395853832040821, "learning_rate": 4.193805768406881e-06, "loss": 0.3784, "num_tokens": 5789484673.0, "step": 13032 }, { "epoch": 4.774240828104245, "grad_norm": 0.1498934160930947, "learning_rate": 4.193181205916731e-06, "loss": 0.3613, "num_tokens": 5790220695.0, "step": 13033 }, { "epoch": 4.774607245912152, "grad_norm": 0.13844339107513948, "learning_rate": 4.192557645985981e-06, "loss": 0.3632, "num_tokens": 5791109042.0, "step": 13034 }, { "epoch": 4.774973663720057, "grad_norm": 0.1386480573785981, "learning_rate": 4.191935088649737e-06, "loss": 0.3446, "num_tokens": 5791878237.0, "step": 13035 }, { "epoch": 4.775340081527962, "grad_norm": 0.15063890454824697, "learning_rate": 4.191313533943047e-06, "loss": 0.3685, "num_tokens": 5792731043.0, "step": 13036 }, { "epoch": 4.775706499335868, "grad_norm": 0.1491224384549889, "learning_rate": 4.190692981900909e-06, "loss": 0.3508, "num_tokens": 5793480768.0, "step": 13037 }, { "epoch": 4.7760729171437735, "grad_norm": 0.15158578246403026, "learning_rate": 4.1900734325582626e-06, "loss": 0.3941, "num_tokens": 5794291168.0, "step": 13038 }, { "epoch": 4.776439334951679, "grad_norm": 0.13835704569031276, "learning_rate": 4.189454885949987e-06, "loss": 0.3987, "num_tokens": 5795099716.0, "step": 13039 }, { "epoch": 4.776805752759584, "grad_norm": 0.1529702388153473, "learning_rate": 4.188837342110908e-06, "loss": 0.3842, "num_tokens": 5795853788.0, "step": 13040 }, { "epoch": 4.777172170567489, "grad_norm": 0.1523548236974511, "learning_rate": 4.188220801075796e-06, "loss": 0.3618, "num_tokens": 5796555338.0, "step": 13041 }, { "epoch": 4.777538588375395, "grad_norm": 0.1419547338846474, "learning_rate": 4.187605262879365e-06, "loss": 0.3812, "num_tokens": 5797278018.0, "step": 13042 }, { "epoch": 4.777905006183301, "grad_norm": 0.1454193654532154, "learning_rate": 4.186990727556266e-06, "loss": 0.3927, "num_tokens": 5798101901.0, "step": 13043 }, { "epoch": 4.778271423991206, "grad_norm": 0.16513333345198652, "learning_rate": 4.186377195141099e-06, "loss": 0.3713, "num_tokens": 5798803246.0, "step": 13044 }, { "epoch": 4.778637841799111, "grad_norm": 0.14743518538288677, "learning_rate": 4.185764665668412e-06, "loss": 0.3743, "num_tokens": 5799611355.0, "step": 13045 }, { "epoch": 4.779004259607017, "grad_norm": 0.16517489402562693, "learning_rate": 4.1851531391726885e-06, "loss": 0.3763, "num_tokens": 5800302814.0, "step": 13046 }, { "epoch": 4.7793706774149225, "grad_norm": 0.1490356930780788, "learning_rate": 4.18454261568836e-06, "loss": 0.3715, "num_tokens": 5801099692.0, "step": 13047 }, { "epoch": 4.779737095222828, "grad_norm": 0.14121849229320255, "learning_rate": 4.1839330952497995e-06, "loss": 0.3738, "num_tokens": 5801961127.0, "step": 13048 }, { "epoch": 4.780103513030733, "grad_norm": 0.15488415821289622, "learning_rate": 4.1833245778913225e-06, "loss": 0.3595, "num_tokens": 5802682651.0, "step": 13049 }, { "epoch": 4.780469930838639, "grad_norm": 0.1487390595039593, "learning_rate": 4.182717063647191e-06, "loss": 0.3692, "num_tokens": 5803430960.0, "step": 13050 }, { "epoch": 4.780836348646544, "grad_norm": 0.22545952674052588, "learning_rate": 4.182110552551609e-06, "loss": 0.3728, "num_tokens": 5804172270.0, "step": 13051 }, { "epoch": 4.78120276645445, "grad_norm": 0.13793022965837817, "learning_rate": 4.181505044638727e-06, "loss": 0.3424, "num_tokens": 5804947006.0, "step": 13052 }, { "epoch": 4.781569184262355, "grad_norm": 0.13101035606375486, "learning_rate": 4.180900539942634e-06, "loss": 0.3446, "num_tokens": 5805826711.0, "step": 13053 }, { "epoch": 4.781935602070261, "grad_norm": 0.14056271876067858, "learning_rate": 4.1802970384973634e-06, "loss": 0.366, "num_tokens": 5806651210.0, "step": 13054 }, { "epoch": 4.782302019878166, "grad_norm": 0.15155429129188067, "learning_rate": 4.179694540336897e-06, "loss": 0.3663, "num_tokens": 5807471498.0, "step": 13055 }, { "epoch": 4.7826684376860715, "grad_norm": 0.13897212492390895, "learning_rate": 4.179093045495153e-06, "loss": 0.3638, "num_tokens": 5808404039.0, "step": 13056 }, { "epoch": 4.783034855493977, "grad_norm": 0.13895777093288714, "learning_rate": 4.178492554005998e-06, "loss": 0.3534, "num_tokens": 5809245349.0, "step": 13057 }, { "epoch": 4.783401273301882, "grad_norm": 0.14678903963934653, "learning_rate": 4.177893065903242e-06, "loss": 0.3679, "num_tokens": 5810023373.0, "step": 13058 }, { "epoch": 4.783767691109788, "grad_norm": 0.14612329370190713, "learning_rate": 4.1772945812206355e-06, "loss": 0.3434, "num_tokens": 5810845134.0, "step": 13059 }, { "epoch": 4.784134108917693, "grad_norm": 0.13844364445365573, "learning_rate": 4.176697099991878e-06, "loss": 0.3558, "num_tokens": 5811649883.0, "step": 13060 }, { "epoch": 4.784500526725599, "grad_norm": 0.14951329124560278, "learning_rate": 4.176100622250606e-06, "loss": 0.379, "num_tokens": 5812382017.0, "step": 13061 }, { "epoch": 4.784866944533505, "grad_norm": 0.14882445479389816, "learning_rate": 4.1755051480304006e-06, "loss": 0.3826, "num_tokens": 5813200531.0, "step": 13062 }, { "epoch": 4.78523336234141, "grad_norm": 0.15382866009457957, "learning_rate": 4.1749106773647935e-06, "loss": 0.3855, "num_tokens": 5813978675.0, "step": 13063 }, { "epoch": 4.785599780149315, "grad_norm": 0.15886845553678466, "learning_rate": 4.174317210287249e-06, "loss": 0.3761, "num_tokens": 5814693913.0, "step": 13064 }, { "epoch": 4.785966197957221, "grad_norm": 0.14695339890635528, "learning_rate": 4.173724746831183e-06, "loss": 0.393, "num_tokens": 5815530954.0, "step": 13065 }, { "epoch": 4.786332615765126, "grad_norm": 0.1517231520303547, "learning_rate": 4.173133287029955e-06, "loss": 0.357, "num_tokens": 5816190938.0, "step": 13066 }, { "epoch": 4.786699033573032, "grad_norm": 0.1570229872460121, "learning_rate": 4.172542830916861e-06, "loss": 0.3679, "num_tokens": 5816804608.0, "step": 13067 }, { "epoch": 4.787065451380937, "grad_norm": 0.14832333963056615, "learning_rate": 4.171953378525148e-06, "loss": 0.3654, "num_tokens": 5817689700.0, "step": 13068 }, { "epoch": 4.7874318691888424, "grad_norm": 0.15086931700798192, "learning_rate": 4.171364929888e-06, "loss": 0.3554, "num_tokens": 5818396667.0, "step": 13069 }, { "epoch": 4.787798286996748, "grad_norm": 0.1511892267427422, "learning_rate": 4.170777485038551e-06, "loss": 0.3746, "num_tokens": 5819235981.0, "step": 13070 }, { "epoch": 4.788164704804654, "grad_norm": 0.15402327090208157, "learning_rate": 4.1701910440098735e-06, "loss": 0.3509, "num_tokens": 5819886251.0, "step": 13071 }, { "epoch": 4.788531122612559, "grad_norm": 0.14932352079839684, "learning_rate": 4.169605606834985e-06, "loss": 0.3641, "num_tokens": 5820643094.0, "step": 13072 }, { "epoch": 4.788897540420464, "grad_norm": 0.163706002746395, "learning_rate": 4.1690211735468486e-06, "loss": 0.3862, "num_tokens": 5821414909.0, "step": 13073 }, { "epoch": 4.78926395822837, "grad_norm": 0.14235060978476424, "learning_rate": 4.168437744178368e-06, "loss": 0.3568, "num_tokens": 5822202888.0, "step": 13074 }, { "epoch": 4.789630376036276, "grad_norm": 0.14482363486884017, "learning_rate": 4.167855318762392e-06, "loss": 0.3971, "num_tokens": 5823002651.0, "step": 13075 }, { "epoch": 4.789996793844181, "grad_norm": 0.14685015382031205, "learning_rate": 4.16727389733171e-06, "loss": 0.3632, "num_tokens": 5823842521.0, "step": 13076 }, { "epoch": 4.790363211652086, "grad_norm": 0.14382749321462157, "learning_rate": 4.166693479919062e-06, "loss": 0.3746, "num_tokens": 5824646066.0, "step": 13077 }, { "epoch": 4.7907296294599915, "grad_norm": 0.1468773254726675, "learning_rate": 4.166114066557121e-06, "loss": 0.3743, "num_tokens": 5825409959.0, "step": 13078 }, { "epoch": 4.791096047267898, "grad_norm": 0.14924681424582867, "learning_rate": 4.165535657278513e-06, "loss": 0.3927, "num_tokens": 5826181595.0, "step": 13079 }, { "epoch": 4.791462465075803, "grad_norm": 0.15518581914368493, "learning_rate": 4.164958252115803e-06, "loss": 0.3834, "num_tokens": 5826879242.0, "step": 13080 }, { "epoch": 4.791828882883708, "grad_norm": 0.15056558580965734, "learning_rate": 4.164381851101498e-06, "loss": 0.3576, "num_tokens": 5827656654.0, "step": 13081 }, { "epoch": 4.792195300691613, "grad_norm": 0.14150301730899295, "learning_rate": 4.163806454268054e-06, "loss": 0.382, "num_tokens": 5828411302.0, "step": 13082 }, { "epoch": 4.792561718499519, "grad_norm": 0.15999079566789387, "learning_rate": 4.163232061647865e-06, "loss": 0.3608, "num_tokens": 5829128309.0, "step": 13083 }, { "epoch": 4.792928136307425, "grad_norm": 0.14310819642228698, "learning_rate": 4.162658673273268e-06, "loss": 0.3909, "num_tokens": 5829992589.0, "step": 13084 }, { "epoch": 4.79329455411533, "grad_norm": 0.15462726100004945, "learning_rate": 4.1620862891765505e-06, "loss": 0.3853, "num_tokens": 5830738782.0, "step": 13085 }, { "epoch": 4.793660971923235, "grad_norm": 0.15921074650068753, "learning_rate": 4.161514909389936e-06, "loss": 0.3675, "num_tokens": 5831464395.0, "step": 13086 }, { "epoch": 4.794027389731141, "grad_norm": 0.15544584168720826, "learning_rate": 4.1609445339455945e-06, "loss": 0.3585, "num_tokens": 5832249027.0, "step": 13087 }, { "epoch": 4.794393807539047, "grad_norm": 0.15707128773021917, "learning_rate": 4.160375162875641e-06, "loss": 0.4012, "num_tokens": 5833021289.0, "step": 13088 }, { "epoch": 4.794760225346952, "grad_norm": 0.15012713451787826, "learning_rate": 4.159806796212131e-06, "loss": 0.3572, "num_tokens": 5833817626.0, "step": 13089 }, { "epoch": 4.795126643154857, "grad_norm": 0.13881639087605083, "learning_rate": 4.159239433987064e-06, "loss": 0.3529, "num_tokens": 5834591946.0, "step": 13090 }, { "epoch": 4.795493060962762, "grad_norm": 0.15490765496490377, "learning_rate": 4.158673076232386e-06, "loss": 0.3926, "num_tokens": 5835358836.0, "step": 13091 }, { "epoch": 4.7958594787706685, "grad_norm": 0.14957156469768737, "learning_rate": 4.158107722979979e-06, "loss": 0.3689, "num_tokens": 5836096820.0, "step": 13092 }, { "epoch": 4.796225896578574, "grad_norm": 0.15434824602550565, "learning_rate": 4.157543374261679e-06, "loss": 0.3899, "num_tokens": 5836886015.0, "step": 13093 }, { "epoch": 4.796592314386479, "grad_norm": 0.14394769521403772, "learning_rate": 4.156980030109259e-06, "loss": 0.3887, "num_tokens": 5837666470.0, "step": 13094 }, { "epoch": 4.796958732194384, "grad_norm": 0.14541603452943735, "learning_rate": 4.156417690554433e-06, "loss": 0.372, "num_tokens": 5838403722.0, "step": 13095 }, { "epoch": 4.79732515000229, "grad_norm": 0.15669854835188535, "learning_rate": 4.155856355628866e-06, "loss": 0.3678, "num_tokens": 5839185231.0, "step": 13096 }, { "epoch": 4.797691567810196, "grad_norm": 0.1528111792201308, "learning_rate": 4.155296025364161e-06, "loss": 0.3779, "num_tokens": 5839928459.0, "step": 13097 }, { "epoch": 4.798057985618101, "grad_norm": 0.14822976398566617, "learning_rate": 4.1547366997918645e-06, "loss": 0.3647, "num_tokens": 5840705839.0, "step": 13098 }, { "epoch": 4.798424403426006, "grad_norm": 0.14981622969147085, "learning_rate": 4.154178378943466e-06, "loss": 0.3834, "num_tokens": 5841409406.0, "step": 13099 }, { "epoch": 4.798790821233912, "grad_norm": 0.1685693740132503, "learning_rate": 4.153621062850406e-06, "loss": 0.3636, "num_tokens": 5842064801.0, "step": 13100 }, { "epoch": 4.799157239041818, "grad_norm": 0.1493464886744064, "learning_rate": 4.153064751544054e-06, "loss": 0.365, "num_tokens": 5842928197.0, "step": 13101 }, { "epoch": 4.799523656849723, "grad_norm": 0.14349867171418554, "learning_rate": 4.15250944505574e-06, "loss": 0.3619, "num_tokens": 5843671617.0, "step": 13102 }, { "epoch": 4.799890074657628, "grad_norm": 0.14151386546601658, "learning_rate": 4.151955143416726e-06, "loss": 0.3717, "num_tokens": 5844541606.0, "step": 13103 }, { "epoch": 4.800256492465534, "grad_norm": 0.14484437157907035, "learning_rate": 4.151401846658217e-06, "loss": 0.3864, "num_tokens": 5845363047.0, "step": 13104 }, { "epoch": 4.8006229102734395, "grad_norm": 0.16079948119740667, "learning_rate": 4.150849554811368e-06, "loss": 0.385, "num_tokens": 5846055174.0, "step": 13105 }, { "epoch": 4.800989328081345, "grad_norm": 0.1363873141960533, "learning_rate": 4.150298267907273e-06, "loss": 0.3472, "num_tokens": 5846949814.0, "step": 13106 }, { "epoch": 4.80135574588925, "grad_norm": 0.15376335803015037, "learning_rate": 4.1497479859769705e-06, "loss": 0.3667, "num_tokens": 5847706399.0, "step": 13107 }, { "epoch": 4.801722163697155, "grad_norm": 0.14587944124795424, "learning_rate": 4.149198709051443e-06, "loss": 0.3067, "num_tokens": 5848419928.0, "step": 13108 }, { "epoch": 4.802088581505061, "grad_norm": 0.14429758699107828, "learning_rate": 4.148650437161616e-06, "loss": 0.3754, "num_tokens": 5849279059.0, "step": 13109 }, { "epoch": 4.802454999312967, "grad_norm": 0.14019614698013644, "learning_rate": 4.14810317033836e-06, "loss": 0.3913, "num_tokens": 5850138857.0, "step": 13110 }, { "epoch": 4.802821417120872, "grad_norm": 0.16381988235797879, "learning_rate": 4.147556908612483e-06, "loss": 0.3872, "num_tokens": 5850824938.0, "step": 13111 }, { "epoch": 4.803187834928778, "grad_norm": 0.14721386629121083, "learning_rate": 4.1470116520147415e-06, "loss": 0.3942, "num_tokens": 5851696541.0, "step": 13112 }, { "epoch": 4.803554252736683, "grad_norm": 0.14757621264108772, "learning_rate": 4.146467400575837e-06, "loss": 0.3435, "num_tokens": 5852427366.0, "step": 13113 }, { "epoch": 4.8039206705445885, "grad_norm": 0.16396506499439256, "learning_rate": 4.145924154326412e-06, "loss": 0.3776, "num_tokens": 5853162731.0, "step": 13114 }, { "epoch": 4.804287088352494, "grad_norm": 0.15653856126035243, "learning_rate": 4.145381913297051e-06, "loss": 0.3634, "num_tokens": 5853866960.0, "step": 13115 }, { "epoch": 4.804653506160399, "grad_norm": 0.14503490255842516, "learning_rate": 4.1448406775182814e-06, "loss": 0.3431, "num_tokens": 5854635234.0, "step": 13116 }, { "epoch": 4.805019923968305, "grad_norm": 0.14566775689220718, "learning_rate": 4.144300447020579e-06, "loss": 0.3817, "num_tokens": 5855442799.0, "step": 13117 }, { "epoch": 4.80538634177621, "grad_norm": 0.1599377575966495, "learning_rate": 4.143761221834359e-06, "loss": 0.3547, "num_tokens": 5856121491.0, "step": 13118 }, { "epoch": 4.805752759584116, "grad_norm": 0.16103239210121842, "learning_rate": 4.14322300198998e-06, "loss": 0.3677, "num_tokens": 5856749956.0, "step": 13119 }, { "epoch": 4.806119177392021, "grad_norm": 0.16066999916491095, "learning_rate": 4.1426857875177476e-06, "loss": 0.3929, "num_tokens": 5857547720.0, "step": 13120 }, { "epoch": 4.806485595199927, "grad_norm": 0.1500629900164243, "learning_rate": 4.142149578447903e-06, "loss": 0.3674, "num_tokens": 5858315940.0, "step": 13121 }, { "epoch": 4.806852013007832, "grad_norm": 0.1423772968897778, "learning_rate": 4.141614374810641e-06, "loss": 0.3379, "num_tokens": 5859177921.0, "step": 13122 }, { "epoch": 4.8072184308157375, "grad_norm": 0.13690166251456656, "learning_rate": 4.141080176636089e-06, "loss": 0.3592, "num_tokens": 5859948894.0, "step": 13123 }, { "epoch": 4.807584848623643, "grad_norm": 0.1486823668393177, "learning_rate": 4.140546983954331e-06, "loss": 0.3752, "num_tokens": 5860726144.0, "step": 13124 }, { "epoch": 4.807951266431549, "grad_norm": 0.14463130222571222, "learning_rate": 4.140014796795379e-06, "loss": 0.3608, "num_tokens": 5861557264.0, "step": 13125 }, { "epoch": 4.808317684239454, "grad_norm": 0.15125189428958064, "learning_rate": 4.139483615189201e-06, "loss": 0.3858, "num_tokens": 5862233772.0, "step": 13126 }, { "epoch": 4.808684102047359, "grad_norm": 0.16484318767387732, "learning_rate": 4.138953439165704e-06, "loss": 0.3534, "num_tokens": 5862912765.0, "step": 13127 }, { "epoch": 4.809050519855265, "grad_norm": 0.14741008445121012, "learning_rate": 4.138424268754734e-06, "loss": 0.3778, "num_tokens": 5863770646.0, "step": 13128 }, { "epoch": 4.809416937663171, "grad_norm": 0.16006155106205, "learning_rate": 4.137896103986087e-06, "loss": 0.3896, "num_tokens": 5864462512.0, "step": 13129 }, { "epoch": 4.809783355471076, "grad_norm": 0.14969920558818983, "learning_rate": 4.1373689448895e-06, "loss": 0.3501, "num_tokens": 5865223090.0, "step": 13130 }, { "epoch": 4.810149773278981, "grad_norm": 0.13346111232123267, "learning_rate": 4.136842791494653e-06, "loss": 0.3792, "num_tokens": 5866189182.0, "step": 13131 }, { "epoch": 4.8105161910868866, "grad_norm": 0.15473351008172073, "learning_rate": 4.13631764383117e-06, "loss": 0.3482, "num_tokens": 5866921301.0, "step": 13132 }, { "epoch": 4.810882608894792, "grad_norm": 0.1538007299783136, "learning_rate": 4.1357935019286135e-06, "loss": 0.3918, "num_tokens": 5867647204.0, "step": 13133 }, { "epoch": 4.811249026702698, "grad_norm": 0.15124514803181643, "learning_rate": 4.135270365816498e-06, "loss": 0.3761, "num_tokens": 5868486627.0, "step": 13134 }, { "epoch": 4.811615444510603, "grad_norm": 0.13711747002887334, "learning_rate": 4.134748235524279e-06, "loss": 0.3597, "num_tokens": 5869306345.0, "step": 13135 }, { "epoch": 4.811981862318508, "grad_norm": 0.15239772314161745, "learning_rate": 4.134227111081348e-06, "loss": 0.3821, "num_tokens": 5870031156.0, "step": 13136 }, { "epoch": 4.812348280126415, "grad_norm": 0.15205175592864076, "learning_rate": 4.133706992517048e-06, "loss": 0.3749, "num_tokens": 5870784558.0, "step": 13137 }, { "epoch": 4.81271469793432, "grad_norm": 0.1458003083623299, "learning_rate": 4.133187879860664e-06, "loss": 0.35, "num_tokens": 5871592285.0, "step": 13138 }, { "epoch": 4.813081115742225, "grad_norm": 0.15021882658481328, "learning_rate": 4.13266977314142e-06, "loss": 0.3863, "num_tokens": 5872368815.0, "step": 13139 }, { "epoch": 4.81344753355013, "grad_norm": 0.14477413339149056, "learning_rate": 4.132152672388489e-06, "loss": 0.3826, "num_tokens": 5873146825.0, "step": 13140 }, { "epoch": 4.813813951358036, "grad_norm": 0.15539138630356747, "learning_rate": 4.131636577630985e-06, "loss": 0.3879, "num_tokens": 5873874990.0, "step": 13141 }, { "epoch": 4.814180369165942, "grad_norm": 0.148530655933549, "learning_rate": 4.131121488897965e-06, "loss": 0.3547, "num_tokens": 5874566198.0, "step": 13142 }, { "epoch": 4.814546786973847, "grad_norm": 0.1474500730777896, "learning_rate": 4.130607406218428e-06, "loss": 0.3846, "num_tokens": 5875380989.0, "step": 13143 }, { "epoch": 4.814913204781752, "grad_norm": 0.14598208294896192, "learning_rate": 4.130094329621317e-06, "loss": 0.3412, "num_tokens": 5876126103.0, "step": 13144 }, { "epoch": 4.8152796225896575, "grad_norm": 0.1541749911221196, "learning_rate": 4.1295822591355206e-06, "loss": 0.3404, "num_tokens": 5876853666.0, "step": 13145 }, { "epoch": 4.815646040397564, "grad_norm": 0.14913755389380096, "learning_rate": 4.129071194789872e-06, "loss": 0.3741, "num_tokens": 5877651812.0, "step": 13146 }, { "epoch": 4.816012458205469, "grad_norm": 0.1517503782381004, "learning_rate": 4.128561136613141e-06, "loss": 0.3862, "num_tokens": 5878412677.0, "step": 13147 }, { "epoch": 4.816378876013374, "grad_norm": 0.16294742385557948, "learning_rate": 4.128052084634048e-06, "loss": 0.3912, "num_tokens": 5879112722.0, "step": 13148 }, { "epoch": 4.816745293821279, "grad_norm": 0.15292911883221127, "learning_rate": 4.127544038881252e-06, "loss": 0.3771, "num_tokens": 5879816287.0, "step": 13149 }, { "epoch": 4.8171117116291855, "grad_norm": 0.14143136572406445, "learning_rate": 4.127036999383357e-06, "loss": 0.3635, "num_tokens": 5880650057.0, "step": 13150 }, { "epoch": 4.817478129437091, "grad_norm": 0.15073277009877517, "learning_rate": 4.126530966168909e-06, "loss": 0.331, "num_tokens": 5881389530.0, "step": 13151 }, { "epoch": 4.817844547244996, "grad_norm": 0.15917939326085262, "learning_rate": 4.126025939266401e-06, "loss": 0.3835, "num_tokens": 5882119542.0, "step": 13152 }, { "epoch": 4.818210965052901, "grad_norm": 0.15110892637361878, "learning_rate": 4.125521918704269e-06, "loss": 0.3843, "num_tokens": 5882924817.0, "step": 13153 }, { "epoch": 4.818577382860807, "grad_norm": 0.14012403817127342, "learning_rate": 4.125018904510885e-06, "loss": 0.3517, "num_tokens": 5883739511.0, "step": 13154 }, { "epoch": 4.818943800668713, "grad_norm": 0.13877962611638867, "learning_rate": 4.124516896714574e-06, "loss": 0.3826, "num_tokens": 5884634929.0, "step": 13155 }, { "epoch": 4.819310218476618, "grad_norm": 0.1318584139383242, "learning_rate": 4.124015895343597e-06, "loss": 0.3824, "num_tokens": 5885521133.0, "step": 13156 }, { "epoch": 4.819676636284523, "grad_norm": 0.15222017383798883, "learning_rate": 4.1235159004261635e-06, "loss": 0.3718, "num_tokens": 5886245451.0, "step": 13157 }, { "epoch": 4.820043054092429, "grad_norm": 0.1525022850090386, "learning_rate": 4.123016911990423e-06, "loss": 0.3379, "num_tokens": 5886895958.0, "step": 13158 }, { "epoch": 4.8204094719003345, "grad_norm": 0.14929158077519544, "learning_rate": 4.122518930064472e-06, "loss": 0.3686, "num_tokens": 5887734016.0, "step": 13159 }, { "epoch": 4.82077588970824, "grad_norm": 0.1441135319472602, "learning_rate": 4.122021954676343e-06, "loss": 0.362, "num_tokens": 5888445825.0, "step": 13160 }, { "epoch": 4.821142307516145, "grad_norm": 0.1489602335330792, "learning_rate": 4.121525985854022e-06, "loss": 0.3744, "num_tokens": 5889296001.0, "step": 13161 }, { "epoch": 4.821508725324051, "grad_norm": 0.14847739549195493, "learning_rate": 4.121031023625428e-06, "loss": 0.3709, "num_tokens": 5890068927.0, "step": 13162 }, { "epoch": 4.821875143131956, "grad_norm": 0.15783563833488878, "learning_rate": 4.120537068018433e-06, "loss": 0.3389, "num_tokens": 5890738414.0, "step": 13163 }, { "epoch": 4.822241560939862, "grad_norm": 0.14742400155436386, "learning_rate": 4.120044119060846e-06, "loss": 0.3507, "num_tokens": 5891556588.0, "step": 13164 }, { "epoch": 4.822607978747767, "grad_norm": 0.14652485066566484, "learning_rate": 4.11955217678042e-06, "loss": 0.3815, "num_tokens": 5892326070.0, "step": 13165 }, { "epoch": 4.822974396555672, "grad_norm": 0.1468678348519557, "learning_rate": 4.1190612412048545e-06, "loss": 0.3779, "num_tokens": 5893150263.0, "step": 13166 }, { "epoch": 4.823340814363578, "grad_norm": 0.14916164893455097, "learning_rate": 4.118571312361787e-06, "loss": 0.3458, "num_tokens": 5893861419.0, "step": 13167 }, { "epoch": 4.823707232171484, "grad_norm": 0.155288303452193, "learning_rate": 4.118082390278804e-06, "loss": 0.3745, "num_tokens": 5894580765.0, "step": 13168 }, { "epoch": 4.824073649979389, "grad_norm": 0.14785585566878182, "learning_rate": 4.117594474983431e-06, "loss": 0.3565, "num_tokens": 5895370307.0, "step": 13169 }, { "epoch": 4.824440067787295, "grad_norm": 0.14535350091124158, "learning_rate": 4.11710756650314e-06, "loss": 0.3751, "num_tokens": 5896122350.0, "step": 13170 }, { "epoch": 4.8248064855952, "grad_norm": 0.1417794156219288, "learning_rate": 4.1166216648653444e-06, "loss": 0.3477, "num_tokens": 5896926667.0, "step": 13171 }, { "epoch": 4.8251729034031055, "grad_norm": 0.141521748943844, "learning_rate": 4.1161367700974025e-06, "loss": 0.3788, "num_tokens": 5897813873.0, "step": 13172 }, { "epoch": 4.825539321211011, "grad_norm": 0.15302829485187303, "learning_rate": 4.115652882226612e-06, "loss": 0.3867, "num_tokens": 5898548267.0, "step": 13173 }, { "epoch": 4.825905739018916, "grad_norm": 0.15184824787651172, "learning_rate": 4.11517000128022e-06, "loss": 0.3912, "num_tokens": 5899268398.0, "step": 13174 }, { "epoch": 4.826272156826822, "grad_norm": 0.16364610499748947, "learning_rate": 4.114688127285413e-06, "loss": 0.4019, "num_tokens": 5900004841.0, "step": 13175 }, { "epoch": 4.826638574634727, "grad_norm": 0.14723639411046674, "learning_rate": 4.114207260269319e-06, "loss": 0.3683, "num_tokens": 5900900056.0, "step": 13176 }, { "epoch": 4.827004992442633, "grad_norm": 0.14040486477540554, "learning_rate": 4.1137274002590154e-06, "loss": 0.3929, "num_tokens": 5901703876.0, "step": 13177 }, { "epoch": 4.827371410250538, "grad_norm": 0.14765383405147262, "learning_rate": 4.1132485472815155e-06, "loss": 0.3696, "num_tokens": 5902506637.0, "step": 13178 }, { "epoch": 4.827737828058444, "grad_norm": 0.1681056753557098, "learning_rate": 4.112770701363783e-06, "loss": 0.3841, "num_tokens": 5903233005.0, "step": 13179 }, { "epoch": 4.828104245866349, "grad_norm": 0.14294840412627882, "learning_rate": 4.112293862532721e-06, "loss": 0.3819, "num_tokens": 5903963249.0, "step": 13180 }, { "epoch": 4.8284706636742545, "grad_norm": 0.14804861182471674, "learning_rate": 4.111818030815172e-06, "loss": 0.3456, "num_tokens": 5904773074.0, "step": 13181 }, { "epoch": 4.82883708148216, "grad_norm": 0.1438175881519513, "learning_rate": 4.111343206237933e-06, "loss": 0.3527, "num_tokens": 5905553822.0, "step": 13182 }, { "epoch": 4.829203499290066, "grad_norm": 0.14593611891133784, "learning_rate": 4.110869388827733e-06, "loss": 0.3744, "num_tokens": 5906316454.0, "step": 13183 }, { "epoch": 4.829569917097971, "grad_norm": 0.13956058803219046, "learning_rate": 4.11039657861125e-06, "loss": 0.3476, "num_tokens": 5907097444.0, "step": 13184 }, { "epoch": 4.829936334905876, "grad_norm": 0.14743091516887047, "learning_rate": 4.109924775615107e-06, "loss": 0.3586, "num_tokens": 5907871544.0, "step": 13185 }, { "epoch": 4.830302752713782, "grad_norm": 0.15129404395451973, "learning_rate": 4.109453979865863e-06, "loss": 0.3676, "num_tokens": 5908598694.0, "step": 13186 }, { "epoch": 4.830669170521688, "grad_norm": 0.1572129722281192, "learning_rate": 4.108984191390028e-06, "loss": 0.3909, "num_tokens": 5909299826.0, "step": 13187 }, { "epoch": 4.831035588329593, "grad_norm": 0.14503765123336293, "learning_rate": 4.108515410214049e-06, "loss": 0.3528, "num_tokens": 5910117903.0, "step": 13188 }, { "epoch": 4.831402006137498, "grad_norm": 0.1397290150802042, "learning_rate": 4.10804763636432e-06, "loss": 0.3517, "num_tokens": 5910949943.0, "step": 13189 }, { "epoch": 4.8317684239454035, "grad_norm": 0.15389488478648328, "learning_rate": 4.107580869867181e-06, "loss": 0.3755, "num_tokens": 5911683492.0, "step": 13190 }, { "epoch": 4.832134841753309, "grad_norm": 0.14184748329511485, "learning_rate": 4.107115110748908e-06, "loss": 0.3874, "num_tokens": 5912481708.0, "step": 13191 }, { "epoch": 4.832501259561215, "grad_norm": 0.16578723818033747, "learning_rate": 4.106650359035729e-06, "loss": 0.4093, "num_tokens": 5913131402.0, "step": 13192 }, { "epoch": 4.83286767736912, "grad_norm": 0.15454285881276236, "learning_rate": 4.106186614753803e-06, "loss": 0.3688, "num_tokens": 5913933782.0, "step": 13193 }, { "epoch": 4.833234095177025, "grad_norm": 0.12658991526251823, "learning_rate": 4.105723877929246e-06, "loss": 0.3848, "num_tokens": 5914752497.0, "step": 13194 }, { "epoch": 4.8336005129849315, "grad_norm": 0.15120279433107572, "learning_rate": 4.105262148588108e-06, "loss": 0.3628, "num_tokens": 5915568969.0, "step": 13195 }, { "epoch": 4.833966930792837, "grad_norm": 0.15048506544000317, "learning_rate": 4.104801426756387e-06, "loss": 0.3508, "num_tokens": 5916305043.0, "step": 13196 }, { "epoch": 4.834333348600742, "grad_norm": 0.14691599727813204, "learning_rate": 4.1043417124600206e-06, "loss": 0.4058, "num_tokens": 5917164328.0, "step": 13197 }, { "epoch": 4.834699766408647, "grad_norm": 0.15656722692192163, "learning_rate": 4.103883005724892e-06, "loss": 0.3631, "num_tokens": 5917831232.0, "step": 13198 }, { "epoch": 4.8350661842165525, "grad_norm": 0.16005636295237405, "learning_rate": 4.103425306576831e-06, "loss": 0.3668, "num_tokens": 5918502485.0, "step": 13199 }, { "epoch": 4.835432602024459, "grad_norm": 0.15944233113770215, "learning_rate": 4.102968615041602e-06, "loss": 0.3761, "num_tokens": 5919201418.0, "step": 13200 }, { "epoch": 4.835799019832364, "grad_norm": 0.15802193248386365, "learning_rate": 4.102512931144919e-06, "loss": 0.3679, "num_tokens": 5919912178.0, "step": 13201 }, { "epoch": 4.836165437640269, "grad_norm": 0.1519665037998956, "learning_rate": 4.10205825491244e-06, "loss": 0.368, "num_tokens": 5920691417.0, "step": 13202 }, { "epoch": 4.836531855448174, "grad_norm": 0.16402332173046233, "learning_rate": 4.101604586369763e-06, "loss": 0.3913, "num_tokens": 5921320034.0, "step": 13203 }, { "epoch": 4.836898273256081, "grad_norm": 0.15003518567037324, "learning_rate": 4.1011519255424295e-06, "loss": 0.3769, "num_tokens": 5922180424.0, "step": 13204 }, { "epoch": 4.837264691063986, "grad_norm": 0.14905424602533143, "learning_rate": 4.100700272455926e-06, "loss": 0.3806, "num_tokens": 5922834479.0, "step": 13205 }, { "epoch": 4.837631108871891, "grad_norm": 0.15329948966423732, "learning_rate": 4.100249627135681e-06, "loss": 0.3865, "num_tokens": 5923657276.0, "step": 13206 }, { "epoch": 4.837997526679796, "grad_norm": 0.140384180385219, "learning_rate": 4.099799989607066e-06, "loss": 0.3807, "num_tokens": 5924434032.0, "step": 13207 }, { "epoch": 4.8383639444877025, "grad_norm": 0.1644039376399244, "learning_rate": 4.099351359895401e-06, "loss": 0.3679, "num_tokens": 5925068350.0, "step": 13208 }, { "epoch": 4.838730362295608, "grad_norm": 0.15052110641863103, "learning_rate": 4.0989037380259375e-06, "loss": 0.3664, "num_tokens": 5925841676.0, "step": 13209 }, { "epoch": 4.839096780103513, "grad_norm": 0.14636031829781873, "learning_rate": 4.098457124023883e-06, "loss": 0.3651, "num_tokens": 5926618697.0, "step": 13210 }, { "epoch": 4.839463197911418, "grad_norm": 0.1597745260789582, "learning_rate": 4.098011517914381e-06, "loss": 0.3779, "num_tokens": 5927391956.0, "step": 13211 }, { "epoch": 4.839829615719324, "grad_norm": 0.14134314234592416, "learning_rate": 4.097566919722521e-06, "loss": 0.3494, "num_tokens": 5928237327.0, "step": 13212 }, { "epoch": 4.84019603352723, "grad_norm": 0.1457116950377216, "learning_rate": 4.0971233294733325e-06, "loss": 0.401, "num_tokens": 5928964599.0, "step": 13213 }, { "epoch": 4.840562451335135, "grad_norm": 0.14482706189954653, "learning_rate": 4.096680747191793e-06, "loss": 0.3567, "num_tokens": 5929746816.0, "step": 13214 }, { "epoch": 4.84092886914304, "grad_norm": 0.16131929773194953, "learning_rate": 4.096239172902819e-06, "loss": 0.3716, "num_tokens": 5930444060.0, "step": 13215 }, { "epoch": 4.841295286950945, "grad_norm": 0.13916138577080406, "learning_rate": 4.0957986066312725e-06, "loss": 0.3701, "num_tokens": 5931289060.0, "step": 13216 }, { "epoch": 4.8416617047588515, "grad_norm": 0.1459962911027388, "learning_rate": 4.095359048401957e-06, "loss": 0.3782, "num_tokens": 5932058810.0, "step": 13217 }, { "epoch": 4.842028122566757, "grad_norm": 0.14981897315610118, "learning_rate": 4.094920498239625e-06, "loss": 0.348, "num_tokens": 5932771546.0, "step": 13218 }, { "epoch": 4.842394540374662, "grad_norm": 0.14561133268345922, "learning_rate": 4.0944829561689635e-06, "loss": 0.364, "num_tokens": 5933454958.0, "step": 13219 }, { "epoch": 4.842760958182568, "grad_norm": 0.15523786020909758, "learning_rate": 4.094046422214607e-06, "loss": 0.395, "num_tokens": 5934300637.0, "step": 13220 }, { "epoch": 4.843127375990473, "grad_norm": 0.16254266627855216, "learning_rate": 4.093610896401136e-06, "loss": 0.3632, "num_tokens": 5934958557.0, "step": 13221 }, { "epoch": 4.843493793798379, "grad_norm": 0.14048441107225376, "learning_rate": 4.09317637875307e-06, "loss": 0.3482, "num_tokens": 5935805926.0, "step": 13222 }, { "epoch": 4.843860211606284, "grad_norm": 0.14982108553862686, "learning_rate": 4.09274286929487e-06, "loss": 0.3878, "num_tokens": 5936529828.0, "step": 13223 }, { "epoch": 4.844226629414189, "grad_norm": 0.14565518673244382, "learning_rate": 4.09231036805095e-06, "loss": 0.3756, "num_tokens": 5937320414.0, "step": 13224 }, { "epoch": 4.844593047222095, "grad_norm": 0.15549306705232516, "learning_rate": 4.091878875045655e-06, "loss": 0.3701, "num_tokens": 5938105661.0, "step": 13225 }, { "epoch": 4.8449594650300005, "grad_norm": 0.1497160407467582, "learning_rate": 4.091448390303282e-06, "loss": 0.3644, "num_tokens": 5938798506.0, "step": 13226 }, { "epoch": 4.845325882837906, "grad_norm": 0.1548104777505068, "learning_rate": 4.091018913848069e-06, "loss": 0.3765, "num_tokens": 5939587767.0, "step": 13227 }, { "epoch": 4.845692300645811, "grad_norm": 0.15223841217756415, "learning_rate": 4.090590445704192e-06, "loss": 0.3836, "num_tokens": 5940373915.0, "step": 13228 }, { "epoch": 4.846058718453717, "grad_norm": 0.14699777051488605, "learning_rate": 4.090162985895778e-06, "loss": 0.3532, "num_tokens": 5941069345.0, "step": 13229 }, { "epoch": 4.846425136261622, "grad_norm": 0.1566497988409513, "learning_rate": 4.089736534446893e-06, "loss": 0.3719, "num_tokens": 5941857814.0, "step": 13230 }, { "epoch": 4.846791554069528, "grad_norm": 0.1414537786364773, "learning_rate": 4.089311091381547e-06, "loss": 0.3923, "num_tokens": 5942706796.0, "step": 13231 }, { "epoch": 4.847157971877433, "grad_norm": 0.15885528741497074, "learning_rate": 4.0888866567236954e-06, "loss": 0.4102, "num_tokens": 5943412417.0, "step": 13232 }, { "epoch": 4.847524389685339, "grad_norm": 0.14081540278552784, "learning_rate": 4.08846323049723e-06, "loss": 0.3842, "num_tokens": 5944353019.0, "step": 13233 }, { "epoch": 4.847890807493244, "grad_norm": 0.14159626918960969, "learning_rate": 4.088040812725994e-06, "loss": 0.4025, "num_tokens": 5945157431.0, "step": 13234 }, { "epoch": 4.8482572253011496, "grad_norm": 0.14840617861016978, "learning_rate": 4.087619403433772e-06, "loss": 0.3657, "num_tokens": 5945891458.0, "step": 13235 }, { "epoch": 4.848623643109055, "grad_norm": 0.14584735289929562, "learning_rate": 4.087199002644285e-06, "loss": 0.3717, "num_tokens": 5946675782.0, "step": 13236 }, { "epoch": 4.848990060916961, "grad_norm": 0.1456985426626925, "learning_rate": 4.086779610381206e-06, "loss": 0.3643, "num_tokens": 5947486996.0, "step": 13237 }, { "epoch": 4.849356478724866, "grad_norm": 0.14988420646045694, "learning_rate": 4.086361226668148e-06, "loss": 0.3624, "num_tokens": 5948198243.0, "step": 13238 }, { "epoch": 4.849722896532771, "grad_norm": 0.16557974722593627, "learning_rate": 4.085943851528664e-06, "loss": 0.3772, "num_tokens": 5948824422.0, "step": 13239 }, { "epoch": 4.850089314340677, "grad_norm": 0.13682907849811363, "learning_rate": 4.085527484986256e-06, "loss": 0.3527, "num_tokens": 5949806254.0, "step": 13240 }, { "epoch": 4.850455732148582, "grad_norm": 0.1434226892255403, "learning_rate": 4.085112127064365e-06, "loss": 0.3725, "num_tokens": 5950612689.0, "step": 13241 }, { "epoch": 4.850822149956488, "grad_norm": 0.14213501688580044, "learning_rate": 4.084697777786377e-06, "loss": 0.3651, "num_tokens": 5951437049.0, "step": 13242 }, { "epoch": 4.851188567764393, "grad_norm": 0.15310212401147527, "learning_rate": 4.08428443717562e-06, "loss": 0.3733, "num_tokens": 5952147642.0, "step": 13243 }, { "epoch": 4.851554985572299, "grad_norm": 0.1491372329366972, "learning_rate": 4.083872105255367e-06, "loss": 0.3566, "num_tokens": 5952908666.0, "step": 13244 }, { "epoch": 4.851921403380205, "grad_norm": 0.15307206369915105, "learning_rate": 4.083460782048829e-06, "loss": 0.3439, "num_tokens": 5953588758.0, "step": 13245 }, { "epoch": 4.85228782118811, "grad_norm": 0.15736358909721954, "learning_rate": 4.0830504675791706e-06, "loss": 0.3969, "num_tokens": 5954282205.0, "step": 13246 }, { "epoch": 4.852654238996015, "grad_norm": 0.15990179000400373, "learning_rate": 4.082641161869489e-06, "loss": 0.3925, "num_tokens": 5954949931.0, "step": 13247 }, { "epoch": 4.8530206568039205, "grad_norm": 0.1547980883843572, "learning_rate": 4.082232864942831e-06, "loss": 0.3449, "num_tokens": 5955632224.0, "step": 13248 }, { "epoch": 4.853387074611826, "grad_norm": 0.15687813427454167, "learning_rate": 4.081825576822181e-06, "loss": 0.3923, "num_tokens": 5956398486.0, "step": 13249 }, { "epoch": 4.853753492419732, "grad_norm": 0.1501556183059388, "learning_rate": 4.081419297530474e-06, "loss": 0.3497, "num_tokens": 5957196962.0, "step": 13250 }, { "epoch": 4.854119910227637, "grad_norm": 0.13805803162619948, "learning_rate": 4.081014027090585e-06, "loss": 0.3611, "num_tokens": 5957929318.0, "step": 13251 }, { "epoch": 4.854486328035542, "grad_norm": 0.14297391364560283, "learning_rate": 4.08060976552533e-06, "loss": 0.3649, "num_tokens": 5958758332.0, "step": 13252 }, { "epoch": 4.854852745843448, "grad_norm": 0.1551686991186381, "learning_rate": 4.080206512857468e-06, "loss": 0.3564, "num_tokens": 5959460255.0, "step": 13253 }, { "epoch": 4.855219163651354, "grad_norm": 0.14942376939698715, "learning_rate": 4.079804269109703e-06, "loss": 0.3579, "num_tokens": 5960338035.0, "step": 13254 }, { "epoch": 4.855585581459259, "grad_norm": 0.14000589978613567, "learning_rate": 4.079403034304686e-06, "loss": 0.3535, "num_tokens": 5961169567.0, "step": 13255 }, { "epoch": 4.855951999267164, "grad_norm": 0.14534838924835092, "learning_rate": 4.079002808465003e-06, "loss": 0.3542, "num_tokens": 5961907630.0, "step": 13256 }, { "epoch": 4.8563184170750695, "grad_norm": 0.1437710656022081, "learning_rate": 4.07860359161319e-06, "loss": 0.3662, "num_tokens": 5962777480.0, "step": 13257 }, { "epoch": 4.856684834882976, "grad_norm": 0.15291405173077946, "learning_rate": 4.078205383771724e-06, "loss": 0.3915, "num_tokens": 5963602418.0, "step": 13258 }, { "epoch": 4.857051252690881, "grad_norm": 0.1454000592142913, "learning_rate": 4.077808184963024e-06, "loss": 0.3495, "num_tokens": 5964382943.0, "step": 13259 }, { "epoch": 4.857417670498786, "grad_norm": 0.1373174967131451, "learning_rate": 4.0774119952094515e-06, "loss": 0.3931, "num_tokens": 5965173412.0, "step": 13260 }, { "epoch": 4.857784088306691, "grad_norm": 0.15644784659242086, "learning_rate": 4.077016814533315e-06, "loss": 0.3458, "num_tokens": 5965869214.0, "step": 13261 }, { "epoch": 4.8581505061145975, "grad_norm": 0.1597066629555436, "learning_rate": 4.0766226429568625e-06, "loss": 0.3559, "num_tokens": 5966611573.0, "step": 13262 }, { "epoch": 4.858516923922503, "grad_norm": 0.1464603266884476, "learning_rate": 4.076229480502289e-06, "loss": 0.3587, "num_tokens": 5967386575.0, "step": 13263 }, { "epoch": 4.858883341730408, "grad_norm": 0.15432977151848393, "learning_rate": 4.075837327191727e-06, "loss": 0.3801, "num_tokens": 5968162233.0, "step": 13264 }, { "epoch": 4.859249759538313, "grad_norm": 0.13593781028674073, "learning_rate": 4.075446183047259e-06, "loss": 0.3998, "num_tokens": 5968973895.0, "step": 13265 }, { "epoch": 4.8596161773462185, "grad_norm": 0.15356885169506446, "learning_rate": 4.075056048090904e-06, "loss": 0.3783, "num_tokens": 5969789006.0, "step": 13266 }, { "epoch": 4.859982595154125, "grad_norm": 0.14346194609852345, "learning_rate": 4.074666922344629e-06, "loss": 0.3806, "num_tokens": 5970599879.0, "step": 13267 }, { "epoch": 4.86034901296203, "grad_norm": 0.1444898960074322, "learning_rate": 4.074278805830342e-06, "loss": 0.3603, "num_tokens": 5971340278.0, "step": 13268 }, { "epoch": 4.860715430769935, "grad_norm": 0.15110649374950977, "learning_rate": 4.0738916985698976e-06, "loss": 0.3405, "num_tokens": 5972113664.0, "step": 13269 }, { "epoch": 4.861081848577841, "grad_norm": 0.13928738037741092, "learning_rate": 4.073505600585086e-06, "loss": 0.3785, "num_tokens": 5972940941.0, "step": 13270 }, { "epoch": 4.861448266385747, "grad_norm": 0.14167590934216714, "learning_rate": 4.073120511897648e-06, "loss": 0.3608, "num_tokens": 5973744263.0, "step": 13271 }, { "epoch": 4.861814684193652, "grad_norm": 0.14777104438330632, "learning_rate": 4.072736432529264e-06, "loss": 0.3732, "num_tokens": 5974545933.0, "step": 13272 }, { "epoch": 4.862181102001557, "grad_norm": 0.15076929970507125, "learning_rate": 4.07235336250156e-06, "loss": 0.3726, "num_tokens": 5975298038.0, "step": 13273 }, { "epoch": 4.862547519809462, "grad_norm": 0.14950616270279646, "learning_rate": 4.071971301836101e-06, "loss": 0.3631, "num_tokens": 5976031122.0, "step": 13274 }, { "epoch": 4.8629139376173685, "grad_norm": 0.14905310191222093, "learning_rate": 4.071590250554399e-06, "loss": 0.3644, "num_tokens": 5976797152.0, "step": 13275 }, { "epoch": 4.863280355425274, "grad_norm": 0.14667725908316523, "learning_rate": 4.071210208677911e-06, "loss": 0.345, "num_tokens": 5977485829.0, "step": 13276 }, { "epoch": 4.863646773233179, "grad_norm": 0.16178516689513664, "learning_rate": 4.070831176228028e-06, "loss": 0.3489, "num_tokens": 5978193470.0, "step": 13277 }, { "epoch": 4.864013191041085, "grad_norm": 0.15892082945618496, "learning_rate": 4.070453153226093e-06, "loss": 0.3667, "num_tokens": 5978841073.0, "step": 13278 }, { "epoch": 4.86437960884899, "grad_norm": 0.15224238036191143, "learning_rate": 4.0700761396933915e-06, "loss": 0.3748, "num_tokens": 5979627475.0, "step": 13279 }, { "epoch": 4.864746026656896, "grad_norm": 0.15290425287879164, "learning_rate": 4.06970013565115e-06, "loss": 0.4055, "num_tokens": 5980455571.0, "step": 13280 }, { "epoch": 4.865112444464801, "grad_norm": 0.1456786611376691, "learning_rate": 4.069325141120534e-06, "loss": 0.3888, "num_tokens": 5981255681.0, "step": 13281 }, { "epoch": 4.865478862272706, "grad_norm": 0.15620406734816325, "learning_rate": 4.068951156122663e-06, "loss": 0.3424, "num_tokens": 5981895963.0, "step": 13282 }, { "epoch": 4.865845280080612, "grad_norm": 0.1427046699394924, "learning_rate": 4.068578180678586e-06, "loss": 0.3456, "num_tokens": 5982769513.0, "step": 13283 }, { "epoch": 4.8662116978885175, "grad_norm": 0.15758513703762345, "learning_rate": 4.068206214809307e-06, "loss": 0.3747, "num_tokens": 5983490585.0, "step": 13284 }, { "epoch": 4.866578115696423, "grad_norm": 0.15135668009271264, "learning_rate": 4.067835258535766e-06, "loss": 0.3792, "num_tokens": 5984283523.0, "step": 13285 }, { "epoch": 4.866944533504328, "grad_norm": 0.15327568767814292, "learning_rate": 4.067465311878851e-06, "loss": 0.3477, "num_tokens": 5985007973.0, "step": 13286 }, { "epoch": 4.867310951312234, "grad_norm": 0.15609116387314773, "learning_rate": 4.067096374859389e-06, "loss": 0.3498, "num_tokens": 5985820979.0, "step": 13287 }, { "epoch": 4.867677369120139, "grad_norm": 0.15447744585095477, "learning_rate": 4.066728447498153e-06, "loss": 0.367, "num_tokens": 5986522161.0, "step": 13288 }, { "epoch": 4.868043786928045, "grad_norm": 0.15784815829592008, "learning_rate": 4.066361529815856e-06, "loss": 0.3886, "num_tokens": 5987200768.0, "step": 13289 }, { "epoch": 4.86841020473595, "grad_norm": 0.1530199634114614, "learning_rate": 4.065995621833158e-06, "loss": 0.3915, "num_tokens": 5987984597.0, "step": 13290 }, { "epoch": 4.868776622543856, "grad_norm": 0.17166944631075606, "learning_rate": 4.065630723570661e-06, "loss": 0.3621, "num_tokens": 5988693194.0, "step": 13291 }, { "epoch": 4.869143040351761, "grad_norm": 0.14784334656414397, "learning_rate": 4.065266835048907e-06, "loss": 0.3804, "num_tokens": 5989536361.0, "step": 13292 }, { "epoch": 4.8695094581596665, "grad_norm": 0.1515228330326825, "learning_rate": 4.064903956288385e-06, "loss": 0.377, "num_tokens": 5990295218.0, "step": 13293 }, { "epoch": 4.869875875967572, "grad_norm": 0.14403586305976468, "learning_rate": 4.064542087309527e-06, "loss": 0.3677, "num_tokens": 5991094891.0, "step": 13294 }, { "epoch": 4.870242293775478, "grad_norm": 0.14437726239877807, "learning_rate": 4.064181228132705e-06, "loss": 0.3471, "num_tokens": 5991854193.0, "step": 13295 }, { "epoch": 4.870608711583383, "grad_norm": 0.15091696302815785, "learning_rate": 4.063821378778236e-06, "loss": 0.3837, "num_tokens": 5992653645.0, "step": 13296 }, { "epoch": 4.870975129391288, "grad_norm": 0.15185919409499574, "learning_rate": 4.063462539266382e-06, "loss": 0.353, "num_tokens": 5993343704.0, "step": 13297 }, { "epoch": 4.871341547199194, "grad_norm": 0.15259632724705105, "learning_rate": 4.063104709617344e-06, "loss": 0.3714, "num_tokens": 5994119051.0, "step": 13298 }, { "epoch": 4.871707965007099, "grad_norm": 0.16328390748083957, "learning_rate": 4.062747889851272e-06, "loss": 0.3846, "num_tokens": 5994823495.0, "step": 13299 }, { "epoch": 4.872074382815005, "grad_norm": 0.1448419034559548, "learning_rate": 4.062392079988253e-06, "loss": 0.3697, "num_tokens": 5995640223.0, "step": 13300 }, { "epoch": 4.87244080062291, "grad_norm": 0.15393625144241027, "learning_rate": 4.0620372800483215e-06, "loss": 0.3914, "num_tokens": 5996279550.0, "step": 13301 }, { "epoch": 4.8728072184308155, "grad_norm": 0.15258265086569847, "learning_rate": 4.061683490051451e-06, "loss": 0.3666, "num_tokens": 5997063734.0, "step": 13302 }, { "epoch": 4.873173636238722, "grad_norm": 0.15084654825353563, "learning_rate": 4.0613307100175634e-06, "loss": 0.3862, "num_tokens": 5997865466.0, "step": 13303 }, { "epoch": 4.873540054046627, "grad_norm": 0.154693477839853, "learning_rate": 4.060978939966518e-06, "loss": 0.3601, "num_tokens": 5998626000.0, "step": 13304 }, { "epoch": 4.873906471854532, "grad_norm": 0.14901426310175267, "learning_rate": 4.060628179918123e-06, "loss": 0.356, "num_tokens": 5999393119.0, "step": 13305 }, { "epoch": 4.874272889662437, "grad_norm": 0.15941085257912363, "learning_rate": 4.060278429892124e-06, "loss": 0.3619, "num_tokens": 6000023443.0, "step": 13306 }, { "epoch": 4.874639307470343, "grad_norm": 0.1503296356140383, "learning_rate": 4.059929689908215e-06, "loss": 0.3744, "num_tokens": 6000927317.0, "step": 13307 }, { "epoch": 4.875005725278249, "grad_norm": 0.16527256602504528, "learning_rate": 4.059581959986029e-06, "loss": 0.3681, "num_tokens": 6001558735.0, "step": 13308 }, { "epoch": 4.875372143086154, "grad_norm": 0.14856166691761075, "learning_rate": 4.059235240145147e-06, "loss": 0.3629, "num_tokens": 6002384475.0, "step": 13309 }, { "epoch": 4.875738560894059, "grad_norm": 0.257796708943628, "learning_rate": 4.058889530405086e-06, "loss": 0.3943, "num_tokens": 6003219643.0, "step": 13310 }, { "epoch": 4.876104978701965, "grad_norm": 0.15035223584533472, "learning_rate": 4.058544830785312e-06, "loss": 0.399, "num_tokens": 6004106076.0, "step": 13311 }, { "epoch": 4.876471396509871, "grad_norm": 0.14994741691179098, "learning_rate": 4.058201141305233e-06, "loss": 0.3772, "num_tokens": 6004902672.0, "step": 13312 }, { "epoch": 4.876837814317776, "grad_norm": 0.14421672448695275, "learning_rate": 4.0578584619841995e-06, "loss": 0.3703, "num_tokens": 6005792645.0, "step": 13313 }, { "epoch": 4.877204232125681, "grad_norm": 0.1379953140895969, "learning_rate": 4.057516792841502e-06, "loss": 0.401, "num_tokens": 6006650643.0, "step": 13314 }, { "epoch": 4.8775706499335865, "grad_norm": 0.14495258017762125, "learning_rate": 4.05717613389638e-06, "loss": 0.3769, "num_tokens": 6007463320.0, "step": 13315 }, { "epoch": 4.877937067741493, "grad_norm": 0.1478166441204221, "learning_rate": 4.056836485168014e-06, "loss": 0.3757, "num_tokens": 6008194448.0, "step": 13316 }, { "epoch": 4.878303485549398, "grad_norm": 0.1560121204564534, "learning_rate": 4.056497846675524e-06, "loss": 0.3944, "num_tokens": 6008901979.0, "step": 13317 }, { "epoch": 4.878669903357303, "grad_norm": 0.14237583614737392, "learning_rate": 4.056160218437978e-06, "loss": 0.3659, "num_tokens": 6009781502.0, "step": 13318 }, { "epoch": 4.879036321165208, "grad_norm": 0.14168374582021073, "learning_rate": 4.055823600474385e-06, "loss": 0.3519, "num_tokens": 6010569118.0, "step": 13319 }, { "epoch": 4.8794027389731145, "grad_norm": 0.14351128717829292, "learning_rate": 4.055487992803697e-06, "loss": 0.3833, "num_tokens": 6011428871.0, "step": 13320 }, { "epoch": 4.87976915678102, "grad_norm": 0.1559612059203042, "learning_rate": 4.055153395444809e-06, "loss": 0.3868, "num_tokens": 6012127567.0, "step": 13321 }, { "epoch": 4.880135574588925, "grad_norm": 0.14681844304107045, "learning_rate": 4.054819808416561e-06, "loss": 0.3815, "num_tokens": 6012956746.0, "step": 13322 }, { "epoch": 4.88050199239683, "grad_norm": 0.15082939002272008, "learning_rate": 4.0544872317377325e-06, "loss": 0.3812, "num_tokens": 6013729874.0, "step": 13323 }, { "epoch": 4.8808684102047355, "grad_norm": 0.14745493162387052, "learning_rate": 4.054155665427049e-06, "loss": 0.359, "num_tokens": 6014524699.0, "step": 13324 }, { "epoch": 4.881234828012642, "grad_norm": 0.1481892494249281, "learning_rate": 4.053825109503178e-06, "loss": 0.3879, "num_tokens": 6015432362.0, "step": 13325 }, { "epoch": 4.881601245820547, "grad_norm": 0.1336077093310332, "learning_rate": 4.053495563984732e-06, "loss": 0.3656, "num_tokens": 6016349182.0, "step": 13326 }, { "epoch": 4.881967663628452, "grad_norm": 0.14195697824053286, "learning_rate": 4.053167028890263e-06, "loss": 0.3896, "num_tokens": 6017085051.0, "step": 13327 }, { "epoch": 4.882334081436358, "grad_norm": 0.16821879355020042, "learning_rate": 4.052839504238269e-06, "loss": 0.3692, "num_tokens": 6017719159.0, "step": 13328 }, { "epoch": 4.8827004992442635, "grad_norm": 0.1480686349191599, "learning_rate": 4.052512990047193e-06, "loss": 0.3693, "num_tokens": 6018551484.0, "step": 13329 }, { "epoch": 4.883066917052169, "grad_norm": 0.14680579947088776, "learning_rate": 4.0521874863354145e-06, "loss": 0.3653, "num_tokens": 6019316037.0, "step": 13330 }, { "epoch": 4.883433334860074, "grad_norm": 0.14872447694709295, "learning_rate": 4.051862993121261e-06, "loss": 0.3663, "num_tokens": 6020111102.0, "step": 13331 }, { "epoch": 4.883799752667979, "grad_norm": 0.1458401636092696, "learning_rate": 4.051539510423004e-06, "loss": 0.3583, "num_tokens": 6020883224.0, "step": 13332 }, { "epoch": 4.884166170475885, "grad_norm": 0.1604806819491807, "learning_rate": 4.051217038258852e-06, "loss": 0.3773, "num_tokens": 6021562431.0, "step": 13333 }, { "epoch": 4.884532588283791, "grad_norm": 0.16436131077600225, "learning_rate": 4.050895576646964e-06, "loss": 0.3752, "num_tokens": 6022347262.0, "step": 13334 }, { "epoch": 4.884899006091696, "grad_norm": 0.15082417309136834, "learning_rate": 4.050575125605442e-06, "loss": 0.3581, "num_tokens": 6023060950.0, "step": 13335 }, { "epoch": 4.885265423899601, "grad_norm": 0.14918678121824142, "learning_rate": 4.050255685152321e-06, "loss": 0.3642, "num_tokens": 6023819931.0, "step": 13336 }, { "epoch": 4.885631841707507, "grad_norm": 0.15645912885965976, "learning_rate": 4.049937255305592e-06, "loss": 0.3556, "num_tokens": 6024654391.0, "step": 13337 }, { "epoch": 4.885998259515413, "grad_norm": 0.14575144076685292, "learning_rate": 4.049619836083179e-06, "loss": 0.3754, "num_tokens": 6025446923.0, "step": 13338 }, { "epoch": 4.886364677323318, "grad_norm": 0.14368767264887616, "learning_rate": 4.049303427502957e-06, "loss": 0.3652, "num_tokens": 6026310698.0, "step": 13339 }, { "epoch": 4.886731095131223, "grad_norm": 0.1505924264543169, "learning_rate": 4.048988029582737e-06, "loss": 0.3584, "num_tokens": 6027171772.0, "step": 13340 }, { "epoch": 4.887097512939129, "grad_norm": 0.14588196004175563, "learning_rate": 4.048673642340279e-06, "loss": 0.3357, "num_tokens": 6027852658.0, "step": 13341 }, { "epoch": 4.8874639307470344, "grad_norm": 0.14780426949117667, "learning_rate": 4.048360265793283e-06, "loss": 0.3653, "num_tokens": 6028601168.0, "step": 13342 }, { "epoch": 4.88783034855494, "grad_norm": 0.1452639129030332, "learning_rate": 4.048047899959394e-06, "loss": 0.3795, "num_tokens": 6029387959.0, "step": 13343 }, { "epoch": 4.888196766362845, "grad_norm": 0.15259775870034267, "learning_rate": 4.047736544856194e-06, "loss": 0.3814, "num_tokens": 6030176278.0, "step": 13344 }, { "epoch": 4.888563184170751, "grad_norm": 0.14851333868550443, "learning_rate": 4.04742620050122e-06, "loss": 0.3896, "num_tokens": 6030956190.0, "step": 13345 }, { "epoch": 4.888929601978656, "grad_norm": 0.14968276981935566, "learning_rate": 4.047116866911939e-06, "loss": 0.3489, "num_tokens": 6031739060.0, "step": 13346 }, { "epoch": 4.889296019786562, "grad_norm": 0.16820840602849466, "learning_rate": 4.04680854410577e-06, "loss": 0.3722, "num_tokens": 6032463194.0, "step": 13347 }, { "epoch": 4.889662437594467, "grad_norm": 0.16207916927840657, "learning_rate": 4.046501232100071e-06, "loss": 0.354, "num_tokens": 6033084844.0, "step": 13348 }, { "epoch": 4.890028855402372, "grad_norm": 0.14870027202448993, "learning_rate": 4.046194930912146e-06, "loss": 0.3555, "num_tokens": 6033893634.0, "step": 13349 }, { "epoch": 4.890395273210278, "grad_norm": 0.14609741601564627, "learning_rate": 4.0458896405592385e-06, "loss": 0.3561, "num_tokens": 6034608423.0, "step": 13350 }, { "epoch": 4.8907616910181835, "grad_norm": 0.1507372091856275, "learning_rate": 4.045585361058538e-06, "loss": 0.3716, "num_tokens": 6035453574.0, "step": 13351 }, { "epoch": 4.891128108826089, "grad_norm": 0.13372104780124186, "learning_rate": 4.0452820924271755e-06, "loss": 0.3791, "num_tokens": 6036296253.0, "step": 13352 }, { "epoch": 4.891494526633995, "grad_norm": 0.1505792738726043, "learning_rate": 4.044979834682226e-06, "loss": 0.4276, "num_tokens": 6037093553.0, "step": 13353 }, { "epoch": 4.8918609444419, "grad_norm": 0.1440935813435679, "learning_rate": 4.0446785878407055e-06, "loss": 0.3602, "num_tokens": 6037803675.0, "step": 13354 }, { "epoch": 4.892227362249805, "grad_norm": 0.15430404152124413, "learning_rate": 4.044378351919579e-06, "loss": 0.3675, "num_tokens": 6038566722.0, "step": 13355 }, { "epoch": 4.892593780057711, "grad_norm": 0.14943782989078103, "learning_rate": 4.044079126935747e-06, "loss": 0.3614, "num_tokens": 6039328778.0, "step": 13356 }, { "epoch": 4.892960197865616, "grad_norm": 0.14756380004996236, "learning_rate": 4.043780912906056e-06, "loss": 0.3864, "num_tokens": 6040151801.0, "step": 13357 }, { "epoch": 4.893326615673522, "grad_norm": 0.14517895372462714, "learning_rate": 4.043483709847299e-06, "loss": 0.3641, "num_tokens": 6040877153.0, "step": 13358 }, { "epoch": 4.893693033481427, "grad_norm": 0.15611116447276988, "learning_rate": 4.043187517776207e-06, "loss": 0.3897, "num_tokens": 6041638345.0, "step": 13359 }, { "epoch": 4.8940594512893325, "grad_norm": 0.1461411125727327, "learning_rate": 4.042892336709456e-06, "loss": 0.3677, "num_tokens": 6042392999.0, "step": 13360 }, { "epoch": 4.894425869097238, "grad_norm": 0.13321070195598245, "learning_rate": 4.042598166663665e-06, "loss": 0.3562, "num_tokens": 6043199448.0, "step": 13361 }, { "epoch": 4.894792286905144, "grad_norm": 0.15594560754796175, "learning_rate": 4.042305007655398e-06, "loss": 0.3415, "num_tokens": 6043910726.0, "step": 13362 }, { "epoch": 4.895158704713049, "grad_norm": 0.14979874965414977, "learning_rate": 4.042012859701162e-06, "loss": 0.3633, "num_tokens": 6044760088.0, "step": 13363 }, { "epoch": 4.895525122520954, "grad_norm": 0.13825096303476964, "learning_rate": 4.041721722817401e-06, "loss": 0.367, "num_tokens": 6045540483.0, "step": 13364 }, { "epoch": 4.89589154032886, "grad_norm": 0.1471742535723577, "learning_rate": 4.0414315970205085e-06, "loss": 0.3658, "num_tokens": 6046342569.0, "step": 13365 }, { "epoch": 4.896257958136766, "grad_norm": 0.1458962729389532, "learning_rate": 4.04114248232682e-06, "loss": 0.3745, "num_tokens": 6047110754.0, "step": 13366 }, { "epoch": 4.896624375944671, "grad_norm": 0.16023217152306654, "learning_rate": 4.0408543787526125e-06, "loss": 0.3528, "num_tokens": 6047829483.0, "step": 13367 }, { "epoch": 4.896990793752576, "grad_norm": 0.1541145883758918, "learning_rate": 4.040567286314107e-06, "loss": 0.3707, "num_tokens": 6048625559.0, "step": 13368 }, { "epoch": 4.8973572115604815, "grad_norm": 0.1496475151329412, "learning_rate": 4.040281205027469e-06, "loss": 0.367, "num_tokens": 6049367724.0, "step": 13369 }, { "epoch": 4.897723629368388, "grad_norm": 0.14871186751106755, "learning_rate": 4.039996134908802e-06, "loss": 0.388, "num_tokens": 6050100412.0, "step": 13370 }, { "epoch": 4.898090047176293, "grad_norm": 0.15122717101925762, "learning_rate": 4.039712075974158e-06, "loss": 0.3593, "num_tokens": 6050961726.0, "step": 13371 }, { "epoch": 4.898456464984198, "grad_norm": 0.15103963213211533, "learning_rate": 4.039429028239529e-06, "loss": 0.3745, "num_tokens": 6051768031.0, "step": 13372 }, { "epoch": 4.898822882792103, "grad_norm": 0.15072565760479953, "learning_rate": 4.039146991720854e-06, "loss": 0.3761, "num_tokens": 6052511570.0, "step": 13373 }, { "epoch": 4.899189300600009, "grad_norm": 0.1426591490629935, "learning_rate": 4.038865966434008e-06, "loss": 0.3394, "num_tokens": 6053297748.0, "step": 13374 }, { "epoch": 4.899555718407915, "grad_norm": 0.15580186996182274, "learning_rate": 4.038585952394817e-06, "loss": 0.381, "num_tokens": 6054081489.0, "step": 13375 }, { "epoch": 4.89992213621582, "grad_norm": 0.15350269191215854, "learning_rate": 4.038306949619046e-06, "loss": 0.3406, "num_tokens": 6054886004.0, "step": 13376 }, { "epoch": 4.900288554023725, "grad_norm": 0.13460959518075494, "learning_rate": 4.038028958122402e-06, "loss": 0.359, "num_tokens": 6055718129.0, "step": 13377 }, { "epoch": 4.9006549718316315, "grad_norm": 0.1433400609175183, "learning_rate": 4.037751977920533e-06, "loss": 0.3486, "num_tokens": 6056479387.0, "step": 13378 }, { "epoch": 4.901021389639537, "grad_norm": 0.1477374433841158, "learning_rate": 4.037476009029043e-06, "loss": 0.3584, "num_tokens": 6057269257.0, "step": 13379 }, { "epoch": 4.901387807447442, "grad_norm": 0.14715506410490636, "learning_rate": 4.037201051463459e-06, "loss": 0.403, "num_tokens": 6058037445.0, "step": 13380 }, { "epoch": 4.901754225255347, "grad_norm": 0.16460939582526196, "learning_rate": 4.03692710523927e-06, "loss": 0.3522, "num_tokens": 6058742633.0, "step": 13381 }, { "epoch": 4.9021206430632525, "grad_norm": 0.1533535994295791, "learning_rate": 4.036654170371895e-06, "loss": 0.3552, "num_tokens": 6059436291.0, "step": 13382 }, { "epoch": 4.902487060871159, "grad_norm": 0.15223717789311536, "learning_rate": 4.036382246876701e-06, "loss": 0.349, "num_tokens": 6060154783.0, "step": 13383 }, { "epoch": 4.902853478679064, "grad_norm": 0.14527293598624935, "learning_rate": 4.036111334769001e-06, "loss": 0.3678, "num_tokens": 6060986115.0, "step": 13384 }, { "epoch": 4.903219896486969, "grad_norm": 0.14489681247689346, "learning_rate": 4.035841434064045e-06, "loss": 0.3549, "num_tokens": 6061753322.0, "step": 13385 }, { "epoch": 4.903586314294874, "grad_norm": 0.1400474469398833, "learning_rate": 4.0355725447770285e-06, "loss": 0.343, "num_tokens": 6062564013.0, "step": 13386 }, { "epoch": 4.9039527321027805, "grad_norm": 0.15139909610079327, "learning_rate": 4.035304666923094e-06, "loss": 0.3759, "num_tokens": 6063297671.0, "step": 13387 }, { "epoch": 4.904319149910686, "grad_norm": 0.13600769965457934, "learning_rate": 4.0350378005173205e-06, "loss": 0.3658, "num_tokens": 6064213216.0, "step": 13388 }, { "epoch": 4.904685567718591, "grad_norm": 0.14388878231296418, "learning_rate": 4.034771945574734e-06, "loss": 0.3906, "num_tokens": 6064926492.0, "step": 13389 }, { "epoch": 4.905051985526496, "grad_norm": 0.15747913841215763, "learning_rate": 4.0345071021103004e-06, "loss": 0.3892, "num_tokens": 6065732920.0, "step": 13390 }, { "epoch": 4.905418403334402, "grad_norm": 0.16325363001499854, "learning_rate": 4.034243270138937e-06, "loss": 0.3656, "num_tokens": 6066487590.0, "step": 13391 }, { "epoch": 4.905784821142308, "grad_norm": 0.14854404556935405, "learning_rate": 4.033980449675492e-06, "loss": 0.3978, "num_tokens": 6067269026.0, "step": 13392 }, { "epoch": 4.906151238950213, "grad_norm": 0.14578543976340574, "learning_rate": 4.033718640734765e-06, "loss": 0.3607, "num_tokens": 6068122485.0, "step": 13393 }, { "epoch": 4.906517656758118, "grad_norm": 0.1431778315476769, "learning_rate": 4.033457843331497e-06, "loss": 0.3663, "num_tokens": 6068899165.0, "step": 13394 }, { "epoch": 4.906884074566024, "grad_norm": 0.16890226417661106, "learning_rate": 4.033198057480369e-06, "loss": 0.3618, "num_tokens": 6069562617.0, "step": 13395 }, { "epoch": 4.9072504923739295, "grad_norm": 0.14220361308899643, "learning_rate": 4.0329392831960105e-06, "loss": 0.3542, "num_tokens": 6070371929.0, "step": 13396 }, { "epoch": 4.907616910181835, "grad_norm": 0.14846477773693584, "learning_rate": 4.032681520492988e-06, "loss": 0.3434, "num_tokens": 6071121658.0, "step": 13397 }, { "epoch": 4.90798332798974, "grad_norm": 0.1542508429508431, "learning_rate": 4.032424769385818e-06, "loss": 0.3907, "num_tokens": 6071883718.0, "step": 13398 }, { "epoch": 4.908349745797645, "grad_norm": 0.14561925615815116, "learning_rate": 4.032169029888952e-06, "loss": 0.3712, "num_tokens": 6072696193.0, "step": 13399 }, { "epoch": 4.908716163605551, "grad_norm": 0.16020959729069595, "learning_rate": 4.031914302016793e-06, "loss": 0.3604, "num_tokens": 6073372627.0, "step": 13400 }, { "epoch": 4.909082581413457, "grad_norm": 0.16520168007621694, "learning_rate": 4.031660585783675e-06, "loss": 0.3718, "num_tokens": 6074118435.0, "step": 13401 }, { "epoch": 4.909448999221362, "grad_norm": 0.14908372370292763, "learning_rate": 4.031407881203894e-06, "loss": 0.3714, "num_tokens": 6074885711.0, "step": 13402 }, { "epoch": 4.909815417029268, "grad_norm": 0.1548056096556223, "learning_rate": 4.031156188291668e-06, "loss": 0.3985, "num_tokens": 6075609831.0, "step": 13403 }, { "epoch": 4.910181834837173, "grad_norm": 0.1464935587201269, "learning_rate": 4.030905507061172e-06, "loss": 0.3613, "num_tokens": 6076477898.0, "step": 13404 }, { "epoch": 4.9105482526450785, "grad_norm": 0.14642929558228698, "learning_rate": 4.03065583752652e-06, "loss": 0.384, "num_tokens": 6077146188.0, "step": 13405 }, { "epoch": 4.910914670452984, "grad_norm": 0.15134008094648396, "learning_rate": 4.030407179701767e-06, "loss": 0.3409, "num_tokens": 6077951930.0, "step": 13406 }, { "epoch": 4.911281088260889, "grad_norm": 0.14904871917768653, "learning_rate": 4.0301595336009144e-06, "loss": 0.4048, "num_tokens": 6078703304.0, "step": 13407 }, { "epoch": 4.911647506068795, "grad_norm": 0.1539414438283116, "learning_rate": 4.029912899237907e-06, "loss": 0.357, "num_tokens": 6079458006.0, "step": 13408 }, { "epoch": 4.9120139238767, "grad_norm": 0.13930406581163832, "learning_rate": 4.0296672766266265e-06, "loss": 0.3654, "num_tokens": 6080242825.0, "step": 13409 }, { "epoch": 4.912380341684606, "grad_norm": 0.14348202200878465, "learning_rate": 4.029422665780903e-06, "loss": 0.3433, "num_tokens": 6081048093.0, "step": 13410 }, { "epoch": 4.912746759492512, "grad_norm": 0.15338094991045786, "learning_rate": 4.029179066714513e-06, "loss": 0.3836, "num_tokens": 6081777944.0, "step": 13411 }, { "epoch": 4.913113177300417, "grad_norm": 0.14306163696861632, "learning_rate": 4.028936479441166e-06, "loss": 0.3508, "num_tokens": 6082561902.0, "step": 13412 }, { "epoch": 4.913479595108322, "grad_norm": 0.14313030861779164, "learning_rate": 4.028694903974523e-06, "loss": 0.3644, "num_tokens": 6083348871.0, "step": 13413 }, { "epoch": 4.913846012916228, "grad_norm": 0.16602088352576697, "learning_rate": 4.028454340328186e-06, "loss": 0.3693, "num_tokens": 6084097160.0, "step": 13414 }, { "epoch": 4.914212430724133, "grad_norm": 0.15142002300940677, "learning_rate": 4.028214788515695e-06, "loss": 0.3684, "num_tokens": 6084828265.0, "step": 13415 }, { "epoch": 4.914578848532039, "grad_norm": 0.1572902598294076, "learning_rate": 4.027976248550543e-06, "loss": 0.3609, "num_tokens": 6085492184.0, "step": 13416 }, { "epoch": 4.914945266339944, "grad_norm": 0.15232123689983912, "learning_rate": 4.027738720446155e-06, "loss": 0.3687, "num_tokens": 6086271541.0, "step": 13417 }, { "epoch": 4.9153116841478495, "grad_norm": 0.14790623820337334, "learning_rate": 4.027502204215909e-06, "loss": 0.3663, "num_tokens": 6087018303.0, "step": 13418 }, { "epoch": 4.915678101955755, "grad_norm": 0.15575005864178235, "learning_rate": 4.027266699873117e-06, "loss": 0.3882, "num_tokens": 6087781086.0, "step": 13419 }, { "epoch": 4.916044519763661, "grad_norm": 0.15860407616716693, "learning_rate": 4.027032207431042e-06, "loss": 0.3737, "num_tokens": 6088558887.0, "step": 13420 }, { "epoch": 4.916410937571566, "grad_norm": 0.15374570776352936, "learning_rate": 4.0267987269028846e-06, "loss": 0.3555, "num_tokens": 6089247320.0, "step": 13421 }, { "epoch": 4.916777355379471, "grad_norm": 0.14316596817329205, "learning_rate": 4.026566258301789e-06, "loss": 0.3907, "num_tokens": 6090090081.0, "step": 13422 }, { "epoch": 4.917143773187377, "grad_norm": 0.1446222062974652, "learning_rate": 4.026334801640846e-06, "loss": 0.3972, "num_tokens": 6090896844.0, "step": 13423 }, { "epoch": 4.917510190995283, "grad_norm": 0.15452489664588165, "learning_rate": 4.026104356933086e-06, "loss": 0.3617, "num_tokens": 6091613751.0, "step": 13424 }, { "epoch": 4.917876608803188, "grad_norm": 0.1515732269387278, "learning_rate": 4.025874924191483e-06, "loss": 0.3969, "num_tokens": 6092423654.0, "step": 13425 }, { "epoch": 4.918243026611093, "grad_norm": 0.14625539880369948, "learning_rate": 4.025646503428956e-06, "loss": 0.3611, "num_tokens": 6093158080.0, "step": 13426 }, { "epoch": 4.9186094444189985, "grad_norm": 0.13945124064409892, "learning_rate": 4.025419094658365e-06, "loss": 0.3664, "num_tokens": 6093974087.0, "step": 13427 }, { "epoch": 4.918975862226905, "grad_norm": 0.1617465603721532, "learning_rate": 4.025192697892513e-06, "loss": 0.3923, "num_tokens": 6094676655.0, "step": 13428 }, { "epoch": 4.91934228003481, "grad_norm": 0.14487759717216006, "learning_rate": 4.024967313144146e-06, "loss": 0.3567, "num_tokens": 6095533456.0, "step": 13429 }, { "epoch": 4.919708697842715, "grad_norm": 0.1312134765992732, "learning_rate": 4.024742940425955e-06, "loss": 0.3484, "num_tokens": 6096433592.0, "step": 13430 }, { "epoch": 4.92007511565062, "grad_norm": 0.1376161216839178, "learning_rate": 4.024519579750572e-06, "loss": 0.355, "num_tokens": 6097239362.0, "step": 13431 }, { "epoch": 4.920441533458526, "grad_norm": 0.15303174481695114, "learning_rate": 4.024297231130572e-06, "loss": 0.335, "num_tokens": 6097907006.0, "step": 13432 }, { "epoch": 4.920807951266432, "grad_norm": 0.14498501934012362, "learning_rate": 4.024075894578476e-06, "loss": 0.3784, "num_tokens": 6098703023.0, "step": 13433 }, { "epoch": 4.921174369074337, "grad_norm": 0.1508428234574927, "learning_rate": 4.023855570106742e-06, "loss": 0.3476, "num_tokens": 6099534107.0, "step": 13434 }, { "epoch": 4.921540786882242, "grad_norm": 0.15766997807535577, "learning_rate": 4.02363625772778e-06, "loss": 0.3625, "num_tokens": 6100182051.0, "step": 13435 }, { "epoch": 4.921907204690148, "grad_norm": 0.14937329590733087, "learning_rate": 4.023417957453932e-06, "loss": 0.4014, "num_tokens": 6101011353.0, "step": 13436 }, { "epoch": 4.922273622498054, "grad_norm": 0.15088628749869112, "learning_rate": 4.023200669297492e-06, "loss": 0.3848, "num_tokens": 6101865350.0, "step": 13437 }, { "epoch": 4.922640040305959, "grad_norm": 0.17382327199285882, "learning_rate": 4.022984393270692e-06, "loss": 0.3771, "num_tokens": 6102480897.0, "step": 13438 }, { "epoch": 4.923006458113864, "grad_norm": 0.16751165383335562, "learning_rate": 4.022769129385712e-06, "loss": 0.3832, "num_tokens": 6103167675.0, "step": 13439 }, { "epoch": 4.923372875921769, "grad_norm": 0.16142663692788092, "learning_rate": 4.022554877654668e-06, "loss": 0.3884, "num_tokens": 6103789285.0, "step": 13440 }, { "epoch": 4.923739293729676, "grad_norm": 0.16684541229394972, "learning_rate": 4.0223416380896255e-06, "loss": 0.3808, "num_tokens": 6104521290.0, "step": 13441 }, { "epoch": 4.924105711537581, "grad_norm": 0.13947069001746415, "learning_rate": 4.0221294107025865e-06, "loss": 0.3664, "num_tokens": 6105328468.0, "step": 13442 }, { "epoch": 4.924472129345486, "grad_norm": 0.13916186725860336, "learning_rate": 4.021918195505505e-06, "loss": 0.3988, "num_tokens": 6106230980.0, "step": 13443 }, { "epoch": 4.924838547153391, "grad_norm": 0.1529377346471616, "learning_rate": 4.021707992510269e-06, "loss": 0.3898, "num_tokens": 6106953731.0, "step": 13444 }, { "epoch": 4.9252049649612974, "grad_norm": 0.15541818262250187, "learning_rate": 4.021498801728716e-06, "loss": 0.355, "num_tokens": 6107752038.0, "step": 13445 }, { "epoch": 4.925571382769203, "grad_norm": 0.14465802718166443, "learning_rate": 4.021290623172621e-06, "loss": 0.405, "num_tokens": 6108652879.0, "step": 13446 }, { "epoch": 4.925937800577108, "grad_norm": 0.13793431473741893, "learning_rate": 4.021083456853708e-06, "loss": 0.3562, "num_tokens": 6109432392.0, "step": 13447 }, { "epoch": 4.926304218385013, "grad_norm": 0.16331847711870304, "learning_rate": 4.020877302783639e-06, "loss": 0.3829, "num_tokens": 6110151573.0, "step": 13448 }, { "epoch": 4.926670636192919, "grad_norm": 0.15015736163990362, "learning_rate": 4.020672160974019e-06, "loss": 0.396, "num_tokens": 6110918371.0, "step": 13449 }, { "epoch": 4.927037054000825, "grad_norm": 0.14798394180460206, "learning_rate": 4.020468031436403e-06, "loss": 0.376, "num_tokens": 6111770647.0, "step": 13450 }, { "epoch": 4.92740347180873, "grad_norm": 0.15472553600475564, "learning_rate": 4.0202649141822805e-06, "loss": 0.3973, "num_tokens": 6112538976.0, "step": 13451 }, { "epoch": 4.927769889616635, "grad_norm": 0.15234020394397635, "learning_rate": 4.020062809223088e-06, "loss": 0.3679, "num_tokens": 6113317591.0, "step": 13452 }, { "epoch": 4.928136307424541, "grad_norm": 0.15745065703266187, "learning_rate": 4.019861716570202e-06, "loss": 0.3638, "num_tokens": 6114002123.0, "step": 13453 }, { "epoch": 4.9285027252324465, "grad_norm": 0.1460084620438379, "learning_rate": 4.019661636234949e-06, "loss": 0.3537, "num_tokens": 6114814716.0, "step": 13454 }, { "epoch": 4.928869143040352, "grad_norm": 0.14223826420273905, "learning_rate": 4.019462568228592e-06, "loss": 0.3617, "num_tokens": 6115592826.0, "step": 13455 }, { "epoch": 4.929235560848257, "grad_norm": 0.15127257278214434, "learning_rate": 4.0192645125623365e-06, "loss": 0.3766, "num_tokens": 6116432332.0, "step": 13456 }, { "epoch": 4.929601978656162, "grad_norm": 0.1502622431173045, "learning_rate": 4.019067469247336e-06, "loss": 0.3307, "num_tokens": 6117185635.0, "step": 13457 }, { "epoch": 4.929968396464068, "grad_norm": 0.1477078885982505, "learning_rate": 4.018871438294686e-06, "loss": 0.3739, "num_tokens": 6117990669.0, "step": 13458 }, { "epoch": 4.930334814271974, "grad_norm": 0.14054589670449433, "learning_rate": 4.018676419715418e-06, "loss": 0.3935, "num_tokens": 6118883519.0, "step": 13459 }, { "epoch": 4.930701232079879, "grad_norm": 0.14435608811024986, "learning_rate": 4.018482413520517e-06, "loss": 0.3703, "num_tokens": 6119588506.0, "step": 13460 }, { "epoch": 4.931067649887785, "grad_norm": 0.15915876637586665, "learning_rate": 4.0182894197209055e-06, "loss": 0.3791, "num_tokens": 6120286345.0, "step": 13461 }, { "epoch": 4.93143406769569, "grad_norm": 0.1506847009207326, "learning_rate": 4.018097438327448e-06, "loss": 0.3795, "num_tokens": 6121073336.0, "step": 13462 }, { "epoch": 4.9318004855035955, "grad_norm": 0.15083422821132153, "learning_rate": 4.017906469350951e-06, "loss": 0.3788, "num_tokens": 6121854540.0, "step": 13463 }, { "epoch": 4.932166903311501, "grad_norm": 0.14805219391812738, "learning_rate": 4.01771651280217e-06, "loss": 0.3839, "num_tokens": 6122731965.0, "step": 13464 }, { "epoch": 4.932533321119406, "grad_norm": 0.15087495764209208, "learning_rate": 4.017527568691801e-06, "loss": 0.3681, "num_tokens": 6123490146.0, "step": 13465 }, { "epoch": 4.932899738927312, "grad_norm": 0.14887893954929343, "learning_rate": 4.017339637030478e-06, "loss": 0.3679, "num_tokens": 6124215055.0, "step": 13466 }, { "epoch": 4.933266156735217, "grad_norm": 0.1432652236901256, "learning_rate": 4.0171527178287825e-06, "loss": 0.3617, "num_tokens": 6124991682.0, "step": 13467 }, { "epoch": 4.933632574543123, "grad_norm": 0.14944123709721904, "learning_rate": 4.016966811097242e-06, "loss": 0.3523, "num_tokens": 6125680009.0, "step": 13468 }, { "epoch": 4.933998992351028, "grad_norm": 0.15585514242158763, "learning_rate": 4.01678191684632e-06, "loss": 0.3554, "num_tokens": 6126406487.0, "step": 13469 }, { "epoch": 4.934365410158934, "grad_norm": 0.15009253965976385, "learning_rate": 4.01659803508643e-06, "loss": 0.3845, "num_tokens": 6127233178.0, "step": 13470 }, { "epoch": 4.934731827966839, "grad_norm": 0.14205571246752352, "learning_rate": 4.01641516582792e-06, "loss": 0.4056, "num_tokens": 6127995793.0, "step": 13471 }, { "epoch": 4.9350982457747445, "grad_norm": 0.15520571631729518, "learning_rate": 4.0162333090810875e-06, "loss": 0.3732, "num_tokens": 6128771776.0, "step": 13472 }, { "epoch": 4.93546466358265, "grad_norm": 0.15108455477096755, "learning_rate": 4.016052464856175e-06, "loss": 0.405, "num_tokens": 6129512442.0, "step": 13473 }, { "epoch": 4.935831081390556, "grad_norm": 0.14953680220827137, "learning_rate": 4.015872633163362e-06, "loss": 0.3985, "num_tokens": 6130273225.0, "step": 13474 }, { "epoch": 4.936197499198461, "grad_norm": 0.15181007558833634, "learning_rate": 4.015693814012771e-06, "loss": 0.3952, "num_tokens": 6131054739.0, "step": 13475 }, { "epoch": 4.936563917006366, "grad_norm": 0.1578446831617065, "learning_rate": 4.015516007414472e-06, "loss": 0.3722, "num_tokens": 6131780876.0, "step": 13476 }, { "epoch": 4.936930334814272, "grad_norm": 0.14412881500051938, "learning_rate": 4.015339213378475e-06, "loss": 0.3626, "num_tokens": 6132631997.0, "step": 13477 }, { "epoch": 4.937296752622178, "grad_norm": 0.14876089662484773, "learning_rate": 4.015163431914736e-06, "loss": 0.379, "num_tokens": 6133439425.0, "step": 13478 }, { "epoch": 4.937663170430083, "grad_norm": 0.14953337555691412, "learning_rate": 4.014988663033152e-06, "loss": 0.3595, "num_tokens": 6134156484.0, "step": 13479 }, { "epoch": 4.938029588237988, "grad_norm": 0.15619492053795653, "learning_rate": 4.014814906743559e-06, "loss": 0.3564, "num_tokens": 6134891920.0, "step": 13480 }, { "epoch": 4.938396006045894, "grad_norm": 0.15014259719940387, "learning_rate": 4.0146421630557435e-06, "loss": 0.3393, "num_tokens": 6135618428.0, "step": 13481 }, { "epoch": 4.938762423853799, "grad_norm": 0.14279719168770377, "learning_rate": 4.0144704319794295e-06, "loss": 0.4058, "num_tokens": 6136423754.0, "step": 13482 }, { "epoch": 4.939128841661705, "grad_norm": 0.15085230172234987, "learning_rate": 4.014299713524287e-06, "loss": 0.3819, "num_tokens": 6137151460.0, "step": 13483 }, { "epoch": 4.93949525946961, "grad_norm": 0.1582454635331426, "learning_rate": 4.014130007699926e-06, "loss": 0.3783, "num_tokens": 6137788229.0, "step": 13484 }, { "epoch": 4.9398616772775155, "grad_norm": 0.15784785860195638, "learning_rate": 4.0139613145159024e-06, "loss": 0.3793, "num_tokens": 6138544875.0, "step": 13485 }, { "epoch": 4.940228095085422, "grad_norm": 0.1441762444039258, "learning_rate": 4.013793633981714e-06, "loss": 0.3719, "num_tokens": 6139313371.0, "step": 13486 }, { "epoch": 4.940594512893327, "grad_norm": 0.14879397820117743, "learning_rate": 4.013626966106803e-06, "loss": 0.3834, "num_tokens": 6140060905.0, "step": 13487 }, { "epoch": 4.940960930701232, "grad_norm": 0.15459285193864183, "learning_rate": 4.013461310900549e-06, "loss": 0.3739, "num_tokens": 6140933652.0, "step": 13488 }, { "epoch": 4.941327348509137, "grad_norm": 0.13981583760528202, "learning_rate": 4.013296668372284e-06, "loss": 0.376, "num_tokens": 6141787422.0, "step": 13489 }, { "epoch": 4.941693766317043, "grad_norm": 0.1672518820838655, "learning_rate": 4.013133038531273e-06, "loss": 0.3839, "num_tokens": 6142471741.0, "step": 13490 }, { "epoch": 4.942060184124949, "grad_norm": 0.14458004267798957, "learning_rate": 4.012970421386731e-06, "loss": 0.3822, "num_tokens": 6143265586.0, "step": 13491 }, { "epoch": 4.942426601932854, "grad_norm": 0.1638318605497296, "learning_rate": 4.012808816947814e-06, "loss": 0.3855, "num_tokens": 6144024032.0, "step": 13492 }, { "epoch": 4.942793019740759, "grad_norm": 0.14611284192888496, "learning_rate": 4.0126482252236195e-06, "loss": 0.3823, "num_tokens": 6144888619.0, "step": 13493 }, { "epoch": 4.9431594375486645, "grad_norm": 0.16557291784129674, "learning_rate": 4.01248864622319e-06, "loss": 0.3821, "num_tokens": 6145556490.0, "step": 13494 }, { "epoch": 4.943525855356571, "grad_norm": 0.14643024688217982, "learning_rate": 4.01233007995551e-06, "loss": 0.3663, "num_tokens": 6146277419.0, "step": 13495 }, { "epoch": 4.943892273164476, "grad_norm": 0.14913387369847594, "learning_rate": 4.012172526429506e-06, "loss": 0.364, "num_tokens": 6147093306.0, "step": 13496 }, { "epoch": 4.944258690972381, "grad_norm": 0.14358741732907757, "learning_rate": 4.01201598565405e-06, "loss": 0.3761, "num_tokens": 6147875857.0, "step": 13497 }, { "epoch": 4.944625108780286, "grad_norm": 0.14627113100389222, "learning_rate": 4.0118604576379536e-06, "loss": 0.3775, "num_tokens": 6148763943.0, "step": 13498 }, { "epoch": 4.9449915265881925, "grad_norm": 0.13994055822070442, "learning_rate": 4.011705942389977e-06, "loss": 0.3722, "num_tokens": 6149616957.0, "step": 13499 }, { "epoch": 4.945357944396098, "grad_norm": 0.15674784752932946, "learning_rate": 4.011552439918817e-06, "loss": 0.3622, "num_tokens": 6150379403.0, "step": 13500 }, { "epoch": 4.945724362204003, "grad_norm": 0.1414968320649883, "learning_rate": 4.011399950233117e-06, "loss": 0.3509, "num_tokens": 6151251469.0, "step": 13501 }, { "epoch": 4.946090780011908, "grad_norm": 0.13498682755676686, "learning_rate": 4.011248473341461e-06, "loss": 0.3734, "num_tokens": 6152136579.0, "step": 13502 }, { "epoch": 4.946457197819814, "grad_norm": 0.16222421004977405, "learning_rate": 4.011098009252376e-06, "loss": 0.3578, "num_tokens": 6152866163.0, "step": 13503 }, { "epoch": 4.94682361562772, "grad_norm": 0.15873182546035577, "learning_rate": 4.010948557974339e-06, "loss": 0.3507, "num_tokens": 6153537989.0, "step": 13504 }, { "epoch": 4.947190033435625, "grad_norm": 0.15413994251057989, "learning_rate": 4.0108001195157625e-06, "loss": 0.363, "num_tokens": 6154321887.0, "step": 13505 }, { "epoch": 4.94755645124353, "grad_norm": 0.14782046088107925, "learning_rate": 4.010652693884999e-06, "loss": 0.34, "num_tokens": 6155093452.0, "step": 13506 }, { "epoch": 4.947922869051435, "grad_norm": 0.15767793625893023, "learning_rate": 4.010506281090355e-06, "loss": 0.3648, "num_tokens": 6155792937.0, "step": 13507 }, { "epoch": 4.9482892868593416, "grad_norm": 0.1584889372601078, "learning_rate": 4.010360881140071e-06, "loss": 0.3841, "num_tokens": 6156459732.0, "step": 13508 }, { "epoch": 4.948655704667247, "grad_norm": 0.14303890560129254, "learning_rate": 4.010216494042336e-06, "loss": 0.3764, "num_tokens": 6157373155.0, "step": 13509 }, { "epoch": 4.949022122475152, "grad_norm": 0.14136884951441644, "learning_rate": 4.010073119805273e-06, "loss": 0.3685, "num_tokens": 6158231655.0, "step": 13510 }, { "epoch": 4.949388540283058, "grad_norm": 0.1396809566292234, "learning_rate": 4.009930758436961e-06, "loss": 0.359, "num_tokens": 6159087637.0, "step": 13511 }, { "epoch": 4.949754958090963, "grad_norm": 0.13673020119194473, "learning_rate": 4.009789409945409e-06, "loss": 0.366, "num_tokens": 6160039362.0, "step": 13512 }, { "epoch": 4.950121375898869, "grad_norm": 0.13203688402260344, "learning_rate": 4.009649074338582e-06, "loss": 0.3592, "num_tokens": 6160871024.0, "step": 13513 }, { "epoch": 4.950487793706774, "grad_norm": 0.15539314766143583, "learning_rate": 4.009509751624377e-06, "loss": 0.3775, "num_tokens": 6161677099.0, "step": 13514 }, { "epoch": 4.950854211514679, "grad_norm": 0.15123628976146639, "learning_rate": 4.009371441810639e-06, "loss": 0.3455, "num_tokens": 6162370976.0, "step": 13515 }, { "epoch": 4.951220629322585, "grad_norm": 0.15727836712946652, "learning_rate": 4.0092341449051545e-06, "loss": 0.3478, "num_tokens": 6163152986.0, "step": 13516 }, { "epoch": 4.951587047130491, "grad_norm": 0.14141138457542765, "learning_rate": 4.009097860915654e-06, "loss": 0.3708, "num_tokens": 6164015548.0, "step": 13517 }, { "epoch": 4.951953464938396, "grad_norm": 0.14775489611840859, "learning_rate": 4.008962589849812e-06, "loss": 0.3605, "num_tokens": 6164777586.0, "step": 13518 }, { "epoch": 4.952319882746301, "grad_norm": 0.16037281067884127, "learning_rate": 4.008828331715242e-06, "loss": 0.3538, "num_tokens": 6165414571.0, "step": 13519 }, { "epoch": 4.952686300554207, "grad_norm": 0.1419771329642578, "learning_rate": 4.008695086519505e-06, "loss": 0.3478, "num_tokens": 6166314658.0, "step": 13520 }, { "epoch": 4.9530527183621125, "grad_norm": 0.1423548749195315, "learning_rate": 4.008562854270103e-06, "loss": 0.3745, "num_tokens": 6167111654.0, "step": 13521 }, { "epoch": 4.953419136170018, "grad_norm": 0.15122169144501146, "learning_rate": 4.00843163497448e-06, "loss": 0.3842, "num_tokens": 6167848725.0, "step": 13522 }, { "epoch": 4.953785553977923, "grad_norm": 0.15331590233343062, "learning_rate": 4.008301428640023e-06, "loss": 0.3909, "num_tokens": 6168524307.0, "step": 13523 }, { "epoch": 4.954151971785829, "grad_norm": 0.1525838573662336, "learning_rate": 4.008172235274064e-06, "loss": 0.3893, "num_tokens": 6169286295.0, "step": 13524 }, { "epoch": 4.954518389593734, "grad_norm": 0.1572157747497865, "learning_rate": 4.008044054883876e-06, "loss": 0.3885, "num_tokens": 6170090319.0, "step": 13525 }, { "epoch": 4.95488480740164, "grad_norm": 0.14606835380512667, "learning_rate": 4.00791688747668e-06, "loss": 0.3679, "num_tokens": 6170821107.0, "step": 13526 }, { "epoch": 4.955251225209545, "grad_norm": 0.1651347713818162, "learning_rate": 4.007790733059629e-06, "loss": 0.3847, "num_tokens": 6171542507.0, "step": 13527 }, { "epoch": 4.955617643017451, "grad_norm": 0.16365348258769932, "learning_rate": 4.00766559163983e-06, "loss": 0.3817, "num_tokens": 6172308346.0, "step": 13528 }, { "epoch": 4.955984060825356, "grad_norm": 0.16264205131820803, "learning_rate": 4.007541463224328e-06, "loss": 0.3773, "num_tokens": 6172943526.0, "step": 13529 }, { "epoch": 4.9563504786332615, "grad_norm": 0.16124110593826174, "learning_rate": 4.007418347820112e-06, "loss": 0.3826, "num_tokens": 6173672902.0, "step": 13530 }, { "epoch": 4.956716896441167, "grad_norm": 0.15527348438936425, "learning_rate": 4.007296245434112e-06, "loss": 0.3435, "num_tokens": 6174571194.0, "step": 13531 }, { "epoch": 4.957083314249073, "grad_norm": 0.14189660396574105, "learning_rate": 4.007175156073206e-06, "loss": 0.3735, "num_tokens": 6175346173.0, "step": 13532 }, { "epoch": 4.957449732056978, "grad_norm": 0.1533223094351268, "learning_rate": 4.007055079744208e-06, "loss": 0.3985, "num_tokens": 6176084579.0, "step": 13533 }, { "epoch": 4.957816149864883, "grad_norm": 0.1512846608407943, "learning_rate": 4.006936016453879e-06, "loss": 0.3575, "num_tokens": 6176796838.0, "step": 13534 }, { "epoch": 4.958182567672789, "grad_norm": 0.15513877330669537, "learning_rate": 4.006817966208925e-06, "loss": 0.3678, "num_tokens": 6177522877.0, "step": 13535 }, { "epoch": 4.958548985480695, "grad_norm": 0.1563768341217297, "learning_rate": 4.006700929015991e-06, "loss": 0.355, "num_tokens": 6178264828.0, "step": 13536 }, { "epoch": 4.9589154032886, "grad_norm": 0.1777361902183246, "learning_rate": 4.006584904881665e-06, "loss": 0.3975, "num_tokens": 6178953012.0, "step": 13537 }, { "epoch": 4.959281821096505, "grad_norm": 0.14803930114200722, "learning_rate": 4.00646989381248e-06, "loss": 0.3766, "num_tokens": 6179761667.0, "step": 13538 }, { "epoch": 4.9596482389044105, "grad_norm": 0.1590699347371414, "learning_rate": 4.006355895814914e-06, "loss": 0.3847, "num_tokens": 6180483956.0, "step": 13539 }, { "epoch": 4.960014656712316, "grad_norm": 0.162678945063197, "learning_rate": 4.006242910895382e-06, "loss": 0.3894, "num_tokens": 6181203488.0, "step": 13540 }, { "epoch": 4.960381074520222, "grad_norm": 0.14969655209592034, "learning_rate": 4.006130939060248e-06, "loss": 0.3661, "num_tokens": 6181967630.0, "step": 13541 }, { "epoch": 4.960747492328127, "grad_norm": 0.15774738354127227, "learning_rate": 4.006019980315814e-06, "loss": 0.3547, "num_tokens": 6182762224.0, "step": 13542 }, { "epoch": 4.961113910136032, "grad_norm": 0.15564144544592695, "learning_rate": 4.0059100346683274e-06, "loss": 0.3908, "num_tokens": 6183467636.0, "step": 13543 }, { "epoch": 4.961480327943939, "grad_norm": 0.16114154102424036, "learning_rate": 4.005801102123981e-06, "loss": 0.3613, "num_tokens": 6184188510.0, "step": 13544 }, { "epoch": 4.961846745751844, "grad_norm": 0.1478532173060523, "learning_rate": 4.005693182688904e-06, "loss": 0.3608, "num_tokens": 6184896594.0, "step": 13545 }, { "epoch": 4.962213163559749, "grad_norm": 0.14816466177633264, "learning_rate": 4.005586276369176e-06, "loss": 0.3862, "num_tokens": 6185693833.0, "step": 13546 }, { "epoch": 4.962579581367654, "grad_norm": 0.14891659561737056, "learning_rate": 4.005480383170813e-06, "loss": 0.369, "num_tokens": 6186573703.0, "step": 13547 }, { "epoch": 4.96294599917556, "grad_norm": 0.15169487652573832, "learning_rate": 4.00537550309978e-06, "loss": 0.3786, "num_tokens": 6187340704.0, "step": 13548 }, { "epoch": 4.963312416983466, "grad_norm": 0.14733220534340816, "learning_rate": 4.005271636161979e-06, "loss": 0.3591, "num_tokens": 6188173680.0, "step": 13549 }, { "epoch": 4.963678834791371, "grad_norm": 0.14886896391436952, "learning_rate": 4.005168782363259e-06, "loss": 0.3614, "num_tokens": 6188880265.0, "step": 13550 }, { "epoch": 4.964045252599276, "grad_norm": 0.1544419863463061, "learning_rate": 4.0050669417094115e-06, "loss": 0.3616, "num_tokens": 6189605172.0, "step": 13551 }, { "epoch": 4.9644116704071815, "grad_norm": 0.16531348446260682, "learning_rate": 4.00496611420617e-06, "loss": 0.3634, "num_tokens": 6190297304.0, "step": 13552 }, { "epoch": 4.964778088215088, "grad_norm": 0.15940995079205875, "learning_rate": 4.004866299859213e-06, "loss": 0.3643, "num_tokens": 6190942110.0, "step": 13553 }, { "epoch": 4.965144506022993, "grad_norm": 0.1587506677082731, "learning_rate": 4.0047674986741576e-06, "loss": 0.4012, "num_tokens": 6191747959.0, "step": 13554 }, { "epoch": 4.965510923830898, "grad_norm": 0.1477417953502127, "learning_rate": 4.004669710656566e-06, "loss": 0.3275, "num_tokens": 6192536484.0, "step": 13555 }, { "epoch": 4.965877341638803, "grad_norm": 0.15555511178678488, "learning_rate": 4.0045729358119475e-06, "loss": 0.3991, "num_tokens": 6193211288.0, "step": 13556 }, { "epoch": 4.9662437594467095, "grad_norm": 0.15836805272444285, "learning_rate": 4.004477174145747e-06, "loss": 0.3452, "num_tokens": 6193822483.0, "step": 13557 }, { "epoch": 4.966610177254615, "grad_norm": 0.15400386299782282, "learning_rate": 4.004382425663359e-06, "loss": 0.3838, "num_tokens": 6194513861.0, "step": 13558 }, { "epoch": 4.96697659506252, "grad_norm": 0.1532658704899484, "learning_rate": 4.004288690370116e-06, "loss": 0.3512, "num_tokens": 6195384371.0, "step": 13559 }, { "epoch": 4.967343012870425, "grad_norm": 0.14801369176515822, "learning_rate": 4.004195968271295e-06, "loss": 0.3638, "num_tokens": 6196183450.0, "step": 13560 }, { "epoch": 4.967709430678331, "grad_norm": 0.13901543035840908, "learning_rate": 4.004104259372118e-06, "loss": 0.3728, "num_tokens": 6196983912.0, "step": 13561 }, { "epoch": 4.968075848486237, "grad_norm": 0.13764293654954776, "learning_rate": 4.00401356367775e-06, "loss": 0.3679, "num_tokens": 6197869795.0, "step": 13562 }, { "epoch": 4.968442266294142, "grad_norm": 0.15646196377756114, "learning_rate": 4.0039238811932925e-06, "loss": 0.3692, "num_tokens": 6198556591.0, "step": 13563 }, { "epoch": 4.968808684102047, "grad_norm": 0.14646483139633992, "learning_rate": 4.0038352119238e-06, "loss": 0.3592, "num_tokens": 6199448501.0, "step": 13564 }, { "epoch": 4.969175101909952, "grad_norm": 0.13322234817674383, "learning_rate": 4.003747555874261e-06, "loss": 0.3855, "num_tokens": 6200233952.0, "step": 13565 }, { "epoch": 4.9695415197178585, "grad_norm": 0.14218410909657728, "learning_rate": 4.003660913049613e-06, "loss": 0.3584, "num_tokens": 6201065744.0, "step": 13566 }, { "epoch": 4.969907937525764, "grad_norm": 0.14118512929652233, "learning_rate": 4.003575283454732e-06, "loss": 0.3865, "num_tokens": 6201932464.0, "step": 13567 }, { "epoch": 4.970274355333669, "grad_norm": 0.1324415735415332, "learning_rate": 4.003490667094442e-06, "loss": 0.3807, "num_tokens": 6202840062.0, "step": 13568 }, { "epoch": 4.970640773141575, "grad_norm": 0.139616153387517, "learning_rate": 4.003407063973505e-06, "loss": 0.3753, "num_tokens": 6203638066.0, "step": 13569 }, { "epoch": 4.97100719094948, "grad_norm": 0.1627401114444021, "learning_rate": 4.003324474096628e-06, "loss": 0.3509, "num_tokens": 6204265351.0, "step": 13570 }, { "epoch": 4.971373608757386, "grad_norm": 0.13822830021195803, "learning_rate": 4.003242897468461e-06, "loss": 0.3362, "num_tokens": 6205166792.0, "step": 13571 }, { "epoch": 4.971740026565291, "grad_norm": 0.14222631323568402, "learning_rate": 4.003162334093598e-06, "loss": 0.3779, "num_tokens": 6205992893.0, "step": 13572 }, { "epoch": 4.972106444373196, "grad_norm": 0.1426098123180772, "learning_rate": 4.003082783976574e-06, "loss": 0.3617, "num_tokens": 6206807172.0, "step": 13573 }, { "epoch": 4.972472862181102, "grad_norm": 0.1697305960963592, "learning_rate": 4.003004247121868e-06, "loss": 0.395, "num_tokens": 6207427779.0, "step": 13574 }, { "epoch": 4.9728392799890075, "grad_norm": 0.14877855684585256, "learning_rate": 4.002926723533902e-06, "loss": 0.3732, "num_tokens": 6208225086.0, "step": 13575 }, { "epoch": 4.973205697796913, "grad_norm": 0.16099327970968538, "learning_rate": 4.0028502132170394e-06, "loss": 0.3379, "num_tokens": 6208928440.0, "step": 13576 }, { "epoch": 4.973572115604818, "grad_norm": 0.1342543975560039, "learning_rate": 4.002774716175591e-06, "loss": 0.3658, "num_tokens": 6209793336.0, "step": 13577 }, { "epoch": 4.973938533412724, "grad_norm": 0.1454656840715245, "learning_rate": 4.002700232413804e-06, "loss": 0.367, "num_tokens": 6210569662.0, "step": 13578 }, { "epoch": 4.974304951220629, "grad_norm": 0.14095697687874473, "learning_rate": 4.002626761935875e-06, "loss": 0.3743, "num_tokens": 6211361200.0, "step": 13579 }, { "epoch": 4.974671369028535, "grad_norm": 0.15864946092455115, "learning_rate": 4.002554304745937e-06, "loss": 0.3288, "num_tokens": 6212025434.0, "step": 13580 }, { "epoch": 4.97503778683644, "grad_norm": 0.14490240247477282, "learning_rate": 4.002482860848073e-06, "loss": 0.3652, "num_tokens": 6212774272.0, "step": 13581 }, { "epoch": 4.975404204644346, "grad_norm": 0.14737694998589998, "learning_rate": 4.002412430246305e-06, "loss": 0.3627, "num_tokens": 6213534408.0, "step": 13582 }, { "epoch": 4.975770622452251, "grad_norm": 0.1668701298759686, "learning_rate": 4.0023430129445954e-06, "loss": 0.3816, "num_tokens": 6214254168.0, "step": 13583 }, { "epoch": 4.976137040260157, "grad_norm": 0.1525502260132069, "learning_rate": 4.002274608946856e-06, "loss": 0.377, "num_tokens": 6214981522.0, "step": 13584 }, { "epoch": 4.976503458068062, "grad_norm": 0.14793484529864065, "learning_rate": 4.002207218256935e-06, "loss": 0.3781, "num_tokens": 6215730253.0, "step": 13585 }, { "epoch": 4.976869875875968, "grad_norm": 0.14154018334469598, "learning_rate": 4.002140840878627e-06, "loss": 0.3718, "num_tokens": 6216521031.0, "step": 13586 }, { "epoch": 4.977236293683873, "grad_norm": 0.161341324523949, "learning_rate": 4.002075476815673e-06, "loss": 0.3705, "num_tokens": 6217215275.0, "step": 13587 }, { "epoch": 4.9776027114917785, "grad_norm": 0.16482563246502652, "learning_rate": 4.0020111260717495e-06, "loss": 0.3925, "num_tokens": 6217929316.0, "step": 13588 }, { "epoch": 4.977969129299684, "grad_norm": 0.1434963994799858, "learning_rate": 4.00194778865048e-06, "loss": 0.395, "num_tokens": 6218728767.0, "step": 13589 }, { "epoch": 4.978335547107589, "grad_norm": 0.15259485260201364, "learning_rate": 4.001885464555433e-06, "loss": 0.3724, "num_tokens": 6219484059.0, "step": 13590 }, { "epoch": 4.978701964915495, "grad_norm": 0.1586643017377937, "learning_rate": 4.001824153790114e-06, "loss": 0.4212, "num_tokens": 6220177619.0, "step": 13591 }, { "epoch": 4.9790683827234, "grad_norm": 0.14825974803907, "learning_rate": 4.001763856357977e-06, "loss": 0.3521, "num_tokens": 6220928770.0, "step": 13592 }, { "epoch": 4.979434800531306, "grad_norm": 0.15060652529306354, "learning_rate": 4.001704572262414e-06, "loss": 0.3807, "num_tokens": 6221697976.0, "step": 13593 }, { "epoch": 4.979801218339212, "grad_norm": 0.15530684046866924, "learning_rate": 4.001646301506767e-06, "loss": 0.3749, "num_tokens": 6222401465.0, "step": 13594 }, { "epoch": 4.980167636147117, "grad_norm": 0.15362788223828966, "learning_rate": 4.001589044094316e-06, "loss": 0.3622, "num_tokens": 6223234458.0, "step": 13595 }, { "epoch": 4.980534053955022, "grad_norm": 0.13158093193634537, "learning_rate": 4.001532800028282e-06, "loss": 0.3573, "num_tokens": 6224112315.0, "step": 13596 }, { "epoch": 4.9809004717629275, "grad_norm": 0.14861214351011315, "learning_rate": 4.0014775693118335e-06, "loss": 0.3633, "num_tokens": 6224793821.0, "step": 13597 }, { "epoch": 4.981266889570833, "grad_norm": 0.18825062898652714, "learning_rate": 4.0014233519480815e-06, "loss": 0.4139, "num_tokens": 6225431580.0, "step": 13598 }, { "epoch": 4.981633307378739, "grad_norm": 0.17041279901539969, "learning_rate": 4.001370147940076e-06, "loss": 0.3719, "num_tokens": 6226104273.0, "step": 13599 }, { "epoch": 4.981999725186644, "grad_norm": 0.1546657260215188, "learning_rate": 4.001317957290811e-06, "loss": 0.3555, "num_tokens": 6226796650.0, "step": 13600 }, { "epoch": 4.982366142994549, "grad_norm": 0.15440630594754465, "learning_rate": 4.001266780003229e-06, "loss": 0.3759, "num_tokens": 6227527591.0, "step": 13601 }, { "epoch": 4.982732560802455, "grad_norm": 0.13520041613400308, "learning_rate": 4.001216616080212e-06, "loss": 0.3512, "num_tokens": 6228284495.0, "step": 13602 }, { "epoch": 4.983098978610361, "grad_norm": 0.15259021794324093, "learning_rate": 4.0011674655245785e-06, "loss": 0.3545, "num_tokens": 6229042799.0, "step": 13603 }, { "epoch": 4.983465396418266, "grad_norm": 0.15354030779586617, "learning_rate": 4.001119328339101e-06, "loss": 0.3824, "num_tokens": 6229811771.0, "step": 13604 }, { "epoch": 4.983831814226171, "grad_norm": 0.1527009681296017, "learning_rate": 4.001072204526489e-06, "loss": 0.3766, "num_tokens": 6230635843.0, "step": 13605 }, { "epoch": 4.9841982320340765, "grad_norm": 0.14205305081471414, "learning_rate": 4.0010260940893945e-06, "loss": 0.338, "num_tokens": 6231422649.0, "step": 13606 }, { "epoch": 4.984564649841983, "grad_norm": 0.1493941923268778, "learning_rate": 4.000980997030412e-06, "loss": 0.37, "num_tokens": 6232264416.0, "step": 13607 }, { "epoch": 4.984931067649888, "grad_norm": 0.14831282253965863, "learning_rate": 4.000936913352083e-06, "loss": 0.384, "num_tokens": 6233010495.0, "step": 13608 }, { "epoch": 4.985297485457793, "grad_norm": 0.1447204631307194, "learning_rate": 4.000893843056889e-06, "loss": 0.3553, "num_tokens": 6233838523.0, "step": 13609 }, { "epoch": 4.985663903265698, "grad_norm": 0.1720998565221271, "learning_rate": 4.000851786147256e-06, "loss": 0.4193, "num_tokens": 6234524459.0, "step": 13610 }, { "epoch": 4.9860303210736046, "grad_norm": 0.15464586768630284, "learning_rate": 4.000810742625549e-06, "loss": 0.3589, "num_tokens": 6235349611.0, "step": 13611 }, { "epoch": 4.98639673888151, "grad_norm": 0.14161730122627075, "learning_rate": 4.0007707124940815e-06, "loss": 0.3616, "num_tokens": 6236096229.0, "step": 13612 }, { "epoch": 4.986763156689415, "grad_norm": 0.1700250800063046, "learning_rate": 4.000731695755105e-06, "loss": 0.3572, "num_tokens": 6236784680.0, "step": 13613 }, { "epoch": 4.98712957449732, "grad_norm": 0.1660914353882789, "learning_rate": 4.00069369241082e-06, "loss": 0.4049, "num_tokens": 6237450374.0, "step": 13614 }, { "epoch": 4.987495992305226, "grad_norm": 0.1543659509465734, "learning_rate": 4.000656702463361e-06, "loss": 0.3738, "num_tokens": 6238208891.0, "step": 13615 }, { "epoch": 4.987862410113132, "grad_norm": 0.1499462297454099, "learning_rate": 4.000620725914815e-06, "loss": 0.3766, "num_tokens": 6239037171.0, "step": 13616 }, { "epoch": 4.988228827921037, "grad_norm": 0.16086286882037792, "learning_rate": 4.000585762767205e-06, "loss": 0.3679, "num_tokens": 6239675483.0, "step": 13617 }, { "epoch": 4.988595245728942, "grad_norm": 0.14913967417851967, "learning_rate": 4.0005518130225005e-06, "loss": 0.3837, "num_tokens": 6240438268.0, "step": 13618 }, { "epoch": 4.988961663536848, "grad_norm": 0.16368394665914815, "learning_rate": 4.000518876682613e-06, "loss": 0.3751, "num_tokens": 6241146590.0, "step": 13619 }, { "epoch": 4.989328081344754, "grad_norm": 0.15684050059455107, "learning_rate": 4.000486953749398e-06, "loss": 0.3809, "num_tokens": 6241858247.0, "step": 13620 }, { "epoch": 4.989694499152659, "grad_norm": 0.15812739337702383, "learning_rate": 4.000456044224648e-06, "loss": 0.3843, "num_tokens": 6242578472.0, "step": 13621 }, { "epoch": 4.990060916960564, "grad_norm": 0.1578585455116544, "learning_rate": 4.00042614811011e-06, "loss": 0.3709, "num_tokens": 6243253982.0, "step": 13622 }, { "epoch": 4.990427334768469, "grad_norm": 0.13719249626424956, "learning_rate": 4.000397265407463e-06, "loss": 0.3763, "num_tokens": 6244154351.0, "step": 13623 }, { "epoch": 4.9907937525763755, "grad_norm": 0.15421313546653181, "learning_rate": 4.000369396118334e-06, "loss": 0.3842, "num_tokens": 6244864843.0, "step": 13624 }, { "epoch": 4.991160170384281, "grad_norm": 0.1610408291157719, "learning_rate": 4.000342540244292e-06, "loss": 0.3803, "num_tokens": 6245592707.0, "step": 13625 }, { "epoch": 4.991526588192186, "grad_norm": 0.15743069482911762, "learning_rate": 4.00031669778685e-06, "loss": 0.3817, "num_tokens": 6246317476.0, "step": 13626 }, { "epoch": 4.991893006000091, "grad_norm": 0.15794385958515514, "learning_rate": 4.000291868747461e-06, "loss": 0.3575, "num_tokens": 6247000954.0, "step": 13627 }, { "epoch": 4.992259423807997, "grad_norm": 0.1526137794607319, "learning_rate": 4.000268053127525e-06, "loss": 0.3733, "num_tokens": 6247738804.0, "step": 13628 }, { "epoch": 4.992625841615903, "grad_norm": 0.15967179019599198, "learning_rate": 4.000245250928383e-06, "loss": 0.3763, "num_tokens": 6248522577.0, "step": 13629 }, { "epoch": 4.992992259423808, "grad_norm": 0.14965095172669154, "learning_rate": 4.000223462151316e-06, "loss": 0.3979, "num_tokens": 6249298980.0, "step": 13630 }, { "epoch": 4.993358677231713, "grad_norm": 0.16494222979058698, "learning_rate": 4.000202686797553e-06, "loss": 0.3616, "num_tokens": 6249897013.0, "step": 13631 }, { "epoch": 4.993725095039619, "grad_norm": 0.14270834136516902, "learning_rate": 4.0001829248682636e-06, "loss": 0.3942, "num_tokens": 6250713624.0, "step": 13632 }, { "epoch": 4.9940915128475245, "grad_norm": 0.149918398953175, "learning_rate": 4.0001641763645595e-06, "loss": 0.3437, "num_tokens": 6251477765.0, "step": 13633 }, { "epoch": 4.99445793065543, "grad_norm": 0.15869949699555452, "learning_rate": 4.000146441287497e-06, "loss": 0.38, "num_tokens": 6252257665.0, "step": 13634 }, { "epoch": 4.994824348463335, "grad_norm": 0.16182184320291487, "learning_rate": 4.000129719638076e-06, "loss": 0.366, "num_tokens": 6252985788.0, "step": 13635 }, { "epoch": 4.995190766271241, "grad_norm": 0.13211560342114162, "learning_rate": 4.000114011417235e-06, "loss": 0.3427, "num_tokens": 6253844097.0, "step": 13636 }, { "epoch": 4.995557184079146, "grad_norm": 0.1512322210023458, "learning_rate": 4.0000993166258605e-06, "loss": 0.3632, "num_tokens": 6254520636.0, "step": 13637 }, { "epoch": 4.995923601887052, "grad_norm": 0.15516261807000623, "learning_rate": 4.00008563526478e-06, "loss": 0.3547, "num_tokens": 6255267710.0, "step": 13638 }, { "epoch": 4.996290019694957, "grad_norm": 0.15324706555174078, "learning_rate": 4.000072967334761e-06, "loss": 0.3825, "num_tokens": 6255962581.0, "step": 13639 }, { "epoch": 4.996656437502862, "grad_norm": 0.16834174466762544, "learning_rate": 4.000061312836521e-06, "loss": 0.3863, "num_tokens": 6256604686.0, "step": 13640 }, { "epoch": 4.997022855310768, "grad_norm": 0.14492319979435858, "learning_rate": 4.000050671770712e-06, "loss": 0.3596, "num_tokens": 6257464385.0, "step": 13641 }, { "epoch": 4.9973892731186735, "grad_norm": 0.1507947909058768, "learning_rate": 4.000041044137935e-06, "loss": 0.3732, "num_tokens": 6258190489.0, "step": 13642 }, { "epoch": 4.997755690926579, "grad_norm": 0.14983507985422698, "learning_rate": 4.000032429938732e-06, "loss": 0.3807, "num_tokens": 6258938144.0, "step": 13643 }, { "epoch": 4.998122108734485, "grad_norm": 0.15245773171135515, "learning_rate": 4.00002482917359e-06, "loss": 0.3923, "num_tokens": 6259695245.0, "step": 13644 }, { "epoch": 4.99848852654239, "grad_norm": 0.15599551933604647, "learning_rate": 4.000018241842934e-06, "loss": 0.383, "num_tokens": 6260420682.0, "step": 13645 }, { "epoch": 4.998854944350295, "grad_norm": 0.14829290103415727, "learning_rate": 4.0000126679471354e-06, "loss": 0.3539, "num_tokens": 6261213762.0, "step": 13646 }, { "epoch": 4.999221362158201, "grad_norm": 0.14540572995634, "learning_rate": 4.000008107486509e-06, "loss": 0.3546, "num_tokens": 6262019941.0, "step": 13647 }, { "epoch": 4.999587779966106, "grad_norm": 0.15848499992678242, "learning_rate": 4.000004560461311e-06, "loss": 0.3766, "num_tokens": 6262757820.0, "step": 13648 }, { "epoch": 4.999954197774012, "grad_norm": 0.16208783393805687, "learning_rate": 4.000002026871741e-06, "loss": 0.3546, "num_tokens": 6263402811.0, "step": 13649 }, { "epoch": 5.0, "grad_norm": 0.16208783393805687, "learning_rate": 4.000000506717942e-06, "loss": 0.499, "num_tokens": 6263446556.0, "step": 13650 }, { "epoch": 5.0, "step": 13650, "total_flos": 4.477080668367277e+19, "train_loss": 0.24621310420307047, "train_runtime": 332347.1323, "train_samples_per_second": 5.255, "train_steps_per_second": 0.041 } ], "logging_steps": 1, "max_steps": 13650, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.477080668367277e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }