{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 11220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017825311942959, "grad_norm": 2.46875, "learning_rate": 0.0, "loss": 1.4755, "num_tokens": 6275289.0, "step": 1 }, { "epoch": 0.00035650623885918, "grad_norm": 2.515625, "learning_rate": 5.934718100890208e-08, "loss": 1.4983, "num_tokens": 12559168.0, "step": 2 }, { "epoch": 0.0005347593582887701, "grad_norm": 2.5, "learning_rate": 1.1869436201780416e-07, "loss": 1.5087, "num_tokens": 18815689.0, "step": 3 }, { "epoch": 0.00071301247771836, "grad_norm": 2.484375, "learning_rate": 1.7804154302670624e-07, "loss": 1.4887, "num_tokens": 25097878.0, "step": 4 }, { "epoch": 0.00089126559714795, "grad_norm": 2.46875, "learning_rate": 2.3738872403560833e-07, "loss": 1.5134, "num_tokens": 31381201.0, "step": 5 }, { "epoch": 0.0010695187165775401, "grad_norm": 2.5, "learning_rate": 2.9673590504451043e-07, "loss": 1.5087, "num_tokens": 37663506.0, "step": 6 }, { "epoch": 0.0012477718360071302, "grad_norm": 2.5, "learning_rate": 3.560830860534125e-07, "loss": 1.5037, "num_tokens": 43947517.0, "step": 7 }, { "epoch": 0.00142602495543672, "grad_norm": 2.46875, "learning_rate": 4.1543026706231454e-07, "loss": 1.4959, "num_tokens": 50219852.0, "step": 8 }, { "epoch": 0.0016042780748663102, "grad_norm": 2.453125, "learning_rate": 4.7477744807121665e-07, "loss": 1.4897, "num_tokens": 56432659.0, "step": 9 }, { "epoch": 0.0017825311942959, "grad_norm": 2.453125, "learning_rate": 5.341246290801187e-07, "loss": 1.494, "num_tokens": 62677046.0, "step": 10 }, { "epoch": 0.00196078431372549, "grad_norm": 2.453125, "learning_rate": 5.934718100890209e-07, "loss": 1.5178, "num_tokens": 68894225.0, "step": 11 }, { "epoch": 0.0021390374331550803, "grad_norm": 2.453125, "learning_rate": 6.528189910979229e-07, "loss": 1.4739, "num_tokens": 75147491.0, "step": 12 }, { "epoch": 0.0023172905525846704, "grad_norm": 2.484375, "learning_rate": 7.12166172106825e-07, "loss": 1.5257, "num_tokens": 81405973.0, "step": 13 }, { "epoch": 0.0024955436720142605, "grad_norm": 2.4375, "learning_rate": 7.71513353115727e-07, "loss": 1.5007, "num_tokens": 87658665.0, "step": 14 }, { "epoch": 0.00267379679144385, "grad_norm": 2.4375, "learning_rate": 8.308605341246291e-07, "loss": 1.5038, "num_tokens": 93942755.0, "step": 15 }, { "epoch": 0.00285204991087344, "grad_norm": 2.390625, "learning_rate": 8.902077151335312e-07, "loss": 1.4861, "num_tokens": 100214773.0, "step": 16 }, { "epoch": 0.0030303030303030303, "grad_norm": 2.421875, "learning_rate": 9.495548961424333e-07, "loss": 1.4897, "num_tokens": 106491402.0, "step": 17 }, { "epoch": 0.0032085561497326204, "grad_norm": 2.40625, "learning_rate": 1.0089020771513354e-06, "loss": 1.4979, "num_tokens": 112747701.0, "step": 18 }, { "epoch": 0.0033868092691622105, "grad_norm": 2.359375, "learning_rate": 1.0682492581602374e-06, "loss": 1.4881, "num_tokens": 119007789.0, "step": 19 }, { "epoch": 0.0035650623885918, "grad_norm": 2.359375, "learning_rate": 1.1275964391691395e-06, "loss": 1.4958, "num_tokens": 125283500.0, "step": 20 }, { "epoch": 0.0037433155080213902, "grad_norm": 2.328125, "learning_rate": 1.1869436201780417e-06, "loss": 1.4819, "num_tokens": 131568670.0, "step": 21 }, { "epoch": 0.00392156862745098, "grad_norm": 2.28125, "learning_rate": 1.2462908011869438e-06, "loss": 1.5101, "num_tokens": 137834036.0, "step": 22 }, { "epoch": 0.00409982174688057, "grad_norm": 2.265625, "learning_rate": 1.3056379821958458e-06, "loss": 1.5096, "num_tokens": 144116141.0, "step": 23 }, { "epoch": 0.0042780748663101605, "grad_norm": 2.234375, "learning_rate": 1.364985163204748e-06, "loss": 1.4966, "num_tokens": 150400377.0, "step": 24 }, { "epoch": 0.004456327985739751, "grad_norm": 2.25, "learning_rate": 1.42433234421365e-06, "loss": 1.5166, "num_tokens": 156676877.0, "step": 25 }, { "epoch": 0.004634581105169341, "grad_norm": 2.1875, "learning_rate": 1.483679525222552e-06, "loss": 1.4677, "num_tokens": 162960410.0, "step": 26 }, { "epoch": 0.004812834224598931, "grad_norm": 2.1875, "learning_rate": 1.543026706231454e-06, "loss": 1.4986, "num_tokens": 169220839.0, "step": 27 }, { "epoch": 0.004991087344028521, "grad_norm": 2.140625, "learning_rate": 1.6023738872403563e-06, "loss": 1.494, "num_tokens": 175499905.0, "step": 28 }, { "epoch": 0.00516934046345811, "grad_norm": 2.109375, "learning_rate": 1.6617210682492582e-06, "loss": 1.4774, "num_tokens": 181784506.0, "step": 29 }, { "epoch": 0.0053475935828877, "grad_norm": 2.09375, "learning_rate": 1.7210682492581604e-06, "loss": 1.4927, "num_tokens": 188067708.0, "step": 30 }, { "epoch": 0.00552584670231729, "grad_norm": 2.0625, "learning_rate": 1.7804154302670625e-06, "loss": 1.4955, "num_tokens": 194339056.0, "step": 31 }, { "epoch": 0.00570409982174688, "grad_norm": 2.03125, "learning_rate": 1.8397626112759646e-06, "loss": 1.5125, "num_tokens": 200623526.0, "step": 32 }, { "epoch": 0.0058823529411764705, "grad_norm": 2.0, "learning_rate": 1.8991097922848666e-06, "loss": 1.4832, "num_tokens": 206907806.0, "step": 33 }, { "epoch": 0.006060606060606061, "grad_norm": 1.984375, "learning_rate": 1.9584569732937684e-06, "loss": 1.4874, "num_tokens": 213166951.0, "step": 34 }, { "epoch": 0.006238859180035651, "grad_norm": 1.921875, "learning_rate": 2.0178041543026707e-06, "loss": 1.4919, "num_tokens": 219450003.0, "step": 35 }, { "epoch": 0.006417112299465241, "grad_norm": 1.8984375, "learning_rate": 2.077151335311573e-06, "loss": 1.4829, "num_tokens": 225735101.0, "step": 36 }, { "epoch": 0.006595365418894831, "grad_norm": 1.8359375, "learning_rate": 2.136498516320475e-06, "loss": 1.5171, "num_tokens": 231936535.0, "step": 37 }, { "epoch": 0.006773618538324421, "grad_norm": 1.7265625, "learning_rate": 2.195845697329377e-06, "loss": 1.4722, "num_tokens": 238221216.0, "step": 38 }, { "epoch": 0.006951871657754011, "grad_norm": 1.6875, "learning_rate": 2.255192878338279e-06, "loss": 1.4905, "num_tokens": 244484001.0, "step": 39 }, { "epoch": 0.0071301247771836, "grad_norm": 1.625, "learning_rate": 2.314540059347181e-06, "loss": 1.4539, "num_tokens": 250767574.0, "step": 40 }, { "epoch": 0.00730837789661319, "grad_norm": 1.5703125, "learning_rate": 2.3738872403560835e-06, "loss": 1.4775, "num_tokens": 257052413.0, "step": 41 }, { "epoch": 0.0074866310160427805, "grad_norm": 1.5078125, "learning_rate": 2.4332344213649853e-06, "loss": 1.4682, "num_tokens": 263337990.0, "step": 42 }, { "epoch": 0.007664884135472371, "grad_norm": 1.4765625, "learning_rate": 2.4925816023738876e-06, "loss": 1.4625, "num_tokens": 269609086.0, "step": 43 }, { "epoch": 0.00784313725490196, "grad_norm": 1.40625, "learning_rate": 2.5519287833827894e-06, "loss": 1.4581, "num_tokens": 275835527.0, "step": 44 }, { "epoch": 0.008021390374331552, "grad_norm": 1.3515625, "learning_rate": 2.6112759643916917e-06, "loss": 1.4587, "num_tokens": 282088993.0, "step": 45 }, { "epoch": 0.00819964349376114, "grad_norm": 1.3046875, "learning_rate": 2.670623145400594e-06, "loss": 1.4514, "num_tokens": 288346544.0, "step": 46 }, { "epoch": 0.00837789661319073, "grad_norm": 1.2734375, "learning_rate": 2.729970326409496e-06, "loss": 1.4489, "num_tokens": 294623675.0, "step": 47 }, { "epoch": 0.008556149732620321, "grad_norm": 1.2109375, "learning_rate": 2.789317507418398e-06, "loss": 1.4292, "num_tokens": 300906709.0, "step": 48 }, { "epoch": 0.00873440285204991, "grad_norm": 1.1875, "learning_rate": 2.8486646884273e-06, "loss": 1.481, "num_tokens": 307190616.0, "step": 49 }, { "epoch": 0.008912655971479501, "grad_norm": 1.1796875, "learning_rate": 2.9080118694362018e-06, "loss": 1.4141, "num_tokens": 313473221.0, "step": 50 }, { "epoch": 0.00909090909090909, "grad_norm": 1.1328125, "learning_rate": 2.967359050445104e-06, "loss": 1.4272, "num_tokens": 319757273.0, "step": 51 }, { "epoch": 0.009269162210338681, "grad_norm": 1.0703125, "learning_rate": 3.0267062314540063e-06, "loss": 1.4588, "num_tokens": 325987809.0, "step": 52 }, { "epoch": 0.00944741532976827, "grad_norm": 1.046875, "learning_rate": 3.086053412462908e-06, "loss": 1.4548, "num_tokens": 332238039.0, "step": 53 }, { "epoch": 0.009625668449197862, "grad_norm": 1.03125, "learning_rate": 3.14540059347181e-06, "loss": 1.4377, "num_tokens": 338490797.0, "step": 54 }, { "epoch": 0.00980392156862745, "grad_norm": 1.0, "learning_rate": 3.2047477744807127e-06, "loss": 1.4327, "num_tokens": 344774920.0, "step": 55 }, { "epoch": 0.009982174688057042, "grad_norm": 1.0390625, "learning_rate": 3.2640949554896145e-06, "loss": 1.4162, "num_tokens": 351057230.0, "step": 56 }, { "epoch": 0.010160427807486631, "grad_norm": 0.99609375, "learning_rate": 3.3234421364985163e-06, "loss": 1.4117, "num_tokens": 357340180.0, "step": 57 }, { "epoch": 0.01033868092691622, "grad_norm": 0.92578125, "learning_rate": 3.382789317507419e-06, "loss": 1.4444, "num_tokens": 363610239.0, "step": 58 }, { "epoch": 0.010516934046345811, "grad_norm": 0.90625, "learning_rate": 3.442136498516321e-06, "loss": 1.4294, "num_tokens": 369876610.0, "step": 59 }, { "epoch": 0.0106951871657754, "grad_norm": 0.96875, "learning_rate": 3.5014836795252227e-06, "loss": 1.3927, "num_tokens": 376161770.0, "step": 60 }, { "epoch": 0.010873440285204991, "grad_norm": 0.87890625, "learning_rate": 3.560830860534125e-06, "loss": 1.417, "num_tokens": 382419382.0, "step": 61 }, { "epoch": 0.01105169340463458, "grad_norm": 0.9296875, "learning_rate": 3.6201780415430273e-06, "loss": 1.4198, "num_tokens": 388674195.0, "step": 62 }, { "epoch": 0.011229946524064172, "grad_norm": 0.703125, "learning_rate": 3.679525222551929e-06, "loss": 1.4131, "num_tokens": 394949113.0, "step": 63 }, { "epoch": 0.01140819964349376, "grad_norm": 0.91796875, "learning_rate": 3.738872403560831e-06, "loss": 1.4061, "num_tokens": 401217660.0, "step": 64 }, { "epoch": 0.011586452762923352, "grad_norm": 0.75390625, "learning_rate": 3.7982195845697332e-06, "loss": 1.4084, "num_tokens": 407501289.0, "step": 65 }, { "epoch": 0.011764705882352941, "grad_norm": 0.94921875, "learning_rate": 3.857566765578635e-06, "loss": 1.4277, "num_tokens": 413775442.0, "step": 66 }, { "epoch": 0.011942959001782532, "grad_norm": 1.171875, "learning_rate": 3.916913946587537e-06, "loss": 1.3775, "num_tokens": 420048268.0, "step": 67 }, { "epoch": 0.012121212121212121, "grad_norm": 0.703125, "learning_rate": 3.97626112759644e-06, "loss": 1.381, "num_tokens": 426327760.0, "step": 68 }, { "epoch": 0.01229946524064171, "grad_norm": 0.734375, "learning_rate": 4.0356083086053414e-06, "loss": 1.3744, "num_tokens": 432605598.0, "step": 69 }, { "epoch": 0.012477718360071301, "grad_norm": 0.5390625, "learning_rate": 4.094955489614243e-06, "loss": 1.3792, "num_tokens": 438889548.0, "step": 70 }, { "epoch": 0.01265597147950089, "grad_norm": 0.625, "learning_rate": 4.154302670623146e-06, "loss": 1.3551, "num_tokens": 445171859.0, "step": 71 }, { "epoch": 0.012834224598930482, "grad_norm": 0.5859375, "learning_rate": 4.213649851632048e-06, "loss": 1.3959, "num_tokens": 451438688.0, "step": 72 }, { "epoch": 0.01301247771836007, "grad_norm": 0.515625, "learning_rate": 4.27299703264095e-06, "loss": 1.3709, "num_tokens": 457722264.0, "step": 73 }, { "epoch": 0.013190730837789662, "grad_norm": 0.466796875, "learning_rate": 4.332344213649852e-06, "loss": 1.354, "num_tokens": 463981553.0, "step": 74 }, { "epoch": 0.013368983957219251, "grad_norm": 0.4765625, "learning_rate": 4.391691394658754e-06, "loss": 1.374, "num_tokens": 470247196.0, "step": 75 }, { "epoch": 0.013547237076648842, "grad_norm": 0.458984375, "learning_rate": 4.451038575667656e-06, "loss": 1.4006, "num_tokens": 476512013.0, "step": 76 }, { "epoch": 0.013725490196078431, "grad_norm": 0.435546875, "learning_rate": 4.510385756676558e-06, "loss": 1.3845, "num_tokens": 482783801.0, "step": 77 }, { "epoch": 0.013903743315508022, "grad_norm": 0.435546875, "learning_rate": 4.5697329376854606e-06, "loss": 1.3828, "num_tokens": 489064608.0, "step": 78 }, { "epoch": 0.014081996434937611, "grad_norm": 0.412109375, "learning_rate": 4.629080118694362e-06, "loss": 1.352, "num_tokens": 495309798.0, "step": 79 }, { "epoch": 0.0142602495543672, "grad_norm": 0.404296875, "learning_rate": 4.688427299703264e-06, "loss": 1.3423, "num_tokens": 501592130.0, "step": 80 }, { "epoch": 0.014438502673796792, "grad_norm": 0.3984375, "learning_rate": 4.747774480712167e-06, "loss": 1.3449, "num_tokens": 507837782.0, "step": 81 }, { "epoch": 0.01461675579322638, "grad_norm": 0.380859375, "learning_rate": 4.807121661721069e-06, "loss": 1.3237, "num_tokens": 514092903.0, "step": 82 }, { "epoch": 0.014795008912655972, "grad_norm": 0.36328125, "learning_rate": 4.866468842729971e-06, "loss": 1.345, "num_tokens": 520377444.0, "step": 83 }, { "epoch": 0.014973262032085561, "grad_norm": 0.35546875, "learning_rate": 4.925816023738873e-06, "loss": 1.347, "num_tokens": 526660198.0, "step": 84 }, { "epoch": 0.015151515151515152, "grad_norm": 0.3515625, "learning_rate": 4.985163204747775e-06, "loss": 1.3308, "num_tokens": 532944955.0, "step": 85 }, { "epoch": 0.015329768270944741, "grad_norm": 0.34375, "learning_rate": 5.044510385756677e-06, "loss": 1.3318, "num_tokens": 539226511.0, "step": 86 }, { "epoch": 0.015508021390374332, "grad_norm": 0.32421875, "learning_rate": 5.103857566765579e-06, "loss": 1.3312, "num_tokens": 545488319.0, "step": 87 }, { "epoch": 0.01568627450980392, "grad_norm": 0.328125, "learning_rate": 5.163204747774481e-06, "loss": 1.328, "num_tokens": 551734258.0, "step": 88 }, { "epoch": 0.015864527629233512, "grad_norm": 0.314453125, "learning_rate": 5.222551928783383e-06, "loss": 1.3043, "num_tokens": 558021245.0, "step": 89 }, { "epoch": 0.016042780748663103, "grad_norm": 0.3125, "learning_rate": 5.281899109792285e-06, "loss": 1.3605, "num_tokens": 564306064.0, "step": 90 }, { "epoch": 0.01622103386809269, "grad_norm": 0.302734375, "learning_rate": 5.341246290801188e-06, "loss": 1.2963, "num_tokens": 570565345.0, "step": 91 }, { "epoch": 0.01639928698752228, "grad_norm": 0.30078125, "learning_rate": 5.40059347181009e-06, "loss": 1.3156, "num_tokens": 576826780.0, "step": 92 }, { "epoch": 0.016577540106951873, "grad_norm": 0.298828125, "learning_rate": 5.459940652818992e-06, "loss": 1.3473, "num_tokens": 583091254.0, "step": 93 }, { "epoch": 0.01675579322638146, "grad_norm": 0.2890625, "learning_rate": 5.5192878338278934e-06, "loss": 1.2864, "num_tokens": 589342905.0, "step": 94 }, { "epoch": 0.01693404634581105, "grad_norm": 0.29296875, "learning_rate": 5.578635014836796e-06, "loss": 1.3124, "num_tokens": 595604120.0, "step": 95 }, { "epoch": 0.017112299465240642, "grad_norm": 0.2890625, "learning_rate": 5.637982195845698e-06, "loss": 1.3122, "num_tokens": 601840868.0, "step": 96 }, { "epoch": 0.017290552584670233, "grad_norm": 0.28515625, "learning_rate": 5.6973293768546e-06, "loss": 1.295, "num_tokens": 608076268.0, "step": 97 }, { "epoch": 0.01746880570409982, "grad_norm": 0.2890625, "learning_rate": 5.756676557863502e-06, "loss": 1.3147, "num_tokens": 614328405.0, "step": 98 }, { "epoch": 0.01764705882352941, "grad_norm": 0.28125, "learning_rate": 5.8160237388724035e-06, "loss": 1.2973, "num_tokens": 620612378.0, "step": 99 }, { "epoch": 0.017825311942959002, "grad_norm": 0.271484375, "learning_rate": 5.875370919881306e-06, "loss": 1.3065, "num_tokens": 626896780.0, "step": 100 }, { "epoch": 0.018003565062388593, "grad_norm": 0.279296875, "learning_rate": 5.934718100890208e-06, "loss": 1.3189, "num_tokens": 633162562.0, "step": 101 }, { "epoch": 0.01818181818181818, "grad_norm": 0.27734375, "learning_rate": 5.994065281899111e-06, "loss": 1.3345, "num_tokens": 639420440.0, "step": 102 }, { "epoch": 0.018360071301247772, "grad_norm": 0.2890625, "learning_rate": 6.0534124629080126e-06, "loss": 1.3004, "num_tokens": 645704535.0, "step": 103 }, { "epoch": 0.018538324420677363, "grad_norm": 0.28125, "learning_rate": 6.112759643916914e-06, "loss": 1.2937, "num_tokens": 651956873.0, "step": 104 }, { "epoch": 0.01871657754010695, "grad_norm": 0.28125, "learning_rate": 6.172106824925816e-06, "loss": 1.2712, "num_tokens": 658232607.0, "step": 105 }, { "epoch": 0.01889483065953654, "grad_norm": 0.28125, "learning_rate": 6.231454005934718e-06, "loss": 1.2851, "num_tokens": 664512641.0, "step": 106 }, { "epoch": 0.019073083778966132, "grad_norm": 0.267578125, "learning_rate": 6.29080118694362e-06, "loss": 1.282, "num_tokens": 670762498.0, "step": 107 }, { "epoch": 0.019251336898395723, "grad_norm": 0.287109375, "learning_rate": 6.3501483679525235e-06, "loss": 1.2923, "num_tokens": 677033564.0, "step": 108 }, { "epoch": 0.01942959001782531, "grad_norm": 0.267578125, "learning_rate": 6.409495548961425e-06, "loss": 1.2734, "num_tokens": 683317512.0, "step": 109 }, { "epoch": 0.0196078431372549, "grad_norm": 0.283203125, "learning_rate": 6.468842729970327e-06, "loss": 1.2953, "num_tokens": 689590318.0, "step": 110 }, { "epoch": 0.019786096256684493, "grad_norm": 0.271484375, "learning_rate": 6.528189910979229e-06, "loss": 1.2636, "num_tokens": 695872633.0, "step": 111 }, { "epoch": 0.019964349376114084, "grad_norm": 0.26171875, "learning_rate": 6.587537091988131e-06, "loss": 1.2719, "num_tokens": 702156753.0, "step": 112 }, { "epoch": 0.02014260249554367, "grad_norm": 0.26171875, "learning_rate": 6.646884272997033e-06, "loss": 1.2542, "num_tokens": 708424593.0, "step": 113 }, { "epoch": 0.020320855614973262, "grad_norm": 0.2734375, "learning_rate": 6.7062314540059345e-06, "loss": 1.2736, "num_tokens": 714709562.0, "step": 114 }, { "epoch": 0.020499108734402853, "grad_norm": 0.2578125, "learning_rate": 6.765578635014838e-06, "loss": 1.2772, "num_tokens": 720959686.0, "step": 115 }, { "epoch": 0.02067736185383244, "grad_norm": 0.26953125, "learning_rate": 6.82492581602374e-06, "loss": 1.3203, "num_tokens": 727242042.0, "step": 116 }, { "epoch": 0.02085561497326203, "grad_norm": 0.279296875, "learning_rate": 6.884272997032642e-06, "loss": 1.2976, "num_tokens": 733502410.0, "step": 117 }, { "epoch": 0.021033868092691622, "grad_norm": 0.26953125, "learning_rate": 6.943620178041544e-06, "loss": 1.245, "num_tokens": 739778789.0, "step": 118 }, { "epoch": 0.021212121212121213, "grad_norm": 0.259765625, "learning_rate": 7.0029673590504455e-06, "loss": 1.2683, "num_tokens": 746045104.0, "step": 119 }, { "epoch": 0.0213903743315508, "grad_norm": 0.2578125, "learning_rate": 7.062314540059347e-06, "loss": 1.2737, "num_tokens": 752329559.0, "step": 120 }, { "epoch": 0.021568627450980392, "grad_norm": 0.23828125, "learning_rate": 7.12166172106825e-06, "loss": 1.2902, "num_tokens": 758588529.0, "step": 121 }, { "epoch": 0.021746880570409983, "grad_norm": 0.25, "learning_rate": 7.181008902077153e-06, "loss": 1.2985, "num_tokens": 764869908.0, "step": 122 }, { "epoch": 0.021925133689839574, "grad_norm": 0.2373046875, "learning_rate": 7.2403560830860545e-06, "loss": 1.2798, "num_tokens": 771153258.0, "step": 123 }, { "epoch": 0.02210338680926916, "grad_norm": 0.2373046875, "learning_rate": 7.299703264094956e-06, "loss": 1.2728, "num_tokens": 777436569.0, "step": 124 }, { "epoch": 0.022281639928698752, "grad_norm": 0.26171875, "learning_rate": 7.359050445103858e-06, "loss": 1.2678, "num_tokens": 783722033.0, "step": 125 }, { "epoch": 0.022459893048128343, "grad_norm": 0.2255859375, "learning_rate": 7.41839762611276e-06, "loss": 1.2661, "num_tokens": 789993471.0, "step": 126 }, { "epoch": 0.02263814616755793, "grad_norm": 0.2392578125, "learning_rate": 7.477744807121662e-06, "loss": 1.2711, "num_tokens": 796245332.0, "step": 127 }, { "epoch": 0.02281639928698752, "grad_norm": 0.228515625, "learning_rate": 7.537091988130565e-06, "loss": 1.2419, "num_tokens": 802528556.0, "step": 128 }, { "epoch": 0.022994652406417113, "grad_norm": 0.2236328125, "learning_rate": 7.5964391691394664e-06, "loss": 1.247, "num_tokens": 808804998.0, "step": 129 }, { "epoch": 0.023172905525846704, "grad_norm": 0.2431640625, "learning_rate": 7.655786350148369e-06, "loss": 1.2692, "num_tokens": 815084224.0, "step": 130 }, { "epoch": 0.02335115864527629, "grad_norm": 0.220703125, "learning_rate": 7.71513353115727e-06, "loss": 1.2296, "num_tokens": 821350965.0, "step": 131 }, { "epoch": 0.023529411764705882, "grad_norm": 0.236328125, "learning_rate": 7.774480712166173e-06, "loss": 1.2454, "num_tokens": 827634226.0, "step": 132 }, { "epoch": 0.023707664884135473, "grad_norm": 0.244140625, "learning_rate": 7.833827893175074e-06, "loss": 1.2402, "num_tokens": 833916737.0, "step": 133 }, { "epoch": 0.023885918003565064, "grad_norm": 0.24609375, "learning_rate": 7.893175074183978e-06, "loss": 1.2846, "num_tokens": 840199801.0, "step": 134 }, { "epoch": 0.02406417112299465, "grad_norm": 0.228515625, "learning_rate": 7.95252225519288e-06, "loss": 1.2284, "num_tokens": 846470588.0, "step": 135 }, { "epoch": 0.024242424242424242, "grad_norm": 0.2578125, "learning_rate": 8.011869436201782e-06, "loss": 1.2386, "num_tokens": 852754119.0, "step": 136 }, { "epoch": 0.024420677361853833, "grad_norm": 0.255859375, "learning_rate": 8.071216617210683e-06, "loss": 1.2262, "num_tokens": 859031551.0, "step": 137 }, { "epoch": 0.02459893048128342, "grad_norm": 0.24609375, "learning_rate": 8.130563798219586e-06, "loss": 1.2388, "num_tokens": 865257907.0, "step": 138 }, { "epoch": 0.024777183600713012, "grad_norm": 0.271484375, "learning_rate": 8.189910979228487e-06, "loss": 1.2368, "num_tokens": 871538489.0, "step": 139 }, { "epoch": 0.024955436720142603, "grad_norm": 0.2451171875, "learning_rate": 8.24925816023739e-06, "loss": 1.25, "num_tokens": 877804319.0, "step": 140 }, { "epoch": 0.025133689839572194, "grad_norm": 0.265625, "learning_rate": 8.308605341246292e-06, "loss": 1.2441, "num_tokens": 884085892.0, "step": 141 }, { "epoch": 0.02531194295900178, "grad_norm": 0.23046875, "learning_rate": 8.367952522255195e-06, "loss": 1.252, "num_tokens": 890369768.0, "step": 142 }, { "epoch": 0.025490196078431372, "grad_norm": 0.265625, "learning_rate": 8.427299703264096e-06, "loss": 1.2516, "num_tokens": 896646634.0, "step": 143 }, { "epoch": 0.025668449197860963, "grad_norm": 0.2265625, "learning_rate": 8.486646884272998e-06, "loss": 1.2434, "num_tokens": 902873495.0, "step": 144 }, { "epoch": 0.025846702317290554, "grad_norm": 0.25390625, "learning_rate": 8.5459940652819e-06, "loss": 1.274, "num_tokens": 909128789.0, "step": 145 }, { "epoch": 0.02602495543672014, "grad_norm": 0.2451171875, "learning_rate": 8.605341246290802e-06, "loss": 1.2415, "num_tokens": 915400001.0, "step": 146 }, { "epoch": 0.026203208556149733, "grad_norm": 0.244140625, "learning_rate": 8.664688427299705e-06, "loss": 1.21, "num_tokens": 921652059.0, "step": 147 }, { "epoch": 0.026381461675579324, "grad_norm": 0.255859375, "learning_rate": 8.724035608308606e-06, "loss": 1.2361, "num_tokens": 927933306.0, "step": 148 }, { "epoch": 0.02655971479500891, "grad_norm": 0.2314453125, "learning_rate": 8.783382789317508e-06, "loss": 1.212, "num_tokens": 934217304.0, "step": 149 }, { "epoch": 0.026737967914438502, "grad_norm": 0.26953125, "learning_rate": 8.842729970326411e-06, "loss": 1.2338, "num_tokens": 940474646.0, "step": 150 }, { "epoch": 0.026916221033868093, "grad_norm": 0.2412109375, "learning_rate": 8.902077151335312e-06, "loss": 1.1815, "num_tokens": 946758434.0, "step": 151 }, { "epoch": 0.027094474153297684, "grad_norm": 0.33203125, "learning_rate": 8.961424332344215e-06, "loss": 1.2458, "num_tokens": 953027120.0, "step": 152 }, { "epoch": 0.02727272727272727, "grad_norm": 0.2265625, "learning_rate": 9.020771513353116e-06, "loss": 1.1931, "num_tokens": 959310349.0, "step": 153 }, { "epoch": 0.027450980392156862, "grad_norm": 0.2734375, "learning_rate": 9.080118694362018e-06, "loss": 1.243, "num_tokens": 965593416.0, "step": 154 }, { "epoch": 0.027629233511586453, "grad_norm": 0.2431640625, "learning_rate": 9.139465875370921e-06, "loss": 1.209, "num_tokens": 971847629.0, "step": 155 }, { "epoch": 0.027807486631016044, "grad_norm": 0.2890625, "learning_rate": 9.198813056379822e-06, "loss": 1.2495, "num_tokens": 978078292.0, "step": 156 }, { "epoch": 0.027985739750445632, "grad_norm": 0.291015625, "learning_rate": 9.258160237388725e-06, "loss": 1.2468, "num_tokens": 984330725.0, "step": 157 }, { "epoch": 0.028163992869875223, "grad_norm": 0.271484375, "learning_rate": 9.317507418397626e-06, "loss": 1.2221, "num_tokens": 990594553.0, "step": 158 }, { "epoch": 0.028342245989304814, "grad_norm": 0.255859375, "learning_rate": 9.376854599406528e-06, "loss": 1.2592, "num_tokens": 996857310.0, "step": 159 }, { "epoch": 0.0285204991087344, "grad_norm": 0.2060546875, "learning_rate": 9.436201780415431e-06, "loss": 1.2184, "num_tokens": 1003126839.0, "step": 160 }, { "epoch": 0.028698752228163992, "grad_norm": 0.265625, "learning_rate": 9.495548961424334e-06, "loss": 1.2335, "num_tokens": 1009408553.0, "step": 161 }, { "epoch": 0.028877005347593583, "grad_norm": 0.2333984375, "learning_rate": 9.554896142433235e-06, "loss": 1.1848, "num_tokens": 1015649909.0, "step": 162 }, { "epoch": 0.029055258467023174, "grad_norm": 0.283203125, "learning_rate": 9.614243323442138e-06, "loss": 1.2112, "num_tokens": 1021932320.0, "step": 163 }, { "epoch": 0.02923351158645276, "grad_norm": 0.236328125, "learning_rate": 9.673590504451039e-06, "loss": 1.2094, "num_tokens": 1028216582.0, "step": 164 }, { "epoch": 0.029411764705882353, "grad_norm": 0.27734375, "learning_rate": 9.732937685459941e-06, "loss": 1.2205, "num_tokens": 1034500037.0, "step": 165 }, { "epoch": 0.029590017825311943, "grad_norm": 0.251953125, "learning_rate": 9.792284866468842e-06, "loss": 1.2296, "num_tokens": 1040730823.0, "step": 166 }, { "epoch": 0.029768270944741534, "grad_norm": 0.30859375, "learning_rate": 9.851632047477747e-06, "loss": 1.2126, "num_tokens": 1046997112.0, "step": 167 }, { "epoch": 0.029946524064171122, "grad_norm": 0.271484375, "learning_rate": 9.910979228486648e-06, "loss": 1.2102, "num_tokens": 1053269558.0, "step": 168 }, { "epoch": 0.030124777183600713, "grad_norm": 0.271484375, "learning_rate": 9.97032640949555e-06, "loss": 1.2047, "num_tokens": 1059516923.0, "step": 169 }, { "epoch": 0.030303030303030304, "grad_norm": 0.287109375, "learning_rate": 1.0029673590504451e-05, "loss": 1.232, "num_tokens": 1065797537.0, "step": 170 }, { "epoch": 0.03048128342245989, "grad_norm": 0.265625, "learning_rate": 1.0089020771513354e-05, "loss": 1.2142, "num_tokens": 1072050269.0, "step": 171 }, { "epoch": 0.030659536541889482, "grad_norm": 0.29296875, "learning_rate": 1.0148367952522255e-05, "loss": 1.2145, "num_tokens": 1078334293.0, "step": 172 }, { "epoch": 0.030837789661319073, "grad_norm": 0.26171875, "learning_rate": 1.0207715133531158e-05, "loss": 1.2215, "num_tokens": 1084617862.0, "step": 173 }, { "epoch": 0.031016042780748664, "grad_norm": 0.267578125, "learning_rate": 1.0267062314540059e-05, "loss": 1.1934, "num_tokens": 1090843379.0, "step": 174 }, { "epoch": 0.031194295900178252, "grad_norm": 0.306640625, "learning_rate": 1.0326409495548961e-05, "loss": 1.1949, "num_tokens": 1097128491.0, "step": 175 }, { "epoch": 0.03137254901960784, "grad_norm": 0.2470703125, "learning_rate": 1.0385756676557866e-05, "loss": 1.2118, "num_tokens": 1103413530.0, "step": 176 }, { "epoch": 0.03155080213903743, "grad_norm": 0.337890625, "learning_rate": 1.0445103857566767e-05, "loss": 1.2229, "num_tokens": 1109677553.0, "step": 177 }, { "epoch": 0.031729055258467025, "grad_norm": 0.30078125, "learning_rate": 1.050445103857567e-05, "loss": 1.2071, "num_tokens": 1115949650.0, "step": 178 }, { "epoch": 0.03190730837789661, "grad_norm": 0.3046875, "learning_rate": 1.056379821958457e-05, "loss": 1.1943, "num_tokens": 1122219618.0, "step": 179 }, { "epoch": 0.03208556149732621, "grad_norm": 0.294921875, "learning_rate": 1.0623145400593473e-05, "loss": 1.2258, "num_tokens": 1128490252.0, "step": 180 }, { "epoch": 0.032263814616755794, "grad_norm": 0.333984375, "learning_rate": 1.0682492581602376e-05, "loss": 1.2103, "num_tokens": 1134748412.0, "step": 181 }, { "epoch": 0.03244206773618538, "grad_norm": 0.2578125, "learning_rate": 1.0741839762611277e-05, "loss": 1.2103, "num_tokens": 1141031919.0, "step": 182 }, { "epoch": 0.032620320855614976, "grad_norm": 0.263671875, "learning_rate": 1.080118694362018e-05, "loss": 1.1969, "num_tokens": 1147303434.0, "step": 183 }, { "epoch": 0.03279857397504456, "grad_norm": 0.2734375, "learning_rate": 1.086053412462908e-05, "loss": 1.193, "num_tokens": 1153588288.0, "step": 184 }, { "epoch": 0.03297682709447415, "grad_norm": 0.259765625, "learning_rate": 1.0919881305637983e-05, "loss": 1.1859, "num_tokens": 1159849567.0, "step": 185 }, { "epoch": 0.033155080213903745, "grad_norm": 0.255859375, "learning_rate": 1.0979228486646884e-05, "loss": 1.2104, "num_tokens": 1166133503.0, "step": 186 }, { "epoch": 0.03333333333333333, "grad_norm": 0.2333984375, "learning_rate": 1.1038575667655787e-05, "loss": 1.1773, "num_tokens": 1172418063.0, "step": 187 }, { "epoch": 0.03351158645276292, "grad_norm": 0.26171875, "learning_rate": 1.1097922848664688e-05, "loss": 1.2074, "num_tokens": 1178676037.0, "step": 188 }, { "epoch": 0.033689839572192515, "grad_norm": 0.3203125, "learning_rate": 1.1157270029673592e-05, "loss": 1.1809, "num_tokens": 1184958634.0, "step": 189 }, { "epoch": 0.0338680926916221, "grad_norm": 0.26171875, "learning_rate": 1.1216617210682495e-05, "loss": 1.2041, "num_tokens": 1191212043.0, "step": 190 }, { "epoch": 0.0340463458110517, "grad_norm": 0.263671875, "learning_rate": 1.1275964391691396e-05, "loss": 1.2076, "num_tokens": 1197497327.0, "step": 191 }, { "epoch": 0.034224598930481284, "grad_norm": 0.27734375, "learning_rate": 1.1335311572700299e-05, "loss": 1.2092, "num_tokens": 1203767799.0, "step": 192 }, { "epoch": 0.03440285204991087, "grad_norm": 0.279296875, "learning_rate": 1.13946587537092e-05, "loss": 1.1959, "num_tokens": 1210050774.0, "step": 193 }, { "epoch": 0.034581105169340466, "grad_norm": 0.251953125, "learning_rate": 1.1454005934718102e-05, "loss": 1.2276, "num_tokens": 1216317849.0, "step": 194 }, { "epoch": 0.034759358288770054, "grad_norm": 0.23828125, "learning_rate": 1.1513353115727003e-05, "loss": 1.2179, "num_tokens": 1222601116.0, "step": 195 }, { "epoch": 0.03493761140819964, "grad_norm": 0.255859375, "learning_rate": 1.1572700296735906e-05, "loss": 1.1898, "num_tokens": 1228862316.0, "step": 196 }, { "epoch": 0.035115864527629236, "grad_norm": 0.232421875, "learning_rate": 1.1632047477744807e-05, "loss": 1.2258, "num_tokens": 1235137578.0, "step": 197 }, { "epoch": 0.03529411764705882, "grad_norm": 0.25, "learning_rate": 1.169139465875371e-05, "loss": 1.1942, "num_tokens": 1241419025.0, "step": 198 }, { "epoch": 0.03547237076648841, "grad_norm": 0.2490234375, "learning_rate": 1.1750741839762612e-05, "loss": 1.1869, "num_tokens": 1247694155.0, "step": 199 }, { "epoch": 0.035650623885918005, "grad_norm": 0.2314453125, "learning_rate": 1.1810089020771513e-05, "loss": 1.1705, "num_tokens": 1253977821.0, "step": 200 }, { "epoch": 0.03582887700534759, "grad_norm": 0.248046875, "learning_rate": 1.1869436201780416e-05, "loss": 1.1762, "num_tokens": 1260245192.0, "step": 201 }, { "epoch": 0.03600713012477719, "grad_norm": 0.2255859375, "learning_rate": 1.1928783382789319e-05, "loss": 1.2219, "num_tokens": 1266528913.0, "step": 202 }, { "epoch": 0.036185383244206774, "grad_norm": 0.2314453125, "learning_rate": 1.1988130563798221e-05, "loss": 1.1902, "num_tokens": 1272813977.0, "step": 203 }, { "epoch": 0.03636363636363636, "grad_norm": 0.2431640625, "learning_rate": 1.2047477744807124e-05, "loss": 1.1822, "num_tokens": 1279099030.0, "step": 204 }, { "epoch": 0.036541889483065956, "grad_norm": 0.2333984375, "learning_rate": 1.2106824925816025e-05, "loss": 1.191, "num_tokens": 1285377012.0, "step": 205 }, { "epoch": 0.036720142602495544, "grad_norm": 0.236328125, "learning_rate": 1.2166172106824928e-05, "loss": 1.2133, "num_tokens": 1291663020.0, "step": 206 }, { "epoch": 0.03689839572192513, "grad_norm": 0.220703125, "learning_rate": 1.2225519287833829e-05, "loss": 1.1722, "num_tokens": 1297914019.0, "step": 207 }, { "epoch": 0.037076648841354726, "grad_norm": 0.265625, "learning_rate": 1.2284866468842732e-05, "loss": 1.1926, "num_tokens": 1304199296.0, "step": 208 }, { "epoch": 0.03725490196078431, "grad_norm": 0.2255859375, "learning_rate": 1.2344213649851633e-05, "loss": 1.2051, "num_tokens": 1310483359.0, "step": 209 }, { "epoch": 0.0374331550802139, "grad_norm": 0.224609375, "learning_rate": 1.2403560830860535e-05, "loss": 1.2026, "num_tokens": 1316764962.0, "step": 210 }, { "epoch": 0.037611408199643495, "grad_norm": 0.2451171875, "learning_rate": 1.2462908011869436e-05, "loss": 1.1794, "num_tokens": 1323029548.0, "step": 211 }, { "epoch": 0.03778966131907308, "grad_norm": 0.265625, "learning_rate": 1.2522255192878339e-05, "loss": 1.2532, "num_tokens": 1329286282.0, "step": 212 }, { "epoch": 0.03796791443850268, "grad_norm": 0.26171875, "learning_rate": 1.258160237388724e-05, "loss": 1.1966, "num_tokens": 1335562160.0, "step": 213 }, { "epoch": 0.038146167557932265, "grad_norm": 0.25390625, "learning_rate": 1.2640949554896143e-05, "loss": 1.2165, "num_tokens": 1341847298.0, "step": 214 }, { "epoch": 0.03832442067736185, "grad_norm": 0.240234375, "learning_rate": 1.2700296735905047e-05, "loss": 1.1955, "num_tokens": 1348123558.0, "step": 215 }, { "epoch": 0.038502673796791446, "grad_norm": 0.2255859375, "learning_rate": 1.2759643916913948e-05, "loss": 1.2031, "num_tokens": 1354388534.0, "step": 216 }, { "epoch": 0.038680926916221034, "grad_norm": 0.263671875, "learning_rate": 1.281899109792285e-05, "loss": 1.1872, "num_tokens": 1360578631.0, "step": 217 }, { "epoch": 0.03885918003565062, "grad_norm": 0.25, "learning_rate": 1.2878338278931752e-05, "loss": 1.208, "num_tokens": 1366861946.0, "step": 218 }, { "epoch": 0.039037433155080216, "grad_norm": 0.25390625, "learning_rate": 1.2937685459940654e-05, "loss": 1.2024, "num_tokens": 1373123122.0, "step": 219 }, { "epoch": 0.0392156862745098, "grad_norm": 0.283203125, "learning_rate": 1.2997032640949557e-05, "loss": 1.1788, "num_tokens": 1379377438.0, "step": 220 }, { "epoch": 0.03939393939393939, "grad_norm": 0.265625, "learning_rate": 1.3056379821958458e-05, "loss": 1.1989, "num_tokens": 1385661999.0, "step": 221 }, { "epoch": 0.039572192513368985, "grad_norm": 0.294921875, "learning_rate": 1.311572700296736e-05, "loss": 1.1865, "num_tokens": 1391917533.0, "step": 222 }, { "epoch": 0.03975044563279857, "grad_norm": 0.30859375, "learning_rate": 1.3175074183976262e-05, "loss": 1.1542, "num_tokens": 1398176255.0, "step": 223 }, { "epoch": 0.03992869875222817, "grad_norm": 0.259765625, "learning_rate": 1.3234421364985164e-05, "loss": 1.1745, "num_tokens": 1404462448.0, "step": 224 }, { "epoch": 0.040106951871657755, "grad_norm": 0.28515625, "learning_rate": 1.3293768545994065e-05, "loss": 1.1927, "num_tokens": 1410726170.0, "step": 225 }, { "epoch": 0.04028520499108734, "grad_norm": 0.24609375, "learning_rate": 1.3353115727002968e-05, "loss": 1.1936, "num_tokens": 1416992995.0, "step": 226 }, { "epoch": 0.04046345811051694, "grad_norm": 0.30078125, "learning_rate": 1.3412462908011869e-05, "loss": 1.1654, "num_tokens": 1423254684.0, "step": 227 }, { "epoch": 0.040641711229946524, "grad_norm": 0.2353515625, "learning_rate": 1.3471810089020773e-05, "loss": 1.1966, "num_tokens": 1429465053.0, "step": 228 }, { "epoch": 0.04081996434937611, "grad_norm": 0.306640625, "learning_rate": 1.3531157270029676e-05, "loss": 1.196, "num_tokens": 1435748425.0, "step": 229 }, { "epoch": 0.040998217468805706, "grad_norm": 0.25390625, "learning_rate": 1.3590504451038577e-05, "loss": 1.1736, "num_tokens": 1442001975.0, "step": 230 }, { "epoch": 0.041176470588235294, "grad_norm": 0.298828125, "learning_rate": 1.364985163204748e-05, "loss": 1.1702, "num_tokens": 1448276271.0, "step": 231 }, { "epoch": 0.04135472370766488, "grad_norm": 0.2734375, "learning_rate": 1.370919881305638e-05, "loss": 1.1451, "num_tokens": 1454560071.0, "step": 232 }, { "epoch": 0.041532976827094475, "grad_norm": 0.271484375, "learning_rate": 1.3768545994065284e-05, "loss": 1.1672, "num_tokens": 1460843653.0, "step": 233 }, { "epoch": 0.04171122994652406, "grad_norm": 0.291015625, "learning_rate": 1.3827893175074185e-05, "loss": 1.1651, "num_tokens": 1467108116.0, "step": 234 }, { "epoch": 0.04188948306595366, "grad_norm": 0.287109375, "learning_rate": 1.3887240356083087e-05, "loss": 1.1546, "num_tokens": 1473369101.0, "step": 235 }, { "epoch": 0.042067736185383245, "grad_norm": 0.251953125, "learning_rate": 1.3946587537091988e-05, "loss": 1.163, "num_tokens": 1479647024.0, "step": 236 }, { "epoch": 0.04224598930481283, "grad_norm": 0.287109375, "learning_rate": 1.4005934718100891e-05, "loss": 1.1699, "num_tokens": 1485911901.0, "step": 237 }, { "epoch": 0.04242424242424243, "grad_norm": 0.2734375, "learning_rate": 1.4065281899109794e-05, "loss": 1.17, "num_tokens": 1492162990.0, "step": 238 }, { "epoch": 0.042602495543672014, "grad_norm": 0.271484375, "learning_rate": 1.4124629080118695e-05, "loss": 1.1678, "num_tokens": 1498447345.0, "step": 239 }, { "epoch": 0.0427807486631016, "grad_norm": 0.318359375, "learning_rate": 1.4183976261127597e-05, "loss": 1.1538, "num_tokens": 1504730797.0, "step": 240 }, { "epoch": 0.042959001782531196, "grad_norm": 0.24609375, "learning_rate": 1.42433234421365e-05, "loss": 1.1859, "num_tokens": 1511002005.0, "step": 241 }, { "epoch": 0.043137254901960784, "grad_norm": 0.28515625, "learning_rate": 1.4302670623145403e-05, "loss": 1.1848, "num_tokens": 1517284727.0, "step": 242 }, { "epoch": 0.04331550802139037, "grad_norm": 0.259765625, "learning_rate": 1.4362017804154305e-05, "loss": 1.1752, "num_tokens": 1523568572.0, "step": 243 }, { "epoch": 0.043493761140819966, "grad_norm": 0.30859375, "learning_rate": 1.4421364985163206e-05, "loss": 1.1753, "num_tokens": 1529853032.0, "step": 244 }, { "epoch": 0.04367201426024955, "grad_norm": 0.26953125, "learning_rate": 1.4480712166172109e-05, "loss": 1.1725, "num_tokens": 1536106540.0, "step": 245 }, { "epoch": 0.04385026737967915, "grad_norm": 0.306640625, "learning_rate": 1.454005934718101e-05, "loss": 1.2188, "num_tokens": 1542377248.0, "step": 246 }, { "epoch": 0.044028520499108735, "grad_norm": 0.267578125, "learning_rate": 1.4599406528189913e-05, "loss": 1.1749, "num_tokens": 1548639616.0, "step": 247 }, { "epoch": 0.04420677361853832, "grad_norm": 0.357421875, "learning_rate": 1.4658753709198814e-05, "loss": 1.1918, "num_tokens": 1554921375.0, "step": 248 }, { "epoch": 0.04438502673796792, "grad_norm": 0.27734375, "learning_rate": 1.4718100890207716e-05, "loss": 1.167, "num_tokens": 1561203397.0, "step": 249 }, { "epoch": 0.044563279857397504, "grad_norm": 0.359375, "learning_rate": 1.4777448071216617e-05, "loss": 1.1932, "num_tokens": 1567487915.0, "step": 250 }, { "epoch": 0.04474153297682709, "grad_norm": 0.3203125, "learning_rate": 1.483679525222552e-05, "loss": 1.1769, "num_tokens": 1573735767.0, "step": 251 }, { "epoch": 0.044919786096256686, "grad_norm": 0.30859375, "learning_rate": 1.4896142433234421e-05, "loss": 1.1814, "num_tokens": 1580019878.0, "step": 252 }, { "epoch": 0.045098039215686274, "grad_norm": 0.28125, "learning_rate": 1.4955489614243324e-05, "loss": 1.1869, "num_tokens": 1586303320.0, "step": 253 }, { "epoch": 0.04527629233511586, "grad_norm": 0.330078125, "learning_rate": 1.5014836795252228e-05, "loss": 1.1642, "num_tokens": 1592587815.0, "step": 254 }, { "epoch": 0.045454545454545456, "grad_norm": 0.291015625, "learning_rate": 1.507418397626113e-05, "loss": 1.1693, "num_tokens": 1598872099.0, "step": 255 }, { "epoch": 0.04563279857397504, "grad_norm": 0.35546875, "learning_rate": 1.5133531157270032e-05, "loss": 1.1821, "num_tokens": 1605103650.0, "step": 256 }, { "epoch": 0.04581105169340464, "grad_norm": 0.29296875, "learning_rate": 1.5192878338278933e-05, "loss": 1.1536, "num_tokens": 1611386479.0, "step": 257 }, { "epoch": 0.045989304812834225, "grad_norm": 0.341796875, "learning_rate": 1.5252225519287836e-05, "loss": 1.1516, "num_tokens": 1617646500.0, "step": 258 }, { "epoch": 0.04616755793226381, "grad_norm": 0.3046875, "learning_rate": 1.5311572700296738e-05, "loss": 1.1952, "num_tokens": 1623881910.0, "step": 259 }, { "epoch": 0.04634581105169341, "grad_norm": 0.36328125, "learning_rate": 1.537091988130564e-05, "loss": 1.1531, "num_tokens": 1630165141.0, "step": 260 }, { "epoch": 0.046524064171122995, "grad_norm": 0.34765625, "learning_rate": 1.543026706231454e-05, "loss": 1.1837, "num_tokens": 1636415802.0, "step": 261 }, { "epoch": 0.04670231729055258, "grad_norm": 0.33203125, "learning_rate": 1.5489614243323445e-05, "loss": 1.1793, "num_tokens": 1642700445.0, "step": 262 }, { "epoch": 0.04688057040998218, "grad_norm": 0.314453125, "learning_rate": 1.5548961424332346e-05, "loss": 1.1943, "num_tokens": 1648973392.0, "step": 263 }, { "epoch": 0.047058823529411764, "grad_norm": 0.291015625, "learning_rate": 1.5608308605341247e-05, "loss": 1.1379, "num_tokens": 1655233666.0, "step": 264 }, { "epoch": 0.04723707664884135, "grad_norm": 0.283203125, "learning_rate": 1.5667655786350148e-05, "loss": 1.1701, "num_tokens": 1661485584.0, "step": 265 }, { "epoch": 0.047415329768270946, "grad_norm": 0.30859375, "learning_rate": 1.5727002967359052e-05, "loss": 1.1815, "num_tokens": 1667747292.0, "step": 266 }, { "epoch": 0.04759358288770053, "grad_norm": 0.283203125, "learning_rate": 1.5786350148367956e-05, "loss": 1.1593, "num_tokens": 1674031638.0, "step": 267 }, { "epoch": 0.04777183600713013, "grad_norm": 0.296875, "learning_rate": 1.5845697329376857e-05, "loss": 1.2004, "num_tokens": 1680313733.0, "step": 268 }, { "epoch": 0.047950089126559715, "grad_norm": 0.306640625, "learning_rate": 1.590504451038576e-05, "loss": 1.1455, "num_tokens": 1686581316.0, "step": 269 }, { "epoch": 0.0481283422459893, "grad_norm": 0.345703125, "learning_rate": 1.596439169139466e-05, "loss": 1.1824, "num_tokens": 1692863435.0, "step": 270 }, { "epoch": 0.0483065953654189, "grad_norm": 0.287109375, "learning_rate": 1.6023738872403564e-05, "loss": 1.1618, "num_tokens": 1699146843.0, "step": 271 }, { "epoch": 0.048484848484848485, "grad_norm": 0.373046875, "learning_rate": 1.6083086053412465e-05, "loss": 1.1411, "num_tokens": 1705350829.0, "step": 272 }, { "epoch": 0.04866310160427807, "grad_norm": 0.306640625, "learning_rate": 1.6142433234421366e-05, "loss": 1.1681, "num_tokens": 1711616389.0, "step": 273 }, { "epoch": 0.04884135472370767, "grad_norm": 0.416015625, "learning_rate": 1.6201780415430267e-05, "loss": 1.1799, "num_tokens": 1717899405.0, "step": 274 }, { "epoch": 0.049019607843137254, "grad_norm": 0.3203125, "learning_rate": 1.626112759643917e-05, "loss": 1.1668, "num_tokens": 1724184522.0, "step": 275 }, { "epoch": 0.04919786096256684, "grad_norm": 0.369140625, "learning_rate": 1.6320474777448072e-05, "loss": 1.1251, "num_tokens": 1730443453.0, "step": 276 }, { "epoch": 0.049376114081996436, "grad_norm": 0.330078125, "learning_rate": 1.6379821958456973e-05, "loss": 1.1617, "num_tokens": 1736717854.0, "step": 277 }, { "epoch": 0.049554367201426024, "grad_norm": 0.369140625, "learning_rate": 1.6439169139465877e-05, "loss": 1.1597, "num_tokens": 1742967407.0, "step": 278 }, { "epoch": 0.04973262032085562, "grad_norm": 0.353515625, "learning_rate": 1.649851632047478e-05, "loss": 1.1695, "num_tokens": 1749240204.0, "step": 279 }, { "epoch": 0.049910873440285206, "grad_norm": 0.357421875, "learning_rate": 1.6557863501483683e-05, "loss": 1.1719, "num_tokens": 1755482469.0, "step": 280 }, { "epoch": 0.05008912655971479, "grad_norm": 0.353515625, "learning_rate": 1.6617210682492584e-05, "loss": 1.1614, "num_tokens": 1761750370.0, "step": 281 }, { "epoch": 0.05026737967914439, "grad_norm": 0.271484375, "learning_rate": 1.6676557863501485e-05, "loss": 1.1955, "num_tokens": 1768014156.0, "step": 282 }, { "epoch": 0.050445632798573975, "grad_norm": 0.314453125, "learning_rate": 1.673590504451039e-05, "loss": 1.1671, "num_tokens": 1774266092.0, "step": 283 }, { "epoch": 0.05062388591800356, "grad_norm": 0.25, "learning_rate": 1.679525222551929e-05, "loss": 1.1377, "num_tokens": 1780545776.0, "step": 284 }, { "epoch": 0.05080213903743316, "grad_norm": 0.345703125, "learning_rate": 1.685459940652819e-05, "loss": 1.174, "num_tokens": 1786784469.0, "step": 285 }, { "epoch": 0.050980392156862744, "grad_norm": 0.271484375, "learning_rate": 1.6913946587537092e-05, "loss": 1.1304, "num_tokens": 1793050606.0, "step": 286 }, { "epoch": 0.05115864527629233, "grad_norm": 0.28515625, "learning_rate": 1.6973293768545997e-05, "loss": 1.1414, "num_tokens": 1799311937.0, "step": 287 }, { "epoch": 0.051336898395721926, "grad_norm": 0.3203125, "learning_rate": 1.7032640949554898e-05, "loss": 1.1718, "num_tokens": 1805595940.0, "step": 288 }, { "epoch": 0.051515151515151514, "grad_norm": 0.2578125, "learning_rate": 1.70919881305638e-05, "loss": 1.182, "num_tokens": 1811849093.0, "step": 289 }, { "epoch": 0.05169340463458111, "grad_norm": 0.3359375, "learning_rate": 1.71513353115727e-05, "loss": 1.131, "num_tokens": 1818099011.0, "step": 290 }, { "epoch": 0.051871657754010696, "grad_norm": 0.26171875, "learning_rate": 1.7210682492581604e-05, "loss": 1.1929, "num_tokens": 1824381417.0, "step": 291 }, { "epoch": 0.05204991087344028, "grad_norm": 0.359375, "learning_rate": 1.7270029673590505e-05, "loss": 1.1791, "num_tokens": 1830658300.0, "step": 292 }, { "epoch": 0.05222816399286988, "grad_norm": 0.3203125, "learning_rate": 1.732937685459941e-05, "loss": 1.1377, "num_tokens": 1836888639.0, "step": 293 }, { "epoch": 0.052406417112299465, "grad_norm": 0.29296875, "learning_rate": 1.738872403560831e-05, "loss": 1.1649, "num_tokens": 1843173582.0, "step": 294 }, { "epoch": 0.05258467023172905, "grad_norm": 0.298828125, "learning_rate": 1.744807121661721e-05, "loss": 1.1659, "num_tokens": 1849425823.0, "step": 295 }, { "epoch": 0.05276292335115865, "grad_norm": 0.326171875, "learning_rate": 1.7507418397626116e-05, "loss": 1.1559, "num_tokens": 1855684265.0, "step": 296 }, { "epoch": 0.052941176470588235, "grad_norm": 0.267578125, "learning_rate": 1.7566765578635017e-05, "loss": 1.1722, "num_tokens": 1861967209.0, "step": 297 }, { "epoch": 0.05311942959001782, "grad_norm": 0.443359375, "learning_rate": 1.7626112759643918e-05, "loss": 1.1601, "num_tokens": 1868252186.0, "step": 298 }, { "epoch": 0.053297682709447416, "grad_norm": 0.322265625, "learning_rate": 1.7685459940652822e-05, "loss": 1.1511, "num_tokens": 1874530682.0, "step": 299 }, { "epoch": 0.053475935828877004, "grad_norm": 0.470703125, "learning_rate": 1.7744807121661723e-05, "loss": 1.137, "num_tokens": 1880814283.0, "step": 300 }, { "epoch": 0.0536541889483066, "grad_norm": 0.322265625, "learning_rate": 1.7804154302670624e-05, "loss": 1.1688, "num_tokens": 1887049217.0, "step": 301 }, { "epoch": 0.053832442067736186, "grad_norm": 0.55078125, "learning_rate": 1.7863501483679525e-05, "loss": 1.1367, "num_tokens": 1893304283.0, "step": 302 }, { "epoch": 0.05401069518716577, "grad_norm": 0.365234375, "learning_rate": 1.792284866468843e-05, "loss": 1.1935, "num_tokens": 1899588660.0, "step": 303 }, { "epoch": 0.05418894830659537, "grad_norm": 0.5078125, "learning_rate": 1.798219584569733e-05, "loss": 1.139, "num_tokens": 1905873776.0, "step": 304 }, { "epoch": 0.054367201426024955, "grad_norm": 0.46875, "learning_rate": 1.804154302670623e-05, "loss": 1.1483, "num_tokens": 1912159050.0, "step": 305 }, { "epoch": 0.05454545454545454, "grad_norm": 0.41796875, "learning_rate": 1.8100890207715136e-05, "loss": 1.1583, "num_tokens": 1918428604.0, "step": 306 }, { "epoch": 0.05472370766488414, "grad_norm": 0.408203125, "learning_rate": 1.8160237388724037e-05, "loss": 1.1384, "num_tokens": 1924672406.0, "step": 307 }, { "epoch": 0.054901960784313725, "grad_norm": 0.34765625, "learning_rate": 1.821958456973294e-05, "loss": 1.1406, "num_tokens": 1930950071.0, "step": 308 }, { "epoch": 0.05508021390374331, "grad_norm": 0.2890625, "learning_rate": 1.8278931750741842e-05, "loss": 1.1375, "num_tokens": 1937232489.0, "step": 309 }, { "epoch": 0.05525846702317291, "grad_norm": 0.412109375, "learning_rate": 1.8338278931750743e-05, "loss": 1.1518, "num_tokens": 1943513527.0, "step": 310 }, { "epoch": 0.055436720142602494, "grad_norm": 0.267578125, "learning_rate": 1.8397626112759644e-05, "loss": 1.1201, "num_tokens": 1949796330.0, "step": 311 }, { "epoch": 0.05561497326203209, "grad_norm": 0.4453125, "learning_rate": 1.845697329376855e-05, "loss": 1.1604, "num_tokens": 1956063672.0, "step": 312 }, { "epoch": 0.055793226381461676, "grad_norm": 0.279296875, "learning_rate": 1.851632047477745e-05, "loss": 1.1451, "num_tokens": 1962347028.0, "step": 313 }, { "epoch": 0.055971479500891264, "grad_norm": 0.490234375, "learning_rate": 1.857566765578635e-05, "loss": 1.1757, "num_tokens": 1968591216.0, "step": 314 }, { "epoch": 0.05614973262032086, "grad_norm": 0.3359375, "learning_rate": 1.863501483679525e-05, "loss": 1.1551, "num_tokens": 1974867692.0, "step": 315 }, { "epoch": 0.056327985739750445, "grad_norm": 0.5859375, "learning_rate": 1.8694362017804156e-05, "loss": 1.1252, "num_tokens": 1981151630.0, "step": 316 }, { "epoch": 0.05650623885918003, "grad_norm": 0.51171875, "learning_rate": 1.8753709198813057e-05, "loss": 1.155, "num_tokens": 1987424391.0, "step": 317 }, { "epoch": 0.05668449197860963, "grad_norm": 0.48046875, "learning_rate": 1.8813056379821958e-05, "loss": 1.1441, "num_tokens": 1993707079.0, "step": 318 }, { "epoch": 0.056862745098039215, "grad_norm": 0.462890625, "learning_rate": 1.8872403560830862e-05, "loss": 1.1672, "num_tokens": 1999958233.0, "step": 319 }, { "epoch": 0.0570409982174688, "grad_norm": 0.384765625, "learning_rate": 1.8931750741839763e-05, "loss": 1.1648, "num_tokens": 2006208307.0, "step": 320 }, { "epoch": 0.0572192513368984, "grad_norm": 0.365234375, "learning_rate": 1.8991097922848668e-05, "loss": 1.1441, "num_tokens": 2012462423.0, "step": 321 }, { "epoch": 0.057397504456327984, "grad_norm": 0.384765625, "learning_rate": 1.905044510385757e-05, "loss": 1.1403, "num_tokens": 2018695709.0, "step": 322 }, { "epoch": 0.05757575757575758, "grad_norm": 0.318359375, "learning_rate": 1.910979228486647e-05, "loss": 1.1716, "num_tokens": 2024978800.0, "step": 323 }, { "epoch": 0.057754010695187166, "grad_norm": 0.451171875, "learning_rate": 1.9169139465875374e-05, "loss": 1.1403, "num_tokens": 2031232945.0, "step": 324 }, { "epoch": 0.057932263814616754, "grad_norm": 0.375, "learning_rate": 1.9228486646884275e-05, "loss": 1.1613, "num_tokens": 2037500811.0, "step": 325 }, { "epoch": 0.05811051693404635, "grad_norm": 0.4921875, "learning_rate": 1.9287833827893176e-05, "loss": 1.154, "num_tokens": 2043749798.0, "step": 326 }, { "epoch": 0.058288770053475936, "grad_norm": 0.435546875, "learning_rate": 1.9347181008902077e-05, "loss": 1.1575, "num_tokens": 2050034224.0, "step": 327 }, { "epoch": 0.05846702317290552, "grad_norm": 0.419921875, "learning_rate": 1.940652818991098e-05, "loss": 1.1524, "num_tokens": 2056317717.0, "step": 328 }, { "epoch": 0.05864527629233512, "grad_norm": 0.416015625, "learning_rate": 1.9465875370919883e-05, "loss": 1.1322, "num_tokens": 2062600343.0, "step": 329 }, { "epoch": 0.058823529411764705, "grad_norm": 0.380859375, "learning_rate": 1.9525222551928784e-05, "loss": 1.1601, "num_tokens": 2068884172.0, "step": 330 }, { "epoch": 0.05900178253119429, "grad_norm": 0.3828125, "learning_rate": 1.9584569732937684e-05, "loss": 1.1582, "num_tokens": 2075155327.0, "step": 331 }, { "epoch": 0.05918003565062389, "grad_norm": 0.41015625, "learning_rate": 1.964391691394659e-05, "loss": 1.1456, "num_tokens": 2081367043.0, "step": 332 }, { "epoch": 0.059358288770053474, "grad_norm": 0.361328125, "learning_rate": 1.9703264094955493e-05, "loss": 1.1571, "num_tokens": 2087647662.0, "step": 333 }, { "epoch": 0.05953654188948307, "grad_norm": 0.48046875, "learning_rate": 1.9762611275964394e-05, "loss": 1.1526, "num_tokens": 2093922816.0, "step": 334 }, { "epoch": 0.059714795008912656, "grad_norm": 0.443359375, "learning_rate": 1.9821958456973295e-05, "loss": 1.1341, "num_tokens": 2100207685.0, "step": 335 }, { "epoch": 0.059893048128342244, "grad_norm": 0.4375, "learning_rate": 1.9881305637982196e-05, "loss": 1.1438, "num_tokens": 2106492570.0, "step": 336 }, { "epoch": 0.06007130124777184, "grad_norm": 0.474609375, "learning_rate": 1.99406528189911e-05, "loss": 1.1607, "num_tokens": 2112731491.0, "step": 337 }, { "epoch": 0.060249554367201426, "grad_norm": 0.34375, "learning_rate": 2e-05, "loss": 1.1487, "num_tokens": 2119014342.0, "step": 338 }, { "epoch": 0.06042780748663101, "grad_norm": 0.328125, "learning_rate": 1.999999962501405e-05, "loss": 1.1508, "num_tokens": 2125297601.0, "step": 339 }, { "epoch": 0.06060606060606061, "grad_norm": 0.419921875, "learning_rate": 1.9999998500056227e-05, "loss": 1.1602, "num_tokens": 2131581995.0, "step": 340 }, { "epoch": 0.060784313725490195, "grad_norm": 0.318359375, "learning_rate": 1.9999996625126626e-05, "loss": 1.1614, "num_tokens": 2137835705.0, "step": 341 }, { "epoch": 0.06096256684491978, "grad_norm": 0.515625, "learning_rate": 1.99999940002254e-05, "loss": 1.0994, "num_tokens": 2144103410.0, "step": 342 }, { "epoch": 0.06114081996434938, "grad_norm": 0.419921875, "learning_rate": 1.999999062535277e-05, "loss": 1.142, "num_tokens": 2150387635.0, "step": 343 }, { "epoch": 0.061319073083778965, "grad_norm": 0.515625, "learning_rate": 1.9999986500509024e-05, "loss": 1.1384, "num_tokens": 2156670693.0, "step": 344 }, { "epoch": 0.06149732620320856, "grad_norm": 0.5078125, "learning_rate": 1.9999981625694493e-05, "loss": 1.1354, "num_tokens": 2162929199.0, "step": 345 }, { "epoch": 0.06167557932263815, "grad_norm": 0.33984375, "learning_rate": 1.9999976000909594e-05, "loss": 1.1512, "num_tokens": 2169205513.0, "step": 346 }, { "epoch": 0.061853832442067734, "grad_norm": 0.388671875, "learning_rate": 1.999996962615479e-05, "loss": 1.1331, "num_tokens": 2175489799.0, "step": 347 }, { "epoch": 0.06203208556149733, "grad_norm": 0.34765625, "learning_rate": 1.999996250143061e-05, "loss": 1.1738, "num_tokens": 2181773029.0, "step": 348 }, { "epoch": 0.062210338680926916, "grad_norm": 0.3046875, "learning_rate": 1.9999954626737658e-05, "loss": 1.1443, "num_tokens": 2188058113.0, "step": 349 }, { "epoch": 0.062388591800356503, "grad_norm": 0.46484375, "learning_rate": 1.999994600207658e-05, "loss": 1.1502, "num_tokens": 2194306175.0, "step": 350 }, { "epoch": 0.06256684491978609, "grad_norm": 0.33984375, "learning_rate": 1.99999366274481e-05, "loss": 1.1251, "num_tokens": 2200564871.0, "step": 351 }, { "epoch": 0.06274509803921569, "grad_norm": 0.60546875, "learning_rate": 1.9999926502852997e-05, "loss": 1.1497, "num_tokens": 2206813445.0, "step": 352 }, { "epoch": 0.06292335115864528, "grad_norm": 0.54296875, "learning_rate": 1.9999915628292112e-05, "loss": 1.13, "num_tokens": 2213098657.0, "step": 353 }, { "epoch": 0.06310160427807486, "grad_norm": 0.330078125, "learning_rate": 1.9999904003766358e-05, "loss": 1.1421, "num_tokens": 2219370391.0, "step": 354 }, { "epoch": 0.06327985739750445, "grad_norm": 0.443359375, "learning_rate": 1.9999891629276697e-05, "loss": 1.1515, "num_tokens": 2225631083.0, "step": 355 }, { "epoch": 0.06345811051693405, "grad_norm": 0.279296875, "learning_rate": 1.9999878504824167e-05, "loss": 1.1256, "num_tokens": 2231888521.0, "step": 356 }, { "epoch": 0.06363636363636363, "grad_norm": 0.37890625, "learning_rate": 1.9999864630409854e-05, "loss": 1.1273, "num_tokens": 2238152609.0, "step": 357 }, { "epoch": 0.06381461675579322, "grad_norm": 0.267578125, "learning_rate": 1.999985000603492e-05, "loss": 1.1624, "num_tokens": 2244431291.0, "step": 358 }, { "epoch": 0.06399286987522282, "grad_norm": 0.369140625, "learning_rate": 1.9999834631700585e-05, "loss": 1.1326, "num_tokens": 2250715595.0, "step": 359 }, { "epoch": 0.06417112299465241, "grad_norm": 0.310546875, "learning_rate": 1.999981850740812e-05, "loss": 1.1581, "num_tokens": 2256979125.0, "step": 360 }, { "epoch": 0.064349376114082, "grad_norm": 0.3203125, "learning_rate": 1.999980163315888e-05, "loss": 1.1176, "num_tokens": 2263225161.0, "step": 361 }, { "epoch": 0.06452762923351159, "grad_norm": 0.306640625, "learning_rate": 1.9999784008954267e-05, "loss": 1.1755, "num_tokens": 2269493834.0, "step": 362 }, { "epoch": 0.06470588235294118, "grad_norm": 0.28515625, "learning_rate": 1.9999765634795746e-05, "loss": 1.1359, "num_tokens": 2275778456.0, "step": 363 }, { "epoch": 0.06488413547237076, "grad_norm": 0.341796875, "learning_rate": 1.9999746510684855e-05, "loss": 1.1481, "num_tokens": 2282057937.0, "step": 364 }, { "epoch": 0.06506238859180036, "grad_norm": 0.26953125, "learning_rate": 1.9999726636623182e-05, "loss": 1.1012, "num_tokens": 2288339242.0, "step": 365 }, { "epoch": 0.06524064171122995, "grad_norm": 0.37109375, "learning_rate": 1.9999706012612386e-05, "loss": 1.1469, "num_tokens": 2294595412.0, "step": 366 }, { "epoch": 0.06541889483065953, "grad_norm": 0.24609375, "learning_rate": 1.9999684638654183e-05, "loss": 1.1049, "num_tokens": 2300855985.0, "step": 367 }, { "epoch": 0.06559714795008913, "grad_norm": 0.3828125, "learning_rate": 1.9999662514750355e-05, "loss": 1.1063, "num_tokens": 2307138341.0, "step": 368 }, { "epoch": 0.06577540106951872, "grad_norm": 0.28125, "learning_rate": 1.9999639640902747e-05, "loss": 1.1647, "num_tokens": 2313422083.0, "step": 369 }, { "epoch": 0.0659536541889483, "grad_norm": 0.419921875, "learning_rate": 1.9999616017113265e-05, "loss": 1.1498, "num_tokens": 2319697107.0, "step": 370 }, { "epoch": 0.0661319073083779, "grad_norm": 0.328125, "learning_rate": 1.999959164338388e-05, "loss": 1.1481, "num_tokens": 2325973301.0, "step": 371 }, { "epoch": 0.06631016042780749, "grad_norm": 0.447265625, "learning_rate": 1.999956651971662e-05, "loss": 1.1431, "num_tokens": 2332224770.0, "step": 372 }, { "epoch": 0.06648841354723707, "grad_norm": 0.353515625, "learning_rate": 1.9999540646113574e-05, "loss": 1.1333, "num_tokens": 2338509848.0, "step": 373 }, { "epoch": 0.06666666666666667, "grad_norm": 0.392578125, "learning_rate": 1.9999514022576906e-05, "loss": 1.1318, "num_tokens": 2344794726.0, "step": 374 }, { "epoch": 0.06684491978609626, "grad_norm": 0.328125, "learning_rate": 1.999948664910883e-05, "loss": 1.1307, "num_tokens": 2351079378.0, "step": 375 }, { "epoch": 0.06702317290552584, "grad_norm": 0.416015625, "learning_rate": 1.9999458525711624e-05, "loss": 1.1476, "num_tokens": 2357363089.0, "step": 376 }, { "epoch": 0.06720142602495544, "grad_norm": 0.333984375, "learning_rate": 1.9999429652387642e-05, "loss": 1.1644, "num_tokens": 2363647477.0, "step": 377 }, { "epoch": 0.06737967914438503, "grad_norm": 0.447265625, "learning_rate": 1.9999400029139282e-05, "loss": 1.1217, "num_tokens": 2369902541.0, "step": 378 }, { "epoch": 0.06755793226381461, "grad_norm": 0.392578125, "learning_rate": 1.9999369655969015e-05, "loss": 1.1675, "num_tokens": 2376185469.0, "step": 379 }, { "epoch": 0.0677361853832442, "grad_norm": 0.41796875, "learning_rate": 1.9999338532879375e-05, "loss": 1.1167, "num_tokens": 2382459692.0, "step": 380 }, { "epoch": 0.0679144385026738, "grad_norm": 0.40625, "learning_rate": 1.999930665987294e-05, "loss": 1.1274, "num_tokens": 2388742351.0, "step": 381 }, { "epoch": 0.0680926916221034, "grad_norm": 0.36328125, "learning_rate": 1.999927403695239e-05, "loss": 1.1373, "num_tokens": 2395026022.0, "step": 382 }, { "epoch": 0.06827094474153297, "grad_norm": 0.361328125, "learning_rate": 1.9999240664120424e-05, "loss": 1.1521, "num_tokens": 2401309168.0, "step": 383 }, { "epoch": 0.06844919786096257, "grad_norm": 0.306640625, "learning_rate": 1.9999206541379834e-05, "loss": 1.1063, "num_tokens": 2407594452.0, "step": 384 }, { "epoch": 0.06862745098039216, "grad_norm": 0.353515625, "learning_rate": 1.9999171668733458e-05, "loss": 1.127, "num_tokens": 2413878883.0, "step": 385 }, { "epoch": 0.06880570409982174, "grad_norm": 0.314453125, "learning_rate": 1.9999136046184205e-05, "loss": 1.1419, "num_tokens": 2420165727.0, "step": 386 }, { "epoch": 0.06898395721925134, "grad_norm": 0.298828125, "learning_rate": 1.999909967373504e-05, "loss": 1.1485, "num_tokens": 2426448359.0, "step": 387 }, { "epoch": 0.06916221033868093, "grad_norm": 0.349609375, "learning_rate": 1.9999062551388998e-05, "loss": 1.1378, "num_tokens": 2432733828.0, "step": 388 }, { "epoch": 0.06934046345811051, "grad_norm": 0.2734375, "learning_rate": 1.999902467914917e-05, "loss": 1.114, "num_tokens": 2439018244.0, "step": 389 }, { "epoch": 0.06951871657754011, "grad_norm": 0.376953125, "learning_rate": 1.9998986057018712e-05, "loss": 1.1296, "num_tokens": 2445302845.0, "step": 390 }, { "epoch": 0.0696969696969697, "grad_norm": 0.3046875, "learning_rate": 1.9998946685000845e-05, "loss": 1.109, "num_tokens": 2451576548.0, "step": 391 }, { "epoch": 0.06987522281639928, "grad_norm": 0.421875, "learning_rate": 1.9998906563098842e-05, "loss": 1.1482, "num_tokens": 2457784963.0, "step": 392 }, { "epoch": 0.07005347593582888, "grad_norm": 0.3671875, "learning_rate": 1.9998865691316057e-05, "loss": 1.1399, "num_tokens": 2464042828.0, "step": 393 }, { "epoch": 0.07023172905525847, "grad_norm": 0.427734375, "learning_rate": 1.9998824069655888e-05, "loss": 1.1321, "num_tokens": 2470325441.0, "step": 394 }, { "epoch": 0.07040998217468805, "grad_norm": 0.419921875, "learning_rate": 1.999878169812181e-05, "loss": 1.1562, "num_tokens": 2476592302.0, "step": 395 }, { "epoch": 0.07058823529411765, "grad_norm": 0.353515625, "learning_rate": 1.9998738576717344e-05, "loss": 1.1192, "num_tokens": 2482843318.0, "step": 396 }, { "epoch": 0.07076648841354724, "grad_norm": 0.369140625, "learning_rate": 1.9998694705446094e-05, "loss": 1.1363, "num_tokens": 2489111088.0, "step": 397 }, { "epoch": 0.07094474153297682, "grad_norm": 0.33984375, "learning_rate": 1.999865008431171e-05, "loss": 1.1342, "num_tokens": 2495390660.0, "step": 398 }, { "epoch": 0.07112299465240642, "grad_norm": 0.337890625, "learning_rate": 1.999860471331791e-05, "loss": 1.1415, "num_tokens": 2501645896.0, "step": 399 }, { "epoch": 0.07130124777183601, "grad_norm": 0.306640625, "learning_rate": 1.9998558592468478e-05, "loss": 1.1191, "num_tokens": 2507902463.0, "step": 400 }, { "epoch": 0.07147950089126559, "grad_norm": 0.3125, "learning_rate": 1.9998511721767256e-05, "loss": 1.1428, "num_tokens": 2514144635.0, "step": 401 }, { "epoch": 0.07165775401069518, "grad_norm": 0.291015625, "learning_rate": 1.9998464101218147e-05, "loss": 1.1378, "num_tokens": 2520429784.0, "step": 402 }, { "epoch": 0.07183600713012478, "grad_norm": 0.32421875, "learning_rate": 1.9998415730825122e-05, "loss": 1.1375, "num_tokens": 2526689800.0, "step": 403 }, { "epoch": 0.07201426024955437, "grad_norm": 0.2890625, "learning_rate": 1.9998366610592213e-05, "loss": 1.1076, "num_tokens": 2532957337.0, "step": 404 }, { "epoch": 0.07219251336898395, "grad_norm": 0.404296875, "learning_rate": 1.9998316740523506e-05, "loss": 1.1423, "num_tokens": 2539223282.0, "step": 405 }, { "epoch": 0.07237076648841355, "grad_norm": 0.28125, "learning_rate": 1.9998266120623165e-05, "loss": 1.1502, "num_tokens": 2545486574.0, "step": 406 }, { "epoch": 0.07254901960784314, "grad_norm": 0.439453125, "learning_rate": 1.999821475089541e-05, "loss": 1.1605, "num_tokens": 2551770190.0, "step": 407 }, { "epoch": 0.07272727272727272, "grad_norm": 0.3515625, "learning_rate": 1.9998162631344512e-05, "loss": 1.1281, "num_tokens": 2557951259.0, "step": 408 }, { "epoch": 0.07290552584670232, "grad_norm": 0.40625, "learning_rate": 1.999810976197482e-05, "loss": 1.1246, "num_tokens": 2564235799.0, "step": 409 }, { "epoch": 0.07308377896613191, "grad_norm": 0.36328125, "learning_rate": 1.9998056142790734e-05, "loss": 1.1053, "num_tokens": 2570498805.0, "step": 410 }, { "epoch": 0.0732620320855615, "grad_norm": 0.421875, "learning_rate": 1.9998001773796734e-05, "loss": 1.1292, "num_tokens": 2576783621.0, "step": 411 }, { "epoch": 0.07344028520499109, "grad_norm": 0.37109375, "learning_rate": 1.9997946654997344e-05, "loss": 1.1285, "num_tokens": 2583069599.0, "step": 412 }, { "epoch": 0.07361853832442068, "grad_norm": 0.421875, "learning_rate": 1.9997890786397148e-05, "loss": 1.1022, "num_tokens": 2589297819.0, "step": 413 }, { "epoch": 0.07379679144385026, "grad_norm": 0.357421875, "learning_rate": 1.9997834168000816e-05, "loss": 1.148, "num_tokens": 2595580785.0, "step": 414 }, { "epoch": 0.07397504456327986, "grad_norm": 0.412109375, "learning_rate": 1.9997776799813057e-05, "loss": 1.1227, "num_tokens": 2601846523.0, "step": 415 }, { "epoch": 0.07415329768270945, "grad_norm": 0.34765625, "learning_rate": 1.9997718681838655e-05, "loss": 1.1345, "num_tokens": 2608130223.0, "step": 416 }, { "epoch": 0.07433155080213903, "grad_norm": 0.404296875, "learning_rate": 1.9997659814082452e-05, "loss": 1.1313, "num_tokens": 2614395090.0, "step": 417 }, { "epoch": 0.07450980392156863, "grad_norm": 0.359375, "learning_rate": 1.9997600196549357e-05, "loss": 1.1083, "num_tokens": 2620660465.0, "step": 418 }, { "epoch": 0.07468805704099822, "grad_norm": 0.4765625, "learning_rate": 1.9997539829244333e-05, "loss": 1.1155, "num_tokens": 2626944927.0, "step": 419 }, { "epoch": 0.0748663101604278, "grad_norm": 0.416015625, "learning_rate": 1.9997478712172413e-05, "loss": 1.1372, "num_tokens": 2633231007.0, "step": 420 }, { "epoch": 0.0750445632798574, "grad_norm": 0.443359375, "learning_rate": 1.9997416845338688e-05, "loss": 1.1256, "num_tokens": 2639487923.0, "step": 421 }, { "epoch": 0.07522281639928699, "grad_norm": 0.43359375, "learning_rate": 1.9997354228748312e-05, "loss": 1.1396, "num_tokens": 2645743282.0, "step": 422 }, { "epoch": 0.07540106951871657, "grad_norm": 0.384765625, "learning_rate": 1.999729086240651e-05, "loss": 1.1395, "num_tokens": 2652027405.0, "step": 423 }, { "epoch": 0.07557932263814617, "grad_norm": 0.38671875, "learning_rate": 1.999722674631855e-05, "loss": 1.1232, "num_tokens": 2658292348.0, "step": 424 }, { "epoch": 0.07575757575757576, "grad_norm": 0.345703125, "learning_rate": 1.9997161880489792e-05, "loss": 1.1412, "num_tokens": 2664547965.0, "step": 425 }, { "epoch": 0.07593582887700535, "grad_norm": 0.345703125, "learning_rate": 1.9997096264925625e-05, "loss": 1.1285, "num_tokens": 2670803855.0, "step": 426 }, { "epoch": 0.07611408199643493, "grad_norm": 0.392578125, "learning_rate": 1.999702989963153e-05, "loss": 1.1288, "num_tokens": 2677071068.0, "step": 427 }, { "epoch": 0.07629233511586453, "grad_norm": 0.287109375, "learning_rate": 1.9996962784613024e-05, "loss": 1.1405, "num_tokens": 2683355261.0, "step": 428 }, { "epoch": 0.07647058823529412, "grad_norm": 0.484375, "learning_rate": 1.999689491987571e-05, "loss": 1.1547, "num_tokens": 2689637717.0, "step": 429 }, { "epoch": 0.0766488413547237, "grad_norm": 0.37890625, "learning_rate": 1.999682630542524e-05, "loss": 1.1284, "num_tokens": 2695920561.0, "step": 430 }, { "epoch": 0.0768270944741533, "grad_norm": 0.546875, "learning_rate": 1.999675694126733e-05, "loss": 1.1189, "num_tokens": 2702203820.0, "step": 431 }, { "epoch": 0.07700534759358289, "grad_norm": 0.486328125, "learning_rate": 1.999668682740776e-05, "loss": 1.1542, "num_tokens": 2708469419.0, "step": 432 }, { "epoch": 0.07718360071301247, "grad_norm": 0.30078125, "learning_rate": 1.9996615963852376e-05, "loss": 1.1167, "num_tokens": 2714740306.0, "step": 433 }, { "epoch": 0.07736185383244207, "grad_norm": 0.447265625, "learning_rate": 1.999654435060708e-05, "loss": 1.1263, "num_tokens": 2721024108.0, "step": 434 }, { "epoch": 0.07754010695187166, "grad_norm": 0.3046875, "learning_rate": 1.999647198767784e-05, "loss": 1.124, "num_tokens": 2727293536.0, "step": 435 }, { "epoch": 0.07771836007130124, "grad_norm": 0.443359375, "learning_rate": 1.9996398875070688e-05, "loss": 1.1239, "num_tokens": 2733561364.0, "step": 436 }, { "epoch": 0.07789661319073084, "grad_norm": 0.33984375, "learning_rate": 1.9996325012791714e-05, "loss": 1.1227, "num_tokens": 2739844260.0, "step": 437 }, { "epoch": 0.07807486631016043, "grad_norm": 0.5, "learning_rate": 1.9996250400847077e-05, "loss": 1.1171, "num_tokens": 2746128330.0, "step": 438 }, { "epoch": 0.07825311942959001, "grad_norm": 0.40625, "learning_rate": 1.9996175039242987e-05, "loss": 1.1144, "num_tokens": 2752409030.0, "step": 439 }, { "epoch": 0.0784313725490196, "grad_norm": 0.4609375, "learning_rate": 1.9996098927985734e-05, "loss": 1.1469, "num_tokens": 2758635247.0, "step": 440 }, { "epoch": 0.0786096256684492, "grad_norm": 0.4453125, "learning_rate": 1.9996022067081648e-05, "loss": 1.1231, "num_tokens": 2764909519.0, "step": 441 }, { "epoch": 0.07878787878787878, "grad_norm": 0.404296875, "learning_rate": 1.9995944456537145e-05, "loss": 1.1047, "num_tokens": 2771195102.0, "step": 442 }, { "epoch": 0.07896613190730838, "grad_norm": 0.40625, "learning_rate": 1.9995866096358688e-05, "loss": 1.1546, "num_tokens": 2777478325.0, "step": 443 }, { "epoch": 0.07914438502673797, "grad_norm": 0.373046875, "learning_rate": 1.9995786986552807e-05, "loss": 1.0981, "num_tokens": 2783737425.0, "step": 444 }, { "epoch": 0.07932263814616755, "grad_norm": 0.380859375, "learning_rate": 1.9995707127126094e-05, "loss": 1.1685, "num_tokens": 2790020562.0, "step": 445 }, { "epoch": 0.07950089126559715, "grad_norm": 0.369140625, "learning_rate": 1.99956265180852e-05, "loss": 1.1222, "num_tokens": 2796303252.0, "step": 446 }, { "epoch": 0.07967914438502674, "grad_norm": 0.349609375, "learning_rate": 1.999554515943685e-05, "loss": 1.1532, "num_tokens": 2802519438.0, "step": 447 }, { "epoch": 0.07985739750445633, "grad_norm": 0.404296875, "learning_rate": 1.9995463051187818e-05, "loss": 1.1326, "num_tokens": 2808791482.0, "step": 448 }, { "epoch": 0.08003565062388592, "grad_norm": 0.361328125, "learning_rate": 1.9995380193344947e-05, "loss": 1.134, "num_tokens": 2815076289.0, "step": 449 }, { "epoch": 0.08021390374331551, "grad_norm": 0.400390625, "learning_rate": 1.9995296585915143e-05, "loss": 1.1011, "num_tokens": 2821359818.0, "step": 450 }, { "epoch": 0.0803921568627451, "grad_norm": 0.345703125, "learning_rate": 1.9995212228905373e-05, "loss": 1.097, "num_tokens": 2827644567.0, "step": 451 }, { "epoch": 0.08057040998217468, "grad_norm": 0.4140625, "learning_rate": 1.9995127122322665e-05, "loss": 1.1369, "num_tokens": 2833928311.0, "step": 452 }, { "epoch": 0.08074866310160428, "grad_norm": 0.392578125, "learning_rate": 1.999504126617411e-05, "loss": 1.1233, "num_tokens": 2840185548.0, "step": 453 }, { "epoch": 0.08092691622103387, "grad_norm": 0.3671875, "learning_rate": 1.9994954660466864e-05, "loss": 1.1204, "num_tokens": 2846469958.0, "step": 454 }, { "epoch": 0.08110516934046345, "grad_norm": 0.330078125, "learning_rate": 1.9994867305208143e-05, "loss": 1.1109, "num_tokens": 2852748180.0, "step": 455 }, { "epoch": 0.08128342245989305, "grad_norm": 0.375, "learning_rate": 1.999477920040523e-05, "loss": 1.1231, "num_tokens": 2859018194.0, "step": 456 }, { "epoch": 0.08146167557932264, "grad_norm": 0.33203125, "learning_rate": 1.9994690346065464e-05, "loss": 1.1126, "num_tokens": 2865272812.0, "step": 457 }, { "epoch": 0.08163992869875222, "grad_norm": 0.388671875, "learning_rate": 1.9994600742196245e-05, "loss": 1.1353, "num_tokens": 2871506379.0, "step": 458 }, { "epoch": 0.08181818181818182, "grad_norm": 0.3203125, "learning_rate": 1.999451038880505e-05, "loss": 1.1293, "num_tokens": 2877792124.0, "step": 459 }, { "epoch": 0.08199643493761141, "grad_norm": 0.46484375, "learning_rate": 1.9994419285899398e-05, "loss": 1.0937, "num_tokens": 2884075155.0, "step": 460 }, { "epoch": 0.08217468805704099, "grad_norm": 0.40234375, "learning_rate": 1.9994327433486888e-05, "loss": 1.0618, "num_tokens": 2890328390.0, "step": 461 }, { "epoch": 0.08235294117647059, "grad_norm": 0.43359375, "learning_rate": 1.9994234831575167e-05, "loss": 1.1262, "num_tokens": 2896557761.0, "step": 462 }, { "epoch": 0.08253119429590018, "grad_norm": 0.439453125, "learning_rate": 1.9994141480171957e-05, "loss": 1.1277, "num_tokens": 2902801796.0, "step": 463 }, { "epoch": 0.08270944741532976, "grad_norm": 0.328125, "learning_rate": 1.9994047379285034e-05, "loss": 1.0988, "num_tokens": 2909085545.0, "step": 464 }, { "epoch": 0.08288770053475936, "grad_norm": 0.37109375, "learning_rate": 1.9993952528922244e-05, "loss": 1.102, "num_tokens": 2915370866.0, "step": 465 }, { "epoch": 0.08306595365418895, "grad_norm": 0.3125, "learning_rate": 1.9993856929091485e-05, "loss": 1.1495, "num_tokens": 2921625368.0, "step": 466 }, { "epoch": 0.08324420677361853, "grad_norm": 0.318359375, "learning_rate": 1.9993760579800726e-05, "loss": 1.1504, "num_tokens": 2927894168.0, "step": 467 }, { "epoch": 0.08342245989304813, "grad_norm": 0.34765625, "learning_rate": 1.9993663481058e-05, "loss": 1.1194, "num_tokens": 2934177248.0, "step": 468 }, { "epoch": 0.08360071301247772, "grad_norm": 0.294921875, "learning_rate": 1.999356563287139e-05, "loss": 1.0912, "num_tokens": 2940460272.0, "step": 469 }, { "epoch": 0.08377896613190731, "grad_norm": 0.4375, "learning_rate": 1.9993467035249053e-05, "loss": 1.1127, "num_tokens": 2946733756.0, "step": 470 }, { "epoch": 0.0839572192513369, "grad_norm": 0.33203125, "learning_rate": 1.9993367688199206e-05, "loss": 1.124, "num_tokens": 2953013628.0, "step": 471 }, { "epoch": 0.08413547237076649, "grad_norm": 0.47265625, "learning_rate": 1.999326759173013e-05, "loss": 1.1295, "num_tokens": 2959295060.0, "step": 472 }, { "epoch": 0.08431372549019608, "grad_norm": 0.408203125, "learning_rate": 1.9993166745850163e-05, "loss": 1.1037, "num_tokens": 2965577860.0, "step": 473 }, { "epoch": 0.08449197860962566, "grad_norm": 0.439453125, "learning_rate": 1.9993065150567707e-05, "loss": 1.1525, "num_tokens": 2971858443.0, "step": 474 }, { "epoch": 0.08467023172905526, "grad_norm": 0.42578125, "learning_rate": 1.9992962805891235e-05, "loss": 1.1221, "num_tokens": 2978142966.0, "step": 475 }, { "epoch": 0.08484848484848485, "grad_norm": 0.37109375, "learning_rate": 1.9992859711829268e-05, "loss": 1.1674, "num_tokens": 2984426195.0, "step": 476 }, { "epoch": 0.08502673796791443, "grad_norm": 0.408203125, "learning_rate": 1.9992755868390398e-05, "loss": 1.1258, "num_tokens": 2990677183.0, "step": 477 }, { "epoch": 0.08520499108734403, "grad_norm": 0.326171875, "learning_rate": 1.9992651275583283e-05, "loss": 1.1324, "num_tokens": 2996943477.0, "step": 478 }, { "epoch": 0.08538324420677362, "grad_norm": 0.330078125, "learning_rate": 1.9992545933416632e-05, "loss": 1.1265, "num_tokens": 3003197674.0, "step": 479 }, { "epoch": 0.0855614973262032, "grad_norm": 0.302734375, "learning_rate": 1.999243984189922e-05, "loss": 1.1517, "num_tokens": 3009440456.0, "step": 480 }, { "epoch": 0.0857397504456328, "grad_norm": 0.291015625, "learning_rate": 1.9992333001039908e-05, "loss": 1.103, "num_tokens": 3015698950.0, "step": 481 }, { "epoch": 0.08591800356506239, "grad_norm": 0.353515625, "learning_rate": 1.9992225410847578e-05, "loss": 1.1312, "num_tokens": 3021957797.0, "step": 482 }, { "epoch": 0.08609625668449197, "grad_norm": 0.306640625, "learning_rate": 1.9992117071331204e-05, "loss": 1.0966, "num_tokens": 3028194849.0, "step": 483 }, { "epoch": 0.08627450980392157, "grad_norm": 0.4296875, "learning_rate": 1.999200798249981e-05, "loss": 1.1709, "num_tokens": 3034476950.0, "step": 484 }, { "epoch": 0.08645276292335116, "grad_norm": 0.302734375, "learning_rate": 1.9991898144362494e-05, "loss": 1.1108, "num_tokens": 3040742042.0, "step": 485 }, { "epoch": 0.08663101604278074, "grad_norm": 0.421875, "learning_rate": 1.9991787556928403e-05, "loss": 1.0995, "num_tokens": 3046997793.0, "step": 486 }, { "epoch": 0.08680926916221034, "grad_norm": 0.328125, "learning_rate": 1.999167622020675e-05, "loss": 1.1059, "num_tokens": 3053266288.0, "step": 487 }, { "epoch": 0.08698752228163993, "grad_norm": 0.455078125, "learning_rate": 1.999156413420682e-05, "loss": 1.1009, "num_tokens": 3059539366.0, "step": 488 }, { "epoch": 0.08716577540106951, "grad_norm": 0.419921875, "learning_rate": 1.9991451298937943e-05, "loss": 1.1134, "num_tokens": 3065818555.0, "step": 489 }, { "epoch": 0.0873440285204991, "grad_norm": 0.380859375, "learning_rate": 1.9991337714409533e-05, "loss": 1.1266, "num_tokens": 3072076740.0, "step": 490 }, { "epoch": 0.0875222816399287, "grad_norm": 0.421875, "learning_rate": 1.999122338063105e-05, "loss": 1.1236, "num_tokens": 3078358542.0, "step": 491 }, { "epoch": 0.0877005347593583, "grad_norm": 0.318359375, "learning_rate": 1.999110829761202e-05, "loss": 1.1273, "num_tokens": 3084589977.0, "step": 492 }, { "epoch": 0.08787878787878788, "grad_norm": 0.322265625, "learning_rate": 1.9990992465362033e-05, "loss": 1.1004, "num_tokens": 3090847403.0, "step": 493 }, { "epoch": 0.08805704099821747, "grad_norm": 0.35546875, "learning_rate": 1.9990875883890743e-05, "loss": 1.1077, "num_tokens": 3097132536.0, "step": 494 }, { "epoch": 0.08823529411764706, "grad_norm": 0.326171875, "learning_rate": 1.9990758553207864e-05, "loss": 1.1156, "num_tokens": 3103415350.0, "step": 495 }, { "epoch": 0.08841354723707665, "grad_norm": 0.392578125, "learning_rate": 1.9990640473323174e-05, "loss": 1.1171, "num_tokens": 3109698054.0, "step": 496 }, { "epoch": 0.08859180035650624, "grad_norm": 0.37109375, "learning_rate": 1.999052164424651e-05, "loss": 1.1061, "num_tokens": 3115950515.0, "step": 497 }, { "epoch": 0.08877005347593583, "grad_norm": 0.412109375, "learning_rate": 1.999040206598778e-05, "loss": 1.1437, "num_tokens": 3122210420.0, "step": 498 }, { "epoch": 0.08894830659536541, "grad_norm": 0.36328125, "learning_rate": 1.999028173855694e-05, "loss": 1.1341, "num_tokens": 3128493554.0, "step": 499 }, { "epoch": 0.08912655971479501, "grad_norm": 0.361328125, "learning_rate": 1.9990160661964025e-05, "loss": 1.1432, "num_tokens": 3134752025.0, "step": 500 }, { "epoch": 0.0893048128342246, "grad_norm": 0.314453125, "learning_rate": 1.999003883621912e-05, "loss": 1.1277, "num_tokens": 3141001591.0, "step": 501 }, { "epoch": 0.08948306595365418, "grad_norm": 0.365234375, "learning_rate": 1.9989916261332375e-05, "loss": 1.1327, "num_tokens": 3147207014.0, "step": 502 }, { "epoch": 0.08966131907308378, "grad_norm": 0.302734375, "learning_rate": 1.998979293731401e-05, "loss": 1.1019, "num_tokens": 3153488717.0, "step": 503 }, { "epoch": 0.08983957219251337, "grad_norm": 0.384765625, "learning_rate": 1.9989668864174297e-05, "loss": 1.1322, "num_tokens": 3159731142.0, "step": 504 }, { "epoch": 0.09001782531194295, "grad_norm": 0.3046875, "learning_rate": 1.9989544041923573e-05, "loss": 1.1546, "num_tokens": 3166014872.0, "step": 505 }, { "epoch": 0.09019607843137255, "grad_norm": 0.345703125, "learning_rate": 1.998941847057225e-05, "loss": 1.1288, "num_tokens": 3172282917.0, "step": 506 }, { "epoch": 0.09037433155080214, "grad_norm": 0.333984375, "learning_rate": 1.998929215013078e-05, "loss": 1.1197, "num_tokens": 3178531637.0, "step": 507 }, { "epoch": 0.09055258467023172, "grad_norm": 0.431640625, "learning_rate": 1.9989165080609692e-05, "loss": 1.0899, "num_tokens": 3184806757.0, "step": 508 }, { "epoch": 0.09073083778966132, "grad_norm": 0.37890625, "learning_rate": 1.9989037262019582e-05, "loss": 1.1136, "num_tokens": 3191047046.0, "step": 509 }, { "epoch": 0.09090909090909091, "grad_norm": 0.37890625, "learning_rate": 1.998890869437109e-05, "loss": 1.1163, "num_tokens": 3197329120.0, "step": 510 }, { "epoch": 0.0910873440285205, "grad_norm": 0.38671875, "learning_rate": 1.9988779377674942e-05, "loss": 1.111, "num_tokens": 3203612141.0, "step": 511 }, { "epoch": 0.09126559714795009, "grad_norm": 0.3203125, "learning_rate": 1.9988649311941907e-05, "loss": 1.1156, "num_tokens": 3209857879.0, "step": 512 }, { "epoch": 0.09144385026737968, "grad_norm": 0.3828125, "learning_rate": 1.998851849718282e-05, "loss": 1.1058, "num_tokens": 3216141646.0, "step": 513 }, { "epoch": 0.09162210338680928, "grad_norm": 0.318359375, "learning_rate": 1.9988386933408585e-05, "loss": 1.1464, "num_tokens": 3222425526.0, "step": 514 }, { "epoch": 0.09180035650623886, "grad_norm": 0.3671875, "learning_rate": 1.998825462063017e-05, "loss": 1.1238, "num_tokens": 3228709363.0, "step": 515 }, { "epoch": 0.09197860962566845, "grad_norm": 0.283203125, "learning_rate": 1.9988121558858593e-05, "loss": 1.0978, "num_tokens": 3234961672.0, "step": 516 }, { "epoch": 0.09215686274509804, "grad_norm": 0.40625, "learning_rate": 1.9987987748104947e-05, "loss": 1.1355, "num_tokens": 3241193871.0, "step": 517 }, { "epoch": 0.09233511586452763, "grad_norm": 0.28515625, "learning_rate": 1.9987853188380382e-05, "loss": 1.1132, "num_tokens": 3247433762.0, "step": 518 }, { "epoch": 0.09251336898395722, "grad_norm": 0.53515625, "learning_rate": 1.9987717879696108e-05, "loss": 1.131, "num_tokens": 3253717295.0, "step": 519 }, { "epoch": 0.09269162210338681, "grad_norm": 0.396484375, "learning_rate": 1.9987581822063405e-05, "loss": 1.1132, "num_tokens": 3260001096.0, "step": 520 }, { "epoch": 0.0928698752228164, "grad_norm": 0.55859375, "learning_rate": 1.9987445015493606e-05, "loss": 1.1106, "num_tokens": 3266287448.0, "step": 521 }, { "epoch": 0.09304812834224599, "grad_norm": 0.52734375, "learning_rate": 1.998730745999811e-05, "loss": 1.1368, "num_tokens": 3272570813.0, "step": 522 }, { "epoch": 0.09322638146167558, "grad_norm": 0.310546875, "learning_rate": 1.998716915558839e-05, "loss": 1.0969, "num_tokens": 3278854635.0, "step": 523 }, { "epoch": 0.09340463458110516, "grad_norm": 0.45703125, "learning_rate": 1.9987030102275958e-05, "loss": 1.1214, "num_tokens": 3285138240.0, "step": 524 }, { "epoch": 0.09358288770053476, "grad_norm": 0.306640625, "learning_rate": 1.9986890300072412e-05, "loss": 1.106, "num_tokens": 3291351789.0, "step": 525 }, { "epoch": 0.09376114081996435, "grad_norm": 0.4453125, "learning_rate": 1.9986749748989392e-05, "loss": 1.0937, "num_tokens": 3297602994.0, "step": 526 }, { "epoch": 0.09393939393939393, "grad_norm": 0.416015625, "learning_rate": 1.9986608449038618e-05, "loss": 1.1503, "num_tokens": 3303885233.0, "step": 527 }, { "epoch": 0.09411764705882353, "grad_norm": 0.3984375, "learning_rate": 1.998646640023186e-05, "loss": 1.1288, "num_tokens": 3310146028.0, "step": 528 }, { "epoch": 0.09429590017825312, "grad_norm": 0.390625, "learning_rate": 1.9986323602580958e-05, "loss": 1.0932, "num_tokens": 3316427951.0, "step": 529 }, { "epoch": 0.0944741532976827, "grad_norm": 0.376953125, "learning_rate": 1.9986180056097807e-05, "loss": 1.1166, "num_tokens": 3322696447.0, "step": 530 }, { "epoch": 0.0946524064171123, "grad_norm": 0.349609375, "learning_rate": 1.9986035760794372e-05, "loss": 1.0717, "num_tokens": 3328981129.0, "step": 531 }, { "epoch": 0.09483065953654189, "grad_norm": 0.341796875, "learning_rate": 1.9985890716682678e-05, "loss": 1.1016, "num_tokens": 3335221384.0, "step": 532 }, { "epoch": 0.09500891265597149, "grad_norm": 0.349609375, "learning_rate": 1.998574492377481e-05, "loss": 1.153, "num_tokens": 3341469215.0, "step": 533 }, { "epoch": 0.09518716577540107, "grad_norm": 0.31640625, "learning_rate": 1.9985598382082917e-05, "loss": 1.1259, "num_tokens": 3347733868.0, "step": 534 }, { "epoch": 0.09536541889483066, "grad_norm": 0.359375, "learning_rate": 1.998545109161921e-05, "loss": 1.1283, "num_tokens": 3354008606.0, "step": 535 }, { "epoch": 0.09554367201426026, "grad_norm": 0.28515625, "learning_rate": 1.9985303052395962e-05, "loss": 1.1053, "num_tokens": 3360292570.0, "step": 536 }, { "epoch": 0.09572192513368984, "grad_norm": 0.41796875, "learning_rate": 1.9985154264425513e-05, "loss": 1.137, "num_tokens": 3366544637.0, "step": 537 }, { "epoch": 0.09590017825311943, "grad_norm": 0.302734375, "learning_rate": 1.998500472772026e-05, "loss": 1.1189, "num_tokens": 3372820879.0, "step": 538 }, { "epoch": 0.09607843137254903, "grad_norm": 0.404296875, "learning_rate": 1.998485444229266e-05, "loss": 1.1368, "num_tokens": 3379072265.0, "step": 539 }, { "epoch": 0.0962566844919786, "grad_norm": 0.318359375, "learning_rate": 1.9984703408155237e-05, "loss": 1.1348, "num_tokens": 3385355913.0, "step": 540 }, { "epoch": 0.0964349376114082, "grad_norm": 0.4296875, "learning_rate": 1.9984551625320584e-05, "loss": 1.1151, "num_tokens": 3391629473.0, "step": 541 }, { "epoch": 0.0966131907308378, "grad_norm": 0.357421875, "learning_rate": 1.998439909380134e-05, "loss": 1.108, "num_tokens": 3397892442.0, "step": 542 }, { "epoch": 0.09679144385026738, "grad_norm": 0.44921875, "learning_rate": 1.998424581361022e-05, "loss": 1.1055, "num_tokens": 3404152085.0, "step": 543 }, { "epoch": 0.09696969696969697, "grad_norm": 0.39453125, "learning_rate": 1.998409178476e-05, "loss": 1.0988, "num_tokens": 3410420330.0, "step": 544 }, { "epoch": 0.09714795008912656, "grad_norm": 0.384765625, "learning_rate": 1.998393700726351e-05, "loss": 1.0941, "num_tokens": 3416675081.0, "step": 545 }, { "epoch": 0.09732620320855614, "grad_norm": 0.3828125, "learning_rate": 1.9983781481133645e-05, "loss": 1.1041, "num_tokens": 3422939857.0, "step": 546 }, { "epoch": 0.09750445632798574, "grad_norm": 0.283203125, "learning_rate": 1.9983625206383374e-05, "loss": 1.1154, "num_tokens": 3429194560.0, "step": 547 }, { "epoch": 0.09768270944741533, "grad_norm": 0.3359375, "learning_rate": 1.9983468183025712e-05, "loss": 1.1148, "num_tokens": 3435450091.0, "step": 548 }, { "epoch": 0.09786096256684491, "grad_norm": 0.267578125, "learning_rate": 1.9983310411073746e-05, "loss": 1.1188, "num_tokens": 3441732843.0, "step": 549 }, { "epoch": 0.09803921568627451, "grad_norm": 0.29296875, "learning_rate": 1.9983151890540627e-05, "loss": 1.0894, "num_tokens": 3448016551.0, "step": 550 }, { "epoch": 0.0982174688057041, "grad_norm": 0.2421875, "learning_rate": 1.998299262143956e-05, "loss": 1.1372, "num_tokens": 3454240260.0, "step": 551 }, { "epoch": 0.09839572192513368, "grad_norm": 0.33203125, "learning_rate": 1.9982832603783816e-05, "loss": 1.135, "num_tokens": 3460518454.0, "step": 552 }, { "epoch": 0.09857397504456328, "grad_norm": 0.263671875, "learning_rate": 1.9982671837586732e-05, "loss": 1.1459, "num_tokens": 3466801988.0, "step": 553 }, { "epoch": 0.09875222816399287, "grad_norm": 0.4140625, "learning_rate": 1.9982510322861703e-05, "loss": 1.1161, "num_tokens": 3473087736.0, "step": 554 }, { "epoch": 0.09893048128342247, "grad_norm": 0.2890625, "learning_rate": 1.9982348059622193e-05, "loss": 1.1038, "num_tokens": 3479346911.0, "step": 555 }, { "epoch": 0.09910873440285205, "grad_norm": 0.404296875, "learning_rate": 1.9982185047881718e-05, "loss": 1.1312, "num_tokens": 3485616274.0, "step": 556 }, { "epoch": 0.09928698752228164, "grad_norm": 0.33203125, "learning_rate": 1.9982021287653863e-05, "loss": 1.1047, "num_tokens": 3491871542.0, "step": 557 }, { "epoch": 0.09946524064171124, "grad_norm": 0.447265625, "learning_rate": 1.9981856778952273e-05, "loss": 1.0862, "num_tokens": 3498155239.0, "step": 558 }, { "epoch": 0.09964349376114082, "grad_norm": 0.3671875, "learning_rate": 1.998169152179066e-05, "loss": 1.0934, "num_tokens": 3504437974.0, "step": 559 }, { "epoch": 0.09982174688057041, "grad_norm": 0.455078125, "learning_rate": 1.9981525516182793e-05, "loss": 1.1204, "num_tokens": 3510722442.0, "step": 560 }, { "epoch": 0.1, "grad_norm": 0.443359375, "learning_rate": 1.9981358762142506e-05, "loss": 1.1401, "num_tokens": 3516987966.0, "step": 561 }, { "epoch": 0.10017825311942959, "grad_norm": 0.38671875, "learning_rate": 1.9981191259683693e-05, "loss": 1.0923, "num_tokens": 3523261824.0, "step": 562 }, { "epoch": 0.10035650623885918, "grad_norm": 0.40234375, "learning_rate": 1.998102300882031e-05, "loss": 1.1297, "num_tokens": 3529531347.0, "step": 563 }, { "epoch": 0.10053475935828877, "grad_norm": 0.3671875, "learning_rate": 1.9980854009566385e-05, "loss": 1.0929, "num_tokens": 3535815073.0, "step": 564 }, { "epoch": 0.10071301247771836, "grad_norm": 0.337890625, "learning_rate": 1.998068426193599e-05, "loss": 1.0835, "num_tokens": 3542100787.0, "step": 565 }, { "epoch": 0.10089126559714795, "grad_norm": 0.390625, "learning_rate": 1.9980513765943283e-05, "loss": 1.1129, "num_tokens": 3548368508.0, "step": 566 }, { "epoch": 0.10106951871657754, "grad_norm": 0.296875, "learning_rate": 1.9980342521602463e-05, "loss": 1.123, "num_tokens": 3554650432.0, "step": 567 }, { "epoch": 0.10124777183600712, "grad_norm": 0.353515625, "learning_rate": 1.99801705289278e-05, "loss": 1.138, "num_tokens": 3560925694.0, "step": 568 }, { "epoch": 0.10142602495543672, "grad_norm": 0.287109375, "learning_rate": 1.997999778793363e-05, "loss": 1.0806, "num_tokens": 3567187260.0, "step": 569 }, { "epoch": 0.10160427807486631, "grad_norm": 0.333984375, "learning_rate": 1.9979824298634344e-05, "loss": 1.1222, "num_tokens": 3573442590.0, "step": 570 }, { "epoch": 0.1017825311942959, "grad_norm": 0.263671875, "learning_rate": 1.99796500610444e-05, "loss": 1.098, "num_tokens": 6260552.0, "step": 571 }, { "epoch": 0.10196078431372549, "grad_norm": 0.3984375, "learning_rate": 1.9979475075178313e-05, "loss": 1.1221, "num_tokens": 12546399.0, "step": 572 }, { "epoch": 0.10213903743315508, "grad_norm": 0.267578125, "learning_rate": 1.997929934105067e-05, "loss": 1.1171, "num_tokens": 18756944.0, "step": 573 }, { "epoch": 0.10231729055258466, "grad_norm": 0.482421875, "learning_rate": 1.997912285867612e-05, "loss": 1.0959, "num_tokens": 24984153.0, "step": 574 }, { "epoch": 0.10249554367201426, "grad_norm": 0.40234375, "learning_rate": 1.9978945628069356e-05, "loss": 1.1106, "num_tokens": 31239398.0, "step": 575 }, { "epoch": 0.10267379679144385, "grad_norm": 0.388671875, "learning_rate": 1.997876764924516e-05, "loss": 1.0924, "num_tokens": 37485580.0, "step": 576 }, { "epoch": 0.10285204991087345, "grad_norm": 0.37890625, "learning_rate": 1.9978588922218356e-05, "loss": 1.0915, "num_tokens": 43741948.0, "step": 577 }, { "epoch": 0.10303030303030303, "grad_norm": 0.341796875, "learning_rate": 1.9978409447003836e-05, "loss": 1.0873, "num_tokens": 50019865.0, "step": 578 }, { "epoch": 0.10320855614973262, "grad_norm": 0.337890625, "learning_rate": 1.997822922361656e-05, "loss": 1.1298, "num_tokens": 56304339.0, "step": 579 }, { "epoch": 0.10338680926916222, "grad_norm": 0.302734375, "learning_rate": 1.9978048252071542e-05, "loss": 1.1034, "num_tokens": 62587832.0, "step": 580 }, { "epoch": 0.1035650623885918, "grad_norm": 0.314453125, "learning_rate": 1.9977866532383868e-05, "loss": 1.1358, "num_tokens": 68838412.0, "step": 581 }, { "epoch": 0.10374331550802139, "grad_norm": 0.3203125, "learning_rate": 1.997768406456867e-05, "loss": 1.1426, "num_tokens": 75123015.0, "step": 582 }, { "epoch": 0.10392156862745099, "grad_norm": 0.294921875, "learning_rate": 1.9977500848641168e-05, "loss": 1.1085, "num_tokens": 81407560.0, "step": 583 }, { "epoch": 0.10409982174688057, "grad_norm": 0.33984375, "learning_rate": 1.997731688461662e-05, "loss": 1.115, "num_tokens": 87624277.0, "step": 584 }, { "epoch": 0.10427807486631016, "grad_norm": 0.298828125, "learning_rate": 1.997713217251035e-05, "loss": 1.0927, "num_tokens": 93909184.0, "step": 585 }, { "epoch": 0.10445632798573976, "grad_norm": 0.3515625, "learning_rate": 1.9976946712337766e-05, "loss": 1.1019, "num_tokens": 100185899.0, "step": 586 }, { "epoch": 0.10463458110516934, "grad_norm": 0.267578125, "learning_rate": 1.9976760504114314e-05, "loss": 1.0952, "num_tokens": 106469710.0, "step": 587 }, { "epoch": 0.10481283422459893, "grad_norm": 0.341796875, "learning_rate": 1.9976573547855503e-05, "loss": 1.1396, "num_tokens": 112753330.0, "step": 588 }, { "epoch": 0.10499108734402852, "grad_norm": 0.255859375, "learning_rate": 1.9976385843576927e-05, "loss": 1.1063, "num_tokens": 119007981.0, "step": 589 }, { "epoch": 0.1051693404634581, "grad_norm": 0.39453125, "learning_rate": 1.9976197391294218e-05, "loss": 1.0788, "num_tokens": 125265200.0, "step": 590 }, { "epoch": 0.1053475935828877, "grad_norm": 0.26171875, "learning_rate": 1.9976008191023085e-05, "loss": 1.1135, "num_tokens": 131530634.0, "step": 591 }, { "epoch": 0.1055258467023173, "grad_norm": 0.41015625, "learning_rate": 1.9975818242779286e-05, "loss": 1.1115, "num_tokens": 137814803.0, "step": 592 }, { "epoch": 0.10570409982174687, "grad_norm": 0.31640625, "learning_rate": 1.9975627546578658e-05, "loss": 1.0887, "num_tokens": 144098687.0, "step": 593 }, { "epoch": 0.10588235294117647, "grad_norm": 0.4375, "learning_rate": 1.997543610243709e-05, "loss": 1.0864, "num_tokens": 150359932.0, "step": 594 }, { "epoch": 0.10606060606060606, "grad_norm": 0.3984375, "learning_rate": 1.9975243910370526e-05, "loss": 1.1248, "num_tokens": 156641198.0, "step": 595 }, { "epoch": 0.10623885918003564, "grad_norm": 0.40234375, "learning_rate": 1.9975050970394993e-05, "loss": 1.0865, "num_tokens": 162897168.0, "step": 596 }, { "epoch": 0.10641711229946524, "grad_norm": 0.373046875, "learning_rate": 1.997485728252657e-05, "loss": 1.102, "num_tokens": 169166983.0, "step": 597 }, { "epoch": 0.10659536541889483, "grad_norm": 0.345703125, "learning_rate": 1.997466284678138e-05, "loss": 1.078, "num_tokens": 175424476.0, "step": 598 }, { "epoch": 0.10677361853832443, "grad_norm": 0.333984375, "learning_rate": 1.9974467663175647e-05, "loss": 1.1253, "num_tokens": 181676739.0, "step": 599 }, { "epoch": 0.10695187165775401, "grad_norm": 0.33984375, "learning_rate": 1.9974271731725624e-05, "loss": 1.1133, "num_tokens": 187919254.0, "step": 600 }, { "epoch": 0.1071301247771836, "grad_norm": 0.28125, "learning_rate": 1.9974075052447637e-05, "loss": 1.1448, "num_tokens": 194201750.0, "step": 601 }, { "epoch": 0.1073083778966132, "grad_norm": 0.396484375, "learning_rate": 1.997387762535808e-05, "loss": 1.0946, "num_tokens": 200488159.0, "step": 602 }, { "epoch": 0.10748663101604278, "grad_norm": 0.3359375, "learning_rate": 1.99736794504734e-05, "loss": 1.0971, "num_tokens": 206771829.0, "step": 603 }, { "epoch": 0.10766488413547237, "grad_norm": 0.40234375, "learning_rate": 1.9973480527810115e-05, "loss": 1.1012, "num_tokens": 213045083.0, "step": 604 }, { "epoch": 0.10784313725490197, "grad_norm": 0.345703125, "learning_rate": 1.9973280857384804e-05, "loss": 1.109, "num_tokens": 219315617.0, "step": 605 }, { "epoch": 0.10802139037433155, "grad_norm": 0.33984375, "learning_rate": 1.9973080439214097e-05, "loss": 1.0995, "num_tokens": 225599038.0, "step": 606 }, { "epoch": 0.10819964349376114, "grad_norm": 0.3203125, "learning_rate": 1.9972879273314702e-05, "loss": 1.1143, "num_tokens": 231840097.0, "step": 607 }, { "epoch": 0.10837789661319074, "grad_norm": 0.36328125, "learning_rate": 1.9972677359703373e-05, "loss": 1.0947, "num_tokens": 238124067.0, "step": 608 }, { "epoch": 0.10855614973262032, "grad_norm": 0.291015625, "learning_rate": 1.997247469839695e-05, "loss": 1.0802, "num_tokens": 244401528.0, "step": 609 }, { "epoch": 0.10873440285204991, "grad_norm": 0.359375, "learning_rate": 1.997227128941231e-05, "loss": 1.1083, "num_tokens": 250657188.0, "step": 610 }, { "epoch": 0.1089126559714795, "grad_norm": 0.30859375, "learning_rate": 1.9972067132766405e-05, "loss": 1.1112, "num_tokens": 256909920.0, "step": 611 }, { "epoch": 0.10909090909090909, "grad_norm": 0.39453125, "learning_rate": 1.9971862228476245e-05, "loss": 1.105, "num_tokens": 263185366.0, "step": 612 }, { "epoch": 0.10926916221033868, "grad_norm": 0.373046875, "learning_rate": 1.9971656576558912e-05, "loss": 1.129, "num_tokens": 269434362.0, "step": 613 }, { "epoch": 0.10944741532976827, "grad_norm": 0.38671875, "learning_rate": 1.997145017703154e-05, "loss": 1.0884, "num_tokens": 275717473.0, "step": 614 }, { "epoch": 0.10962566844919786, "grad_norm": 0.357421875, "learning_rate": 1.9971243029911326e-05, "loss": 1.0683, "num_tokens": 281993659.0, "step": 615 }, { "epoch": 0.10980392156862745, "grad_norm": 0.337890625, "learning_rate": 1.9971035135215533e-05, "loss": 1.0795, "num_tokens": 288251772.0, "step": 616 }, { "epoch": 0.10998217468805704, "grad_norm": 0.3359375, "learning_rate": 1.9970826492961486e-05, "loss": 1.1096, "num_tokens": 294536144.0, "step": 617 }, { "epoch": 0.11016042780748662, "grad_norm": 0.328125, "learning_rate": 1.997061710316657e-05, "loss": 1.0961, "num_tokens": 300806700.0, "step": 618 }, { "epoch": 0.11033868092691622, "grad_norm": 0.314453125, "learning_rate": 1.9970406965848235e-05, "loss": 1.1071, "num_tokens": 307090149.0, "step": 619 }, { "epoch": 0.11051693404634581, "grad_norm": 0.322265625, "learning_rate": 1.9970196081023987e-05, "loss": 1.0922, "num_tokens": 313362651.0, "step": 620 }, { "epoch": 0.11069518716577541, "grad_norm": 0.30859375, "learning_rate": 1.9969984448711404e-05, "loss": 1.0946, "num_tokens": 319585607.0, "step": 621 }, { "epoch": 0.11087344028520499, "grad_norm": 0.333984375, "learning_rate": 1.996977206892812e-05, "loss": 1.1315, "num_tokens": 325843660.0, "step": 622 }, { "epoch": 0.11105169340463458, "grad_norm": 0.291015625, "learning_rate": 1.996955894169183e-05, "loss": 1.0715, "num_tokens": 332108953.0, "step": 623 }, { "epoch": 0.11122994652406418, "grad_norm": 0.302734375, "learning_rate": 1.99693450670203e-05, "loss": 1.1069, "num_tokens": 338389340.0, "step": 624 }, { "epoch": 0.11140819964349376, "grad_norm": 0.310546875, "learning_rate": 1.9969130444931347e-05, "loss": 1.1038, "num_tokens": 344672524.0, "step": 625 }, { "epoch": 0.11158645276292335, "grad_norm": 0.322265625, "learning_rate": 1.9968915075442862e-05, "loss": 1.0904, "num_tokens": 350949385.0, "step": 626 }, { "epoch": 0.11176470588235295, "grad_norm": 0.30859375, "learning_rate": 1.9968698958572782e-05, "loss": 1.1091, "num_tokens": 357218782.0, "step": 627 }, { "epoch": 0.11194295900178253, "grad_norm": 0.37109375, "learning_rate": 1.996848209433912e-05, "loss": 1.1104, "num_tokens": 363494720.0, "step": 628 }, { "epoch": 0.11212121212121212, "grad_norm": 0.294921875, "learning_rate": 1.9968264482759948e-05, "loss": 1.1139, "num_tokens": 369739041.0, "step": 629 }, { "epoch": 0.11229946524064172, "grad_norm": 0.349609375, "learning_rate": 1.9968046123853403e-05, "loss": 1.0979, "num_tokens": 376024761.0, "step": 630 }, { "epoch": 0.1124777183600713, "grad_norm": 0.28515625, "learning_rate": 1.9967827017637678e-05, "loss": 1.1237, "num_tokens": 382306523.0, "step": 631 }, { "epoch": 0.11265597147950089, "grad_norm": 0.404296875, "learning_rate": 1.996760716413103e-05, "loss": 1.0955, "num_tokens": 388576136.0, "step": 632 }, { "epoch": 0.11283422459893049, "grad_norm": 0.298828125, "learning_rate": 1.9967386563351784e-05, "loss": 1.0777, "num_tokens": 394822560.0, "step": 633 }, { "epoch": 0.11301247771836007, "grad_norm": 0.384765625, "learning_rate": 1.996716521531831e-05, "loss": 1.0778, "num_tokens": 401107777.0, "step": 634 }, { "epoch": 0.11319073083778966, "grad_norm": 0.3203125, "learning_rate": 1.996694312004907e-05, "loss": 1.1229, "num_tokens": 407391988.0, "step": 635 }, { "epoch": 0.11336898395721925, "grad_norm": 0.392578125, "learning_rate": 1.996672027756256e-05, "loss": 1.0997, "num_tokens": 413675787.0, "step": 636 }, { "epoch": 0.11354723707664884, "grad_norm": 0.34375, "learning_rate": 1.9966496687877356e-05, "loss": 1.0981, "num_tokens": 419959370.0, "step": 637 }, { "epoch": 0.11372549019607843, "grad_norm": 0.408203125, "learning_rate": 1.9966272351012085e-05, "loss": 1.1008, "num_tokens": 426242564.0, "step": 638 }, { "epoch": 0.11390374331550802, "grad_norm": 0.357421875, "learning_rate": 1.9966047266985446e-05, "loss": 1.1114, "num_tokens": 432507358.0, "step": 639 }, { "epoch": 0.1140819964349376, "grad_norm": 0.359375, "learning_rate": 1.9965821435816187e-05, "loss": 1.0662, "num_tokens": 438790988.0, "step": 640 }, { "epoch": 0.1142602495543672, "grad_norm": 0.36328125, "learning_rate": 1.9965594857523133e-05, "loss": 1.1143, "num_tokens": 445067909.0, "step": 641 }, { "epoch": 0.1144385026737968, "grad_norm": 0.349609375, "learning_rate": 1.9965367532125163e-05, "loss": 1.109, "num_tokens": 451350547.0, "step": 642 }, { "epoch": 0.11461675579322639, "grad_norm": 0.322265625, "learning_rate": 1.9965139459641222e-05, "loss": 1.0902, "num_tokens": 457594063.0, "step": 643 }, { "epoch": 0.11479500891265597, "grad_norm": 0.33203125, "learning_rate": 1.9964910640090313e-05, "loss": 1.1057, "num_tokens": 463876267.0, "step": 644 }, { "epoch": 0.11497326203208556, "grad_norm": 0.30078125, "learning_rate": 1.9964681073491506e-05, "loss": 1.0717, "num_tokens": 470132780.0, "step": 645 }, { "epoch": 0.11515151515151516, "grad_norm": 0.31640625, "learning_rate": 1.9964450759863928e-05, "loss": 1.0833, "num_tokens": 476388325.0, "step": 646 }, { "epoch": 0.11532976827094474, "grad_norm": 0.294921875, "learning_rate": 1.996421969922677e-05, "loss": 1.106, "num_tokens": 482672882.0, "step": 647 }, { "epoch": 0.11550802139037433, "grad_norm": 0.328125, "learning_rate": 1.9963987891599293e-05, "loss": 1.1446, "num_tokens": 488935137.0, "step": 648 }, { "epoch": 0.11568627450980393, "grad_norm": 0.294921875, "learning_rate": 1.9963755337000807e-05, "loss": 1.0904, "num_tokens": 495219017.0, "step": 649 }, { "epoch": 0.11586452762923351, "grad_norm": 0.29296875, "learning_rate": 1.9963522035450697e-05, "loss": 1.1352, "num_tokens": 501502031.0, "step": 650 }, { "epoch": 0.1160427807486631, "grad_norm": 0.337890625, "learning_rate": 1.9963287986968394e-05, "loss": 1.1025, "num_tokens": 507762214.0, "step": 651 }, { "epoch": 0.1162210338680927, "grad_norm": 0.28515625, "learning_rate": 1.9963053191573408e-05, "loss": 1.0638, "num_tokens": 513998656.0, "step": 652 }, { "epoch": 0.11639928698752228, "grad_norm": 0.31640625, "learning_rate": 1.9962817649285307e-05, "loss": 1.0785, "num_tokens": 520254694.0, "step": 653 }, { "epoch": 0.11657754010695187, "grad_norm": 0.291015625, "learning_rate": 1.9962581360123714e-05, "loss": 1.088, "num_tokens": 526539149.0, "step": 654 }, { "epoch": 0.11675579322638147, "grad_norm": 0.36328125, "learning_rate": 1.996234432410832e-05, "loss": 1.0722, "num_tokens": 532793783.0, "step": 655 }, { "epoch": 0.11693404634581105, "grad_norm": 0.267578125, "learning_rate": 1.996210654125888e-05, "loss": 1.113, "num_tokens": 539077281.0, "step": 656 }, { "epoch": 0.11711229946524064, "grad_norm": 0.41015625, "learning_rate": 1.9961868011595205e-05, "loss": 1.0864, "num_tokens": 545363540.0, "step": 657 }, { "epoch": 0.11729055258467024, "grad_norm": 0.26953125, "learning_rate": 1.9961628735137176e-05, "loss": 1.097, "num_tokens": 551622530.0, "step": 658 }, { "epoch": 0.11746880570409982, "grad_norm": 0.45703125, "learning_rate": 1.996138871190473e-05, "loss": 1.1006, "num_tokens": 557887406.0, "step": 659 }, { "epoch": 0.11764705882352941, "grad_norm": 0.373046875, "learning_rate": 1.9961147941917866e-05, "loss": 1.0928, "num_tokens": 564149929.0, "step": 660 }, { "epoch": 0.117825311942959, "grad_norm": 0.48046875, "learning_rate": 1.9960906425196645e-05, "loss": 1.0994, "num_tokens": 570409297.0, "step": 661 }, { "epoch": 0.11800356506238859, "grad_norm": 0.455078125, "learning_rate": 1.99606641617612e-05, "loss": 1.127, "num_tokens": 576684464.0, "step": 662 }, { "epoch": 0.11818181818181818, "grad_norm": 0.296875, "learning_rate": 1.996042115163172e-05, "loss": 1.0711, "num_tokens": 582967703.0, "step": 663 }, { "epoch": 0.11836007130124777, "grad_norm": 0.412109375, "learning_rate": 1.9960177394828443e-05, "loss": 1.1074, "num_tokens": 589250341.0, "step": 664 }, { "epoch": 0.11853832442067737, "grad_norm": 0.271484375, "learning_rate": 1.995993289137169e-05, "loss": 1.0734, "num_tokens": 595512211.0, "step": 665 }, { "epoch": 0.11871657754010695, "grad_norm": 0.412109375, "learning_rate": 1.9959687641281837e-05, "loss": 1.0913, "num_tokens": 601795283.0, "step": 666 }, { "epoch": 0.11889483065953654, "grad_norm": 0.3046875, "learning_rate": 1.9959441644579313e-05, "loss": 1.1019, "num_tokens": 608060718.0, "step": 667 }, { "epoch": 0.11907308377896614, "grad_norm": 0.4140625, "learning_rate": 1.9959194901284625e-05, "loss": 1.1192, "num_tokens": 614344439.0, "step": 668 }, { "epoch": 0.11925133689839572, "grad_norm": 0.349609375, "learning_rate": 1.995894741141833e-05, "loss": 1.1065, "num_tokens": 620630162.0, "step": 669 }, { "epoch": 0.11942959001782531, "grad_norm": 0.404296875, "learning_rate": 1.995869917500106e-05, "loss": 1.1291, "num_tokens": 626897726.0, "step": 670 }, { "epoch": 0.11960784313725491, "grad_norm": 0.3671875, "learning_rate": 1.9958450192053484e-05, "loss": 1.0985, "num_tokens": 633162327.0, "step": 671 }, { "epoch": 0.11978609625668449, "grad_norm": 0.322265625, "learning_rate": 1.995820046259636e-05, "loss": 1.0934, "num_tokens": 639447024.0, "step": 672 }, { "epoch": 0.11996434937611408, "grad_norm": 0.33203125, "learning_rate": 1.99579499866505e-05, "loss": 1.078, "num_tokens": 645730818.0, "step": 673 }, { "epoch": 0.12014260249554368, "grad_norm": 0.314453125, "learning_rate": 1.9957698764236773e-05, "loss": 1.0853, "num_tokens": 652014875.0, "step": 674 }, { "epoch": 0.12032085561497326, "grad_norm": 0.271484375, "learning_rate": 1.9957446795376113e-05, "loss": 1.0618, "num_tokens": 658297120.0, "step": 675 }, { "epoch": 0.12049910873440285, "grad_norm": 0.328125, "learning_rate": 1.9957194080089516e-05, "loss": 1.0855, "num_tokens": 664582017.0, "step": 676 }, { "epoch": 0.12067736185383245, "grad_norm": 0.25, "learning_rate": 1.9956940618398045e-05, "loss": 1.1047, "num_tokens": 670849686.0, "step": 677 }, { "epoch": 0.12085561497326203, "grad_norm": 0.337890625, "learning_rate": 1.9956686410322814e-05, "loss": 1.0846, "num_tokens": 677103449.0, "step": 678 }, { "epoch": 0.12103386809269162, "grad_norm": 0.271484375, "learning_rate": 1.9956431455885013e-05, "loss": 1.1044, "num_tokens": 683367505.0, "step": 679 }, { "epoch": 0.12121212121212122, "grad_norm": 0.314453125, "learning_rate": 1.9956175755105883e-05, "loss": 1.089, "num_tokens": 689652255.0, "step": 680 }, { "epoch": 0.1213903743315508, "grad_norm": 0.28125, "learning_rate": 1.9955919308006735e-05, "loss": 1.1103, "num_tokens": 695925042.0, "step": 681 }, { "epoch": 0.12156862745098039, "grad_norm": 0.2421875, "learning_rate": 1.9955662114608937e-05, "loss": 1.1227, "num_tokens": 702204841.0, "step": 682 }, { "epoch": 0.12174688057040998, "grad_norm": 0.318359375, "learning_rate": 1.9955404174933916e-05, "loss": 1.0899, "num_tokens": 708487936.0, "step": 683 }, { "epoch": 0.12192513368983957, "grad_norm": 0.2578125, "learning_rate": 1.9955145489003177e-05, "loss": 1.0934, "num_tokens": 714651583.0, "step": 684 }, { "epoch": 0.12210338680926916, "grad_norm": 0.34765625, "learning_rate": 1.9954886056838268e-05, "loss": 1.1048, "num_tokens": 6282959.0, "step": 685 }, { "epoch": 0.12228163992869875, "grad_norm": 0.279296875, "learning_rate": 1.995462587846081e-05, "loss": 1.0821, "num_tokens": 12569622.0, "step": 686 }, { "epoch": 0.12245989304812835, "grad_norm": 0.341796875, "learning_rate": 1.9954364953892487e-05, "loss": 1.1007, "num_tokens": 18833792.0, "step": 687 }, { "epoch": 0.12263814616755793, "grad_norm": 0.29296875, "learning_rate": 1.9954103283155038e-05, "loss": 1.1139, "num_tokens": 25085850.0, "step": 688 }, { "epoch": 0.12281639928698752, "grad_norm": 0.333984375, "learning_rate": 1.9953840866270263e-05, "loss": 1.1076, "num_tokens": 31370621.0, "step": 689 }, { "epoch": 0.12299465240641712, "grad_norm": 0.298828125, "learning_rate": 1.995357770326004e-05, "loss": 1.0917, "num_tokens": 37644145.0, "step": 690 }, { "epoch": 0.1231729055258467, "grad_norm": 0.369140625, "learning_rate": 1.995331379414629e-05, "loss": 1.0985, "num_tokens": 43860041.0, "step": 691 }, { "epoch": 0.1233511586452763, "grad_norm": 0.32421875, "learning_rate": 1.9953049138951013e-05, "loss": 1.0842, "num_tokens": 50115643.0, "step": 692 }, { "epoch": 0.12352941176470589, "grad_norm": 0.328125, "learning_rate": 1.9952783737696257e-05, "loss": 1.1037, "num_tokens": 56371062.0, "step": 693 }, { "epoch": 0.12370766488413547, "grad_norm": 0.322265625, "learning_rate": 1.9952517590404135e-05, "loss": 1.1219, "num_tokens": 62650371.0, "step": 694 }, { "epoch": 0.12388591800356506, "grad_norm": 0.3203125, "learning_rate": 1.995225069709683e-05, "loss": 1.1074, "num_tokens": 68910571.0, "step": 695 }, { "epoch": 0.12406417112299466, "grad_norm": 0.310546875, "learning_rate": 1.9951983057796583e-05, "loss": 1.1162, "num_tokens": 75186422.0, "step": 696 }, { "epoch": 0.12424242424242424, "grad_norm": 0.33203125, "learning_rate": 1.9951714672525694e-05, "loss": 1.1055, "num_tokens": 81458960.0, "step": 697 }, { "epoch": 0.12442067736185383, "grad_norm": 0.30078125, "learning_rate": 1.995144554130653e-05, "loss": 1.084, "num_tokens": 87715977.0, "step": 698 }, { "epoch": 0.12459893048128343, "grad_norm": 0.318359375, "learning_rate": 1.995117566416151e-05, "loss": 1.0961, "num_tokens": 94001946.0, "step": 699 }, { "epoch": 0.12477718360071301, "grad_norm": 0.275390625, "learning_rate": 1.9950905041113135e-05, "loss": 1.0937, "num_tokens": 100284410.0, "step": 700 }, { "epoch": 0.1249554367201426, "grad_norm": 0.33203125, "learning_rate": 1.9950633672183946e-05, "loss": 1.1061, "num_tokens": 106567450.0, "step": 701 }, { "epoch": 0.12513368983957218, "grad_norm": 0.27734375, "learning_rate": 1.9950361557396565e-05, "loss": 1.1238, "num_tokens": 112792055.0, "step": 702 }, { "epoch": 0.12531194295900178, "grad_norm": 0.34765625, "learning_rate": 1.9950088696773654e-05, "loss": 1.1345, "num_tokens": 119053268.0, "step": 703 }, { "epoch": 0.12549019607843137, "grad_norm": 0.29296875, "learning_rate": 1.9949815090337966e-05, "loss": 1.0811, "num_tokens": 125336758.0, "step": 704 }, { "epoch": 0.12566844919786097, "grad_norm": 0.376953125, "learning_rate": 1.9949540738112295e-05, "loss": 1.0978, "num_tokens": 131612876.0, "step": 705 }, { "epoch": 0.12584670231729056, "grad_norm": 0.330078125, "learning_rate": 1.9949265640119498e-05, "loss": 1.0841, "num_tokens": 137885553.0, "step": 706 }, { "epoch": 0.12602495543672015, "grad_norm": 0.375, "learning_rate": 1.9948989796382503e-05, "loss": 1.0897, "num_tokens": 144139023.0, "step": 707 }, { "epoch": 0.12620320855614972, "grad_norm": 0.375, "learning_rate": 1.9948713206924294e-05, "loss": 1.1105, "num_tokens": 150423463.0, "step": 708 }, { "epoch": 0.12638146167557932, "grad_norm": 0.326171875, "learning_rate": 1.9948435871767925e-05, "loss": 1.0944, "num_tokens": 156707453.0, "step": 709 }, { "epoch": 0.1265597147950089, "grad_norm": 0.33203125, "learning_rate": 1.99481577909365e-05, "loss": 1.0896, "num_tokens": 162958464.0, "step": 710 }, { "epoch": 0.1267379679144385, "grad_norm": 0.318359375, "learning_rate": 1.9947878964453197e-05, "loss": 1.0838, "num_tokens": 169213914.0, "step": 711 }, { "epoch": 0.1269162210338681, "grad_norm": 0.326171875, "learning_rate": 1.9947599392341242e-05, "loss": 1.0757, "num_tokens": 175489952.0, "step": 712 }, { "epoch": 0.1270944741532977, "grad_norm": 0.251953125, "learning_rate": 1.994731907462394e-05, "loss": 1.1076, "num_tokens": 181773928.0, "step": 713 }, { "epoch": 0.12727272727272726, "grad_norm": 0.359375, "learning_rate": 1.994703801132465e-05, "loss": 1.0885, "num_tokens": 188057771.0, "step": 714 }, { "epoch": 0.12745098039215685, "grad_norm": 0.2734375, "learning_rate": 1.9946756202466783e-05, "loss": 1.124, "num_tokens": 194342037.0, "step": 715 }, { "epoch": 0.12762923351158645, "grad_norm": 0.412109375, "learning_rate": 1.9946473648073838e-05, "loss": 1.108, "num_tokens": 200624802.0, "step": 716 }, { "epoch": 0.12780748663101604, "grad_norm": 0.326171875, "learning_rate": 1.9946190348169344e-05, "loss": 1.077, "num_tokens": 206869568.0, "step": 717 }, { "epoch": 0.12798573975044564, "grad_norm": 0.357421875, "learning_rate": 1.9945906302776922e-05, "loss": 1.0753, "num_tokens": 213125536.0, "step": 718 }, { "epoch": 0.12816399286987523, "grad_norm": 0.375, "learning_rate": 1.9945621511920237e-05, "loss": 1.0762, "num_tokens": 219402266.0, "step": 719 }, { "epoch": 0.12834224598930483, "grad_norm": 0.326171875, "learning_rate": 1.9945335975623014e-05, "loss": 1.0985, "num_tokens": 225628796.0, "step": 720 }, { "epoch": 0.1285204991087344, "grad_norm": 0.34765625, "learning_rate": 1.994504969390906e-05, "loss": 1.0933, "num_tokens": 231904689.0, "step": 721 }, { "epoch": 0.128698752228164, "grad_norm": 0.2890625, "learning_rate": 1.9944762666802214e-05, "loss": 1.0917, "num_tokens": 238173567.0, "step": 722 }, { "epoch": 0.12887700534759358, "grad_norm": 0.33984375, "learning_rate": 1.994447489432641e-05, "loss": 1.1192, "num_tokens": 244441845.0, "step": 723 }, { "epoch": 0.12905525846702318, "grad_norm": 0.30078125, "learning_rate": 1.994418637650562e-05, "loss": 1.1101, "num_tokens": 250704031.0, "step": 724 }, { "epoch": 0.12923351158645277, "grad_norm": 0.337890625, "learning_rate": 1.9943897113363887e-05, "loss": 1.0784, "num_tokens": 256964092.0, "step": 725 }, { "epoch": 0.12941176470588237, "grad_norm": 0.302734375, "learning_rate": 1.9943607104925314e-05, "loss": 1.0898, "num_tokens": 263246504.0, "step": 726 }, { "epoch": 0.12959001782531193, "grad_norm": 0.365234375, "learning_rate": 1.9943316351214073e-05, "loss": 1.1097, "num_tokens": 269464116.0, "step": 727 }, { "epoch": 0.12976827094474153, "grad_norm": 0.33984375, "learning_rate": 1.9943024852254388e-05, "loss": 1.0924, "num_tokens": 275748221.0, "step": 728 }, { "epoch": 0.12994652406417112, "grad_norm": 0.34765625, "learning_rate": 1.994273260807055e-05, "loss": 1.0924, "num_tokens": 282006238.0, "step": 729 }, { "epoch": 0.13012477718360071, "grad_norm": 0.3203125, "learning_rate": 1.994243961868691e-05, "loss": 1.0766, "num_tokens": 288290016.0, "step": 730 }, { "epoch": 0.1303030303030303, "grad_norm": 0.396484375, "learning_rate": 1.994214588412789e-05, "loss": 1.1177, "num_tokens": 294571826.0, "step": 731 }, { "epoch": 0.1304812834224599, "grad_norm": 0.33984375, "learning_rate": 1.994185140441796e-05, "loss": 1.1096, "num_tokens": 300830861.0, "step": 732 }, { "epoch": 0.13065953654188947, "grad_norm": 0.396484375, "learning_rate": 1.9941556179581663e-05, "loss": 1.0972, "num_tokens": 307091929.0, "step": 733 }, { "epoch": 0.13083778966131906, "grad_norm": 0.34765625, "learning_rate": 1.9941260209643594e-05, "loss": 1.1159, "num_tokens": 313374818.0, "step": 734 }, { "epoch": 0.13101604278074866, "grad_norm": 0.365234375, "learning_rate": 1.9940963494628427e-05, "loss": 1.0729, "num_tokens": 319657480.0, "step": 735 }, { "epoch": 0.13119429590017825, "grad_norm": 0.36328125, "learning_rate": 1.994066603456088e-05, "loss": 1.147, "num_tokens": 325942494.0, "step": 736 }, { "epoch": 0.13137254901960785, "grad_norm": 0.375, "learning_rate": 1.9940367829465737e-05, "loss": 1.0974, "num_tokens": 332212805.0, "step": 737 }, { "epoch": 0.13155080213903744, "grad_norm": 0.365234375, "learning_rate": 1.994006887936786e-05, "loss": 1.0992, "num_tokens": 338450147.0, "step": 738 }, { "epoch": 0.131729055258467, "grad_norm": 0.31640625, "learning_rate": 1.9939769184292147e-05, "loss": 1.1224, "num_tokens": 344683927.0, "step": 739 }, { "epoch": 0.1319073083778966, "grad_norm": 0.322265625, "learning_rate": 1.9939468744263575e-05, "loss": 1.0605, "num_tokens": 350963246.0, "step": 740 }, { "epoch": 0.1320855614973262, "grad_norm": 0.33203125, "learning_rate": 1.9939167559307185e-05, "loss": 1.0871, "num_tokens": 357214711.0, "step": 741 }, { "epoch": 0.1322638146167558, "grad_norm": 0.2890625, "learning_rate": 1.9938865629448073e-05, "loss": 1.0979, "num_tokens": 363431378.0, "step": 742 }, { "epoch": 0.1324420677361854, "grad_norm": 0.390625, "learning_rate": 1.9938562954711393e-05, "loss": 1.1064, "num_tokens": 369714266.0, "step": 743 }, { "epoch": 0.13262032085561498, "grad_norm": 0.328125, "learning_rate": 1.9938259535122378e-05, "loss": 1.0969, "num_tokens": 375998699.0, "step": 744 }, { "epoch": 0.13279857397504458, "grad_norm": 0.396484375, "learning_rate": 1.9937955370706302e-05, "loss": 1.1098, "num_tokens": 382249728.0, "step": 745 }, { "epoch": 0.13297682709447414, "grad_norm": 0.341796875, "learning_rate": 1.9937650461488517e-05, "loss": 1.0978, "num_tokens": 388526753.0, "step": 746 }, { "epoch": 0.13315508021390374, "grad_norm": 0.37890625, "learning_rate": 1.9937344807494425e-05, "loss": 1.086, "num_tokens": 394810481.0, "step": 747 }, { "epoch": 0.13333333333333333, "grad_norm": 0.36328125, "learning_rate": 1.99370384087495e-05, "loss": 1.0736, "num_tokens": 401092453.0, "step": 748 }, { "epoch": 0.13351158645276293, "grad_norm": 0.3125, "learning_rate": 1.993673126527928e-05, "loss": 1.1003, "num_tokens": 407361723.0, "step": 749 }, { "epoch": 0.13368983957219252, "grad_norm": 0.345703125, "learning_rate": 1.993642337710935e-05, "loss": 1.1055, "num_tokens": 413616438.0, "step": 750 }, { "epoch": 0.13386809269162211, "grad_norm": 0.2578125, "learning_rate": 1.993611474426537e-05, "loss": 1.0827, "num_tokens": 419875322.0, "step": 751 }, { "epoch": 0.13404634581105168, "grad_norm": 0.287109375, "learning_rate": 1.993580536677306e-05, "loss": 1.0873, "num_tokens": 426134677.0, "step": 752 }, { "epoch": 0.13422459893048128, "grad_norm": 0.28515625, "learning_rate": 1.9935495244658195e-05, "loss": 1.1497, "num_tokens": 432419373.0, "step": 753 }, { "epoch": 0.13440285204991087, "grad_norm": 0.265625, "learning_rate": 1.9935184377946624e-05, "loss": 1.1056, "num_tokens": 438701459.0, "step": 754 }, { "epoch": 0.13458110516934046, "grad_norm": 0.353515625, "learning_rate": 1.993487276666425e-05, "loss": 1.0805, "num_tokens": 444987037.0, "step": 755 }, { "epoch": 0.13475935828877006, "grad_norm": 0.27734375, "learning_rate": 1.993456041083704e-05, "loss": 1.0736, "num_tokens": 451271894.0, "step": 756 }, { "epoch": 0.13493761140819965, "grad_norm": 0.37890625, "learning_rate": 1.993424731049102e-05, "loss": 1.0951, "num_tokens": 457556174.0, "step": 757 }, { "epoch": 0.13511586452762922, "grad_norm": 0.298828125, "learning_rate": 1.9933933465652283e-05, "loss": 1.0804, "num_tokens": 463838105.0, "step": 758 }, { "epoch": 0.13529411764705881, "grad_norm": 0.41015625, "learning_rate": 1.9933618876346976e-05, "loss": 1.0695, "num_tokens": 470102494.0, "step": 759 }, { "epoch": 0.1354723707664884, "grad_norm": 0.3359375, "learning_rate": 1.9933303542601326e-05, "loss": 1.0954, "num_tokens": 476386027.0, "step": 760 }, { "epoch": 0.135650623885918, "grad_norm": 0.41015625, "learning_rate": 1.9932987464441597e-05, "loss": 1.106, "num_tokens": 482628035.0, "step": 761 }, { "epoch": 0.1358288770053476, "grad_norm": 0.38671875, "learning_rate": 1.9932670641894135e-05, "loss": 1.0979, "num_tokens": 488892467.0, "step": 762 }, { "epoch": 0.1360071301247772, "grad_norm": 0.35546875, "learning_rate": 1.993235307498534e-05, "loss": 1.1044, "num_tokens": 495172700.0, "step": 763 }, { "epoch": 0.1361853832442068, "grad_norm": 0.36328125, "learning_rate": 1.993203476374167e-05, "loss": 1.0944, "num_tokens": 501455710.0, "step": 764 }, { "epoch": 0.13636363636363635, "grad_norm": 0.275390625, "learning_rate": 1.9931715708189663e-05, "loss": 1.1041, "num_tokens": 507724027.0, "step": 765 }, { "epoch": 0.13654188948306595, "grad_norm": 0.3125, "learning_rate": 1.9931395908355893e-05, "loss": 1.107, "num_tokens": 513983875.0, "step": 766 }, { "epoch": 0.13672014260249554, "grad_norm": 0.328125, "learning_rate": 1.9931075364267008e-05, "loss": 1.1221, "num_tokens": 520266357.0, "step": 767 }, { "epoch": 0.13689839572192514, "grad_norm": 0.28515625, "learning_rate": 1.993075407594973e-05, "loss": 1.0979, "num_tokens": 526514430.0, "step": 768 }, { "epoch": 0.13707664884135473, "grad_norm": 0.271484375, "learning_rate": 1.9930432043430824e-05, "loss": 1.0838, "num_tokens": 532791951.0, "step": 769 }, { "epoch": 0.13725490196078433, "grad_norm": 0.287109375, "learning_rate": 1.9930109266737128e-05, "loss": 1.0752, "num_tokens": 539015451.0, "step": 770 }, { "epoch": 0.1374331550802139, "grad_norm": 0.2890625, "learning_rate": 1.992978574589554e-05, "loss": 1.0965, "num_tokens": 545278989.0, "step": 771 }, { "epoch": 0.1376114081996435, "grad_norm": 0.26171875, "learning_rate": 1.992946148093301e-05, "loss": 1.1043, "num_tokens": 551551985.0, "step": 772 }, { "epoch": 0.13778966131907308, "grad_norm": 0.330078125, "learning_rate": 1.9929136471876577e-05, "loss": 1.0882, "num_tokens": 557801876.0, "step": 773 }, { "epoch": 0.13796791443850268, "grad_norm": 0.2490234375, "learning_rate": 1.992881071875331e-05, "loss": 1.0886, "num_tokens": 564059125.0, "step": 774 }, { "epoch": 0.13814616755793227, "grad_norm": 0.341796875, "learning_rate": 1.992848422159035e-05, "loss": 1.0793, "num_tokens": 570341680.0, "step": 775 }, { "epoch": 0.13832442067736186, "grad_norm": 0.283203125, "learning_rate": 1.992815698041492e-05, "loss": 1.0798, "num_tokens": 576585098.0, "step": 776 }, { "epoch": 0.13850267379679143, "grad_norm": 0.296875, "learning_rate": 1.9927828995254276e-05, "loss": 1.0809, "num_tokens": 582807319.0, "step": 777 }, { "epoch": 0.13868092691622103, "grad_norm": 0.296875, "learning_rate": 1.9927500266135755e-05, "loss": 1.0997, "num_tokens": 589082497.0, "step": 778 }, { "epoch": 0.13885918003565062, "grad_norm": 0.265625, "learning_rate": 1.992717079308675e-05, "loss": 1.1028, "num_tokens": 595308594.0, "step": 779 }, { "epoch": 0.13903743315508021, "grad_norm": 0.267578125, "learning_rate": 1.9926840576134714e-05, "loss": 1.0659, "num_tokens": 601592656.0, "step": 780 }, { "epoch": 0.1392156862745098, "grad_norm": 0.263671875, "learning_rate": 1.9926509615307167e-05, "loss": 1.0904, "num_tokens": 607861421.0, "step": 781 }, { "epoch": 0.1393939393939394, "grad_norm": 0.25390625, "learning_rate": 1.9926177910631684e-05, "loss": 1.061, "num_tokens": 614120039.0, "step": 782 }, { "epoch": 0.139572192513369, "grad_norm": 0.3125, "learning_rate": 1.9925845462135905e-05, "loss": 1.0971, "num_tokens": 620395733.0, "step": 783 }, { "epoch": 0.13975044563279856, "grad_norm": 0.310546875, "learning_rate": 1.992551226984754e-05, "loss": 1.0962, "num_tokens": 626651714.0, "step": 784 }, { "epoch": 0.13992869875222816, "grad_norm": 0.263671875, "learning_rate": 1.992517833379435e-05, "loss": 1.0665, "num_tokens": 632934755.0, "step": 785 }, { "epoch": 0.14010695187165775, "grad_norm": 0.322265625, "learning_rate": 1.992484365400416e-05, "loss": 1.0915, "num_tokens": 639216681.0, "step": 786 }, { "epoch": 0.14028520499108735, "grad_norm": 0.28125, "learning_rate": 1.992450823050486e-05, "loss": 1.1026, "num_tokens": 645499834.0, "step": 787 }, { "epoch": 0.14046345811051694, "grad_norm": 0.384765625, "learning_rate": 1.99241720633244e-05, "loss": 1.0918, "num_tokens": 651755273.0, "step": 788 }, { "epoch": 0.14064171122994654, "grad_norm": 0.30078125, "learning_rate": 1.99238351524908e-05, "loss": 1.0887, "num_tokens": 658038845.0, "step": 789 }, { "epoch": 0.1408199643493761, "grad_norm": 0.361328125, "learning_rate": 1.992349749803213e-05, "loss": 1.0932, "num_tokens": 664286290.0, "step": 790 }, { "epoch": 0.1409982174688057, "grad_norm": 0.294921875, "learning_rate": 1.9923159099976522e-05, "loss": 1.0958, "num_tokens": 670571218.0, "step": 791 }, { "epoch": 0.1411764705882353, "grad_norm": 0.34765625, "learning_rate": 1.9922819958352183e-05, "loss": 1.1031, "num_tokens": 676853597.0, "step": 792 }, { "epoch": 0.1413547237076649, "grad_norm": 0.294921875, "learning_rate": 1.9922480073187363e-05, "loss": 1.1126, "num_tokens": 683135375.0, "step": 793 }, { "epoch": 0.14153297682709448, "grad_norm": 0.33984375, "learning_rate": 1.99221394445104e-05, "loss": 1.083, "num_tokens": 689419228.0, "step": 794 }, { "epoch": 0.14171122994652408, "grad_norm": 0.29296875, "learning_rate": 1.9921798072349666e-05, "loss": 1.1091, "num_tokens": 695700536.0, "step": 795 }, { "epoch": 0.14188948306595364, "grad_norm": 0.337890625, "learning_rate": 1.9921455956733613e-05, "loss": 1.0664, "num_tokens": 701958688.0, "step": 796 }, { "epoch": 0.14206773618538324, "grad_norm": 0.287109375, "learning_rate": 1.9921113097690746e-05, "loss": 1.0819, "num_tokens": 708243389.0, "step": 797 }, { "epoch": 0.14224598930481283, "grad_norm": 0.330078125, "learning_rate": 1.992076949524964e-05, "loss": 1.0871, "num_tokens": 714526608.0, "step": 798 }, { "epoch": 0.14242424242424243, "grad_norm": 0.28515625, "learning_rate": 1.992042514943892e-05, "loss": 1.0626, "num_tokens": 720810120.0, "step": 799 }, { "epoch": 0.14260249554367202, "grad_norm": 0.34375, "learning_rate": 1.9920080060287293e-05, "loss": 1.1018, "num_tokens": 727092769.0, "step": 800 }, { "epoch": 0.14278074866310161, "grad_norm": 0.28515625, "learning_rate": 1.9919734227823506e-05, "loss": 1.0928, "num_tokens": 733375903.0, "step": 801 }, { "epoch": 0.14295900178253118, "grad_norm": 0.34375, "learning_rate": 1.991938765207638e-05, "loss": 1.0647, "num_tokens": 739659325.0, "step": 802 }, { "epoch": 0.14313725490196078, "grad_norm": 0.291015625, "learning_rate": 1.9919040333074795e-05, "loss": 1.0784, "num_tokens": 745943229.0, "step": 803 }, { "epoch": 0.14331550802139037, "grad_norm": 0.34765625, "learning_rate": 1.991869227084769e-05, "loss": 1.0934, "num_tokens": 752220583.0, "step": 804 }, { "epoch": 0.14349376114081996, "grad_norm": 0.30859375, "learning_rate": 1.9918343465424075e-05, "loss": 1.0656, "num_tokens": 758482295.0, "step": 805 }, { "epoch": 0.14367201426024956, "grad_norm": 0.412109375, "learning_rate": 1.9917993916833008e-05, "loss": 1.0818, "num_tokens": 764765768.0, "step": 806 }, { "epoch": 0.14385026737967915, "grad_norm": 0.376953125, "learning_rate": 1.9917643625103625e-05, "loss": 1.089, "num_tokens": 771012033.0, "step": 807 }, { "epoch": 0.14402852049910875, "grad_norm": 0.357421875, "learning_rate": 1.9917292590265116e-05, "loss": 1.0806, "num_tokens": 777295165.0, "step": 808 }, { "epoch": 0.14420677361853831, "grad_norm": 0.349609375, "learning_rate": 1.9916940812346726e-05, "loss": 1.0506, "num_tokens": 783553008.0, "step": 809 }, { "epoch": 0.1443850267379679, "grad_norm": 0.302734375, "learning_rate": 1.991658829137777e-05, "loss": 1.0629, "num_tokens": 789815780.0, "step": 810 }, { "epoch": 0.1445632798573975, "grad_norm": 0.34375, "learning_rate": 1.991623502738763e-05, "loss": 1.0563, "num_tokens": 796072538.0, "step": 811 }, { "epoch": 0.1447415329768271, "grad_norm": 0.291015625, "learning_rate": 1.991588102040574e-05, "loss": 1.1018, "num_tokens": 802340486.0, "step": 812 }, { "epoch": 0.1449197860962567, "grad_norm": 0.35546875, "learning_rate": 1.9915526270461598e-05, "loss": 1.0609, "num_tokens": 808603153.0, "step": 813 }, { "epoch": 0.1450980392156863, "grad_norm": 0.30078125, "learning_rate": 1.9915170777584767e-05, "loss": 1.0937, "num_tokens": 814871493.0, "step": 814 }, { "epoch": 0.14527629233511585, "grad_norm": 0.326171875, "learning_rate": 1.9914814541804868e-05, "loss": 1.0928, "num_tokens": 821156126.0, "step": 815 }, { "epoch": 0.14545454545454545, "grad_norm": 0.26953125, "learning_rate": 1.9914457563151586e-05, "loss": 1.1047, "num_tokens": 827440352.0, "step": 816 }, { "epoch": 0.14563279857397504, "grad_norm": 0.31640625, "learning_rate": 1.991409984165467e-05, "loss": 1.0783, "num_tokens": 833689459.0, "step": 817 }, { "epoch": 0.14581105169340464, "grad_norm": 0.28515625, "learning_rate": 1.9913741377343936e-05, "loss": 1.091, "num_tokens": 839920558.0, "step": 818 }, { "epoch": 0.14598930481283423, "grad_norm": 0.28515625, "learning_rate": 1.9913382170249244e-05, "loss": 1.0817, "num_tokens": 846178680.0, "step": 819 }, { "epoch": 0.14616755793226383, "grad_norm": 0.275390625, "learning_rate": 1.9913022220400525e-05, "loss": 1.0972, "num_tokens": 852434418.0, "step": 820 }, { "epoch": 0.1463458110516934, "grad_norm": 0.314453125, "learning_rate": 1.9912661527827788e-05, "loss": 1.1072, "num_tokens": 858717672.0, "step": 821 }, { "epoch": 0.146524064171123, "grad_norm": 0.296875, "learning_rate": 1.9912300092561076e-05, "loss": 1.0591, "num_tokens": 865001826.0, "step": 822 }, { "epoch": 0.14670231729055258, "grad_norm": 0.373046875, "learning_rate": 1.9911937914630516e-05, "loss": 1.088, "num_tokens": 871286302.0, "step": 823 }, { "epoch": 0.14688057040998218, "grad_norm": 0.279296875, "learning_rate": 1.9911574994066283e-05, "loss": 1.0805, "num_tokens": 877570850.0, "step": 824 }, { "epoch": 0.14705882352941177, "grad_norm": 0.390625, "learning_rate": 1.991121133089862e-05, "loss": 1.078, "num_tokens": 883833674.0, "step": 825 }, { "epoch": 0.14723707664884136, "grad_norm": 0.3203125, "learning_rate": 1.9910846925157833e-05, "loss": 1.0865, "num_tokens": 890090519.0, "step": 826 }, { "epoch": 0.14741532976827096, "grad_norm": 0.458984375, "learning_rate": 1.9910481776874286e-05, "loss": 1.0564, "num_tokens": 896374549.0, "step": 827 }, { "epoch": 0.14759358288770053, "grad_norm": 0.375, "learning_rate": 1.991011588607841e-05, "loss": 1.0623, "num_tokens": 902635589.0, "step": 828 }, { "epoch": 0.14777183600713012, "grad_norm": 0.384765625, "learning_rate": 1.990974925280069e-05, "loss": 1.0616, "num_tokens": 908912697.0, "step": 829 }, { "epoch": 0.14795008912655971, "grad_norm": 0.412109375, "learning_rate": 1.9909381877071684e-05, "loss": 1.0735, "num_tokens": 915158305.0, "step": 830 }, { "epoch": 0.1481283422459893, "grad_norm": 0.322265625, "learning_rate": 1.9909013758922e-05, "loss": 1.0976, "num_tokens": 921434119.0, "step": 831 }, { "epoch": 0.1483065953654189, "grad_norm": 0.353515625, "learning_rate": 1.9908644898382315e-05, "loss": 1.078, "num_tokens": 927679646.0, "step": 832 }, { "epoch": 0.1484848484848485, "grad_norm": 0.318359375, "learning_rate": 1.9908275295483366e-05, "loss": 1.1149, "num_tokens": 933956910.0, "step": 833 }, { "epoch": 0.14866310160427806, "grad_norm": 0.33203125, "learning_rate": 1.9907904950255957e-05, "loss": 1.1007, "num_tokens": 940211137.0, "step": 834 }, { "epoch": 0.14884135472370766, "grad_norm": 0.330078125, "learning_rate": 1.9907533862730943e-05, "loss": 1.0684, "num_tokens": 946488622.0, "step": 835 }, { "epoch": 0.14901960784313725, "grad_norm": 0.291015625, "learning_rate": 1.9907162032939245e-05, "loss": 1.069, "num_tokens": 952765983.0, "step": 836 }, { "epoch": 0.14919786096256685, "grad_norm": 0.275390625, "learning_rate": 1.9906789460911854e-05, "loss": 1.0809, "num_tokens": 959047537.0, "step": 837 }, { "epoch": 0.14937611408199644, "grad_norm": 0.302734375, "learning_rate": 1.990641614667981e-05, "loss": 1.0663, "num_tokens": 965298754.0, "step": 838 }, { "epoch": 0.14955436720142604, "grad_norm": 0.24609375, "learning_rate": 1.990604209027423e-05, "loss": 1.0809, "num_tokens": 971555616.0, "step": 839 }, { "epoch": 0.1497326203208556, "grad_norm": 0.388671875, "learning_rate": 1.990566729172628e-05, "loss": 1.1006, "num_tokens": 977832932.0, "step": 840 }, { "epoch": 0.1499108734402852, "grad_norm": 0.28125, "learning_rate": 1.9905291751067187e-05, "loss": 1.1175, "num_tokens": 984113159.0, "step": 841 }, { "epoch": 0.1500891265597148, "grad_norm": 0.458984375, "learning_rate": 1.9904915468328248e-05, "loss": 1.0859, "num_tokens": 990376884.0, "step": 842 }, { "epoch": 0.15026737967914439, "grad_norm": 0.384765625, "learning_rate": 1.9904538443540823e-05, "loss": 1.0742, "num_tokens": 996627561.0, "step": 843 }, { "epoch": 0.15044563279857398, "grad_norm": 0.384765625, "learning_rate": 1.9904160676736325e-05, "loss": 1.0816, "num_tokens": 1002912208.0, "step": 844 }, { "epoch": 0.15062388591800357, "grad_norm": 0.369140625, "learning_rate": 1.990378216794623e-05, "loss": 1.086, "num_tokens": 1009187143.0, "step": 845 }, { "epoch": 0.15080213903743314, "grad_norm": 0.357421875, "learning_rate": 1.9903402917202094e-05, "loss": 1.1046, "num_tokens": 1015439172.0, "step": 846 }, { "epoch": 0.15098039215686274, "grad_norm": 0.314453125, "learning_rate": 1.9903022924535506e-05, "loss": 1.0525, "num_tokens": 1021721519.0, "step": 847 }, { "epoch": 0.15115864527629233, "grad_norm": 0.3515625, "learning_rate": 1.9902642189978133e-05, "loss": 1.0767, "num_tokens": 1028003884.0, "step": 848 }, { "epoch": 0.15133689839572192, "grad_norm": 0.291015625, "learning_rate": 1.99022607135617e-05, "loss": 1.0789, "num_tokens": 1034287703.0, "step": 849 }, { "epoch": 0.15151515151515152, "grad_norm": 0.353515625, "learning_rate": 1.9901878495318006e-05, "loss": 1.0856, "num_tokens": 1040565924.0, "step": 850 }, { "epoch": 0.1516934046345811, "grad_norm": 0.265625, "learning_rate": 1.9901495535278892e-05, "loss": 1.0759, "num_tokens": 1046851251.0, "step": 851 }, { "epoch": 0.1518716577540107, "grad_norm": 0.453125, "learning_rate": 1.990111183347627e-05, "loss": 1.067, "num_tokens": 1053106951.0, "step": 852 }, { "epoch": 0.15204991087344027, "grad_norm": 0.337890625, "learning_rate": 1.9900727389942122e-05, "loss": 1.1034, "num_tokens": 1059392235.0, "step": 853 }, { "epoch": 0.15222816399286987, "grad_norm": 0.396484375, "learning_rate": 1.990034220470847e-05, "loss": 1.0611, "num_tokens": 1065675595.0, "step": 854 }, { "epoch": 0.15240641711229946, "grad_norm": 0.416015625, "learning_rate": 1.989995627780743e-05, "loss": 1.0874, "num_tokens": 1071941683.0, "step": 855 }, { "epoch": 0.15258467023172906, "grad_norm": 0.318359375, "learning_rate": 1.9899569609271145e-05, "loss": 1.0894, "num_tokens": 1078200212.0, "step": 856 }, { "epoch": 0.15276292335115865, "grad_norm": 0.322265625, "learning_rate": 1.989918219913184e-05, "loss": 1.0854, "num_tokens": 1084483462.0, "step": 857 }, { "epoch": 0.15294117647058825, "grad_norm": 0.3203125, "learning_rate": 1.9898794047421804e-05, "loss": 1.0734, "num_tokens": 1090742011.0, "step": 858 }, { "epoch": 0.1531194295900178, "grad_norm": 0.298828125, "learning_rate": 1.989840515417338e-05, "loss": 1.0852, "num_tokens": 1097020618.0, "step": 859 }, { "epoch": 0.1532976827094474, "grad_norm": 0.349609375, "learning_rate": 1.989801551941897e-05, "loss": 1.0978, "num_tokens": 1103305667.0, "step": 860 }, { "epoch": 0.153475935828877, "grad_norm": 0.2734375, "learning_rate": 1.9897625143191044e-05, "loss": 1.0824, "num_tokens": 1109589372.0, "step": 861 }, { "epoch": 0.1536541889483066, "grad_norm": 0.4296875, "learning_rate": 1.9897234025522136e-05, "loss": 1.0879, "num_tokens": 1115874711.0, "step": 862 }, { "epoch": 0.1538324420677362, "grad_norm": 0.337890625, "learning_rate": 1.989684216644483e-05, "loss": 1.0607, "num_tokens": 1122112210.0, "step": 863 }, { "epoch": 0.15401069518716579, "grad_norm": 0.470703125, "learning_rate": 1.9896449565991788e-05, "loss": 1.1101, "num_tokens": 1128350948.0, "step": 864 }, { "epoch": 0.15418894830659535, "grad_norm": 0.466796875, "learning_rate": 1.9896056224195723e-05, "loss": 1.0815, "num_tokens": 1134621796.0, "step": 865 }, { "epoch": 0.15436720142602495, "grad_norm": 0.30859375, "learning_rate": 1.989566214108941e-05, "loss": 1.0643, "num_tokens": 1140855121.0, "step": 866 }, { "epoch": 0.15454545454545454, "grad_norm": 0.357421875, "learning_rate": 1.9895267316705692e-05, "loss": 1.0803, "num_tokens": 1147092784.0, "step": 867 }, { "epoch": 0.15472370766488414, "grad_norm": 0.3125, "learning_rate": 1.9894871751077462e-05, "loss": 1.0937, "num_tokens": 1153374761.0, "step": 868 }, { "epoch": 0.15490196078431373, "grad_norm": 0.322265625, "learning_rate": 1.9894475444237694e-05, "loss": 1.1034, "num_tokens": 1159659742.0, "step": 869 }, { "epoch": 0.15508021390374332, "grad_norm": 0.2890625, "learning_rate": 1.9894078396219403e-05, "loss": 1.093, "num_tokens": 1165912511.0, "step": 870 }, { "epoch": 0.15525846702317292, "grad_norm": 0.2890625, "learning_rate": 1.989368060705568e-05, "loss": 1.0854, "num_tokens": 1172181539.0, "step": 871 }, { "epoch": 0.15543672014260249, "grad_norm": 0.302734375, "learning_rate": 1.989328207677967e-05, "loss": 1.0351, "num_tokens": 1178466025.0, "step": 872 }, { "epoch": 0.15561497326203208, "grad_norm": 0.298828125, "learning_rate": 1.9892882805424583e-05, "loss": 1.0518, "num_tokens": 1184751403.0, "step": 873 }, { "epoch": 0.15579322638146167, "grad_norm": 0.291015625, "learning_rate": 1.989248279302369e-05, "loss": 1.1112, "num_tokens": 1191036359.0, "step": 874 }, { "epoch": 0.15597147950089127, "grad_norm": 0.267578125, "learning_rate": 1.9892082039610326e-05, "loss": 1.0912, "num_tokens": 1197320150.0, "step": 875 }, { "epoch": 0.15614973262032086, "grad_norm": 0.310546875, "learning_rate": 1.989168054521789e-05, "loss": 1.045, "num_tokens": 1203604671.0, "step": 876 }, { "epoch": 0.15632798573975046, "grad_norm": 0.2578125, "learning_rate": 1.9891278309879828e-05, "loss": 1.113, "num_tokens": 1209862882.0, "step": 877 }, { "epoch": 0.15650623885918002, "grad_norm": 0.337890625, "learning_rate": 1.9890875333629667e-05, "loss": 1.1087, "num_tokens": 1216124469.0, "step": 878 }, { "epoch": 0.15668449197860962, "grad_norm": 0.271484375, "learning_rate": 1.989047161650098e-05, "loss": 1.1015, "num_tokens": 1222386779.0, "step": 879 }, { "epoch": 0.1568627450980392, "grad_norm": 0.42578125, "learning_rate": 1.9890067158527418e-05, "loss": 1.1022, "num_tokens": 1228672152.0, "step": 880 }, { "epoch": 0.1570409982174688, "grad_norm": 0.341796875, "learning_rate": 1.988966195974268e-05, "loss": 1.1194, "num_tokens": 1234945141.0, "step": 881 }, { "epoch": 0.1572192513368984, "grad_norm": 0.408203125, "learning_rate": 1.9889256020180525e-05, "loss": 1.0672, "num_tokens": 1241195184.0, "step": 882 }, { "epoch": 0.157397504456328, "grad_norm": 0.396484375, "learning_rate": 1.9888849339874788e-05, "loss": 1.0448, "num_tokens": 1247440809.0, "step": 883 }, { "epoch": 0.15757575757575756, "grad_norm": 0.361328125, "learning_rate": 1.988844191885936e-05, "loss": 1.062, "num_tokens": 1253699822.0, "step": 884 }, { "epoch": 0.15775401069518716, "grad_norm": 0.369140625, "learning_rate": 1.9888033757168183e-05, "loss": 1.0734, "num_tokens": 1259982446.0, "step": 885 }, { "epoch": 0.15793226381461675, "grad_norm": 0.31640625, "learning_rate": 1.9887624854835274e-05, "loss": 1.0902, "num_tokens": 1266267655.0, "step": 886 }, { "epoch": 0.15811051693404635, "grad_norm": 0.326171875, "learning_rate": 1.9887215211894707e-05, "loss": 1.1013, "num_tokens": 1272541218.0, "step": 887 }, { "epoch": 0.15828877005347594, "grad_norm": 0.291015625, "learning_rate": 1.9886804828380616e-05, "loss": 1.1009, "num_tokens": 1278814081.0, "step": 888 }, { "epoch": 0.15846702317290554, "grad_norm": 0.294921875, "learning_rate": 1.9886393704327202e-05, "loss": 1.0759, "num_tokens": 1285096682.0, "step": 889 }, { "epoch": 0.1586452762923351, "grad_norm": 0.30078125, "learning_rate": 1.988598183976872e-05, "loss": 1.0779, "num_tokens": 1291350979.0, "step": 890 }, { "epoch": 0.1588235294117647, "grad_norm": 0.25, "learning_rate": 1.9885569234739488e-05, "loss": 1.0865, "num_tokens": 1297633397.0, "step": 891 }, { "epoch": 0.1590017825311943, "grad_norm": 0.30078125, "learning_rate": 1.98851558892739e-05, "loss": 1.0918, "num_tokens": 1303917532.0, "step": 892 }, { "epoch": 0.15918003565062389, "grad_norm": 0.2412109375, "learning_rate": 1.9884741803406385e-05, "loss": 1.108, "num_tokens": 1310194516.0, "step": 893 }, { "epoch": 0.15935828877005348, "grad_norm": 0.291015625, "learning_rate": 1.988432697717146e-05, "loss": 1.0923, "num_tokens": 1316478551.0, "step": 894 }, { "epoch": 0.15953654188948307, "grad_norm": 0.25, "learning_rate": 1.9883911410603688e-05, "loss": 1.0917, "num_tokens": 1322762347.0, "step": 895 }, { "epoch": 0.15971479500891267, "grad_norm": 0.28515625, "learning_rate": 1.98834951037377e-05, "loss": 1.0581, "num_tokens": 1329043702.0, "step": 896 }, { "epoch": 0.15989304812834224, "grad_norm": 0.2734375, "learning_rate": 1.9883078056608187e-05, "loss": 1.0968, "num_tokens": 1335289543.0, "step": 897 }, { "epoch": 0.16007130124777183, "grad_norm": 0.275390625, "learning_rate": 1.98826602692499e-05, "loss": 1.0648, "num_tokens": 1341568667.0, "step": 898 }, { "epoch": 0.16024955436720142, "grad_norm": 0.33203125, "learning_rate": 1.9882241741697658e-05, "loss": 1.0946, "num_tokens": 1347847740.0, "step": 899 }, { "epoch": 0.16042780748663102, "grad_norm": 0.265625, "learning_rate": 1.988182247398633e-05, "loss": 1.1277, "num_tokens": 1354090190.0, "step": 900 }, { "epoch": 0.1606060606060606, "grad_norm": 0.326171875, "learning_rate": 1.9881402466150855e-05, "loss": 1.0756, "num_tokens": 1360345298.0, "step": 901 }, { "epoch": 0.1607843137254902, "grad_norm": 0.3046875, "learning_rate": 1.9880981718226236e-05, "loss": 1.0628, "num_tokens": 1366613173.0, "step": 902 }, { "epoch": 0.16096256684491977, "grad_norm": 0.29296875, "learning_rate": 1.988056023024753e-05, "loss": 1.041, "num_tokens": 1372874191.0, "step": 903 }, { "epoch": 0.16114081996434937, "grad_norm": 0.283203125, "learning_rate": 1.9880138002249865e-05, "loss": 1.0811, "num_tokens": 1379144088.0, "step": 904 }, { "epoch": 0.16131907308377896, "grad_norm": 0.2392578125, "learning_rate": 1.9879715034268422e-05, "loss": 1.058, "num_tokens": 1385429159.0, "step": 905 }, { "epoch": 0.16149732620320856, "grad_norm": 0.302734375, "learning_rate": 1.9879291326338447e-05, "loss": 1.0543, "num_tokens": 1391652326.0, "step": 906 }, { "epoch": 0.16167557932263815, "grad_norm": 0.271484375, "learning_rate": 1.9878866878495248e-05, "loss": 1.0477, "num_tokens": 1397909878.0, "step": 907 }, { "epoch": 0.16185383244206775, "grad_norm": 0.33203125, "learning_rate": 1.9878441690774194e-05, "loss": 1.117, "num_tokens": 1404194228.0, "step": 908 }, { "epoch": 0.1620320855614973, "grad_norm": 0.28125, "learning_rate": 1.9878015763210715e-05, "loss": 1.0755, "num_tokens": 1410477221.0, "step": 909 }, { "epoch": 0.1622103386809269, "grad_norm": 0.32421875, "learning_rate": 1.9877589095840305e-05, "loss": 1.0578, "num_tokens": 1416759279.0, "step": 910 }, { "epoch": 0.1623885918003565, "grad_norm": 0.294921875, "learning_rate": 1.987716168869852e-05, "loss": 1.0777, "num_tokens": 1423042389.0, "step": 911 }, { "epoch": 0.1625668449197861, "grad_norm": 0.294921875, "learning_rate": 1.9876733541820975e-05, "loss": 1.0959, "num_tokens": 1429301724.0, "step": 912 }, { "epoch": 0.1627450980392157, "grad_norm": 0.29296875, "learning_rate": 1.9876304655243342e-05, "loss": 1.0956, "num_tokens": 1435577988.0, "step": 913 }, { "epoch": 0.16292335115864529, "grad_norm": 0.29296875, "learning_rate": 1.9875875029001367e-05, "loss": 1.0802, "num_tokens": 1441864507.0, "step": 914 }, { "epoch": 0.16310160427807488, "grad_norm": 0.28515625, "learning_rate": 1.987544466313085e-05, "loss": 1.0784, "num_tokens": 1448126316.0, "step": 915 }, { "epoch": 0.16327985739750445, "grad_norm": 0.296875, "learning_rate": 1.9875013557667652e-05, "loss": 1.0977, "num_tokens": 1454384501.0, "step": 916 }, { "epoch": 0.16345811051693404, "grad_norm": 0.26171875, "learning_rate": 1.9874581712647696e-05, "loss": 1.0936, "num_tokens": 1460615166.0, "step": 917 }, { "epoch": 0.16363636363636364, "grad_norm": 0.31640625, "learning_rate": 1.987414912810697e-05, "loss": 1.0925, "num_tokens": 1466894608.0, "step": 918 }, { "epoch": 0.16381461675579323, "grad_norm": 0.251953125, "learning_rate": 1.9873715804081523e-05, "loss": 1.0888, "num_tokens": 1473150918.0, "step": 919 }, { "epoch": 0.16399286987522282, "grad_norm": 0.34375, "learning_rate": 1.987328174060746e-05, "loss": 1.0937, "num_tokens": 1479412075.0, "step": 920 }, { "epoch": 0.16417112299465242, "grad_norm": 0.294921875, "learning_rate": 1.987284693772095e-05, "loss": 1.059, "num_tokens": 1485693034.0, "step": 921 }, { "epoch": 0.16434937611408199, "grad_norm": 0.3671875, "learning_rate": 1.9872411395458233e-05, "loss": 1.0659, "num_tokens": 1491950788.0, "step": 922 }, { "epoch": 0.16452762923351158, "grad_norm": 0.3203125, "learning_rate": 1.9871975113855594e-05, "loss": 1.0775, "num_tokens": 1498234991.0, "step": 923 }, { "epoch": 0.16470588235294117, "grad_norm": 0.3671875, "learning_rate": 1.9871538092949393e-05, "loss": 1.0683, "num_tokens": 1504518392.0, "step": 924 }, { "epoch": 0.16488413547237077, "grad_norm": 0.31640625, "learning_rate": 1.9871100332776047e-05, "loss": 1.096, "num_tokens": 1510802502.0, "step": 925 }, { "epoch": 0.16506238859180036, "grad_norm": 0.3046875, "learning_rate": 1.987066183337203e-05, "loss": 1.099, "num_tokens": 1517084685.0, "step": 926 }, { "epoch": 0.16524064171122996, "grad_norm": 0.3125, "learning_rate": 1.9870222594773893e-05, "loss": 1.0929, "num_tokens": 1523368640.0, "step": 927 }, { "epoch": 0.16541889483065952, "grad_norm": 0.298828125, "learning_rate": 1.9869782617018228e-05, "loss": 1.0814, "num_tokens": 1529653016.0, "step": 928 }, { "epoch": 0.16559714795008912, "grad_norm": 0.318359375, "learning_rate": 1.9869341900141705e-05, "loss": 1.0947, "num_tokens": 1535936945.0, "step": 929 }, { "epoch": 0.1657754010695187, "grad_norm": 0.294921875, "learning_rate": 1.9868900444181044e-05, "loss": 1.0637, "num_tokens": 1542220215.0, "step": 930 }, { "epoch": 0.1659536541889483, "grad_norm": 0.32421875, "learning_rate": 1.9868458249173034e-05, "loss": 1.0407, "num_tokens": 1548506345.0, "step": 931 }, { "epoch": 0.1661319073083779, "grad_norm": 0.322265625, "learning_rate": 1.986801531515452e-05, "loss": 1.0779, "num_tokens": 1554756602.0, "step": 932 }, { "epoch": 0.1663101604278075, "grad_norm": 0.328125, "learning_rate": 1.9867571642162416e-05, "loss": 1.1117, "num_tokens": 1561040809.0, "step": 933 }, { "epoch": 0.16648841354723706, "grad_norm": 0.326171875, "learning_rate": 1.9867127230233688e-05, "loss": 1.0976, "num_tokens": 1567312850.0, "step": 934 }, { "epoch": 0.16666666666666666, "grad_norm": 0.318359375, "learning_rate": 1.9866682079405376e-05, "loss": 1.0715, "num_tokens": 1573596278.0, "step": 935 }, { "epoch": 0.16684491978609625, "grad_norm": 0.328125, "learning_rate": 1.9866236189714575e-05, "loss": 1.0761, "num_tokens": 1579841292.0, "step": 936 }, { "epoch": 0.16702317290552585, "grad_norm": 0.3125, "learning_rate": 1.986578956119843e-05, "loss": 1.0669, "num_tokens": 1586126575.0, "step": 937 }, { "epoch": 0.16720142602495544, "grad_norm": 0.310546875, "learning_rate": 1.986534219389417e-05, "loss": 1.065, "num_tokens": 1592409479.0, "step": 938 }, { "epoch": 0.16737967914438504, "grad_norm": 0.349609375, "learning_rate": 1.9864894087839072e-05, "loss": 1.0785, "num_tokens": 1598679728.0, "step": 939 }, { "epoch": 0.16755793226381463, "grad_norm": 0.263671875, "learning_rate": 1.9864445243070472e-05, "loss": 1.0682, "num_tokens": 1604917462.0, "step": 940 }, { "epoch": 0.1677361853832442, "grad_norm": 0.31640625, "learning_rate": 1.9863995659625774e-05, "loss": 1.0951, "num_tokens": 1611199223.0, "step": 941 }, { "epoch": 0.1679144385026738, "grad_norm": 0.27734375, "learning_rate": 1.9863545337542443e-05, "loss": 1.0621, "num_tokens": 1617454114.0, "step": 942 }, { "epoch": 0.16809269162210339, "grad_norm": 0.310546875, "learning_rate": 1.9863094276858008e-05, "loss": 1.0793, "num_tokens": 1623706540.0, "step": 943 }, { "epoch": 0.16827094474153298, "grad_norm": 0.28125, "learning_rate": 1.986264247761005e-05, "loss": 1.0746, "num_tokens": 1629947864.0, "step": 944 }, { "epoch": 0.16844919786096257, "grad_norm": 0.291015625, "learning_rate": 1.986218993983622e-05, "loss": 1.0786, "num_tokens": 1636209699.0, "step": 945 }, { "epoch": 0.16862745098039217, "grad_norm": 0.296875, "learning_rate": 1.986173666357423e-05, "loss": 1.065, "num_tokens": 1642456913.0, "step": 946 }, { "epoch": 0.16880570409982174, "grad_norm": 0.251953125, "learning_rate": 1.9861282648861847e-05, "loss": 1.0846, "num_tokens": 1648729369.0, "step": 947 }, { "epoch": 0.16898395721925133, "grad_norm": 0.322265625, "learning_rate": 1.986082789573691e-05, "loss": 1.0931, "num_tokens": 1655012452.0, "step": 948 }, { "epoch": 0.16916221033868092, "grad_norm": 0.2734375, "learning_rate": 1.9860372404237306e-05, "loss": 1.0964, "num_tokens": 1661298226.0, "step": 949 }, { "epoch": 0.16934046345811052, "grad_norm": 0.34375, "learning_rate": 1.9859916174401e-05, "loss": 1.0993, "num_tokens": 1667540955.0, "step": 950 }, { "epoch": 0.1695187165775401, "grad_norm": 0.39453125, "learning_rate": 1.9859459206266e-05, "loss": 1.0797, "num_tokens": 1673826441.0, "step": 951 }, { "epoch": 0.1696969696969697, "grad_norm": 0.2890625, "learning_rate": 1.9859001499870395e-05, "loss": 1.1036, "num_tokens": 1680092741.0, "step": 952 }, { "epoch": 0.16987522281639927, "grad_norm": 0.296875, "learning_rate": 1.9858543055252322e-05, "loss": 1.0867, "num_tokens": 1686352619.0, "step": 953 }, { "epoch": 0.17005347593582887, "grad_norm": 0.265625, "learning_rate": 1.9858083872449986e-05, "loss": 1.0642, "num_tokens": 1692635955.0, "step": 954 }, { "epoch": 0.17023172905525846, "grad_norm": 0.369140625, "learning_rate": 1.985762395150164e-05, "loss": 1.0558, "num_tokens": 1698901955.0, "step": 955 }, { "epoch": 0.17040998217468806, "grad_norm": 0.28515625, "learning_rate": 1.9857163292445622e-05, "loss": 1.0823, "num_tokens": 1705137117.0, "step": 956 }, { "epoch": 0.17058823529411765, "grad_norm": 0.3828125, "learning_rate": 1.9856701895320312e-05, "loss": 1.0421, "num_tokens": 1711420856.0, "step": 957 }, { "epoch": 0.17076648841354725, "grad_norm": 0.283203125, "learning_rate": 1.985623976016416e-05, "loss": 1.0831, "num_tokens": 1717661564.0, "step": 958 }, { "epoch": 0.17094474153297684, "grad_norm": 0.361328125, "learning_rate": 1.9855776887015675e-05, "loss": 1.0842, "num_tokens": 1723946523.0, "step": 959 }, { "epoch": 0.1711229946524064, "grad_norm": 0.279296875, "learning_rate": 1.985531327591343e-05, "loss": 1.0939, "num_tokens": 1730230161.0, "step": 960 }, { "epoch": 0.171301247771836, "grad_norm": 0.38671875, "learning_rate": 1.9854848926896055e-05, "loss": 1.0882, "num_tokens": 1736481305.0, "step": 961 }, { "epoch": 0.1714795008912656, "grad_norm": 0.28515625, "learning_rate": 1.9854383840002253e-05, "loss": 1.0839, "num_tokens": 1742764982.0, "step": 962 }, { "epoch": 0.1716577540106952, "grad_norm": 0.42578125, "learning_rate": 1.9853918015270766e-05, "loss": 1.0971, "num_tokens": 1749046994.0, "step": 963 }, { "epoch": 0.17183600713012478, "grad_norm": 0.35546875, "learning_rate": 1.985345145274042e-05, "loss": 1.0974, "num_tokens": 1755328856.0, "step": 964 }, { "epoch": 0.17201426024955438, "grad_norm": 0.388671875, "learning_rate": 1.9852984152450092e-05, "loss": 1.0804, "num_tokens": 1761613160.0, "step": 965 }, { "epoch": 0.17219251336898395, "grad_norm": 0.380859375, "learning_rate": 1.9852516114438722e-05, "loss": 1.0904, "num_tokens": 1767880862.0, "step": 966 }, { "epoch": 0.17237076648841354, "grad_norm": 0.33203125, "learning_rate": 1.9852047338745316e-05, "loss": 1.0427, "num_tokens": 1774163376.0, "step": 967 }, { "epoch": 0.17254901960784313, "grad_norm": 0.353515625, "learning_rate": 1.9851577825408926e-05, "loss": 1.0842, "num_tokens": 1780422691.0, "step": 968 }, { "epoch": 0.17272727272727273, "grad_norm": 0.30078125, "learning_rate": 1.9851107574468688e-05, "loss": 1.0627, "num_tokens": 1786674367.0, "step": 969 }, { "epoch": 0.17290552584670232, "grad_norm": 0.267578125, "learning_rate": 1.9850636585963784e-05, "loss": 1.0733, "num_tokens": 1792933842.0, "step": 970 }, { "epoch": 0.17308377896613192, "grad_norm": 0.333984375, "learning_rate": 1.9850164859933462e-05, "loss": 1.1343, "num_tokens": 1799213575.0, "step": 971 }, { "epoch": 0.17326203208556148, "grad_norm": 0.2734375, "learning_rate": 1.9849692396417032e-05, "loss": 1.1176, "num_tokens": 1805419153.0, "step": 972 }, { "epoch": 0.17344028520499108, "grad_norm": 0.357421875, "learning_rate": 1.984921919545386e-05, "loss": 1.0384, "num_tokens": 1811701537.0, "step": 973 }, { "epoch": 0.17361853832442067, "grad_norm": 0.291015625, "learning_rate": 1.984874525708338e-05, "loss": 1.0748, "num_tokens": 1817976921.0, "step": 974 }, { "epoch": 0.17379679144385027, "grad_norm": 0.34765625, "learning_rate": 1.984827058134509e-05, "loss": 1.0747, "num_tokens": 1824256679.0, "step": 975 }, { "epoch": 0.17397504456327986, "grad_norm": 0.27734375, "learning_rate": 1.9847795168278538e-05, "loss": 1.0653, "num_tokens": 1830538854.0, "step": 976 }, { "epoch": 0.17415329768270946, "grad_norm": 0.326171875, "learning_rate": 1.9847319017923347e-05, "loss": 1.1061, "num_tokens": 1836808350.0, "step": 977 }, { "epoch": 0.17433155080213902, "grad_norm": 0.28125, "learning_rate": 1.984684213031919e-05, "loss": 1.0434, "num_tokens": 1843074511.0, "step": 978 }, { "epoch": 0.17450980392156862, "grad_norm": 0.361328125, "learning_rate": 1.9846364505505805e-05, "loss": 1.116, "num_tokens": 1849357482.0, "step": 979 }, { "epoch": 0.1746880570409982, "grad_norm": 0.294921875, "learning_rate": 1.9845886143522995e-05, "loss": 1.0836, "num_tokens": 1855643089.0, "step": 980 }, { "epoch": 0.1748663101604278, "grad_norm": 0.353515625, "learning_rate": 1.9845407044410622e-05, "loss": 1.0736, "num_tokens": 1861898617.0, "step": 981 }, { "epoch": 0.1750445632798574, "grad_norm": 0.314453125, "learning_rate": 1.9844927208208614e-05, "loss": 1.0649, "num_tokens": 1868156803.0, "step": 982 }, { "epoch": 0.175222816399287, "grad_norm": 0.359375, "learning_rate": 1.9844446634956946e-05, "loss": 1.1161, "num_tokens": 1874438707.0, "step": 983 }, { "epoch": 0.1754010695187166, "grad_norm": 0.3359375, "learning_rate": 1.984396532469567e-05, "loss": 1.0541, "num_tokens": 1880722511.0, "step": 984 }, { "epoch": 0.17557932263814616, "grad_norm": 0.345703125, "learning_rate": 1.9843483277464894e-05, "loss": 1.077, "num_tokens": 1887006618.0, "step": 985 }, { "epoch": 0.17575757575757575, "grad_norm": 0.337890625, "learning_rate": 1.9843000493304788e-05, "loss": 1.0784, "num_tokens": 1893265845.0, "step": 986 }, { "epoch": 0.17593582887700535, "grad_norm": 0.333984375, "learning_rate": 1.984251697225558e-05, "loss": 1.0883, "num_tokens": 1899523277.0, "step": 987 }, { "epoch": 0.17611408199643494, "grad_norm": 0.322265625, "learning_rate": 1.984203271435756e-05, "loss": 1.0761, "num_tokens": 1905807448.0, "step": 988 }, { "epoch": 0.17629233511586453, "grad_norm": 0.275390625, "learning_rate": 1.9841547719651083e-05, "loss": 1.0693, "num_tokens": 1912091463.0, "step": 989 }, { "epoch": 0.17647058823529413, "grad_norm": 0.326171875, "learning_rate": 1.984106198817657e-05, "loss": 1.0795, "num_tokens": 1918348660.0, "step": 990 }, { "epoch": 0.1766488413547237, "grad_norm": 0.294921875, "learning_rate": 1.984057551997449e-05, "loss": 1.1056, "num_tokens": 1924631487.0, "step": 991 }, { "epoch": 0.1768270944741533, "grad_norm": 0.275390625, "learning_rate": 1.984008831508538e-05, "loss": 1.0591, "num_tokens": 1930913360.0, "step": 992 }, { "epoch": 0.17700534759358288, "grad_norm": 0.3203125, "learning_rate": 1.9839600373549847e-05, "loss": 1.0844, "num_tokens": 1937200074.0, "step": 993 }, { "epoch": 0.17718360071301248, "grad_norm": 0.29296875, "learning_rate": 1.983911169540854e-05, "loss": 1.0589, "num_tokens": 1943460246.0, "step": 994 }, { "epoch": 0.17736185383244207, "grad_norm": 0.345703125, "learning_rate": 1.9838622280702186e-05, "loss": 1.1008, "num_tokens": 1949743187.0, "step": 995 }, { "epoch": 0.17754010695187167, "grad_norm": 0.294921875, "learning_rate": 1.9838132129471568e-05, "loss": 1.0664, "num_tokens": 1956026786.0, "step": 996 }, { "epoch": 0.17771836007130123, "grad_norm": 0.345703125, "learning_rate": 1.9837641241757535e-05, "loss": 1.0828, "num_tokens": 1962283294.0, "step": 997 }, { "epoch": 0.17789661319073083, "grad_norm": 0.314453125, "learning_rate": 1.9837149617600983e-05, "loss": 1.0755, "num_tokens": 1968541339.0, "step": 998 }, { "epoch": 0.17807486631016042, "grad_norm": 0.373046875, "learning_rate": 1.9836657257042887e-05, "loss": 1.0442, "num_tokens": 1974826072.0, "step": 999 }, { "epoch": 0.17825311942959002, "grad_norm": 0.30859375, "learning_rate": 1.983616416012427e-05, "loss": 1.0356, "num_tokens": 1981109082.0, "step": 1000 }, { "epoch": 0.1784313725490196, "grad_norm": 0.365234375, "learning_rate": 1.983567032688623e-05, "loss": 1.0719, "num_tokens": 1987393478.0, "step": 1001 }, { "epoch": 0.1786096256684492, "grad_norm": 0.353515625, "learning_rate": 1.983517575736991e-05, "loss": 1.1225, "num_tokens": 1993676005.0, "step": 1002 }, { "epoch": 0.1787878787878788, "grad_norm": 0.361328125, "learning_rate": 1.9834680451616524e-05, "loss": 1.109, "num_tokens": 1999935526.0, "step": 1003 }, { "epoch": 0.17896613190730837, "grad_norm": 0.32421875, "learning_rate": 1.983418440966735e-05, "loss": 1.0675, "num_tokens": 2006220568.0, "step": 1004 }, { "epoch": 0.17914438502673796, "grad_norm": 0.296875, "learning_rate": 1.983368763156372e-05, "loss": 1.0716, "num_tokens": 2012477787.0, "step": 1005 }, { "epoch": 0.17932263814616756, "grad_norm": 0.291015625, "learning_rate": 1.983319011734703e-05, "loss": 1.0656, "num_tokens": 2018761087.0, "step": 1006 }, { "epoch": 0.17950089126559715, "grad_norm": 0.27734375, "learning_rate": 1.9832691867058744e-05, "loss": 1.0859, "num_tokens": 2025039064.0, "step": 1007 }, { "epoch": 0.17967914438502675, "grad_norm": 0.26953125, "learning_rate": 1.9832192880740374e-05, "loss": 1.0353, "num_tokens": 2031298173.0, "step": 1008 }, { "epoch": 0.17985739750445634, "grad_norm": 0.361328125, "learning_rate": 1.9831693158433504e-05, "loss": 1.0572, "num_tokens": 2037583001.0, "step": 1009 }, { "epoch": 0.1800356506238859, "grad_norm": 0.2890625, "learning_rate": 1.9831192700179774e-05, "loss": 1.0719, "num_tokens": 2043867782.0, "step": 1010 }, { "epoch": 0.1802139037433155, "grad_norm": 0.380859375, "learning_rate": 1.9830691506020893e-05, "loss": 1.0697, "num_tokens": 2050133066.0, "step": 1011 }, { "epoch": 0.1803921568627451, "grad_norm": 0.359375, "learning_rate": 1.9830189575998615e-05, "loss": 1.105, "num_tokens": 2056415300.0, "step": 1012 }, { "epoch": 0.1805704099821747, "grad_norm": 0.314453125, "learning_rate": 1.9829686910154773e-05, "loss": 1.0785, "num_tokens": 2062682097.0, "step": 1013 }, { "epoch": 0.18074866310160428, "grad_norm": 0.318359375, "learning_rate": 1.9829183508531256e-05, "loss": 1.1088, "num_tokens": 2068944356.0, "step": 1014 }, { "epoch": 0.18092691622103388, "grad_norm": 0.314453125, "learning_rate": 1.9828679371170008e-05, "loss": 1.0848, "num_tokens": 2075227559.0, "step": 1015 }, { "epoch": 0.18110516934046345, "grad_norm": 0.283203125, "learning_rate": 1.9828174498113046e-05, "loss": 1.0528, "num_tokens": 2081512804.0, "step": 1016 }, { "epoch": 0.18128342245989304, "grad_norm": 0.3125, "learning_rate": 1.982766888940243e-05, "loss": 1.0734, "num_tokens": 2087794406.0, "step": 1017 }, { "epoch": 0.18146167557932263, "grad_norm": 0.28515625, "learning_rate": 1.9827162545080303e-05, "loss": 1.0906, "num_tokens": 2094065463.0, "step": 1018 }, { "epoch": 0.18163992869875223, "grad_norm": 0.33984375, "learning_rate": 1.982665546518885e-05, "loss": 1.0678, "num_tokens": 2100337233.0, "step": 1019 }, { "epoch": 0.18181818181818182, "grad_norm": 0.30859375, "learning_rate": 1.9826147649770333e-05, "loss": 1.0748, "num_tokens": 2106588251.0, "step": 1020 }, { "epoch": 0.18199643493761142, "grad_norm": 0.328125, "learning_rate": 1.9825639098867067e-05, "loss": 1.0428, "num_tokens": 2112866131.0, "step": 1021 }, { "epoch": 0.182174688057041, "grad_norm": 0.3125, "learning_rate": 1.9825129812521424e-05, "loss": 1.0906, "num_tokens": 2119148040.0, "step": 1022 }, { "epoch": 0.18235294117647058, "grad_norm": 0.326171875, "learning_rate": 1.982461979077585e-05, "loss": 1.0606, "num_tokens": 2125400123.0, "step": 1023 }, { "epoch": 0.18253119429590017, "grad_norm": 0.265625, "learning_rate": 1.9824109033672843e-05, "loss": 1.0738, "num_tokens": 2131684096.0, "step": 1024 }, { "epoch": 0.18270944741532977, "grad_norm": 0.314453125, "learning_rate": 1.9823597541254964e-05, "loss": 1.0581, "num_tokens": 2137944093.0, "step": 1025 }, { "epoch": 0.18288770053475936, "grad_norm": 0.267578125, "learning_rate": 1.9823085313564833e-05, "loss": 1.0611, "num_tokens": 2144224056.0, "step": 1026 }, { "epoch": 0.18306595365418896, "grad_norm": 0.353515625, "learning_rate": 1.982257235064514e-05, "loss": 1.0868, "num_tokens": 2150488540.0, "step": 1027 }, { "epoch": 0.18324420677361855, "grad_norm": 0.26171875, "learning_rate": 1.982205865253863e-05, "loss": 1.0518, "num_tokens": 2156771789.0, "step": 1028 }, { "epoch": 0.18342245989304812, "grad_norm": 0.40234375, "learning_rate": 1.98215442192881e-05, "loss": 1.0962, "num_tokens": 2163032114.0, "step": 1029 }, { "epoch": 0.1836007130124777, "grad_norm": 0.3125, "learning_rate": 1.982102905093643e-05, "loss": 1.0476, "num_tokens": 2169316929.0, "step": 1030 }, { "epoch": 0.1837789661319073, "grad_norm": 0.388671875, "learning_rate": 1.9820513147526543e-05, "loss": 1.0828, "num_tokens": 2175569878.0, "step": 1031 }, { "epoch": 0.1839572192513369, "grad_norm": 0.3515625, "learning_rate": 1.9819996509101424e-05, "loss": 1.0963, "num_tokens": 2181849713.0, "step": 1032 }, { "epoch": 0.1841354723707665, "grad_norm": 0.373046875, "learning_rate": 1.9819479135704136e-05, "loss": 1.1047, "num_tokens": 2188105052.0, "step": 1033 }, { "epoch": 0.1843137254901961, "grad_norm": 0.373046875, "learning_rate": 1.9818961027377787e-05, "loss": 1.0511, "num_tokens": 2194387698.0, "step": 1034 }, { "epoch": 0.18449197860962566, "grad_norm": 0.318359375, "learning_rate": 1.9818442184165547e-05, "loss": 1.0858, "num_tokens": 2200671297.0, "step": 1035 }, { "epoch": 0.18467023172905525, "grad_norm": 0.337890625, "learning_rate": 1.981792260611066e-05, "loss": 1.0508, "num_tokens": 2206902355.0, "step": 1036 }, { "epoch": 0.18484848484848485, "grad_norm": 0.263671875, "learning_rate": 1.9817402293256413e-05, "loss": 1.0814, "num_tokens": 2213185594.0, "step": 1037 }, { "epoch": 0.18502673796791444, "grad_norm": 0.31640625, "learning_rate": 1.9816881245646166e-05, "loss": 1.098, "num_tokens": 2219464063.0, "step": 1038 }, { "epoch": 0.18520499108734403, "grad_norm": 0.24609375, "learning_rate": 1.9816359463323346e-05, "loss": 1.0857, "num_tokens": 2225748327.0, "step": 1039 }, { "epoch": 0.18538324420677363, "grad_norm": 0.32421875, "learning_rate": 1.9815836946331425e-05, "loss": 1.0733, "num_tokens": 2232011061.0, "step": 1040 }, { "epoch": 0.1855614973262032, "grad_norm": 0.255859375, "learning_rate": 1.9815313694713945e-05, "loss": 1.0461, "num_tokens": 2238260382.0, "step": 1041 }, { "epoch": 0.1857397504456328, "grad_norm": 0.34375, "learning_rate": 1.9814789708514514e-05, "loss": 1.0874, "num_tokens": 2244541974.0, "step": 1042 }, { "epoch": 0.18591800356506238, "grad_norm": 0.298828125, "learning_rate": 1.981426498777679e-05, "loss": 1.0723, "num_tokens": 2250823696.0, "step": 1043 }, { "epoch": 0.18609625668449198, "grad_norm": 0.349609375, "learning_rate": 1.98137395325445e-05, "loss": 1.064, "num_tokens": 2257070324.0, "step": 1044 }, { "epoch": 0.18627450980392157, "grad_norm": 0.298828125, "learning_rate": 1.981321334286143e-05, "loss": 1.0446, "num_tokens": 2263327937.0, "step": 1045 }, { "epoch": 0.18645276292335117, "grad_norm": 0.322265625, "learning_rate": 1.9812686418771435e-05, "loss": 1.0983, "num_tokens": 2269609466.0, "step": 1046 }, { "epoch": 0.18663101604278076, "grad_norm": 0.283203125, "learning_rate": 1.9812158760318408e-05, "loss": 1.0802, "num_tokens": 2275892175.0, "step": 1047 }, { "epoch": 0.18680926916221033, "grad_norm": 0.333984375, "learning_rate": 1.9811630367546336e-05, "loss": 1.0886, "num_tokens": 2282176609.0, "step": 1048 }, { "epoch": 0.18698752228163992, "grad_norm": 0.28125, "learning_rate": 1.9811101240499237e-05, "loss": 1.0582, "num_tokens": 2288459900.0, "step": 1049 }, { "epoch": 0.18716577540106952, "grad_norm": 0.341796875, "learning_rate": 1.9810571379221208e-05, "loss": 1.1146, "num_tokens": 2294742143.0, "step": 1050 }, { "epoch": 0.1873440285204991, "grad_norm": 0.255859375, "learning_rate": 1.9810040783756402e-05, "loss": 1.0287, "num_tokens": 2301027469.0, "step": 1051 }, { "epoch": 0.1875222816399287, "grad_norm": 0.3671875, "learning_rate": 1.980950945414904e-05, "loss": 1.0402, "num_tokens": 2307311770.0, "step": 1052 }, { "epoch": 0.1877005347593583, "grad_norm": 0.302734375, "learning_rate": 1.9808977390443388e-05, "loss": 1.0798, "num_tokens": 2313577469.0, "step": 1053 }, { "epoch": 0.18787878787878787, "grad_norm": 0.376953125, "learning_rate": 1.9808444592683785e-05, "loss": 1.0565, "num_tokens": 2319860771.0, "step": 1054 }, { "epoch": 0.18805704099821746, "grad_norm": 0.361328125, "learning_rate": 1.9807911060914636e-05, "loss": 1.0735, "num_tokens": 2326143518.0, "step": 1055 }, { "epoch": 0.18823529411764706, "grad_norm": 0.3359375, "learning_rate": 1.9807376795180393e-05, "loss": 1.0826, "num_tokens": 2332411276.0, "step": 1056 }, { "epoch": 0.18841354723707665, "grad_norm": 0.353515625, "learning_rate": 1.980684179552558e-05, "loss": 1.0761, "num_tokens": 2338694654.0, "step": 1057 }, { "epoch": 0.18859180035650625, "grad_norm": 0.265625, "learning_rate": 1.9806306061994775e-05, "loss": 1.0371, "num_tokens": 2344979087.0, "step": 1058 }, { "epoch": 0.18877005347593584, "grad_norm": 0.330078125, "learning_rate": 1.9805769594632628e-05, "loss": 1.0542, "num_tokens": 2351247263.0, "step": 1059 }, { "epoch": 0.1889483065953654, "grad_norm": 0.275390625, "learning_rate": 1.9805232393483832e-05, "loss": 1.0609, "num_tokens": 2357530306.0, "step": 1060 }, { "epoch": 0.189126559714795, "grad_norm": 0.33984375, "learning_rate": 1.9804694458593162e-05, "loss": 1.084, "num_tokens": 2363814179.0, "step": 1061 }, { "epoch": 0.1893048128342246, "grad_norm": 0.2734375, "learning_rate": 1.9804155790005444e-05, "loss": 1.0811, "num_tokens": 2370071751.0, "step": 1062 }, { "epoch": 0.1894830659536542, "grad_norm": 0.333984375, "learning_rate": 1.980361638776556e-05, "loss": 1.0564, "num_tokens": 2376350776.0, "step": 1063 }, { "epoch": 0.18966131907308378, "grad_norm": 0.298828125, "learning_rate": 1.980307625191846e-05, "loss": 1.1066, "num_tokens": 2382588015.0, "step": 1064 }, { "epoch": 0.18983957219251338, "grad_norm": 0.33984375, "learning_rate": 1.980253538250915e-05, "loss": 1.0479, "num_tokens": 2388843533.0, "step": 1065 }, { "epoch": 0.19001782531194297, "grad_norm": 0.3046875, "learning_rate": 1.980199377958271e-05, "loss": 1.0782, "num_tokens": 2395064693.0, "step": 1066 }, { "epoch": 0.19019607843137254, "grad_norm": 0.330078125, "learning_rate": 1.9801451443184266e-05, "loss": 1.0458, "num_tokens": 2401330971.0, "step": 1067 }, { "epoch": 0.19037433155080213, "grad_norm": 0.3125, "learning_rate": 1.9800908373359012e-05, "loss": 1.1086, "num_tokens": 2407614835.0, "step": 1068 }, { "epoch": 0.19055258467023173, "grad_norm": 0.357421875, "learning_rate": 1.98003645701522e-05, "loss": 1.0897, "num_tokens": 2413870311.0, "step": 1069 }, { "epoch": 0.19073083778966132, "grad_norm": 0.3203125, "learning_rate": 1.979982003360915e-05, "loss": 1.0816, "num_tokens": 2420151753.0, "step": 1070 }, { "epoch": 0.19090909090909092, "grad_norm": 0.32421875, "learning_rate": 1.9799274763775234e-05, "loss": 1.0684, "num_tokens": 2426435365.0, "step": 1071 }, { "epoch": 0.1910873440285205, "grad_norm": 0.349609375, "learning_rate": 1.9798728760695893e-05, "loss": 1.0921, "num_tokens": 2432704817.0, "step": 1072 }, { "epoch": 0.19126559714795008, "grad_norm": 0.291015625, "learning_rate": 1.979818202441662e-05, "loss": 1.0776, "num_tokens": 2438960563.0, "step": 1073 }, { "epoch": 0.19144385026737967, "grad_norm": 0.318359375, "learning_rate": 1.979763455498298e-05, "loss": 1.0707, "num_tokens": 2445244570.0, "step": 1074 }, { "epoch": 0.19162210338680927, "grad_norm": 0.2470703125, "learning_rate": 1.9797086352440593e-05, "loss": 1.042, "num_tokens": 2451502924.0, "step": 1075 }, { "epoch": 0.19180035650623886, "grad_norm": 0.298828125, "learning_rate": 1.979653741683514e-05, "loss": 1.0814, "num_tokens": 2457785745.0, "step": 1076 }, { "epoch": 0.19197860962566846, "grad_norm": 0.2255859375, "learning_rate": 1.979598774821236e-05, "loss": 1.1015, "num_tokens": 2464060520.0, "step": 1077 }, { "epoch": 0.19215686274509805, "grad_norm": 0.318359375, "learning_rate": 1.9795437346618064e-05, "loss": 1.0753, "num_tokens": 2470344082.0, "step": 1078 }, { "epoch": 0.19233511586452762, "grad_norm": 0.259765625, "learning_rate": 1.979488621209811e-05, "loss": 1.0632, "num_tokens": 2476605382.0, "step": 1079 }, { "epoch": 0.1925133689839572, "grad_norm": 0.3046875, "learning_rate": 1.9794334344698432e-05, "loss": 1.0598, "num_tokens": 2482856854.0, "step": 1080 }, { "epoch": 0.1926916221033868, "grad_norm": 0.259765625, "learning_rate": 1.9793781744465014e-05, "loss": 1.089, "num_tokens": 2489125644.0, "step": 1081 }, { "epoch": 0.1928698752228164, "grad_norm": 0.283203125, "learning_rate": 1.9793228411443905e-05, "loss": 1.0667, "num_tokens": 2495384120.0, "step": 1082 }, { "epoch": 0.193048128342246, "grad_norm": 0.25, "learning_rate": 1.9792674345681207e-05, "loss": 1.0859, "num_tokens": 2501656365.0, "step": 1083 }, { "epoch": 0.1932263814616756, "grad_norm": 0.341796875, "learning_rate": 1.97921195472231e-05, "loss": 1.0831, "num_tokens": 2507937479.0, "step": 1084 }, { "epoch": 0.19340463458110516, "grad_norm": 0.275390625, "learning_rate": 1.9791564016115812e-05, "loss": 1.0523, "num_tokens": 2514222372.0, "step": 1085 }, { "epoch": 0.19358288770053475, "grad_norm": 0.33984375, "learning_rate": 1.9791007752405635e-05, "loss": 1.0619, "num_tokens": 2520506367.0, "step": 1086 }, { "epoch": 0.19376114081996434, "grad_norm": 0.310546875, "learning_rate": 1.979045075613892e-05, "loss": 1.0902, "num_tokens": 2526790375.0, "step": 1087 }, { "epoch": 0.19393939393939394, "grad_norm": 0.341796875, "learning_rate": 1.9789893027362087e-05, "loss": 1.0988, "num_tokens": 2533073732.0, "step": 1088 }, { "epoch": 0.19411764705882353, "grad_norm": 0.341796875, "learning_rate": 1.9789334566121606e-05, "loss": 1.058, "num_tokens": 2539357479.0, "step": 1089 }, { "epoch": 0.19429590017825313, "grad_norm": 0.3125, "learning_rate": 1.9788775372464017e-05, "loss": 1.1014, "num_tokens": 2545542687.0, "step": 1090 }, { "epoch": 0.19447415329768272, "grad_norm": 0.32421875, "learning_rate": 1.978821544643592e-05, "loss": 1.0964, "num_tokens": 2551827643.0, "step": 1091 }, { "epoch": 0.1946524064171123, "grad_norm": 0.310546875, "learning_rate": 1.9787654788083968e-05, "loss": 1.0778, "num_tokens": 2558112005.0, "step": 1092 }, { "epoch": 0.19483065953654188, "grad_norm": 0.27734375, "learning_rate": 1.978709339745489e-05, "loss": 1.0391, "num_tokens": 2564397135.0, "step": 1093 }, { "epoch": 0.19500891265597148, "grad_norm": 0.328125, "learning_rate": 1.978653127459546e-05, "loss": 1.0653, "num_tokens": 2570635916.0, "step": 1094 }, { "epoch": 0.19518716577540107, "grad_norm": 0.265625, "learning_rate": 1.9785968419552518e-05, "loss": 1.0957, "num_tokens": 2576895673.0, "step": 1095 }, { "epoch": 0.19536541889483067, "grad_norm": 0.314453125, "learning_rate": 1.9785404832372968e-05, "loss": 1.0641, "num_tokens": 2583179659.0, "step": 1096 }, { "epoch": 0.19554367201426026, "grad_norm": 0.271484375, "learning_rate": 1.9784840513103777e-05, "loss": 1.0485, "num_tokens": 2589463118.0, "step": 1097 }, { "epoch": 0.19572192513368983, "grad_norm": 0.3203125, "learning_rate": 1.978427546179197e-05, "loss": 1.0433, "num_tokens": 2595747292.0, "step": 1098 }, { "epoch": 0.19590017825311942, "grad_norm": 0.306640625, "learning_rate": 1.978370967848463e-05, "loss": 1.0442, "num_tokens": 2602033296.0, "step": 1099 }, { "epoch": 0.19607843137254902, "grad_norm": 0.302734375, "learning_rate": 1.97831431632289e-05, "loss": 1.0255, "num_tokens": 2608311989.0, "step": 1100 }, { "epoch": 0.1962566844919786, "grad_norm": 0.275390625, "learning_rate": 1.9782575916071998e-05, "loss": 1.0859, "num_tokens": 2614569031.0, "step": 1101 }, { "epoch": 0.1964349376114082, "grad_norm": 0.3046875, "learning_rate": 1.978200793706119e-05, "loss": 1.0404, "num_tokens": 2620852950.0, "step": 1102 }, { "epoch": 0.1966131907308378, "grad_norm": 0.2734375, "learning_rate": 1.9781439226243798e-05, "loss": 1.0585, "num_tokens": 2627127977.0, "step": 1103 }, { "epoch": 0.19679144385026737, "grad_norm": 0.2734375, "learning_rate": 1.978086978366722e-05, "loss": 1.0774, "num_tokens": 2633408123.0, "step": 1104 }, { "epoch": 0.19696969696969696, "grad_norm": 0.267578125, "learning_rate": 1.9780299609378906e-05, "loss": 1.064, "num_tokens": 2639692503.0, "step": 1105 }, { "epoch": 0.19714795008912656, "grad_norm": 0.283203125, "learning_rate": 1.9779728703426368e-05, "loss": 1.0642, "num_tokens": 2645976249.0, "step": 1106 }, { "epoch": 0.19732620320855615, "grad_norm": 0.294921875, "learning_rate": 1.9779157065857182e-05, "loss": 1.0641, "num_tokens": 2652259323.0, "step": 1107 }, { "epoch": 0.19750445632798574, "grad_norm": 0.30078125, "learning_rate": 1.977858469671898e-05, "loss": 1.0648, "num_tokens": 2658541951.0, "step": 1108 }, { "epoch": 0.19768270944741534, "grad_norm": 0.30078125, "learning_rate": 1.977801159605946e-05, "loss": 1.1102, "num_tokens": 2664797895.0, "step": 1109 }, { "epoch": 0.19786096256684493, "grad_norm": 0.28125, "learning_rate": 1.9777437763926377e-05, "loss": 1.0303, "num_tokens": 2671080866.0, "step": 1110 }, { "epoch": 0.1980392156862745, "grad_norm": 0.29296875, "learning_rate": 1.9776863200367547e-05, "loss": 1.0599, "num_tokens": 2677332666.0, "step": 1111 }, { "epoch": 0.1982174688057041, "grad_norm": 0.2734375, "learning_rate": 1.9776287905430848e-05, "loss": 1.0789, "num_tokens": 2683579182.0, "step": 1112 }, { "epoch": 0.1983957219251337, "grad_norm": 0.296875, "learning_rate": 1.977571187916423e-05, "loss": 1.0665, "num_tokens": 2689862344.0, "step": 1113 }, { "epoch": 0.19857397504456328, "grad_norm": 0.267578125, "learning_rate": 1.9775135121615675e-05, "loss": 1.0517, "num_tokens": 2696104612.0, "step": 1114 }, { "epoch": 0.19875222816399288, "grad_norm": 0.287109375, "learning_rate": 1.977455763283326e-05, "loss": 1.0551, "num_tokens": 2702382044.0, "step": 1115 }, { "epoch": 0.19893048128342247, "grad_norm": 0.291015625, "learning_rate": 1.9773979412865102e-05, "loss": 1.0618, "num_tokens": 2708663540.0, "step": 1116 }, { "epoch": 0.19910873440285204, "grad_norm": 0.2421875, "learning_rate": 1.9773400461759384e-05, "loss": 1.0989, "num_tokens": 2714945117.0, "step": 1117 }, { "epoch": 0.19928698752228163, "grad_norm": 0.296875, "learning_rate": 1.977282077956435e-05, "loss": 1.0849, "num_tokens": 2721211230.0, "step": 1118 }, { "epoch": 0.19946524064171123, "grad_norm": 0.265625, "learning_rate": 1.9772240366328305e-05, "loss": 1.0684, "num_tokens": 2727474958.0, "step": 1119 }, { "epoch": 0.19964349376114082, "grad_norm": 0.27734375, "learning_rate": 1.977165922209961e-05, "loss": 1.0716, "num_tokens": 2733759474.0, "step": 1120 }, { "epoch": 0.19982174688057042, "grad_norm": 0.28515625, "learning_rate": 1.9771077346926706e-05, "loss": 1.0665, "num_tokens": 2740042817.0, "step": 1121 }, { "epoch": 0.2, "grad_norm": 0.263671875, "learning_rate": 1.9770494740858065e-05, "loss": 1.0817, "num_tokens": 2746298759.0, "step": 1122 }, { "epoch": 0.20017825311942958, "grad_norm": 0.291015625, "learning_rate": 1.976991140394225e-05, "loss": 1.0688, "num_tokens": 2752576624.0, "step": 1123 }, { "epoch": 0.20035650623885917, "grad_norm": 0.2890625, "learning_rate": 1.9769327336227856e-05, "loss": 1.0772, "num_tokens": 2758861876.0, "step": 1124 }, { "epoch": 0.20053475935828877, "grad_norm": 0.263671875, "learning_rate": 1.976874253776356e-05, "loss": 1.0767, "num_tokens": 2765142970.0, "step": 1125 }, { "epoch": 0.20071301247771836, "grad_norm": 0.3046875, "learning_rate": 1.9768157008598093e-05, "loss": 1.0667, "num_tokens": 2771424813.0, "step": 1126 }, { "epoch": 0.20089126559714796, "grad_norm": 0.26171875, "learning_rate": 1.9767570748780258e-05, "loss": 1.0652, "num_tokens": 2777708311.0, "step": 1127 }, { "epoch": 0.20106951871657755, "grad_norm": 0.259765625, "learning_rate": 1.976698375835889e-05, "loss": 1.0699, "num_tokens": 2783990530.0, "step": 1128 }, { "epoch": 0.20124777183600712, "grad_norm": 0.267578125, "learning_rate": 1.976639603738291e-05, "loss": 1.0571, "num_tokens": 2790261283.0, "step": 1129 }, { "epoch": 0.2014260249554367, "grad_norm": 0.271484375, "learning_rate": 1.9765807585901298e-05, "loss": 1.0826, "num_tokens": 2796522683.0, "step": 1130 }, { "epoch": 0.2016042780748663, "grad_norm": 0.26953125, "learning_rate": 1.9765218403963085e-05, "loss": 1.067, "num_tokens": 2802803906.0, "step": 1131 }, { "epoch": 0.2017825311942959, "grad_norm": 0.2578125, "learning_rate": 1.976462849161737e-05, "loss": 1.0542, "num_tokens": 2809087399.0, "step": 1132 }, { "epoch": 0.2019607843137255, "grad_norm": 0.255859375, "learning_rate": 1.9764037848913307e-05, "loss": 1.0552, "num_tokens": 2815349481.0, "step": 1133 }, { "epoch": 0.2021390374331551, "grad_norm": 0.265625, "learning_rate": 1.976344647590012e-05, "loss": 1.1022, "num_tokens": 2821631271.0, "step": 1134 }, { "epoch": 0.20231729055258468, "grad_norm": 0.283203125, "learning_rate": 1.976285437262708e-05, "loss": 1.0726, "num_tokens": 2827914091.0, "step": 1135 }, { "epoch": 0.20249554367201425, "grad_norm": 0.302734375, "learning_rate": 1.9762261539143534e-05, "loss": 1.0834, "num_tokens": 2834150836.0, "step": 1136 }, { "epoch": 0.20267379679144384, "grad_norm": 0.275390625, "learning_rate": 1.976166797549888e-05, "loss": 1.0684, "num_tokens": 2840435682.0, "step": 1137 }, { "epoch": 0.20285204991087344, "grad_norm": 0.318359375, "learning_rate": 1.9761073681742588e-05, "loss": 1.0836, "num_tokens": 2846717758.0, "step": 1138 }, { "epoch": 0.20303030303030303, "grad_norm": 0.26171875, "learning_rate": 1.9760478657924166e-05, "loss": 1.0856, "num_tokens": 2852930935.0, "step": 1139 }, { "epoch": 0.20320855614973263, "grad_norm": 0.30859375, "learning_rate": 1.9759882904093204e-05, "loss": 1.0669, "num_tokens": 2859207192.0, "step": 1140 }, { "epoch": 0.20338680926916222, "grad_norm": 0.25, "learning_rate": 1.975928642029935e-05, "loss": 1.0602, "num_tokens": 2865459809.0, "step": 1141 }, { "epoch": 0.2035650623885918, "grad_norm": 0.291015625, "learning_rate": 1.9758689206592302e-05, "loss": 1.0563, "num_tokens": 2871742669.0, "step": 1142 }, { "epoch": 0.20374331550802138, "grad_norm": 0.296875, "learning_rate": 1.9758091263021835e-05, "loss": 1.0671, "num_tokens": 2878022846.0, "step": 1143 }, { "epoch": 0.20392156862745098, "grad_norm": 0.2578125, "learning_rate": 1.9757492589637768e-05, "loss": 1.0568, "num_tokens": 2884307429.0, "step": 1144 }, { "epoch": 0.20409982174688057, "grad_norm": 0.279296875, "learning_rate": 1.9756893186489995e-05, "loss": 1.0599, "num_tokens": 2890589896.0, "step": 1145 }, { "epoch": 0.20427807486631017, "grad_norm": 0.259765625, "learning_rate": 1.9756293053628458e-05, "loss": 1.0809, "num_tokens": 2896875801.0, "step": 1146 }, { "epoch": 0.20445632798573976, "grad_norm": 0.314453125, "learning_rate": 1.9755692191103165e-05, "loss": 1.0843, "num_tokens": 2903150510.0, "step": 1147 }, { "epoch": 0.20463458110516933, "grad_norm": 0.2734375, "learning_rate": 1.9755090598964196e-05, "loss": 1.0734, "num_tokens": 2909418069.0, "step": 1148 }, { "epoch": 0.20481283422459892, "grad_norm": 0.32421875, "learning_rate": 1.9754488277261676e-05, "loss": 1.0811, "num_tokens": 2915702690.0, "step": 1149 }, { "epoch": 0.20499108734402852, "grad_norm": 0.244140625, "learning_rate": 1.9753885226045792e-05, "loss": 1.0469, "num_tokens": 2921986645.0, "step": 1150 }, { "epoch": 0.2051693404634581, "grad_norm": 0.341796875, "learning_rate": 1.9753281445366804e-05, "loss": 1.055, "num_tokens": 2928203540.0, "step": 1151 }, { "epoch": 0.2053475935828877, "grad_norm": 0.2734375, "learning_rate": 1.9752676935275023e-05, "loss": 1.0678, "num_tokens": 2934483550.0, "step": 1152 }, { "epoch": 0.2055258467023173, "grad_norm": 0.328125, "learning_rate": 1.975207169582082e-05, "loss": 1.0625, "num_tokens": 2940768149.0, "step": 1153 }, { "epoch": 0.2057040998217469, "grad_norm": 0.30078125, "learning_rate": 1.975146572705463e-05, "loss": 1.1038, "num_tokens": 2947040810.0, "step": 1154 }, { "epoch": 0.20588235294117646, "grad_norm": 0.333984375, "learning_rate": 1.9750859029026954e-05, "loss": 1.0584, "num_tokens": 2953299835.0, "step": 1155 }, { "epoch": 0.20606060606060606, "grad_norm": 0.275390625, "learning_rate": 1.9750251601788343e-05, "loss": 1.0337, "num_tokens": 2959553248.0, "step": 1156 }, { "epoch": 0.20623885918003565, "grad_norm": 0.330078125, "learning_rate": 1.9749643445389415e-05, "loss": 1.0618, "num_tokens": 2965837864.0, "step": 1157 }, { "epoch": 0.20641711229946524, "grad_norm": 0.283203125, "learning_rate": 1.9749034559880846e-05, "loss": 1.0825, "num_tokens": 2972120421.0, "step": 1158 }, { "epoch": 0.20659536541889484, "grad_norm": 0.28515625, "learning_rate": 1.974842494531338e-05, "loss": 1.0409, "num_tokens": 2978342079.0, "step": 1159 }, { "epoch": 0.20677361853832443, "grad_norm": 0.29296875, "learning_rate": 1.9747814601737812e-05, "loss": 1.0902, "num_tokens": 2984573511.0, "step": 1160 }, { "epoch": 0.206951871657754, "grad_norm": 0.26953125, "learning_rate": 1.9747203529205004e-05, "loss": 1.041, "num_tokens": 2990853380.0, "step": 1161 }, { "epoch": 0.2071301247771836, "grad_norm": 0.26953125, "learning_rate": 1.9746591727765874e-05, "loss": 1.0436, "num_tokens": 2997139543.0, "step": 1162 }, { "epoch": 0.2073083778966132, "grad_norm": 0.26171875, "learning_rate": 1.9745979197471406e-05, "loss": 1.046, "num_tokens": 3003424034.0, "step": 1163 }, { "epoch": 0.20748663101604278, "grad_norm": 0.298828125, "learning_rate": 1.9745365938372643e-05, "loss": 1.0668, "num_tokens": 3009685313.0, "step": 1164 }, { "epoch": 0.20766488413547238, "grad_norm": 0.263671875, "learning_rate": 1.974475195052069e-05, "loss": 1.0747, "num_tokens": 3015969820.0, "step": 1165 }, { "epoch": 0.20784313725490197, "grad_norm": 0.298828125, "learning_rate": 1.9744137233966703e-05, "loss": 1.076, "num_tokens": 3022234122.0, "step": 1166 }, { "epoch": 0.20802139037433154, "grad_norm": 0.24609375, "learning_rate": 1.974352178876191e-05, "loss": 1.0736, "num_tokens": 3028473469.0, "step": 1167 }, { "epoch": 0.20819964349376113, "grad_norm": 0.234375, "learning_rate": 1.9742905614957603e-05, "loss": 1.0742, "num_tokens": 3034754417.0, "step": 1168 }, { "epoch": 0.20837789661319073, "grad_norm": 0.28125, "learning_rate": 1.9742288712605118e-05, "loss": 1.0611, "num_tokens": 3041034670.0, "step": 1169 }, { "epoch": 0.20855614973262032, "grad_norm": 0.263671875, "learning_rate": 1.9741671081755866e-05, "loss": 1.0565, "num_tokens": 3047304668.0, "step": 1170 }, { "epoch": 0.20873440285204992, "grad_norm": 0.267578125, "learning_rate": 1.9741052722461314e-05, "loss": 1.0406, "num_tokens": 3053588378.0, "step": 1171 }, { "epoch": 0.2089126559714795, "grad_norm": 0.2734375, "learning_rate": 1.974043363477299e-05, "loss": 1.0856, "num_tokens": 3059872023.0, "step": 1172 }, { "epoch": 0.20909090909090908, "grad_norm": 0.26953125, "learning_rate": 1.9739813818742483e-05, "loss": 1.0446, "num_tokens": 3066157719.0, "step": 1173 }, { "epoch": 0.20926916221033867, "grad_norm": 0.251953125, "learning_rate": 1.9739193274421443e-05, "loss": 1.0857, "num_tokens": 3072441323.0, "step": 1174 }, { "epoch": 0.20944741532976827, "grad_norm": 0.294921875, "learning_rate": 1.973857200186158e-05, "loss": 1.0468, "num_tokens": 3078723836.0, "step": 1175 }, { "epoch": 0.20962566844919786, "grad_norm": 0.2314453125, "learning_rate": 1.973795000111466e-05, "loss": 1.0719, "num_tokens": 3084996137.0, "step": 1176 }, { "epoch": 0.20980392156862746, "grad_norm": 0.26953125, "learning_rate": 1.9737327272232522e-05, "loss": 1.0455, "num_tokens": 3091272684.0, "step": 1177 }, { "epoch": 0.20998217468805705, "grad_norm": 0.25390625, "learning_rate": 1.9736703815267052e-05, "loss": 1.0365, "num_tokens": 3097530615.0, "step": 1178 }, { "epoch": 0.21016042780748664, "grad_norm": 0.283203125, "learning_rate": 1.9736079630270208e-05, "loss": 1.0677, "num_tokens": 3103755384.0, "step": 1179 }, { "epoch": 0.2103386809269162, "grad_norm": 0.271484375, "learning_rate": 1.9735454717294004e-05, "loss": 1.0483, "num_tokens": 3110008486.0, "step": 1180 }, { "epoch": 0.2105169340463458, "grad_norm": 0.263671875, "learning_rate": 1.9734829076390505e-05, "loss": 1.0428, "num_tokens": 3116257831.0, "step": 1181 }, { "epoch": 0.2106951871657754, "grad_norm": 0.283203125, "learning_rate": 1.9734202707611853e-05, "loss": 1.0609, "num_tokens": 3122542666.0, "step": 1182 }, { "epoch": 0.210873440285205, "grad_norm": 0.294921875, "learning_rate": 1.9733575611010244e-05, "loss": 1.073, "num_tokens": 3128809960.0, "step": 1183 }, { "epoch": 0.2110516934046346, "grad_norm": 0.33984375, "learning_rate": 1.9732947786637927e-05, "loss": 1.0735, "num_tokens": 3135086510.0, "step": 1184 }, { "epoch": 0.21122994652406418, "grad_norm": 0.275390625, "learning_rate": 1.973231923454723e-05, "loss": 1.0357, "num_tokens": 3141369521.0, "step": 1185 }, { "epoch": 0.21140819964349375, "grad_norm": 0.25390625, "learning_rate": 1.9731689954790523e-05, "loss": 1.047, "num_tokens": 3147628088.0, "step": 1186 }, { "epoch": 0.21158645276292334, "grad_norm": 0.251953125, "learning_rate": 1.9731059947420246e-05, "loss": 1.0694, "num_tokens": 3153911177.0, "step": 1187 }, { "epoch": 0.21176470588235294, "grad_norm": 0.27734375, "learning_rate": 1.9730429212488894e-05, "loss": 1.0868, "num_tokens": 3160194986.0, "step": 1188 }, { "epoch": 0.21194295900178253, "grad_norm": 0.26953125, "learning_rate": 1.9729797750049034e-05, "loss": 1.0491, "num_tokens": 3166478620.0, "step": 1189 }, { "epoch": 0.21212121212121213, "grad_norm": 0.25390625, "learning_rate": 1.972916556015328e-05, "loss": 1.0839, "num_tokens": 3172735292.0, "step": 1190 }, { "epoch": 0.21229946524064172, "grad_norm": 0.263671875, "learning_rate": 1.972853264285431e-05, "loss": 1.0715, "num_tokens": 3179018631.0, "step": 1191 }, { "epoch": 0.2124777183600713, "grad_norm": 0.271484375, "learning_rate": 1.972789899820487e-05, "loss": 1.0333, "num_tokens": 3185285445.0, "step": 1192 }, { "epoch": 0.21265597147950088, "grad_norm": 0.263671875, "learning_rate": 1.972726462625777e-05, "loss": 1.0513, "num_tokens": 3191568342.0, "step": 1193 }, { "epoch": 0.21283422459893048, "grad_norm": 0.263671875, "learning_rate": 1.972662952706585e-05, "loss": 1.0651, "num_tokens": 3197809649.0, "step": 1194 }, { "epoch": 0.21301247771836007, "grad_norm": 0.2578125, "learning_rate": 1.972599370068205e-05, "loss": 1.0654, "num_tokens": 3204091639.0, "step": 1195 }, { "epoch": 0.21319073083778967, "grad_norm": 0.255859375, "learning_rate": 1.9725357147159355e-05, "loss": 1.0478, "num_tokens": 3210320014.0, "step": 1196 }, { "epoch": 0.21336898395721926, "grad_norm": 0.251953125, "learning_rate": 1.9724719866550795e-05, "loss": 1.0604, "num_tokens": 3216603944.0, "step": 1197 }, { "epoch": 0.21354723707664885, "grad_norm": 0.2392578125, "learning_rate": 1.9724081858909492e-05, "loss": 1.0494, "num_tokens": 3222831660.0, "step": 1198 }, { "epoch": 0.21372549019607842, "grad_norm": 0.28515625, "learning_rate": 1.9723443124288596e-05, "loss": 1.0568, "num_tokens": 3229081744.0, "step": 1199 }, { "epoch": 0.21390374331550802, "grad_norm": 0.259765625, "learning_rate": 1.9722803662741345e-05, "loss": 1.0611, "num_tokens": 3235333007.0, "step": 1200 }, { "epoch": 0.2140819964349376, "grad_norm": 0.287109375, "learning_rate": 1.9722163474321013e-05, "loss": 1.0747, "num_tokens": 3241607691.0, "step": 1201 }, { "epoch": 0.2142602495543672, "grad_norm": 0.26953125, "learning_rate": 1.972152255908096e-05, "loss": 1.0433, "num_tokens": 3247875631.0, "step": 1202 }, { "epoch": 0.2144385026737968, "grad_norm": 0.279296875, "learning_rate": 1.9720880917074584e-05, "loss": 1.0306, "num_tokens": 3254118364.0, "step": 1203 }, { "epoch": 0.2146167557932264, "grad_norm": 0.28125, "learning_rate": 1.972023854835536e-05, "loss": 1.0632, "num_tokens": 3260370953.0, "step": 1204 }, { "epoch": 0.21479500891265596, "grad_norm": 0.3046875, "learning_rate": 1.9719595452976815e-05, "loss": 1.0439, "num_tokens": 3266626814.0, "step": 1205 }, { "epoch": 0.21497326203208555, "grad_norm": 0.267578125, "learning_rate": 1.971895163099253e-05, "loss": 1.0598, "num_tokens": 3272911627.0, "step": 1206 }, { "epoch": 0.21515151515151515, "grad_norm": 0.2431640625, "learning_rate": 1.9718307082456163e-05, "loss": 1.0732, "num_tokens": 3279196387.0, "step": 1207 }, { "epoch": 0.21532976827094474, "grad_norm": 0.259765625, "learning_rate": 1.971766180742143e-05, "loss": 1.1081, "num_tokens": 3285443027.0, "step": 1208 }, { "epoch": 0.21550802139037434, "grad_norm": 0.2890625, "learning_rate": 1.971701580594209e-05, "loss": 1.0368, "num_tokens": 3291715449.0, "step": 1209 }, { "epoch": 0.21568627450980393, "grad_norm": 0.251953125, "learning_rate": 1.971636907807198e-05, "loss": 1.0732, "num_tokens": 3297986435.0, "step": 1210 }, { "epoch": 0.2158645276292335, "grad_norm": 0.29296875, "learning_rate": 1.9715721623864993e-05, "loss": 1.068, "num_tokens": 3304244337.0, "step": 1211 }, { "epoch": 0.2160427807486631, "grad_norm": 0.2451171875, "learning_rate": 1.9715073443375076e-05, "loss": 1.0494, "num_tokens": 3310528743.0, "step": 1212 }, { "epoch": 0.2162210338680927, "grad_norm": 0.33203125, "learning_rate": 1.9714424536656248e-05, "loss": 1.0618, "num_tokens": 3316812463.0, "step": 1213 }, { "epoch": 0.21639928698752228, "grad_norm": 0.255859375, "learning_rate": 1.9713774903762582e-05, "loss": 1.0562, "num_tokens": 3323079817.0, "step": 1214 }, { "epoch": 0.21657754010695188, "grad_norm": 0.30859375, "learning_rate": 1.9713124544748208e-05, "loss": 1.0568, "num_tokens": 3329333678.0, "step": 1215 }, { "epoch": 0.21675579322638147, "grad_norm": 0.267578125, "learning_rate": 1.9712473459667326e-05, "loss": 1.0465, "num_tokens": 3335561905.0, "step": 1216 }, { "epoch": 0.21693404634581104, "grad_norm": 0.265625, "learning_rate": 1.9711821648574185e-05, "loss": 1.0721, "num_tokens": 3341846230.0, "step": 1217 }, { "epoch": 0.21711229946524063, "grad_norm": 0.326171875, "learning_rate": 1.9711169111523103e-05, "loss": 1.0786, "num_tokens": 3348129787.0, "step": 1218 }, { "epoch": 0.21729055258467023, "grad_norm": 0.248046875, "learning_rate": 1.971051584856846e-05, "loss": 1.0447, "num_tokens": 3354412826.0, "step": 1219 }, { "epoch": 0.21746880570409982, "grad_norm": 0.33984375, "learning_rate": 1.9709861859764685e-05, "loss": 1.0418, "num_tokens": 3360678582.0, "step": 1220 }, { "epoch": 0.21764705882352942, "grad_norm": 0.25, "learning_rate": 1.9709207145166282e-05, "loss": 1.0766, "num_tokens": 3366950313.0, "step": 1221 }, { "epoch": 0.217825311942959, "grad_norm": 0.3359375, "learning_rate": 1.9708551704827807e-05, "loss": 1.0498, "num_tokens": 3373234574.0, "step": 1222 }, { "epoch": 0.2180035650623886, "grad_norm": 0.267578125, "learning_rate": 1.9707895538803873e-05, "loss": 1.0457, "num_tokens": 3379516445.0, "step": 1223 }, { "epoch": 0.21818181818181817, "grad_norm": 0.34375, "learning_rate": 1.9707238647149168e-05, "loss": 1.0741, "num_tokens": 3385711081.0, "step": 1224 }, { "epoch": 0.21836007130124777, "grad_norm": 0.28125, "learning_rate": 1.970658102991842e-05, "loss": 1.0745, "num_tokens": 3391993257.0, "step": 1225 }, { "epoch": 0.21853832442067736, "grad_norm": 0.310546875, "learning_rate": 1.9705922687166433e-05, "loss": 1.0217, "num_tokens": 3398261843.0, "step": 1226 }, { "epoch": 0.21871657754010695, "grad_norm": 0.28515625, "learning_rate": 1.970526361894807e-05, "loss": 1.0321, "num_tokens": 3404507771.0, "step": 1227 }, { "epoch": 0.21889483065953655, "grad_norm": 0.251953125, "learning_rate": 1.970460382531825e-05, "loss": 1.0745, "num_tokens": 3410782244.0, "step": 1228 }, { "epoch": 0.21907308377896614, "grad_norm": 0.28125, "learning_rate": 1.970394330633195e-05, "loss": 1.0372, "num_tokens": 3417035150.0, "step": 1229 }, { "epoch": 0.2192513368983957, "grad_norm": 0.2578125, "learning_rate": 1.970328206204421e-05, "loss": 1.1067, "num_tokens": 3423302017.0, "step": 1230 }, { "epoch": 0.2194295900178253, "grad_norm": 0.29296875, "learning_rate": 1.970262009251014e-05, "loss": 1.0507, "num_tokens": 3429547132.0, "step": 1231 }, { "epoch": 0.2196078431372549, "grad_norm": 0.26953125, "learning_rate": 1.9701957397784898e-05, "loss": 1.0608, "num_tokens": 3435831878.0, "step": 1232 }, { "epoch": 0.2197860962566845, "grad_norm": 0.275390625, "learning_rate": 1.9701293977923703e-05, "loss": 1.1125, "num_tokens": 3442066193.0, "step": 1233 }, { "epoch": 0.2199643493761141, "grad_norm": 0.251953125, "learning_rate": 1.970062983298184e-05, "loss": 1.0484, "num_tokens": 3448334913.0, "step": 1234 }, { "epoch": 0.22014260249554368, "grad_norm": 0.263671875, "learning_rate": 1.9699964963014654e-05, "loss": 1.0444, "num_tokens": 3454618652.0, "step": 1235 }, { "epoch": 0.22032085561497325, "grad_norm": 0.28125, "learning_rate": 1.9699299368077547e-05, "loss": 1.0595, "num_tokens": 3460900842.0, "step": 1236 }, { "epoch": 0.22049910873440284, "grad_norm": 0.259765625, "learning_rate": 1.969863304822599e-05, "loss": 1.0439, "num_tokens": 3467184346.0, "step": 1237 }, { "epoch": 0.22067736185383244, "grad_norm": 0.2734375, "learning_rate": 1.9697966003515496e-05, "loss": 1.0397, "num_tokens": 3473469347.0, "step": 1238 }, { "epoch": 0.22085561497326203, "grad_norm": 0.263671875, "learning_rate": 1.9697298234001654e-05, "loss": 1.0526, "num_tokens": 3479754744.0, "step": 1239 }, { "epoch": 0.22103386809269163, "grad_norm": 0.251953125, "learning_rate": 1.969662973974011e-05, "loss": 1.0609, "num_tokens": 3486040315.0, "step": 1240 }, { "epoch": 0.22121212121212122, "grad_norm": 0.2890625, "learning_rate": 1.9695960520786576e-05, "loss": 1.0752, "num_tokens": 3492306708.0, "step": 1241 }, { "epoch": 0.22139037433155082, "grad_norm": 0.25, "learning_rate": 1.969529057719681e-05, "loss": 1.0549, "num_tokens": 3498580189.0, "step": 1242 }, { "epoch": 0.22156862745098038, "grad_norm": 0.28125, "learning_rate": 1.9694619909026643e-05, "loss": 1.0736, "num_tokens": 3504857413.0, "step": 1243 }, { "epoch": 0.22174688057040998, "grad_norm": 0.271484375, "learning_rate": 1.9693948516331955e-05, "loss": 1.0764, "num_tokens": 3511142526.0, "step": 1244 }, { "epoch": 0.22192513368983957, "grad_norm": 0.255859375, "learning_rate": 1.9693276399168702e-05, "loss": 1.0378, "num_tokens": 3517425766.0, "step": 1245 }, { "epoch": 0.22210338680926917, "grad_norm": 0.310546875, "learning_rate": 1.9692603557592892e-05, "loss": 1.0134, "num_tokens": 3523671995.0, "step": 1246 }, { "epoch": 0.22228163992869876, "grad_norm": 0.25, "learning_rate": 1.9691929991660584e-05, "loss": 1.0509, "num_tokens": 3529958009.0, "step": 1247 }, { "epoch": 0.22245989304812835, "grad_norm": 0.26171875, "learning_rate": 1.9691255701427912e-05, "loss": 1.0693, "num_tokens": 3536230984.0, "step": 1248 }, { "epoch": 0.22263814616755792, "grad_norm": 0.251953125, "learning_rate": 1.9690580686951065e-05, "loss": 1.0427, "num_tokens": 3542507120.0, "step": 1249 }, { "epoch": 0.22281639928698752, "grad_norm": 0.291015625, "learning_rate": 1.9689904948286294e-05, "loss": 1.0401, "num_tokens": 3548733522.0, "step": 1250 }, { "epoch": 0.2229946524064171, "grad_norm": 0.24609375, "learning_rate": 1.96892284854899e-05, "loss": 1.0729, "num_tokens": 3554998068.0, "step": 1251 }, { "epoch": 0.2231729055258467, "grad_norm": 0.2490234375, "learning_rate": 1.9688551298618263e-05, "loss": 1.0619, "num_tokens": 3561227525.0, "step": 1252 }, { "epoch": 0.2233511586452763, "grad_norm": 0.283203125, "learning_rate": 1.9687873387727808e-05, "loss": 1.0416, "num_tokens": 3567481179.0, "step": 1253 }, { "epoch": 0.2235294117647059, "grad_norm": 0.2392578125, "learning_rate": 1.9687194752875027e-05, "loss": 1.0498, "num_tokens": 3573746984.0, "step": 1254 }, { "epoch": 0.22370766488413546, "grad_norm": 0.31640625, "learning_rate": 1.9686515394116468e-05, "loss": 1.0533, "num_tokens": 3580024633.0, "step": 1255 }, { "epoch": 0.22388591800356505, "grad_norm": 0.251953125, "learning_rate": 1.9685835311508746e-05, "loss": 1.0691, "num_tokens": 3586259136.0, "step": 1256 }, { "epoch": 0.22406417112299465, "grad_norm": 0.283203125, "learning_rate": 1.968515450510853e-05, "loss": 1.0429, "num_tokens": 3592543490.0, "step": 1257 }, { "epoch": 0.22424242424242424, "grad_norm": 0.283203125, "learning_rate": 1.9684472974972552e-05, "loss": 1.0293, "num_tokens": 3598805298.0, "step": 1258 }, { "epoch": 0.22442067736185384, "grad_norm": 0.27734375, "learning_rate": 1.9683790721157606e-05, "loss": 1.0886, "num_tokens": 3605088386.0, "step": 1259 }, { "epoch": 0.22459893048128343, "grad_norm": 0.29296875, "learning_rate": 1.968310774372054e-05, "loss": 1.0819, "num_tokens": 3611369259.0, "step": 1260 }, { "epoch": 0.224777183600713, "grad_norm": 0.271484375, "learning_rate": 1.9682424042718272e-05, "loss": 1.057, "num_tokens": 3617654189.0, "step": 1261 }, { "epoch": 0.2249554367201426, "grad_norm": 0.3515625, "learning_rate": 1.968173961820777e-05, "loss": 1.0837, "num_tokens": 3623939724.0, "step": 1262 }, { "epoch": 0.2251336898395722, "grad_norm": 0.29296875, "learning_rate": 1.9681054470246073e-05, "loss": 1.0709, "num_tokens": 3630191892.0, "step": 1263 }, { "epoch": 0.22531194295900178, "grad_norm": 0.390625, "learning_rate": 1.9680368598890266e-05, "loss": 1.042, "num_tokens": 3636460660.0, "step": 1264 }, { "epoch": 0.22549019607843138, "grad_norm": 0.380859375, "learning_rate": 1.967968200419751e-05, "loss": 1.0852, "num_tokens": 3642721597.0, "step": 1265 }, { "epoch": 0.22566844919786097, "grad_norm": 0.2890625, "learning_rate": 1.967899468622502e-05, "loss": 1.0785, "num_tokens": 3649005476.0, "step": 1266 }, { "epoch": 0.22584670231729057, "grad_norm": 0.330078125, "learning_rate": 1.9678306645030064e-05, "loss": 1.0549, "num_tokens": 3655288723.0, "step": 1267 }, { "epoch": 0.22602495543672013, "grad_norm": 0.263671875, "learning_rate": 1.967761788066998e-05, "loss": 1.0427, "num_tokens": 3661572562.0, "step": 1268 }, { "epoch": 0.22620320855614973, "grad_norm": 0.291015625, "learning_rate": 1.9676928393202165e-05, "loss": 1.0648, "num_tokens": 3667852194.0, "step": 1269 }, { "epoch": 0.22638146167557932, "grad_norm": 0.31640625, "learning_rate": 1.967623818268407e-05, "loss": 1.0782, "num_tokens": 3674135654.0, "step": 1270 }, { "epoch": 0.22655971479500892, "grad_norm": 0.296875, "learning_rate": 1.9675547249173214e-05, "loss": 1.065, "num_tokens": 3680419786.0, "step": 1271 }, { "epoch": 0.2267379679144385, "grad_norm": 0.326171875, "learning_rate": 1.967485559272717e-05, "loss": 1.0614, "num_tokens": 3686671501.0, "step": 1272 }, { "epoch": 0.2269162210338681, "grad_norm": 0.29296875, "learning_rate": 1.9674163213403576e-05, "loss": 1.0619, "num_tokens": 3692947736.0, "step": 1273 }, { "epoch": 0.22709447415329767, "grad_norm": 0.384765625, "learning_rate": 1.9673470111260124e-05, "loss": 1.0768, "num_tokens": 3699203399.0, "step": 1274 }, { "epoch": 0.22727272727272727, "grad_norm": 0.3359375, "learning_rate": 1.9672776286354575e-05, "loss": 1.0587, "num_tokens": 3705445828.0, "step": 1275 }, { "epoch": 0.22745098039215686, "grad_norm": 0.359375, "learning_rate": 1.9672081738744744e-05, "loss": 1.0636, "num_tokens": 3711663815.0, "step": 1276 }, { "epoch": 0.22762923351158645, "grad_norm": 0.392578125, "learning_rate": 1.9671386468488507e-05, "loss": 1.0398, "num_tokens": 3717943088.0, "step": 1277 }, { "epoch": 0.22780748663101605, "grad_norm": 0.26171875, "learning_rate": 1.9670690475643803e-05, "loss": 1.0674, "num_tokens": 3724208126.0, "step": 1278 }, { "epoch": 0.22798573975044564, "grad_norm": 0.30859375, "learning_rate": 1.9669993760268625e-05, "loss": 1.0958, "num_tokens": 3730459221.0, "step": 1279 }, { "epoch": 0.2281639928698752, "grad_norm": 0.2158203125, "learning_rate": 1.966929632242104e-05, "loss": 1.0573, "num_tokens": 3736741636.0, "step": 1280 }, { "epoch": 0.2283422459893048, "grad_norm": 0.29296875, "learning_rate": 1.9668598162159153e-05, "loss": 1.0455, "num_tokens": 3743007211.0, "step": 1281 }, { "epoch": 0.2285204991087344, "grad_norm": 0.2451171875, "learning_rate": 1.966789927954115e-05, "loss": 1.0954, "num_tokens": 3749244710.0, "step": 1282 }, { "epoch": 0.228698752228164, "grad_norm": 0.322265625, "learning_rate": 1.9667199674625267e-05, "loss": 1.0504, "num_tokens": 3755512737.0, "step": 1283 }, { "epoch": 0.2288770053475936, "grad_norm": 0.26171875, "learning_rate": 1.96664993474698e-05, "loss": 1.0392, "num_tokens": 3761779190.0, "step": 1284 }, { "epoch": 0.22905525846702318, "grad_norm": 0.30859375, "learning_rate": 1.9665798298133112e-05, "loss": 1.0841, "num_tokens": 3768060473.0, "step": 1285 }, { "epoch": 0.22923351158645278, "grad_norm": 0.267578125, "learning_rate": 1.966509652667362e-05, "loss": 1.0719, "num_tokens": 3774325768.0, "step": 1286 }, { "epoch": 0.22941176470588234, "grad_norm": 0.283203125, "learning_rate": 1.9664394033149798e-05, "loss": 1.0863, "num_tokens": 3780609407.0, "step": 1287 }, { "epoch": 0.22959001782531194, "grad_norm": 0.265625, "learning_rate": 1.966369081762019e-05, "loss": 1.0945, "num_tokens": 3786890165.0, "step": 1288 }, { "epoch": 0.22976827094474153, "grad_norm": 0.287109375, "learning_rate": 1.9662986880143397e-05, "loss": 1.0882, "num_tokens": 3793176989.0, "step": 1289 }, { "epoch": 0.22994652406417113, "grad_norm": 0.25, "learning_rate": 1.9662282220778074e-05, "loss": 1.0626, "num_tokens": 3799444305.0, "step": 1290 }, { "epoch": 0.23012477718360072, "grad_norm": 0.2421875, "learning_rate": 1.966157683958294e-05, "loss": 1.0606, "num_tokens": 3805710150.0, "step": 1291 }, { "epoch": 0.23030303030303031, "grad_norm": 0.255859375, "learning_rate": 1.9660870736616776e-05, "loss": 1.1099, "num_tokens": 3811991974.0, "step": 1292 }, { "epoch": 0.23048128342245988, "grad_norm": 0.224609375, "learning_rate": 1.9660163911938423e-05, "loss": 1.0687, "num_tokens": 3818274572.0, "step": 1293 }, { "epoch": 0.23065953654188948, "grad_norm": 0.29296875, "learning_rate": 1.965945636560678e-05, "loss": 1.0376, "num_tokens": 3824486708.0, "step": 1294 }, { "epoch": 0.23083778966131907, "grad_norm": 0.25390625, "learning_rate": 1.9658748097680805e-05, "loss": 1.0533, "num_tokens": 3830759737.0, "step": 1295 }, { "epoch": 0.23101604278074866, "grad_norm": 0.259765625, "learning_rate": 1.9658039108219522e-05, "loss": 1.0698, "num_tokens": 3837025965.0, "step": 1296 }, { "epoch": 0.23119429590017826, "grad_norm": 0.296875, "learning_rate": 1.965732939728201e-05, "loss": 1.055, "num_tokens": 3843311021.0, "step": 1297 }, { "epoch": 0.23137254901960785, "grad_norm": 0.2578125, "learning_rate": 1.9656618964927408e-05, "loss": 1.0573, "num_tokens": 3849595249.0, "step": 1298 }, { "epoch": 0.23155080213903742, "grad_norm": 0.3203125, "learning_rate": 1.965590781121491e-05, "loss": 1.0294, "num_tokens": 3855853074.0, "step": 1299 }, { "epoch": 0.23172905525846701, "grad_norm": 0.2578125, "learning_rate": 1.9655195936203793e-05, "loss": 1.0633, "num_tokens": 3862137010.0, "step": 1300 }, { "epoch": 0.2319073083778966, "grad_norm": 0.296875, "learning_rate": 1.9654483339953363e-05, "loss": 1.0839, "num_tokens": 3868419162.0, "step": 1301 }, { "epoch": 0.2320855614973262, "grad_norm": 0.267578125, "learning_rate": 1.9653770022523005e-05, "loss": 1.07, "num_tokens": 3874703818.0, "step": 1302 }, { "epoch": 0.2322638146167558, "grad_norm": 0.298828125, "learning_rate": 1.9653055983972164e-05, "loss": 1.032, "num_tokens": 3880961319.0, "step": 1303 }, { "epoch": 0.2324420677361854, "grad_norm": 0.294921875, "learning_rate": 1.965234122436034e-05, "loss": 1.0096, "num_tokens": 3887224919.0, "step": 1304 }, { "epoch": 0.232620320855615, "grad_norm": 0.251953125, "learning_rate": 1.9651625743747086e-05, "loss": 1.0594, "num_tokens": 3893508335.0, "step": 1305 }, { "epoch": 0.23279857397504455, "grad_norm": 0.294921875, "learning_rate": 1.9650909542192034e-05, "loss": 1.038, "num_tokens": 3899791523.0, "step": 1306 }, { "epoch": 0.23297682709447415, "grad_norm": 0.275390625, "learning_rate": 1.9650192619754856e-05, "loss": 1.087, "num_tokens": 3906062055.0, "step": 1307 }, { "epoch": 0.23315508021390374, "grad_norm": 0.287109375, "learning_rate": 1.9649474976495304e-05, "loss": 1.071, "num_tokens": 3912338115.0, "step": 1308 }, { "epoch": 0.23333333333333334, "grad_norm": 0.24609375, "learning_rate": 1.9648756612473167e-05, "loss": 1.0352, "num_tokens": 3918622900.0, "step": 1309 }, { "epoch": 0.23351158645276293, "grad_norm": 0.28515625, "learning_rate": 1.9648037527748313e-05, "loss": 1.0591, "num_tokens": 3924860225.0, "step": 1310 }, { "epoch": 0.23368983957219253, "grad_norm": 0.267578125, "learning_rate": 1.9647317722380662e-05, "loss": 1.0878, "num_tokens": 3931143347.0, "step": 1311 }, { "epoch": 0.2338680926916221, "grad_norm": 0.306640625, "learning_rate": 1.96465971964302e-05, "loss": 1.0315, "num_tokens": 3937425933.0, "step": 1312 }, { "epoch": 0.2340463458110517, "grad_norm": 0.259765625, "learning_rate": 1.9645875949956967e-05, "loss": 1.0506, "num_tokens": 3943679453.0, "step": 1313 }, { "epoch": 0.23422459893048128, "grad_norm": 0.3125, "learning_rate": 1.9645153983021056e-05, "loss": 1.0748, "num_tokens": 3949901486.0, "step": 1314 }, { "epoch": 0.23440285204991088, "grad_norm": 0.28125, "learning_rate": 1.964443129568264e-05, "loss": 1.0857, "num_tokens": 3956161613.0, "step": 1315 }, { "epoch": 0.23458110516934047, "grad_norm": 0.32421875, "learning_rate": 1.9643707888001934e-05, "loss": 1.0936, "num_tokens": 3962405172.0, "step": 1316 }, { "epoch": 0.23475935828877006, "grad_norm": 0.314453125, "learning_rate": 1.9642983760039223e-05, "loss": 1.087, "num_tokens": 3968688050.0, "step": 1317 }, { "epoch": 0.23493761140819963, "grad_norm": 0.328125, "learning_rate": 1.9642258911854846e-05, "loss": 1.0531, "num_tokens": 3974970990.0, "step": 1318 }, { "epoch": 0.23511586452762923, "grad_norm": 0.33203125, "learning_rate": 1.9641533343509206e-05, "loss": 1.064, "num_tokens": 3981230686.0, "step": 1319 }, { "epoch": 0.23529411764705882, "grad_norm": 0.26953125, "learning_rate": 1.964080705506277e-05, "loss": 1.0668, "num_tokens": 3987488050.0, "step": 1320 }, { "epoch": 0.23547237076648841, "grad_norm": 0.3203125, "learning_rate": 1.9640080046576045e-05, "loss": 1.0521, "num_tokens": 3993771033.0, "step": 1321 }, { "epoch": 0.235650623885918, "grad_norm": 0.3046875, "learning_rate": 1.963935231810963e-05, "loss": 1.0415, "num_tokens": 3999983410.0, "step": 1322 }, { "epoch": 0.2358288770053476, "grad_norm": 0.306640625, "learning_rate": 1.963862386972416e-05, "loss": 1.0229, "num_tokens": 4006242911.0, "step": 1323 }, { "epoch": 0.23600713012477717, "grad_norm": 0.287109375, "learning_rate": 1.963789470148033e-05, "loss": 1.0908, "num_tokens": 4012510016.0, "step": 1324 }, { "epoch": 0.23618538324420676, "grad_norm": 0.30078125, "learning_rate": 1.963716481343891e-05, "loss": 1.0587, "num_tokens": 4018763854.0, "step": 1325 }, { "epoch": 0.23636363636363636, "grad_norm": 0.296875, "learning_rate": 1.963643420566072e-05, "loss": 1.0734, "num_tokens": 4025048042.0, "step": 1326 }, { "epoch": 0.23654188948306595, "grad_norm": 0.26953125, "learning_rate": 1.963570287820664e-05, "loss": 1.081, "num_tokens": 4031330629.0, "step": 1327 }, { "epoch": 0.23672014260249555, "grad_norm": 0.294921875, "learning_rate": 1.9634970831137617e-05, "loss": 1.1015, "num_tokens": 4037611731.0, "step": 1328 }, { "epoch": 0.23689839572192514, "grad_norm": 0.2734375, "learning_rate": 1.9634238064514647e-05, "loss": 1.0387, "num_tokens": 4043897142.0, "step": 1329 }, { "epoch": 0.23707664884135474, "grad_norm": 0.271484375, "learning_rate": 1.963350457839879e-05, "loss": 1.0708, "num_tokens": 4050149527.0, "step": 1330 }, { "epoch": 0.2372549019607843, "grad_norm": 0.31640625, "learning_rate": 1.963277037285117e-05, "loss": 1.0759, "num_tokens": 4056431557.0, "step": 1331 }, { "epoch": 0.2374331550802139, "grad_norm": 0.2412109375, "learning_rate": 1.9632035447932974e-05, "loss": 1.0551, "num_tokens": 4062690904.0, "step": 1332 }, { "epoch": 0.2376114081996435, "grad_norm": 0.341796875, "learning_rate": 1.9631299803705433e-05, "loss": 1.0367, "num_tokens": 4068973145.0, "step": 1333 }, { "epoch": 0.2377896613190731, "grad_norm": 0.287109375, "learning_rate": 1.963056344022986e-05, "loss": 1.0616, "num_tokens": 4075226551.0, "step": 1334 }, { "epoch": 0.23796791443850268, "grad_norm": 0.279296875, "learning_rate": 1.962982635756761e-05, "loss": 1.068, "num_tokens": 4081447328.0, "step": 1335 }, { "epoch": 0.23814616755793228, "grad_norm": 0.31640625, "learning_rate": 1.96290885557801e-05, "loss": 1.0763, "num_tokens": 4087730037.0, "step": 1336 }, { "epoch": 0.23832442067736184, "grad_norm": 0.27734375, "learning_rate": 1.9628350034928816e-05, "loss": 1.0853, "num_tokens": 4094005328.0, "step": 1337 }, { "epoch": 0.23850267379679144, "grad_norm": 0.287109375, "learning_rate": 1.96276107950753e-05, "loss": 1.0246, "num_tokens": 4100289635.0, "step": 1338 }, { "epoch": 0.23868092691622103, "grad_norm": 0.25390625, "learning_rate": 1.9626870836281156e-05, "loss": 1.0573, "num_tokens": 4106572961.0, "step": 1339 }, { "epoch": 0.23885918003565063, "grad_norm": 0.3046875, "learning_rate": 1.9626130158608038e-05, "loss": 1.0554, "num_tokens": 4112856829.0, "step": 1340 }, { "epoch": 0.23903743315508022, "grad_norm": 0.283203125, "learning_rate": 1.9625388762117668e-05, "loss": 1.0679, "num_tokens": 4119104083.0, "step": 1341 }, { "epoch": 0.23921568627450981, "grad_norm": 0.30078125, "learning_rate": 1.962464664687183e-05, "loss": 1.0549, "num_tokens": 4125387173.0, "step": 1342 }, { "epoch": 0.23939393939393938, "grad_norm": 0.26953125, "learning_rate": 1.962390381293236e-05, "loss": 1.0395, "num_tokens": 4131669376.0, "step": 1343 }, { "epoch": 0.23957219251336898, "grad_norm": 0.30859375, "learning_rate": 1.9623160260361163e-05, "loss": 1.0163, "num_tokens": 4137938224.0, "step": 1344 }, { "epoch": 0.23975044563279857, "grad_norm": 0.279296875, "learning_rate": 1.96224159892202e-05, "loss": 1.0539, "num_tokens": 4144168086.0, "step": 1345 }, { "epoch": 0.23992869875222816, "grad_norm": 0.298828125, "learning_rate": 1.9621670999571486e-05, "loss": 1.0294, "num_tokens": 4150451221.0, "step": 1346 }, { "epoch": 0.24010695187165776, "grad_norm": 0.259765625, "learning_rate": 1.9620925291477104e-05, "loss": 1.0235, "num_tokens": 4156735707.0, "step": 1347 }, { "epoch": 0.24028520499108735, "grad_norm": 0.26171875, "learning_rate": 1.96201788649992e-05, "loss": 1.0078, "num_tokens": 4163021178.0, "step": 1348 }, { "epoch": 0.24046345811051695, "grad_norm": 0.2734375, "learning_rate": 1.961943172019996e-05, "loss": 1.0697, "num_tokens": 4169280523.0, "step": 1349 }, { "epoch": 0.24064171122994651, "grad_norm": 0.28515625, "learning_rate": 1.9618683857141658e-05, "loss": 1.0471, "num_tokens": 4175520536.0, "step": 1350 }, { "epoch": 0.2408199643493761, "grad_norm": 0.291015625, "learning_rate": 1.9617935275886603e-05, "loss": 1.0774, "num_tokens": 4181802941.0, "step": 1351 }, { "epoch": 0.2409982174688057, "grad_norm": 0.255859375, "learning_rate": 1.9617185976497185e-05, "loss": 1.057, "num_tokens": 4188086781.0, "step": 1352 }, { "epoch": 0.2411764705882353, "grad_norm": 0.306640625, "learning_rate": 1.9616435959035832e-05, "loss": 1.074, "num_tokens": 4194369427.0, "step": 1353 }, { "epoch": 0.2413547237076649, "grad_norm": 0.2392578125, "learning_rate": 1.961568522356505e-05, "loss": 1.0376, "num_tokens": 4200652967.0, "step": 1354 }, { "epoch": 0.2415329768270945, "grad_norm": 0.259765625, "learning_rate": 1.9614933770147395e-05, "loss": 1.074, "num_tokens": 4206937038.0, "step": 1355 }, { "epoch": 0.24171122994652405, "grad_norm": 0.25390625, "learning_rate": 1.9614181598845488e-05, "loss": 1.0723, "num_tokens": 4213216473.0, "step": 1356 }, { "epoch": 0.24188948306595365, "grad_norm": 0.29296875, "learning_rate": 1.9613428709722006e-05, "loss": 1.046, "num_tokens": 4219479908.0, "step": 1357 }, { "epoch": 0.24206773618538324, "grad_norm": 0.255859375, "learning_rate": 1.9612675102839687e-05, "loss": 1.0697, "num_tokens": 4225731780.0, "step": 1358 }, { "epoch": 0.24224598930481284, "grad_norm": 0.271484375, "learning_rate": 1.9611920778261335e-05, "loss": 1.0617, "num_tokens": 4231969266.0, "step": 1359 }, { "epoch": 0.24242424242424243, "grad_norm": 0.283203125, "learning_rate": 1.96111657360498e-05, "loss": 1.0449, "num_tokens": 4238212355.0, "step": 1360 }, { "epoch": 0.24260249554367203, "grad_norm": 0.2451171875, "learning_rate": 1.9610409976268002e-05, "loss": 1.0352, "num_tokens": 4244492973.0, "step": 1361 }, { "epoch": 0.2427807486631016, "grad_norm": 0.2890625, "learning_rate": 1.960965349897892e-05, "loss": 1.0521, "num_tokens": 4250753047.0, "step": 1362 }, { "epoch": 0.2429590017825312, "grad_norm": 0.240234375, "learning_rate": 1.9608896304245595e-05, "loss": 1.0552, "num_tokens": 4257038008.0, "step": 1363 }, { "epoch": 0.24313725490196078, "grad_norm": 0.29296875, "learning_rate": 1.9608138392131115e-05, "loss": 1.0683, "num_tokens": 4263297786.0, "step": 1364 }, { "epoch": 0.24331550802139038, "grad_norm": 0.25390625, "learning_rate": 1.9607379762698644e-05, "loss": 1.0444, "num_tokens": 4269562910.0, "step": 1365 }, { "epoch": 0.24349376114081997, "grad_norm": 0.3359375, "learning_rate": 1.96066204160114e-05, "loss": 1.0385, "num_tokens": 4275846753.0, "step": 1366 }, { "epoch": 0.24367201426024956, "grad_norm": 0.28125, "learning_rate": 1.9605860352132652e-05, "loss": 1.0401, "num_tokens": 4282129377.0, "step": 1367 }, { "epoch": 0.24385026737967913, "grad_norm": 0.291015625, "learning_rate": 1.9605099571125745e-05, "loss": 1.0389, "num_tokens": 4288415162.0, "step": 1368 }, { "epoch": 0.24402852049910873, "grad_norm": 0.33203125, "learning_rate": 1.9604338073054066e-05, "loss": 1.0473, "num_tokens": 4294661358.0, "step": 1369 }, { "epoch": 0.24420677361853832, "grad_norm": 0.271484375, "learning_rate": 1.9603575857981083e-05, "loss": 1.0472, "num_tokens": 4300931504.0, "step": 1370 }, { "epoch": 0.24438502673796791, "grad_norm": 0.3046875, "learning_rate": 1.96028129259703e-05, "loss": 1.0576, "num_tokens": 4307214723.0, "step": 1371 }, { "epoch": 0.2445632798573975, "grad_norm": 0.28125, "learning_rate": 1.9602049277085294e-05, "loss": 1.0329, "num_tokens": 4313473200.0, "step": 1372 }, { "epoch": 0.2447415329768271, "grad_norm": 0.333984375, "learning_rate": 1.9601284911389705e-05, "loss": 1.0415, "num_tokens": 4319730053.0, "step": 1373 }, { "epoch": 0.2449197860962567, "grad_norm": 0.2890625, "learning_rate": 1.9600519828947227e-05, "loss": 1.0469, "num_tokens": 4325987139.0, "step": 1374 }, { "epoch": 0.24509803921568626, "grad_norm": 0.3984375, "learning_rate": 1.959975402982161e-05, "loss": 1.0634, "num_tokens": 4332269402.0, "step": 1375 }, { "epoch": 0.24527629233511586, "grad_norm": 0.3125, "learning_rate": 1.9598987514076674e-05, "loss": 1.0732, "num_tokens": 4338546550.0, "step": 1376 }, { "epoch": 0.24545454545454545, "grad_norm": 0.359375, "learning_rate": 1.9598220281776287e-05, "loss": 1.0628, "num_tokens": 4344828732.0, "step": 1377 }, { "epoch": 0.24563279857397505, "grad_norm": 0.359375, "learning_rate": 1.9597452332984385e-05, "loss": 1.0629, "num_tokens": 4351110819.0, "step": 1378 }, { "epoch": 0.24581105169340464, "grad_norm": 0.3046875, "learning_rate": 1.959668366776496e-05, "loss": 1.0733, "num_tokens": 4357396014.0, "step": 1379 }, { "epoch": 0.24598930481283424, "grad_norm": 0.330078125, "learning_rate": 1.9595914286182073e-05, "loss": 1.0727, "num_tokens": 4363659019.0, "step": 1380 }, { "epoch": 0.2461675579322638, "grad_norm": 0.298828125, "learning_rate": 1.9595144188299827e-05, "loss": 1.0443, "num_tokens": 4369943544.0, "step": 1381 }, { "epoch": 0.2463458110516934, "grad_norm": 0.279296875, "learning_rate": 1.95943733741824e-05, "loss": 1.062, "num_tokens": 4376221359.0, "step": 1382 }, { "epoch": 0.246524064171123, "grad_norm": 0.32421875, "learning_rate": 1.9593601843894024e-05, "loss": 1.0462, "num_tokens": 4382503946.0, "step": 1383 }, { "epoch": 0.2467023172905526, "grad_norm": 0.275390625, "learning_rate": 1.9592829597498985e-05, "loss": 1.0735, "num_tokens": 4388788378.0, "step": 1384 }, { "epoch": 0.24688057040998218, "grad_norm": 0.38671875, "learning_rate": 1.9592056635061636e-05, "loss": 1.062, "num_tokens": 4395034269.0, "step": 1385 }, { "epoch": 0.24705882352941178, "grad_norm": 0.37109375, "learning_rate": 1.9591282956646397e-05, "loss": 1.0751, "num_tokens": 4401292549.0, "step": 1386 }, { "epoch": 0.24723707664884134, "grad_norm": 0.302734375, "learning_rate": 1.9590508562317727e-05, "loss": 1.0522, "num_tokens": 4407545705.0, "step": 1387 }, { "epoch": 0.24741532976827094, "grad_norm": 0.333984375, "learning_rate": 1.9589733452140167e-05, "loss": 1.0312, "num_tokens": 4413826610.0, "step": 1388 }, { "epoch": 0.24759358288770053, "grad_norm": 0.2734375, "learning_rate": 1.9588957626178296e-05, "loss": 1.0552, "num_tokens": 4420105393.0, "step": 1389 }, { "epoch": 0.24777183600713013, "grad_norm": 0.271484375, "learning_rate": 1.9588181084496772e-05, "loss": 1.06, "num_tokens": 4426360257.0, "step": 1390 }, { "epoch": 0.24795008912655972, "grad_norm": 0.279296875, "learning_rate": 1.95874038271603e-05, "loss": 1.0734, "num_tokens": 4432643162.0, "step": 1391 }, { "epoch": 0.24812834224598931, "grad_norm": 0.28515625, "learning_rate": 1.9586625854233657e-05, "loss": 1.0316, "num_tokens": 4438926952.0, "step": 1392 }, { "epoch": 0.2483065953654189, "grad_norm": 0.314453125, "learning_rate": 1.958584716578166e-05, "loss": 1.0529, "num_tokens": 4445206072.0, "step": 1393 }, { "epoch": 0.24848484848484848, "grad_norm": 0.2734375, "learning_rate": 1.9585067761869205e-05, "loss": 1.059, "num_tokens": 4451488008.0, "step": 1394 }, { "epoch": 0.24866310160427807, "grad_norm": 0.376953125, "learning_rate": 1.9584287642561236e-05, "loss": 1.0554, "num_tokens": 4457754768.0, "step": 1395 }, { "epoch": 0.24884135472370766, "grad_norm": 0.3203125, "learning_rate": 1.9583506807922766e-05, "loss": 1.0652, "num_tokens": 4464040053.0, "step": 1396 }, { "epoch": 0.24901960784313726, "grad_norm": 0.318359375, "learning_rate": 1.9582725258018856e-05, "loss": 1.0527, "num_tokens": 4470323037.0, "step": 1397 }, { "epoch": 0.24919786096256685, "grad_norm": 0.298828125, "learning_rate": 1.9581942992914638e-05, "loss": 1.0612, "num_tokens": 4476604593.0, "step": 1398 }, { "epoch": 0.24937611408199645, "grad_norm": 0.314453125, "learning_rate": 1.958116001267529e-05, "loss": 1.0436, "num_tokens": 4482889666.0, "step": 1399 }, { "epoch": 0.24955436720142601, "grad_norm": 0.3203125, "learning_rate": 1.9580376317366065e-05, "loss": 1.0755, "num_tokens": 4489151399.0, "step": 1400 }, { "epoch": 0.2497326203208556, "grad_norm": 0.296875, "learning_rate": 1.957959190705227e-05, "loss": 1.0791, "num_tokens": 4495435994.0, "step": 1401 }, { "epoch": 0.2499108734402852, "grad_norm": 0.291015625, "learning_rate": 1.9578806781799263e-05, "loss": 1.0431, "num_tokens": 4501717257.0, "step": 1402 }, { "epoch": 0.25008912655971477, "grad_norm": 0.283203125, "learning_rate": 1.9578020941672476e-05, "loss": 1.0452, "num_tokens": 4507991796.0, "step": 1403 }, { "epoch": 0.25026737967914436, "grad_norm": 0.28125, "learning_rate": 1.9577234386737387e-05, "loss": 1.0557, "num_tokens": 4514245657.0, "step": 1404 }, { "epoch": 0.25044563279857396, "grad_norm": 0.28125, "learning_rate": 1.9576447117059546e-05, "loss": 1.0452, "num_tokens": 4520503307.0, "step": 1405 }, { "epoch": 0.25062388591800355, "grad_norm": 0.2578125, "learning_rate": 1.957565913270455e-05, "loss": 1.0669, "num_tokens": 4526785248.0, "step": 1406 }, { "epoch": 0.25080213903743315, "grad_norm": 0.259765625, "learning_rate": 1.9574870433738063e-05, "loss": 1.0423, "num_tokens": 4533069503.0, "step": 1407 }, { "epoch": 0.25098039215686274, "grad_norm": 0.3046875, "learning_rate": 1.9574081020225816e-05, "loss": 1.0586, "num_tokens": 4539353306.0, "step": 1408 }, { "epoch": 0.25115864527629234, "grad_norm": 0.2421875, "learning_rate": 1.9573290892233577e-05, "loss": 1.0548, "num_tokens": 4545614214.0, "step": 1409 }, { "epoch": 0.25133689839572193, "grad_norm": 0.359375, "learning_rate": 1.95725000498272e-05, "loss": 1.0487, "num_tokens": 4551895185.0, "step": 1410 }, { "epoch": 0.2515151515151515, "grad_norm": 0.25390625, "learning_rate": 1.957170849307258e-05, "loss": 1.0496, "num_tokens": 4558179947.0, "step": 1411 }, { "epoch": 0.2516934046345811, "grad_norm": 0.337890625, "learning_rate": 1.9570916222035677e-05, "loss": 1.0664, "num_tokens": 4564403124.0, "step": 1412 }, { "epoch": 0.2518716577540107, "grad_norm": 0.259765625, "learning_rate": 1.957012323678251e-05, "loss": 1.0436, "num_tokens": 4570681444.0, "step": 1413 }, { "epoch": 0.2520499108734403, "grad_norm": 0.330078125, "learning_rate": 1.956932953737917e-05, "loss": 1.0484, "num_tokens": 4576941318.0, "step": 1414 }, { "epoch": 0.2522281639928699, "grad_norm": 0.265625, "learning_rate": 1.956853512389178e-05, "loss": 1.0405, "num_tokens": 4583226230.0, "step": 1415 }, { "epoch": 0.25240641711229944, "grad_norm": 0.359375, "learning_rate": 1.956773999638655e-05, "loss": 1.0404, "num_tokens": 4589471168.0, "step": 1416 }, { "epoch": 0.25258467023172904, "grad_norm": 0.341796875, "learning_rate": 1.956694415492973e-05, "loss": 1.0273, "num_tokens": 4595754114.0, "step": 1417 }, { "epoch": 0.25276292335115863, "grad_norm": 0.3125, "learning_rate": 1.9566147599587644e-05, "loss": 1.0577, "num_tokens": 4602038951.0, "step": 1418 }, { "epoch": 0.2529411764705882, "grad_norm": 0.33984375, "learning_rate": 1.9565350330426666e-05, "loss": 1.0565, "num_tokens": 4608324026.0, "step": 1419 }, { "epoch": 0.2531194295900178, "grad_norm": 0.291015625, "learning_rate": 1.9564552347513235e-05, "loss": 1.0853, "num_tokens": 4614595473.0, "step": 1420 }, { "epoch": 0.2532976827094474, "grad_norm": 0.28125, "learning_rate": 1.9563753650913844e-05, "loss": 1.0699, "num_tokens": 4620878954.0, "step": 1421 }, { "epoch": 0.253475935828877, "grad_norm": 0.302734375, "learning_rate": 1.956295424069505e-05, "loss": 1.0672, "num_tokens": 4627163685.0, "step": 1422 }, { "epoch": 0.2536541889483066, "grad_norm": 0.2578125, "learning_rate": 1.9562154116923467e-05, "loss": 1.0499, "num_tokens": 4633416202.0, "step": 1423 }, { "epoch": 0.2538324420677362, "grad_norm": 0.330078125, "learning_rate": 1.9561353279665776e-05, "loss": 1.0865, "num_tokens": 4639699902.0, "step": 1424 }, { "epoch": 0.2540106951871658, "grad_norm": 0.26171875, "learning_rate": 1.95605517289887e-05, "loss": 1.0105, "num_tokens": 4645973704.0, "step": 1425 }, { "epoch": 0.2541889483065954, "grad_norm": 0.318359375, "learning_rate": 1.9559749464959044e-05, "loss": 1.0572, "num_tokens": 4652250838.0, "step": 1426 }, { "epoch": 0.254367201426025, "grad_norm": 0.263671875, "learning_rate": 1.955894648764365e-05, "loss": 1.0463, "num_tokens": 4658535669.0, "step": 1427 }, { "epoch": 0.2545454545454545, "grad_norm": 0.37109375, "learning_rate": 1.9558142797109435e-05, "loss": 1.0639, "num_tokens": 4664791205.0, "step": 1428 }, { "epoch": 0.2547237076648841, "grad_norm": 0.3203125, "learning_rate": 1.9557338393423372e-05, "loss": 1.0693, "num_tokens": 4671074277.0, "step": 1429 }, { "epoch": 0.2549019607843137, "grad_norm": 0.345703125, "learning_rate": 1.9556533276652496e-05, "loss": 1.0468, "num_tokens": 4677341538.0, "step": 1430 }, { "epoch": 0.2550802139037433, "grad_norm": 0.318359375, "learning_rate": 1.9555727446863883e-05, "loss": 1.0481, "num_tokens": 4683624572.0, "step": 1431 }, { "epoch": 0.2552584670231729, "grad_norm": 0.298828125, "learning_rate": 1.95549209041247e-05, "loss": 1.0639, "num_tokens": 4689909021.0, "step": 1432 }, { "epoch": 0.2554367201426025, "grad_norm": 0.287109375, "learning_rate": 1.9554113648502145e-05, "loss": 1.0535, "num_tokens": 4696192310.0, "step": 1433 }, { "epoch": 0.2556149732620321, "grad_norm": 0.2470703125, "learning_rate": 1.955330568006349e-05, "loss": 1.0514, "num_tokens": 4702448241.0, "step": 1434 }, { "epoch": 0.2557932263814617, "grad_norm": 0.26171875, "learning_rate": 1.9552496998876067e-05, "loss": 1.0838, "num_tokens": 4708733268.0, "step": 1435 }, { "epoch": 0.2559714795008913, "grad_norm": 0.26953125, "learning_rate": 1.9551687605007258e-05, "loss": 1.0715, "num_tokens": 4715016894.0, "step": 1436 }, { "epoch": 0.25614973262032087, "grad_norm": 0.26953125, "learning_rate": 1.955087749852451e-05, "loss": 1.0604, "num_tokens": 4721297981.0, "step": 1437 }, { "epoch": 0.25632798573975046, "grad_norm": 0.33203125, "learning_rate": 1.9550066679495337e-05, "loss": 1.0552, "num_tokens": 4727583369.0, "step": 1438 }, { "epoch": 0.25650623885918006, "grad_norm": 0.271484375, "learning_rate": 1.9549255147987298e-05, "loss": 1.087, "num_tokens": 4733868427.0, "step": 1439 }, { "epoch": 0.25668449197860965, "grad_norm": 0.3359375, "learning_rate": 1.9548442904068018e-05, "loss": 1.0598, "num_tokens": 4740124989.0, "step": 1440 }, { "epoch": 0.2568627450980392, "grad_norm": 0.326171875, "learning_rate": 1.9547629947805184e-05, "loss": 1.091, "num_tokens": 4746408315.0, "step": 1441 }, { "epoch": 0.2570409982174688, "grad_norm": 0.2890625, "learning_rate": 1.9546816279266537e-05, "loss": 1.0632, "num_tokens": 4752693020.0, "step": 1442 }, { "epoch": 0.2572192513368984, "grad_norm": 0.31640625, "learning_rate": 1.954600189851988e-05, "loss": 1.0392, "num_tokens": 4758968141.0, "step": 1443 }, { "epoch": 0.257397504456328, "grad_norm": 0.3046875, "learning_rate": 1.9545186805633086e-05, "loss": 1.0698, "num_tokens": 4765225534.0, "step": 1444 }, { "epoch": 0.25757575757575757, "grad_norm": 0.302734375, "learning_rate": 1.954437100067406e-05, "loss": 1.0585, "num_tokens": 4771498480.0, "step": 1445 }, { "epoch": 0.25775401069518716, "grad_norm": 0.32421875, "learning_rate": 1.9543554483710797e-05, "loss": 1.0711, "num_tokens": 4777783472.0, "step": 1446 }, { "epoch": 0.25793226381461676, "grad_norm": 0.298828125, "learning_rate": 1.954273725481133e-05, "loss": 1.0399, "num_tokens": 4783979618.0, "step": 1447 }, { "epoch": 0.25811051693404635, "grad_norm": 0.302734375, "learning_rate": 1.954191931404376e-05, "loss": 1.0451, "num_tokens": 4790243736.0, "step": 1448 }, { "epoch": 0.25828877005347595, "grad_norm": 0.275390625, "learning_rate": 1.9541100661476243e-05, "loss": 1.0396, "num_tokens": 4796502417.0, "step": 1449 }, { "epoch": 0.25846702317290554, "grad_norm": 0.2578125, "learning_rate": 1.9540281297177008e-05, "loss": 1.0653, "num_tokens": 4802759129.0, "step": 1450 }, { "epoch": 0.25864527629233514, "grad_norm": 0.302734375, "learning_rate": 1.9539461221214324e-05, "loss": 1.0365, "num_tokens": 4809043616.0, "step": 1451 }, { "epoch": 0.25882352941176473, "grad_norm": 0.2470703125, "learning_rate": 1.953864043365653e-05, "loss": 1.0476, "num_tokens": 4815298864.0, "step": 1452 }, { "epoch": 0.25900178253119427, "grad_norm": 0.287109375, "learning_rate": 1.9537818934572027e-05, "loss": 1.0698, "num_tokens": 4821529278.0, "step": 1453 }, { "epoch": 0.25918003565062386, "grad_norm": 0.255859375, "learning_rate": 1.953699672402926e-05, "loss": 1.0726, "num_tokens": 4827811761.0, "step": 1454 }, { "epoch": 0.25935828877005346, "grad_norm": 0.2373046875, "learning_rate": 1.9536173802096756e-05, "loss": 1.0683, "num_tokens": 4834094217.0, "step": 1455 }, { "epoch": 0.25953654188948305, "grad_norm": 0.271484375, "learning_rate": 1.953535016884308e-05, "loss": 1.06, "num_tokens": 4840347234.0, "step": 1456 }, { "epoch": 0.25971479500891265, "grad_norm": 0.248046875, "learning_rate": 1.9534525824336868e-05, "loss": 1.0847, "num_tokens": 4846580412.0, "step": 1457 }, { "epoch": 0.25989304812834224, "grad_norm": 0.265625, "learning_rate": 1.953370076864682e-05, "loss": 1.0327, "num_tokens": 4852865439.0, "step": 1458 }, { "epoch": 0.26007130124777184, "grad_norm": 0.255859375, "learning_rate": 1.953287500184168e-05, "loss": 1.0884, "num_tokens": 4859091250.0, "step": 1459 }, { "epoch": 0.26024955436720143, "grad_norm": 0.3046875, "learning_rate": 1.9532048523990258e-05, "loss": 1.0484, "num_tokens": 4865375993.0, "step": 1460 }, { "epoch": 0.260427807486631, "grad_norm": 0.271484375, "learning_rate": 1.9531221335161435e-05, "loss": 1.0743, "num_tokens": 4871578946.0, "step": 1461 }, { "epoch": 0.2606060606060606, "grad_norm": 0.271484375, "learning_rate": 1.953039343542413e-05, "loss": 1.0713, "num_tokens": 4877861660.0, "step": 1462 }, { "epoch": 0.2607843137254902, "grad_norm": 0.287109375, "learning_rate": 1.9529564824847337e-05, "loss": 1.081, "num_tokens": 4884144552.0, "step": 1463 }, { "epoch": 0.2609625668449198, "grad_norm": 0.267578125, "learning_rate": 1.95287355035001e-05, "loss": 1.0649, "num_tokens": 4890428731.0, "step": 1464 }, { "epoch": 0.2611408199643494, "grad_norm": 0.26953125, "learning_rate": 1.952790547145153e-05, "loss": 1.0549, "num_tokens": 4896651150.0, "step": 1465 }, { "epoch": 0.26131907308377894, "grad_norm": 0.248046875, "learning_rate": 1.9527074728770797e-05, "loss": 1.05, "num_tokens": 4902932903.0, "step": 1466 }, { "epoch": 0.26149732620320854, "grad_norm": 0.28515625, "learning_rate": 1.9526243275527123e-05, "loss": 1.032, "num_tokens": 4909187763.0, "step": 1467 }, { "epoch": 0.26167557932263813, "grad_norm": 0.2412109375, "learning_rate": 1.952541111178979e-05, "loss": 1.0499, "num_tokens": 4915455459.0, "step": 1468 }, { "epoch": 0.2618538324420677, "grad_norm": 0.333984375, "learning_rate": 1.952457823762815e-05, "loss": 1.0603, "num_tokens": 4921740582.0, "step": 1469 }, { "epoch": 0.2620320855614973, "grad_norm": 0.25, "learning_rate": 1.95237446531116e-05, "loss": 1.0817, "num_tokens": 4928019073.0, "step": 1470 }, { "epoch": 0.2622103386809269, "grad_norm": 0.328125, "learning_rate": 1.9522910358309607e-05, "loss": 1.0782, "num_tokens": 4934299189.0, "step": 1471 }, { "epoch": 0.2623885918003565, "grad_norm": 0.28125, "learning_rate": 1.9522075353291688e-05, "loss": 1.0874, "num_tokens": 4940583790.0, "step": 1472 }, { "epoch": 0.2625668449197861, "grad_norm": 0.3359375, "learning_rate": 1.9521239638127435e-05, "loss": 1.0519, "num_tokens": 4946869131.0, "step": 1473 }, { "epoch": 0.2627450980392157, "grad_norm": 0.322265625, "learning_rate": 1.9520403212886473e-05, "loss": 1.0538, "num_tokens": 4953140199.0, "step": 1474 }, { "epoch": 0.2629233511586453, "grad_norm": 0.279296875, "learning_rate": 1.9519566077638514e-05, "loss": 1.0541, "num_tokens": 4959422314.0, "step": 1475 }, { "epoch": 0.2631016042780749, "grad_norm": 0.32421875, "learning_rate": 1.951872823245331e-05, "loss": 1.0366, "num_tokens": 4965697583.0, "step": 1476 }, { "epoch": 0.2632798573975045, "grad_norm": 0.298828125, "learning_rate": 1.9517889677400683e-05, "loss": 1.0688, "num_tokens": 4971980044.0, "step": 1477 }, { "epoch": 0.263458110516934, "grad_norm": 0.259765625, "learning_rate": 1.9517050412550504e-05, "loss": 1.0304, "num_tokens": 4978265066.0, "step": 1478 }, { "epoch": 0.2636363636363636, "grad_norm": 0.30859375, "learning_rate": 1.951621043797272e-05, "loss": 1.0607, "num_tokens": 4984540786.0, "step": 1479 }, { "epoch": 0.2638146167557932, "grad_norm": 0.259765625, "learning_rate": 1.951536975373731e-05, "loss": 1.0571, "num_tokens": 4990781305.0, "step": 1480 }, { "epoch": 0.2639928698752228, "grad_norm": 0.291015625, "learning_rate": 1.9514528359914343e-05, "loss": 1.0949, "num_tokens": 4997024613.0, "step": 1481 }, { "epoch": 0.2641711229946524, "grad_norm": 0.267578125, "learning_rate": 1.9513686256573924e-05, "loss": 1.0767, "num_tokens": 5003308721.0, "step": 1482 }, { "epoch": 0.264349376114082, "grad_norm": 0.259765625, "learning_rate": 1.9512843443786235e-05, "loss": 1.055, "num_tokens": 5009591613.0, "step": 1483 }, { "epoch": 0.2645276292335116, "grad_norm": 0.283203125, "learning_rate": 1.9511999921621492e-05, "loss": 1.0542, "num_tokens": 5015876676.0, "step": 1484 }, { "epoch": 0.2647058823529412, "grad_norm": 0.263671875, "learning_rate": 1.951115569015e-05, "loss": 1.0283, "num_tokens": 5022160459.0, "step": 1485 }, { "epoch": 0.2648841354723708, "grad_norm": 0.29296875, "learning_rate": 1.951031074944211e-05, "loss": 1.0391, "num_tokens": 5028443444.0, "step": 1486 }, { "epoch": 0.26506238859180037, "grad_norm": 0.271484375, "learning_rate": 1.950946509956822e-05, "loss": 1.0367, "num_tokens": 5034721047.0, "step": 1487 }, { "epoch": 0.26524064171122996, "grad_norm": 0.296875, "learning_rate": 1.9508618740598803e-05, "loss": 1.0649, "num_tokens": 5040993429.0, "step": 1488 }, { "epoch": 0.26541889483065956, "grad_norm": 0.2890625, "learning_rate": 1.950777167260439e-05, "loss": 1.0417, "num_tokens": 5047254707.0, "step": 1489 }, { "epoch": 0.26559714795008915, "grad_norm": 0.244140625, "learning_rate": 1.950692389565556e-05, "loss": 1.1051, "num_tokens": 5053537521.0, "step": 1490 }, { "epoch": 0.2657754010695187, "grad_norm": 0.29296875, "learning_rate": 1.9506075409822964e-05, "loss": 1.0427, "num_tokens": 5059820984.0, "step": 1491 }, { "epoch": 0.2659536541889483, "grad_norm": 0.2333984375, "learning_rate": 1.9505226215177308e-05, "loss": 1.0266, "num_tokens": 5066104037.0, "step": 1492 }, { "epoch": 0.2661319073083779, "grad_norm": 0.2734375, "learning_rate": 1.9504376311789352e-05, "loss": 1.0403, "num_tokens": 5072386705.0, "step": 1493 }, { "epoch": 0.2663101604278075, "grad_norm": 0.255859375, "learning_rate": 1.950352569972992e-05, "loss": 1.0405, "num_tokens": 5078671017.0, "step": 1494 }, { "epoch": 0.26648841354723707, "grad_norm": 0.267578125, "learning_rate": 1.9502674379069888e-05, "loss": 1.0619, "num_tokens": 5084954940.0, "step": 1495 }, { "epoch": 0.26666666666666666, "grad_norm": 0.275390625, "learning_rate": 1.9501822349880207e-05, "loss": 1.031, "num_tokens": 5091239248.0, "step": 1496 }, { "epoch": 0.26684491978609626, "grad_norm": 0.275390625, "learning_rate": 1.9500969612231867e-05, "loss": 1.0437, "num_tokens": 5097499068.0, "step": 1497 }, { "epoch": 0.26702317290552585, "grad_norm": 0.26953125, "learning_rate": 1.9500116166195935e-05, "loss": 1.0627, "num_tokens": 5103769287.0, "step": 1498 }, { "epoch": 0.26720142602495545, "grad_norm": 0.29296875, "learning_rate": 1.9499262011843524e-05, "loss": 1.0374, "num_tokens": 5110037262.0, "step": 1499 }, { "epoch": 0.26737967914438504, "grad_norm": 0.27734375, "learning_rate": 1.949840714924581e-05, "loss": 1.0477, "num_tokens": 5116321016.0, "step": 1500 }, { "epoch": 0.26755793226381464, "grad_norm": 0.302734375, "learning_rate": 1.9497551578474033e-05, "loss": 1.0552, "num_tokens": 5122602541.0, "step": 1501 }, { "epoch": 0.26773618538324423, "grad_norm": 0.26953125, "learning_rate": 1.949669529959949e-05, "loss": 1.0633, "num_tokens": 5128871199.0, "step": 1502 }, { "epoch": 0.2679144385026738, "grad_norm": 0.3203125, "learning_rate": 1.9495838312693523e-05, "loss": 1.0362, "num_tokens": 5135153248.0, "step": 1503 }, { "epoch": 0.26809269162210336, "grad_norm": 0.267578125, "learning_rate": 1.949498061782756e-05, "loss": 1.0318, "num_tokens": 5141406322.0, "step": 1504 }, { "epoch": 0.26827094474153296, "grad_norm": 0.32421875, "learning_rate": 1.949412221507306e-05, "loss": 1.0461, "num_tokens": 5147676061.0, "step": 1505 }, { "epoch": 0.26844919786096255, "grad_norm": 0.267578125, "learning_rate": 1.9493263104501562e-05, "loss": 1.0643, "num_tokens": 5153955023.0, "step": 1506 }, { "epoch": 0.26862745098039215, "grad_norm": 0.291015625, "learning_rate": 1.9492403286184655e-05, "loss": 1.0533, "num_tokens": 5160237971.0, "step": 1507 }, { "epoch": 0.26880570409982174, "grad_norm": 0.26171875, "learning_rate": 1.9491542760193984e-05, "loss": 1.0709, "num_tokens": 5166499700.0, "step": 1508 }, { "epoch": 0.26898395721925134, "grad_norm": 0.30078125, "learning_rate": 1.9490681526601262e-05, "loss": 1.0716, "num_tokens": 5172747997.0, "step": 1509 }, { "epoch": 0.26916221033868093, "grad_norm": 0.275390625, "learning_rate": 1.9489819585478246e-05, "loss": 1.0511, "num_tokens": 5178995058.0, "step": 1510 }, { "epoch": 0.2693404634581105, "grad_norm": 0.3046875, "learning_rate": 1.9488956936896772e-05, "loss": 0.9972, "num_tokens": 5185250889.0, "step": 1511 }, { "epoch": 0.2695187165775401, "grad_norm": 0.24609375, "learning_rate": 1.9488093580928722e-05, "loss": 1.0591, "num_tokens": 5191532757.0, "step": 1512 }, { "epoch": 0.2696969696969697, "grad_norm": 0.3203125, "learning_rate": 1.9487229517646037e-05, "loss": 1.048, "num_tokens": 5197786981.0, "step": 1513 }, { "epoch": 0.2698752228163993, "grad_norm": 0.2578125, "learning_rate": 1.9486364747120724e-05, "loss": 1.0644, "num_tokens": 5204070601.0, "step": 1514 }, { "epoch": 0.2700534759358289, "grad_norm": 0.30859375, "learning_rate": 1.948549926942484e-05, "loss": 1.0616, "num_tokens": 5210353607.0, "step": 1515 }, { "epoch": 0.27023172905525844, "grad_norm": 0.259765625, "learning_rate": 1.9484633084630508e-05, "loss": 1.0643, "num_tokens": 5216625535.0, "step": 1516 }, { "epoch": 0.27040998217468803, "grad_norm": 0.330078125, "learning_rate": 1.9483766192809905e-05, "loss": 1.0402, "num_tokens": 5222911443.0, "step": 1517 }, { "epoch": 0.27058823529411763, "grad_norm": 0.25390625, "learning_rate": 1.9482898594035276e-05, "loss": 1.0577, "num_tokens": 5229195244.0, "step": 1518 }, { "epoch": 0.2707664884135472, "grad_norm": 0.30859375, "learning_rate": 1.948203028837891e-05, "loss": 1.064, "num_tokens": 5235478314.0, "step": 1519 }, { "epoch": 0.2709447415329768, "grad_norm": 0.26953125, "learning_rate": 1.9481161275913162e-05, "loss": 1.0511, "num_tokens": 5241761530.0, "step": 1520 }, { "epoch": 0.2711229946524064, "grad_norm": 0.2890625, "learning_rate": 1.9480291556710456e-05, "loss": 1.0625, "num_tokens": 5248026963.0, "step": 1521 }, { "epoch": 0.271301247771836, "grad_norm": 0.28515625, "learning_rate": 1.9479421130843262e-05, "loss": 1.0662, "num_tokens": 5254311324.0, "step": 1522 }, { "epoch": 0.2714795008912656, "grad_norm": 0.310546875, "learning_rate": 1.947854999838411e-05, "loss": 1.0463, "num_tokens": 5260596501.0, "step": 1523 }, { "epoch": 0.2716577540106952, "grad_norm": 0.265625, "learning_rate": 1.947767815940559e-05, "loss": 1.0072, "num_tokens": 5266847116.0, "step": 1524 }, { "epoch": 0.2718360071301248, "grad_norm": 0.2490234375, "learning_rate": 1.9476805613980363e-05, "loss": 1.0579, "num_tokens": 5273088921.0, "step": 1525 }, { "epoch": 0.2720142602495544, "grad_norm": 0.29296875, "learning_rate": 1.947593236218113e-05, "loss": 1.0551, "num_tokens": 5279352252.0, "step": 1526 }, { "epoch": 0.272192513368984, "grad_norm": 0.28125, "learning_rate": 1.9475058404080657e-05, "loss": 1.0413, "num_tokens": 5285635246.0, "step": 1527 }, { "epoch": 0.2723707664884136, "grad_norm": 0.259765625, "learning_rate": 1.9474183739751777e-05, "loss": 1.0644, "num_tokens": 5291915068.0, "step": 1528 }, { "epoch": 0.2725490196078431, "grad_norm": 0.2392578125, "learning_rate": 1.9473308369267375e-05, "loss": 1.058, "num_tokens": 5298179945.0, "step": 1529 }, { "epoch": 0.2727272727272727, "grad_norm": 0.279296875, "learning_rate": 1.947243229270039e-05, "loss": 1.0554, "num_tokens": 5304463832.0, "step": 1530 }, { "epoch": 0.2729055258467023, "grad_norm": 0.2578125, "learning_rate": 1.9471555510123836e-05, "loss": 1.0692, "num_tokens": 5310746212.0, "step": 1531 }, { "epoch": 0.2730837789661319, "grad_norm": 0.2578125, "learning_rate": 1.9470678021610768e-05, "loss": 1.0411, "num_tokens": 5317012745.0, "step": 1532 }, { "epoch": 0.2732620320855615, "grad_norm": 0.287109375, "learning_rate": 1.946979982723431e-05, "loss": 1.0969, "num_tokens": 5323295222.0, "step": 1533 }, { "epoch": 0.2734402852049911, "grad_norm": 0.267578125, "learning_rate": 1.9468920927067638e-05, "loss": 1.0521, "num_tokens": 5329577294.0, "step": 1534 }, { "epoch": 0.2736185383244207, "grad_norm": 0.255859375, "learning_rate": 1.9468041321183993e-05, "loss": 1.0406, "num_tokens": 5335844841.0, "step": 1535 }, { "epoch": 0.2737967914438503, "grad_norm": 0.25390625, "learning_rate": 1.946716100965668e-05, "loss": 1.0576, "num_tokens": 5342115118.0, "step": 1536 }, { "epoch": 0.27397504456327987, "grad_norm": 0.2890625, "learning_rate": 1.946627999255904e-05, "loss": 1.0246, "num_tokens": 5348400066.0, "step": 1537 }, { "epoch": 0.27415329768270946, "grad_norm": 0.2421875, "learning_rate": 1.9465398269964508e-05, "loss": 1.0898, "num_tokens": 5354684451.0, "step": 1538 }, { "epoch": 0.27433155080213906, "grad_norm": 0.294921875, "learning_rate": 1.9464515841946542e-05, "loss": 1.0673, "num_tokens": 5360944995.0, "step": 1539 }, { "epoch": 0.27450980392156865, "grad_norm": 0.29296875, "learning_rate": 1.9463632708578682e-05, "loss": 1.0303, "num_tokens": 5367215182.0, "step": 1540 }, { "epoch": 0.2746880570409982, "grad_norm": 0.271484375, "learning_rate": 1.9462748869934516e-05, "loss": 1.0732, "num_tokens": 5373454049.0, "step": 1541 }, { "epoch": 0.2748663101604278, "grad_norm": 0.27734375, "learning_rate": 1.94618643260877e-05, "loss": 1.0481, "num_tokens": 5379710316.0, "step": 1542 }, { "epoch": 0.2750445632798574, "grad_norm": 0.2373046875, "learning_rate": 1.946097907711194e-05, "loss": 1.0501, "num_tokens": 5385994194.0, "step": 1543 }, { "epoch": 0.275222816399287, "grad_norm": 0.28515625, "learning_rate": 1.9460093123081007e-05, "loss": 1.0649, "num_tokens": 5392277467.0, "step": 1544 }, { "epoch": 0.27540106951871657, "grad_norm": 0.259765625, "learning_rate": 1.945920646406872e-05, "loss": 1.0589, "num_tokens": 5398537349.0, "step": 1545 }, { "epoch": 0.27557932263814616, "grad_norm": 0.267578125, "learning_rate": 1.9458319100148973e-05, "loss": 1.0364, "num_tokens": 5404822353.0, "step": 1546 }, { "epoch": 0.27575757575757576, "grad_norm": 0.2451171875, "learning_rate": 1.9457431031395702e-05, "loss": 1.0603, "num_tokens": 5411107738.0, "step": 1547 }, { "epoch": 0.27593582887700535, "grad_norm": 0.32421875, "learning_rate": 1.945654225788292e-05, "loss": 1.0428, "num_tokens": 5417392153.0, "step": 1548 }, { "epoch": 0.27611408199643495, "grad_norm": 0.275390625, "learning_rate": 1.945565277968468e-05, "loss": 1.0721, "num_tokens": 5423653033.0, "step": 1549 }, { "epoch": 0.27629233511586454, "grad_norm": 0.310546875, "learning_rate": 1.9454762596875107e-05, "loss": 1.0496, "num_tokens": 5429936441.0, "step": 1550 }, { "epoch": 0.27647058823529413, "grad_norm": 0.279296875, "learning_rate": 1.9453871709528377e-05, "loss": 1.0896, "num_tokens": 5436164945.0, "step": 1551 }, { "epoch": 0.27664884135472373, "grad_norm": 0.267578125, "learning_rate": 1.945298011771873e-05, "loss": 1.0587, "num_tokens": 5442391902.0, "step": 1552 }, { "epoch": 0.2768270944741533, "grad_norm": 0.271484375, "learning_rate": 1.945208782152046e-05, "loss": 1.0461, "num_tokens": 5448640984.0, "step": 1553 }, { "epoch": 0.27700534759358286, "grad_norm": 0.271484375, "learning_rate": 1.945119482100793e-05, "loss": 1.0758, "num_tokens": 5454925655.0, "step": 1554 }, { "epoch": 0.27718360071301246, "grad_norm": 0.33203125, "learning_rate": 1.9450301116255545e-05, "loss": 1.0397, "num_tokens": 5461210698.0, "step": 1555 }, { "epoch": 0.27736185383244205, "grad_norm": 0.263671875, "learning_rate": 1.944940670733778e-05, "loss": 1.0834, "num_tokens": 5467495860.0, "step": 1556 }, { "epoch": 0.27754010695187165, "grad_norm": 0.365234375, "learning_rate": 1.9448511594329167e-05, "loss": 1.0518, "num_tokens": 5473759428.0, "step": 1557 }, { "epoch": 0.27771836007130124, "grad_norm": 0.32421875, "learning_rate": 1.9447615777304298e-05, "loss": 1.0764, "num_tokens": 5480043653.0, "step": 1558 }, { "epoch": 0.27789661319073083, "grad_norm": 0.328125, "learning_rate": 1.9446719256337818e-05, "loss": 1.0629, "num_tokens": 5486295397.0, "step": 1559 }, { "epoch": 0.27807486631016043, "grad_norm": 0.326171875, "learning_rate": 1.9445822031504438e-05, "loss": 1.0543, "num_tokens": 5492561248.0, "step": 1560 }, { "epoch": 0.27825311942959, "grad_norm": 0.29296875, "learning_rate": 1.944492410287892e-05, "loss": 1.0582, "num_tokens": 5498834556.0, "step": 1561 }, { "epoch": 0.2784313725490196, "grad_norm": 0.287109375, "learning_rate": 1.9444025470536093e-05, "loss": 1.0517, "num_tokens": 5505118665.0, "step": 1562 }, { "epoch": 0.2786096256684492, "grad_norm": 0.3125, "learning_rate": 1.9443126134550836e-05, "loss": 1.0493, "num_tokens": 5511400732.0, "step": 1563 }, { "epoch": 0.2787878787878788, "grad_norm": 0.3125, "learning_rate": 1.9442226094998092e-05, "loss": 1.0513, "num_tokens": 5517618112.0, "step": 1564 }, { "epoch": 0.2789661319073084, "grad_norm": 0.28125, "learning_rate": 1.9441325351952864e-05, "loss": 1.0501, "num_tokens": 5523885904.0, "step": 1565 }, { "epoch": 0.279144385026738, "grad_norm": 0.291015625, "learning_rate": 1.944042390549021e-05, "loss": 1.0598, "num_tokens": 5530169344.0, "step": 1566 }, { "epoch": 0.27932263814616753, "grad_norm": 0.275390625, "learning_rate": 1.9439521755685245e-05, "loss": 1.0286, "num_tokens": 5536431843.0, "step": 1567 }, { "epoch": 0.27950089126559713, "grad_norm": 0.2734375, "learning_rate": 1.9438618902613145e-05, "loss": 1.0519, "num_tokens": 5542717094.0, "step": 1568 }, { "epoch": 0.2796791443850267, "grad_norm": 0.28515625, "learning_rate": 1.943771534634915e-05, "loss": 1.0527, "num_tokens": 5549002425.0, "step": 1569 }, { "epoch": 0.2798573975044563, "grad_norm": 0.275390625, "learning_rate": 1.943681108696855e-05, "loss": 1.0536, "num_tokens": 5555272759.0, "step": 1570 }, { "epoch": 0.2800356506238859, "grad_norm": 0.3125, "learning_rate": 1.9435906124546696e-05, "loss": 1.0563, "num_tokens": 5561557170.0, "step": 1571 }, { "epoch": 0.2802139037433155, "grad_norm": 0.296875, "learning_rate": 1.9435000459159003e-05, "loss": 1.0548, "num_tokens": 5567841832.0, "step": 1572 }, { "epoch": 0.2803921568627451, "grad_norm": 0.33984375, "learning_rate": 1.943409409088094e-05, "loss": 1.0725, "num_tokens": 5574115443.0, "step": 1573 }, { "epoch": 0.2805704099821747, "grad_norm": 0.294921875, "learning_rate": 1.9433187019788028e-05, "loss": 1.0597, "num_tokens": 5580364114.0, "step": 1574 }, { "epoch": 0.2807486631016043, "grad_norm": 0.26171875, "learning_rate": 1.9432279245955863e-05, "loss": 1.0338, "num_tokens": 5586645163.0, "step": 1575 }, { "epoch": 0.2809269162210339, "grad_norm": 0.271484375, "learning_rate": 1.943137076946008e-05, "loss": 1.0377, "num_tokens": 5592915408.0, "step": 1576 }, { "epoch": 0.2811051693404635, "grad_norm": 0.275390625, "learning_rate": 1.9430461590376394e-05, "loss": 1.0557, "num_tokens": 5599132753.0, "step": 1577 }, { "epoch": 0.2812834224598931, "grad_norm": 0.28125, "learning_rate": 1.9429551708780555e-05, "loss": 1.0556, "num_tokens": 5605362447.0, "step": 1578 }, { "epoch": 0.2814616755793226, "grad_norm": 0.251953125, "learning_rate": 1.942864112474839e-05, "loss": 1.0456, "num_tokens": 5611619861.0, "step": 1579 }, { "epoch": 0.2816399286987522, "grad_norm": 0.283203125, "learning_rate": 1.942772983835578e-05, "loss": 1.0123, "num_tokens": 5617878932.0, "step": 1580 }, { "epoch": 0.2818181818181818, "grad_norm": 0.251953125, "learning_rate": 1.942681784967866e-05, "loss": 1.0652, "num_tokens": 5624134023.0, "step": 1581 }, { "epoch": 0.2819964349376114, "grad_norm": 0.30859375, "learning_rate": 1.9425905158793026e-05, "loss": 1.0709, "num_tokens": 5630407163.0, "step": 1582 }, { "epoch": 0.282174688057041, "grad_norm": 0.2490234375, "learning_rate": 1.9424991765774935e-05, "loss": 1.0707, "num_tokens": 5636633191.0, "step": 1583 }, { "epoch": 0.2823529411764706, "grad_norm": 0.298828125, "learning_rate": 1.9424077670700494e-05, "loss": 1.0591, "num_tokens": 5642919513.0, "step": 1584 }, { "epoch": 0.2825311942959002, "grad_norm": 0.2578125, "learning_rate": 1.942316287364588e-05, "loss": 1.0635, "num_tokens": 5649204527.0, "step": 1585 }, { "epoch": 0.2827094474153298, "grad_norm": 0.302734375, "learning_rate": 1.9422247374687325e-05, "loss": 1.0185, "num_tokens": 5655472716.0, "step": 1586 }, { "epoch": 0.28288770053475937, "grad_norm": 0.283203125, "learning_rate": 1.9421331173901114e-05, "loss": 1.0466, "num_tokens": 5661757058.0, "step": 1587 }, { "epoch": 0.28306595365418896, "grad_norm": 0.271484375, "learning_rate": 1.9420414271363596e-05, "loss": 1.0422, "num_tokens": 5668040845.0, "step": 1588 }, { "epoch": 0.28324420677361856, "grad_norm": 0.326171875, "learning_rate": 1.9419496667151174e-05, "loss": 1.0484, "num_tokens": 5674324164.0, "step": 1589 }, { "epoch": 0.28342245989304815, "grad_norm": 0.25390625, "learning_rate": 1.9418578361340315e-05, "loss": 1.0638, "num_tokens": 5680606629.0, "step": 1590 }, { "epoch": 0.28360071301247775, "grad_norm": 0.29296875, "learning_rate": 1.941765935400754e-05, "loss": 1.0992, "num_tokens": 5686885275.0, "step": 1591 }, { "epoch": 0.2837789661319073, "grad_norm": 0.265625, "learning_rate": 1.941673964522943e-05, "loss": 1.0734, "num_tokens": 5693164992.0, "step": 1592 }, { "epoch": 0.2839572192513369, "grad_norm": 0.27734375, "learning_rate": 1.941581923508263e-05, "loss": 1.1025, "num_tokens": 5699446442.0, "step": 1593 }, { "epoch": 0.2841354723707665, "grad_norm": 0.25390625, "learning_rate": 1.941489812364383e-05, "loss": 1.0625, "num_tokens": 5705691121.0, "step": 1594 }, { "epoch": 0.28431372549019607, "grad_norm": 0.271484375, "learning_rate": 1.941397631098979e-05, "loss": 1.0499, "num_tokens": 5711962679.0, "step": 1595 }, { "epoch": 0.28449197860962566, "grad_norm": 0.291015625, "learning_rate": 1.9413053797197327e-05, "loss": 1.0332, "num_tokens": 5718247025.0, "step": 1596 }, { "epoch": 0.28467023172905526, "grad_norm": 0.263671875, "learning_rate": 1.941213058234331e-05, "loss": 1.065, "num_tokens": 5724530271.0, "step": 1597 }, { "epoch": 0.28484848484848485, "grad_norm": 0.298828125, "learning_rate": 1.941120666650467e-05, "loss": 1.078, "num_tokens": 5730803615.0, "step": 1598 }, { "epoch": 0.28502673796791445, "grad_norm": 0.265625, "learning_rate": 1.9410282049758402e-05, "loss": 1.0216, "num_tokens": 5737061376.0, "step": 1599 }, { "epoch": 0.28520499108734404, "grad_norm": 0.26953125, "learning_rate": 1.940935673218155e-05, "loss": 1.0704, "num_tokens": 5743345308.0, "step": 1600 }, { "epoch": 0.28538324420677363, "grad_norm": 0.25, "learning_rate": 1.940843071385123e-05, "loss": 1.1023, "num_tokens": 5749627973.0, "step": 1601 }, { "epoch": 0.28556149732620323, "grad_norm": 0.248046875, "learning_rate": 1.9407503994844593e-05, "loss": 1.0308, "num_tokens": 5755898838.0, "step": 1602 }, { "epoch": 0.2857397504456328, "grad_norm": 0.2578125, "learning_rate": 1.940657657523887e-05, "loss": 1.0407, "num_tokens": 5762181916.0, "step": 1603 }, { "epoch": 0.28591800356506236, "grad_norm": 0.2734375, "learning_rate": 1.940564845511135e-05, "loss": 1.041, "num_tokens": 5768440224.0, "step": 1604 }, { "epoch": 0.28609625668449196, "grad_norm": 0.240234375, "learning_rate": 1.9404719634539363e-05, "loss": 1.0612, "num_tokens": 5774722821.0, "step": 1605 }, { "epoch": 0.28627450980392155, "grad_norm": 0.255859375, "learning_rate": 1.9403790113600306e-05, "loss": 1.0183, "num_tokens": 5780978757.0, "step": 1606 }, { "epoch": 0.28645276292335115, "grad_norm": 0.27734375, "learning_rate": 1.9402859892371648e-05, "loss": 1.0553, "num_tokens": 5787249433.0, "step": 1607 }, { "epoch": 0.28663101604278074, "grad_norm": 0.259765625, "learning_rate": 1.9401928970930897e-05, "loss": 1.0253, "num_tokens": 5793533263.0, "step": 1608 }, { "epoch": 0.28680926916221033, "grad_norm": 0.2490234375, "learning_rate": 1.9400997349355627e-05, "loss": 1.0507, "num_tokens": 5799787488.0, "step": 1609 }, { "epoch": 0.28698752228163993, "grad_norm": 0.255859375, "learning_rate": 1.9400065027723468e-05, "loss": 1.0426, "num_tokens": 5806061653.0, "step": 1610 }, { "epoch": 0.2871657754010695, "grad_norm": 0.2353515625, "learning_rate": 1.9399132006112118e-05, "loss": 1.052, "num_tokens": 5812320763.0, "step": 1611 }, { "epoch": 0.2873440285204991, "grad_norm": 0.291015625, "learning_rate": 1.9398198284599324e-05, "loss": 1.0286, "num_tokens": 5818605492.0, "step": 1612 }, { "epoch": 0.2875222816399287, "grad_norm": 0.26953125, "learning_rate": 1.9397263863262888e-05, "loss": 1.057, "num_tokens": 5824887515.0, "step": 1613 }, { "epoch": 0.2877005347593583, "grad_norm": 0.296875, "learning_rate": 1.9396328742180676e-05, "loss": 1.0475, "num_tokens": 5831172015.0, "step": 1614 }, { "epoch": 0.2878787878787879, "grad_norm": 0.275390625, "learning_rate": 1.9395392921430614e-05, "loss": 1.0124, "num_tokens": 5837403576.0, "step": 1615 }, { "epoch": 0.2880570409982175, "grad_norm": 0.25, "learning_rate": 1.939445640109069e-05, "loss": 1.0408, "num_tokens": 5843688063.0, "step": 1616 }, { "epoch": 0.28823529411764703, "grad_norm": 0.263671875, "learning_rate": 1.9393519181238934e-05, "loss": 1.0553, "num_tokens": 5849973038.0, "step": 1617 }, { "epoch": 0.28841354723707663, "grad_norm": 0.26171875, "learning_rate": 1.9392581261953453e-05, "loss": 1.0505, "num_tokens": 5856231487.0, "step": 1618 }, { "epoch": 0.2885918003565062, "grad_norm": 0.255859375, "learning_rate": 1.9391642643312397e-05, "loss": 1.0502, "num_tokens": 5862517065.0, "step": 1619 }, { "epoch": 0.2887700534759358, "grad_norm": 0.28125, "learning_rate": 1.9390703325393984e-05, "loss": 1.054, "num_tokens": 5868782398.0, "step": 1620 }, { "epoch": 0.2889483065953654, "grad_norm": 0.26171875, "learning_rate": 1.9389763308276493e-05, "loss": 1.0478, "num_tokens": 5875024932.0, "step": 1621 }, { "epoch": 0.289126559714795, "grad_norm": 0.2490234375, "learning_rate": 1.938882259203825e-05, "loss": 1.04, "num_tokens": 5881281528.0, "step": 1622 }, { "epoch": 0.2893048128342246, "grad_norm": 0.275390625, "learning_rate": 1.9387881176757642e-05, "loss": 1.0336, "num_tokens": 5887562273.0, "step": 1623 }, { "epoch": 0.2894830659536542, "grad_norm": 0.2373046875, "learning_rate": 1.9386939062513124e-05, "loss": 1.0498, "num_tokens": 5893847655.0, "step": 1624 }, { "epoch": 0.2896613190730838, "grad_norm": 0.279296875, "learning_rate": 1.9385996249383203e-05, "loss": 1.027, "num_tokens": 5900111016.0, "step": 1625 }, { "epoch": 0.2898395721925134, "grad_norm": 0.25, "learning_rate": 1.938505273744644e-05, "loss": 1.0345, "num_tokens": 5906372766.0, "step": 1626 }, { "epoch": 0.290017825311943, "grad_norm": 0.298828125, "learning_rate": 1.9384108526781457e-05, "loss": 1.0524, "num_tokens": 5912575085.0, "step": 1627 }, { "epoch": 0.2901960784313726, "grad_norm": 0.248046875, "learning_rate": 1.938316361746694e-05, "loss": 1.0424, "num_tokens": 5918829659.0, "step": 1628 }, { "epoch": 0.2903743315508021, "grad_norm": 0.291015625, "learning_rate": 1.9382218009581625e-05, "loss": 1.0278, "num_tokens": 5925115399.0, "step": 1629 }, { "epoch": 0.2905525846702317, "grad_norm": 0.244140625, "learning_rate": 1.9381271703204308e-05, "loss": 1.0484, "num_tokens": 5931400270.0, "step": 1630 }, { "epoch": 0.2907308377896613, "grad_norm": 0.29296875, "learning_rate": 1.9380324698413848e-05, "loss": 1.0486, "num_tokens": 5937683565.0, "step": 1631 }, { "epoch": 0.2909090909090909, "grad_norm": 0.255859375, "learning_rate": 1.9379376995289162e-05, "loss": 1.0243, "num_tokens": 5943946122.0, "step": 1632 }, { "epoch": 0.2910873440285205, "grad_norm": 0.306640625, "learning_rate": 1.9378428593909217e-05, "loss": 1.0708, "num_tokens": 5950211275.0, "step": 1633 }, { "epoch": 0.2912655971479501, "grad_norm": 0.271484375, "learning_rate": 1.9377479494353044e-05, "loss": 1.0185, "num_tokens": 5956463299.0, "step": 1634 }, { "epoch": 0.2914438502673797, "grad_norm": 0.259765625, "learning_rate": 1.9376529696699733e-05, "loss": 1.0639, "num_tokens": 5962721967.0, "step": 1635 }, { "epoch": 0.2916221033868093, "grad_norm": 0.279296875, "learning_rate": 1.937557920102843e-05, "loss": 1.0544, "num_tokens": 5968985845.0, "step": 1636 }, { "epoch": 0.29180035650623887, "grad_norm": 0.26953125, "learning_rate": 1.9374628007418344e-05, "loss": 1.0393, "num_tokens": 5975270354.0, "step": 1637 }, { "epoch": 0.29197860962566846, "grad_norm": 0.259765625, "learning_rate": 1.9373676115948736e-05, "loss": 1.0564, "num_tokens": 5981534545.0, "step": 1638 }, { "epoch": 0.29215686274509806, "grad_norm": 0.283203125, "learning_rate": 1.9372723526698925e-05, "loss": 1.0779, "num_tokens": 5987817354.0, "step": 1639 }, { "epoch": 0.29233511586452765, "grad_norm": 0.2431640625, "learning_rate": 1.937177023974829e-05, "loss": 1.0271, "num_tokens": 5994081562.0, "step": 1640 }, { "epoch": 0.29251336898395724, "grad_norm": 0.2890625, "learning_rate": 1.937081625517627e-05, "loss": 1.0532, "num_tokens": 6000359583.0, "step": 1641 }, { "epoch": 0.2926916221033868, "grad_norm": 0.25390625, "learning_rate": 1.9369861573062365e-05, "loss": 1.0559, "num_tokens": 6006642771.0, "step": 1642 }, { "epoch": 0.2928698752228164, "grad_norm": 0.2333984375, "learning_rate": 1.936890619348612e-05, "loss": 1.0643, "num_tokens": 6012916944.0, "step": 1643 }, { "epoch": 0.293048128342246, "grad_norm": 0.259765625, "learning_rate": 1.9367950116527158e-05, "loss": 1.0391, "num_tokens": 6019199715.0, "step": 1644 }, { "epoch": 0.29322638146167557, "grad_norm": 0.26171875, "learning_rate": 1.9366993342265137e-05, "loss": 1.0611, "num_tokens": 6025458165.0, "step": 1645 }, { "epoch": 0.29340463458110516, "grad_norm": 0.26171875, "learning_rate": 1.9366035870779797e-05, "loss": 1.0381, "num_tokens": 6031740470.0, "step": 1646 }, { "epoch": 0.29358288770053476, "grad_norm": 0.275390625, "learning_rate": 1.9365077702150915e-05, "loss": 1.0699, "num_tokens": 6037990070.0, "step": 1647 }, { "epoch": 0.29376114081996435, "grad_norm": 0.24609375, "learning_rate": 1.9364118836458336e-05, "loss": 1.0579, "num_tokens": 6044273955.0, "step": 1648 }, { "epoch": 0.29393939393939394, "grad_norm": 0.255859375, "learning_rate": 1.9363159273781973e-05, "loss": 1.0415, "num_tokens": 6050493001.0, "step": 1649 }, { "epoch": 0.29411764705882354, "grad_norm": 0.263671875, "learning_rate": 1.936219901420177e-05, "loss": 1.0402, "num_tokens": 6056736557.0, "step": 1650 }, { "epoch": 0.29429590017825313, "grad_norm": 0.27734375, "learning_rate": 1.936123805779776e-05, "loss": 1.0698, "num_tokens": 6062994597.0, "step": 1651 }, { "epoch": 0.29447415329768273, "grad_norm": 0.26171875, "learning_rate": 1.9360276404650017e-05, "loss": 1.0673, "num_tokens": 6069278648.0, "step": 1652 }, { "epoch": 0.2946524064171123, "grad_norm": 0.2890625, "learning_rate": 1.9359314054838673e-05, "loss": 1.0461, "num_tokens": 6075564059.0, "step": 1653 }, { "epoch": 0.2948306595365419, "grad_norm": 0.251953125, "learning_rate": 1.9358351008443916e-05, "loss": 1.0654, "num_tokens": 6081847438.0, "step": 1654 }, { "epoch": 0.29500891265597146, "grad_norm": 0.275390625, "learning_rate": 1.9357387265546004e-05, "loss": 1.0322, "num_tokens": 6088132357.0, "step": 1655 }, { "epoch": 0.29518716577540105, "grad_norm": 0.26953125, "learning_rate": 1.9356422826225244e-05, "loss": 1.0818, "num_tokens": 6094416175.0, "step": 1656 }, { "epoch": 0.29536541889483064, "grad_norm": 0.265625, "learning_rate": 1.9355457690562e-05, "loss": 1.0268, "num_tokens": 6100699977.0, "step": 1657 }, { "epoch": 0.29554367201426024, "grad_norm": 0.27734375, "learning_rate": 1.9354491858636707e-05, "loss": 1.0727, "num_tokens": 6106976649.0, "step": 1658 }, { "epoch": 0.29572192513368983, "grad_norm": 0.2470703125, "learning_rate": 1.9353525330529832e-05, "loss": 1.0402, "num_tokens": 6113210872.0, "step": 1659 }, { "epoch": 0.29590017825311943, "grad_norm": 0.22265625, "learning_rate": 1.935255810632193e-05, "loss": 1.0282, "num_tokens": 6119496929.0, "step": 1660 }, { "epoch": 0.296078431372549, "grad_norm": 0.25390625, "learning_rate": 1.935159018609359e-05, "loss": 1.0778, "num_tokens": 6125778198.0, "step": 1661 }, { "epoch": 0.2962566844919786, "grad_norm": 0.2353515625, "learning_rate": 1.935062156992548e-05, "loss": 1.0444, "num_tokens": 6132061796.0, "step": 1662 }, { "epoch": 0.2964349376114082, "grad_norm": 0.275390625, "learning_rate": 1.9349652257898304e-05, "loss": 1.0501, "num_tokens": 6138345903.0, "step": 1663 }, { "epoch": 0.2966131907308378, "grad_norm": 0.232421875, "learning_rate": 1.9348682250092838e-05, "loss": 1.0037, "num_tokens": 6144613383.0, "step": 1664 }, { "epoch": 0.2967914438502674, "grad_norm": 0.26171875, "learning_rate": 1.9347711546589918e-05, "loss": 1.0379, "num_tokens": 6150866625.0, "step": 1665 }, { "epoch": 0.296969696969697, "grad_norm": 0.240234375, "learning_rate": 1.934674014747043e-05, "loss": 1.0349, "num_tokens": 6157099248.0, "step": 1666 }, { "epoch": 0.29714795008912653, "grad_norm": 0.26953125, "learning_rate": 1.934576805281532e-05, "loss": 1.0441, "num_tokens": 6163353579.0, "step": 1667 }, { "epoch": 0.29732620320855613, "grad_norm": 0.25, "learning_rate": 1.934479526270559e-05, "loss": 1.0499, "num_tokens": 6169635558.0, "step": 1668 }, { "epoch": 0.2975044563279857, "grad_norm": 0.26171875, "learning_rate": 1.9343821777222308e-05, "loss": 1.0504, "num_tokens": 6175903639.0, "step": 1669 }, { "epoch": 0.2976827094474153, "grad_norm": 0.236328125, "learning_rate": 1.9342847596446594e-05, "loss": 1.0682, "num_tokens": 6182171894.0, "step": 1670 }, { "epoch": 0.2978609625668449, "grad_norm": 0.26953125, "learning_rate": 1.934187272045962e-05, "loss": 1.0575, "num_tokens": 6188456048.0, "step": 1671 }, { "epoch": 0.2980392156862745, "grad_norm": 0.2392578125, "learning_rate": 1.9340897149342634e-05, "loss": 1.0309, "num_tokens": 6194710231.0, "step": 1672 }, { "epoch": 0.2982174688057041, "grad_norm": 0.279296875, "learning_rate": 1.9339920883176923e-05, "loss": 1.0519, "num_tokens": 6200994724.0, "step": 1673 }, { "epoch": 0.2983957219251337, "grad_norm": 0.25, "learning_rate": 1.9338943922043837e-05, "loss": 1.0617, "num_tokens": 6207245961.0, "step": 1674 }, { "epoch": 0.2985739750445633, "grad_norm": 0.255859375, "learning_rate": 1.9337966266024796e-05, "loss": 1.0482, "num_tokens": 6213503039.0, "step": 1675 }, { "epoch": 0.2987522281639929, "grad_norm": 0.25, "learning_rate": 1.9336987915201263e-05, "loss": 1.0516, "num_tokens": 6219786221.0, "step": 1676 }, { "epoch": 0.2989304812834225, "grad_norm": 0.26171875, "learning_rate": 1.933600886965476e-05, "loss": 1.0937, "num_tokens": 6226051407.0, "step": 1677 }, { "epoch": 0.2991087344028521, "grad_norm": 0.265625, "learning_rate": 1.9335029129466875e-05, "loss": 1.0597, "num_tokens": 6232330337.0, "step": 1678 }, { "epoch": 0.29928698752228167, "grad_norm": 0.25, "learning_rate": 1.9334048694719252e-05, "loss": 1.0522, "num_tokens": 6238614075.0, "step": 1679 }, { "epoch": 0.2994652406417112, "grad_norm": 0.2578125, "learning_rate": 1.9333067565493588e-05, "loss": 1.0495, "num_tokens": 6244891388.0, "step": 1680 }, { "epoch": 0.2996434937611408, "grad_norm": 0.23828125, "learning_rate": 1.933208574187164e-05, "loss": 1.0661, "num_tokens": 6251175185.0, "step": 1681 }, { "epoch": 0.2998217468805704, "grad_norm": 0.26171875, "learning_rate": 1.9331103223935227e-05, "loss": 1.0792, "num_tokens": 6257457313.0, "step": 1682 }, { "epoch": 0.3, "grad_norm": 0.26171875, "learning_rate": 1.9330120011766224e-05, "loss": 1.0781, "num_tokens": 6263724829.0, "step": 1683 }, { "epoch": 0.3001782531194296, "grad_norm": 0.240234375, "learning_rate": 1.932913610544655e-05, "loss": 1.0512, "num_tokens": 6269992400.0, "step": 1684 }, { "epoch": 0.3003565062388592, "grad_norm": 0.2890625, "learning_rate": 1.932815150505821e-05, "loss": 1.0696, "num_tokens": 6276276016.0, "step": 1685 }, { "epoch": 0.30053475935828877, "grad_norm": 0.24609375, "learning_rate": 1.932716621068324e-05, "loss": 1.0617, "num_tokens": 6282520904.0, "step": 1686 }, { "epoch": 0.30071301247771837, "grad_norm": 0.298828125, "learning_rate": 1.9326180222403754e-05, "loss": 1.0672, "num_tokens": 6288803161.0, "step": 1687 }, { "epoch": 0.30089126559714796, "grad_norm": 0.279296875, "learning_rate": 1.93251935403019e-05, "loss": 1.0493, "num_tokens": 6295086430.0, "step": 1688 }, { "epoch": 0.30106951871657756, "grad_norm": 0.28515625, "learning_rate": 1.9324206164459914e-05, "loss": 1.0706, "num_tokens": 6301353112.0, "step": 1689 }, { "epoch": 0.30124777183600715, "grad_norm": 0.26953125, "learning_rate": 1.9323218094960065e-05, "loss": 1.0769, "num_tokens": 6307637213.0, "step": 1690 }, { "epoch": 0.30142602495543674, "grad_norm": 0.298828125, "learning_rate": 1.9322229331884695e-05, "loss": 1.0272, "num_tokens": 6313918995.0, "step": 1691 }, { "epoch": 0.3016042780748663, "grad_norm": 0.2490234375, "learning_rate": 1.932123987531619e-05, "loss": 1.0427, "num_tokens": 6320201209.0, "step": 1692 }, { "epoch": 0.3017825311942959, "grad_norm": 0.248046875, "learning_rate": 1.9320249725337012e-05, "loss": 1.0321, "num_tokens": 6326477780.0, "step": 1693 }, { "epoch": 0.30196078431372547, "grad_norm": 0.263671875, "learning_rate": 1.931925888202966e-05, "loss": 1.0586, "num_tokens": 6332704910.0, "step": 1694 }, { "epoch": 0.30213903743315507, "grad_norm": 0.24609375, "learning_rate": 1.931826734547671e-05, "loss": 1.0687, "num_tokens": 6338988654.0, "step": 1695 }, { "epoch": 0.30231729055258466, "grad_norm": 0.271484375, "learning_rate": 1.9317275115760785e-05, "loss": 1.0379, "num_tokens": 6345266309.0, "step": 1696 }, { "epoch": 0.30249554367201426, "grad_norm": 0.2373046875, "learning_rate": 1.9316282192964562e-05, "loss": 1.0561, "num_tokens": 6351546403.0, "step": 1697 }, { "epoch": 0.30267379679144385, "grad_norm": 0.275390625, "learning_rate": 1.931528857717078e-05, "loss": 1.0318, "num_tokens": 6357818868.0, "step": 1698 }, { "epoch": 0.30285204991087344, "grad_norm": 0.255859375, "learning_rate": 1.931429426846225e-05, "loss": 1.0445, "num_tokens": 6364086384.0, "step": 1699 }, { "epoch": 0.30303030303030304, "grad_norm": 0.263671875, "learning_rate": 1.9313299266921817e-05, "loss": 1.0603, "num_tokens": 6370370916.0, "step": 1700 }, { "epoch": 0.30320855614973263, "grad_norm": 0.25390625, "learning_rate": 1.93123035726324e-05, "loss": 1.0468, "num_tokens": 6376604438.0, "step": 1701 }, { "epoch": 0.3033868092691622, "grad_norm": 0.2314453125, "learning_rate": 1.931130718567697e-05, "loss": 1.018, "num_tokens": 6382874908.0, "step": 1702 }, { "epoch": 0.3035650623885918, "grad_norm": 0.259765625, "learning_rate": 1.931031010613855e-05, "loss": 1.047, "num_tokens": 6389156756.0, "step": 1703 }, { "epoch": 0.3037433155080214, "grad_norm": 0.2470703125, "learning_rate": 1.930931233410023e-05, "loss": 1.046, "num_tokens": 6395441666.0, "step": 1704 }, { "epoch": 0.30392156862745096, "grad_norm": 0.23828125, "learning_rate": 1.9308313869645158e-05, "loss": 1.058, "num_tokens": 6401713321.0, "step": 1705 }, { "epoch": 0.30409982174688055, "grad_norm": 0.265625, "learning_rate": 1.9307314712856537e-05, "loss": 1.0543, "num_tokens": 6407970190.0, "step": 1706 }, { "epoch": 0.30427807486631014, "grad_norm": 0.259765625, "learning_rate": 1.930631486381762e-05, "loss": 1.0438, "num_tokens": 6414255197.0, "step": 1707 }, { "epoch": 0.30445632798573974, "grad_norm": 0.255859375, "learning_rate": 1.9305314322611734e-05, "loss": 1.0263, "num_tokens": 6420538719.0, "step": 1708 }, { "epoch": 0.30463458110516933, "grad_norm": 0.26171875, "learning_rate": 1.9304313089322243e-05, "loss": 1.0083, "num_tokens": 6426791163.0, "step": 1709 }, { "epoch": 0.3048128342245989, "grad_norm": 0.2890625, "learning_rate": 1.930331116403259e-05, "loss": 1.058, "num_tokens": 6433073008.0, "step": 1710 }, { "epoch": 0.3049910873440285, "grad_norm": 0.255859375, "learning_rate": 1.930230854682626e-05, "loss": 1.0672, "num_tokens": 6439356411.0, "step": 1711 }, { "epoch": 0.3051693404634581, "grad_norm": 0.25, "learning_rate": 1.9301305237786805e-05, "loss": 1.0543, "num_tokens": 6445612169.0, "step": 1712 }, { "epoch": 0.3053475935828877, "grad_norm": 0.275390625, "learning_rate": 1.9300301236997828e-05, "loss": 1.0478, "num_tokens": 6451872772.0, "step": 1713 }, { "epoch": 0.3055258467023173, "grad_norm": 0.2734375, "learning_rate": 1.929929654454299e-05, "loss": 1.0311, "num_tokens": 6458156829.0, "step": 1714 }, { "epoch": 0.3057040998217469, "grad_norm": 0.2353515625, "learning_rate": 1.929829116050602e-05, "loss": 1.0548, "num_tokens": 6464440620.0, "step": 1715 }, { "epoch": 0.3058823529411765, "grad_norm": 0.291015625, "learning_rate": 1.929728508497069e-05, "loss": 1.0507, "num_tokens": 6470699999.0, "step": 1716 }, { "epoch": 0.30606060606060603, "grad_norm": 0.26171875, "learning_rate": 1.929627831802084e-05, "loss": 1.0424, "num_tokens": 6476935987.0, "step": 1717 }, { "epoch": 0.3062388591800356, "grad_norm": 0.3203125, "learning_rate": 1.9295270859740365e-05, "loss": 1.043, "num_tokens": 6483205008.0, "step": 1718 }, { "epoch": 0.3064171122994652, "grad_norm": 0.291015625, "learning_rate": 1.9294262710213215e-05, "loss": 1.0532, "num_tokens": 6489442970.0, "step": 1719 }, { "epoch": 0.3065953654188948, "grad_norm": 0.291015625, "learning_rate": 1.92932538695234e-05, "loss": 1.0374, "num_tokens": 6495716846.0, "step": 1720 }, { "epoch": 0.3067736185383244, "grad_norm": 0.255859375, "learning_rate": 1.929224433775498e-05, "loss": 1.0646, "num_tokens": 6501990489.0, "step": 1721 }, { "epoch": 0.306951871657754, "grad_norm": 0.3359375, "learning_rate": 1.929123411499209e-05, "loss": 1.0428, "num_tokens": 6508206536.0, "step": 1722 }, { "epoch": 0.3071301247771836, "grad_norm": 0.267578125, "learning_rate": 1.9290223201318908e-05, "loss": 1.0622, "num_tokens": 6514491044.0, "step": 1723 }, { "epoch": 0.3073083778966132, "grad_norm": 0.330078125, "learning_rate": 1.9289211596819674e-05, "loss": 1.0217, "num_tokens": 6520750073.0, "step": 1724 }, { "epoch": 0.3074866310160428, "grad_norm": 0.259765625, "learning_rate": 1.928819930157868e-05, "loss": 1.0434, "num_tokens": 6527032242.0, "step": 1725 }, { "epoch": 0.3076648841354724, "grad_norm": 0.326171875, "learning_rate": 1.928718631568029e-05, "loss": 1.0419, "num_tokens": 6533315370.0, "step": 1726 }, { "epoch": 0.307843137254902, "grad_norm": 0.26953125, "learning_rate": 1.9286172639208913e-05, "loss": 1.0486, "num_tokens": 6539574478.0, "step": 1727 }, { "epoch": 0.30802139037433157, "grad_norm": 0.28125, "learning_rate": 1.928515827224901e-05, "loss": 1.0438, "num_tokens": 6545828668.0, "step": 1728 }, { "epoch": 0.30819964349376117, "grad_norm": 0.25390625, "learning_rate": 1.9284143214885125e-05, "loss": 1.0495, "num_tokens": 6552111490.0, "step": 1729 }, { "epoch": 0.3083778966131907, "grad_norm": 0.33984375, "learning_rate": 1.928312746720183e-05, "loss": 1.0729, "num_tokens": 6558376750.0, "step": 1730 }, { "epoch": 0.3085561497326203, "grad_norm": 0.263671875, "learning_rate": 1.928211102928377e-05, "loss": 1.0557, "num_tokens": 6564619667.0, "step": 1731 }, { "epoch": 0.3087344028520499, "grad_norm": 0.310546875, "learning_rate": 1.9281093901215648e-05, "loss": 1.0551, "num_tokens": 6570876851.0, "step": 1732 }, { "epoch": 0.3089126559714795, "grad_norm": 0.259765625, "learning_rate": 1.928007608308222e-05, "loss": 1.0543, "num_tokens": 6577161068.0, "step": 1733 }, { "epoch": 0.3090909090909091, "grad_norm": 0.318359375, "learning_rate": 1.92790575749683e-05, "loss": 1.0368, "num_tokens": 6583426334.0, "step": 1734 }, { "epoch": 0.3092691622103387, "grad_norm": 0.259765625, "learning_rate": 1.9278038376958764e-05, "loss": 1.0544, "num_tokens": 6589700182.0, "step": 1735 }, { "epoch": 0.30944741532976827, "grad_norm": 0.32421875, "learning_rate": 1.9277018489138536e-05, "loss": 1.0238, "num_tokens": 6595983726.0, "step": 1736 }, { "epoch": 0.30962566844919787, "grad_norm": 0.3203125, "learning_rate": 1.9275997911592607e-05, "loss": 1.0411, "num_tokens": 6602253684.0, "step": 1737 }, { "epoch": 0.30980392156862746, "grad_norm": 0.29296875, "learning_rate": 1.9274976644406026e-05, "loss": 1.0786, "num_tokens": 6608516945.0, "step": 1738 }, { "epoch": 0.30998217468805705, "grad_norm": 0.302734375, "learning_rate": 1.9273954687663888e-05, "loss": 1.0316, "num_tokens": 6614800168.0, "step": 1739 }, { "epoch": 0.31016042780748665, "grad_norm": 0.2578125, "learning_rate": 1.9272932041451357e-05, "loss": 1.0514, "num_tokens": 6621082082.0, "step": 1740 }, { "epoch": 0.31033868092691624, "grad_norm": 0.318359375, "learning_rate": 1.9271908705853646e-05, "loss": 1.0372, "num_tokens": 6627366050.0, "step": 1741 }, { "epoch": 0.31051693404634584, "grad_norm": 0.255859375, "learning_rate": 1.9270884680956036e-05, "loss": 1.0419, "num_tokens": 6633628072.0, "step": 1742 }, { "epoch": 0.3106951871657754, "grad_norm": 0.29296875, "learning_rate": 1.9269859966843855e-05, "loss": 1.0401, "num_tokens": 6639881548.0, "step": 1743 }, { "epoch": 0.31087344028520497, "grad_norm": 0.291015625, "learning_rate": 1.9268834563602496e-05, "loss": 1.0447, "num_tokens": 6646148051.0, "step": 1744 }, { "epoch": 0.31105169340463457, "grad_norm": 0.26953125, "learning_rate": 1.9267808471317403e-05, "loss": 1.0501, "num_tokens": 6652428561.0, "step": 1745 }, { "epoch": 0.31122994652406416, "grad_norm": 0.291015625, "learning_rate": 1.926678169007408e-05, "loss": 1.0583, "num_tokens": 6658667878.0, "step": 1746 }, { "epoch": 0.31140819964349375, "grad_norm": 0.263671875, "learning_rate": 1.9265754219958094e-05, "loss": 1.0402, "num_tokens": 6664931421.0, "step": 1747 }, { "epoch": 0.31158645276292335, "grad_norm": 0.287109375, "learning_rate": 1.926472606105506e-05, "loss": 1.0549, "num_tokens": 6671188498.0, "step": 1748 }, { "epoch": 0.31176470588235294, "grad_norm": 0.275390625, "learning_rate": 1.926369721345065e-05, "loss": 1.0472, "num_tokens": 6677432314.0, "step": 1749 }, { "epoch": 0.31194295900178254, "grad_norm": 0.3359375, "learning_rate": 1.926266767723061e-05, "loss": 1.0375, "num_tokens": 6683714794.0, "step": 1750 }, { "epoch": 0.31212121212121213, "grad_norm": 0.3046875, "learning_rate": 1.9261637452480725e-05, "loss": 1.048, "num_tokens": 6689965608.0, "step": 1751 }, { "epoch": 0.3122994652406417, "grad_norm": 0.302734375, "learning_rate": 1.9260606539286843e-05, "loss": 1.043, "num_tokens": 6696251563.0, "step": 1752 }, { "epoch": 0.3124777183600713, "grad_norm": 0.302734375, "learning_rate": 1.9259574937734868e-05, "loss": 1.0504, "num_tokens": 6702486371.0, "step": 1753 }, { "epoch": 0.3126559714795009, "grad_norm": 0.29296875, "learning_rate": 1.925854264791077e-05, "loss": 1.0629, "num_tokens": 6708768941.0, "step": 1754 }, { "epoch": 0.31283422459893045, "grad_norm": 0.283203125, "learning_rate": 1.925750966990056e-05, "loss": 1.0477, "num_tokens": 6715020893.0, "step": 1755 }, { "epoch": 0.31301247771836005, "grad_norm": 0.31640625, "learning_rate": 1.9256476003790332e-05, "loss": 1.0785, "num_tokens": 6721286230.0, "step": 1756 }, { "epoch": 0.31319073083778964, "grad_norm": 0.26953125, "learning_rate": 1.9255441649666208e-05, "loss": 1.0105, "num_tokens": 6727570888.0, "step": 1757 }, { "epoch": 0.31336898395721924, "grad_norm": 0.294921875, "learning_rate": 1.9254406607614385e-05, "loss": 1.0317, "num_tokens": 6733853107.0, "step": 1758 }, { "epoch": 0.31354723707664883, "grad_norm": 0.244140625, "learning_rate": 1.9253370877721116e-05, "loss": 1.0571, "num_tokens": 6740122340.0, "step": 1759 }, { "epoch": 0.3137254901960784, "grad_norm": 0.34765625, "learning_rate": 1.9252334460072706e-05, "loss": 1.0496, "num_tokens": 6746370899.0, "step": 1760 }, { "epoch": 0.313903743315508, "grad_norm": 0.287109375, "learning_rate": 1.925129735475552e-05, "loss": 1.0589, "num_tokens": 6752654692.0, "step": 1761 }, { "epoch": 0.3140819964349376, "grad_norm": 0.30078125, "learning_rate": 1.9250259561855977e-05, "loss": 1.0515, "num_tokens": 6758924290.0, "step": 1762 }, { "epoch": 0.3142602495543672, "grad_norm": 0.2734375, "learning_rate": 1.9249221081460564e-05, "loss": 1.0343, "num_tokens": 6765178146.0, "step": 1763 }, { "epoch": 0.3144385026737968, "grad_norm": 0.296875, "learning_rate": 1.9248181913655813e-05, "loss": 1.0669, "num_tokens": 6771455627.0, "step": 1764 }, { "epoch": 0.3146167557932264, "grad_norm": 0.265625, "learning_rate": 1.9247142058528315e-05, "loss": 1.0242, "num_tokens": 6777734012.0, "step": 1765 }, { "epoch": 0.314795008912656, "grad_norm": 0.349609375, "learning_rate": 1.924610151616473e-05, "loss": 1.0893, "num_tokens": 6783971368.0, "step": 1766 }, { "epoch": 0.3149732620320856, "grad_norm": 0.25390625, "learning_rate": 1.924506028665176e-05, "loss": 1.0275, "num_tokens": 6790226922.0, "step": 1767 }, { "epoch": 0.3151515151515151, "grad_norm": 0.310546875, "learning_rate": 1.9244018370076176e-05, "loss": 1.0262, "num_tokens": 6796509732.0, "step": 1768 }, { "epoch": 0.3153297682709447, "grad_norm": 0.2578125, "learning_rate": 1.9242975766524793e-05, "loss": 1.0139, "num_tokens": 6802734825.0, "step": 1769 }, { "epoch": 0.3155080213903743, "grad_norm": 0.279296875, "learning_rate": 1.9241932476084494e-05, "loss": 1.023, "num_tokens": 6809018981.0, "step": 1770 }, { "epoch": 0.3156862745098039, "grad_norm": 0.2578125, "learning_rate": 1.9240888498842224e-05, "loss": 1.0678, "num_tokens": 6815246926.0, "step": 1771 }, { "epoch": 0.3158645276292335, "grad_norm": 0.3046875, "learning_rate": 1.923984383488497e-05, "loss": 1.0452, "num_tokens": 6821528987.0, "step": 1772 }, { "epoch": 0.3160427807486631, "grad_norm": 0.294921875, "learning_rate": 1.9238798484299786e-05, "loss": 1.0461, "num_tokens": 6827814205.0, "step": 1773 }, { "epoch": 0.3162210338680927, "grad_norm": 0.31640625, "learning_rate": 1.9237752447173785e-05, "loss": 1.0333, "num_tokens": 6834096814.0, "step": 1774 }, { "epoch": 0.3163992869875223, "grad_norm": 0.3046875, "learning_rate": 1.9236705723594126e-05, "loss": 1.0253, "num_tokens": 6840356322.0, "step": 1775 }, { "epoch": 0.3165775401069519, "grad_norm": 0.28515625, "learning_rate": 1.923565831364804e-05, "loss": 1.0937, "num_tokens": 6846596964.0, "step": 1776 }, { "epoch": 0.3167557932263815, "grad_norm": 0.28125, "learning_rate": 1.9234610217422808e-05, "loss": 1.048, "num_tokens": 6852849648.0, "step": 1777 }, { "epoch": 0.31693404634581107, "grad_norm": 0.267578125, "learning_rate": 1.923356143500576e-05, "loss": 1.0458, "num_tokens": 6859133142.0, "step": 1778 }, { "epoch": 0.31711229946524067, "grad_norm": 0.279296875, "learning_rate": 1.9232511966484298e-05, "loss": 1.0453, "num_tokens": 6865406913.0, "step": 1779 }, { "epoch": 0.3172905525846702, "grad_norm": 0.287109375, "learning_rate": 1.923146181194587e-05, "loss": 1.0841, "num_tokens": 6871688704.0, "step": 1780 }, { "epoch": 0.3174688057040998, "grad_norm": 0.283203125, "learning_rate": 1.9230410971477993e-05, "loss": 1.0134, "num_tokens": 6877972893.0, "step": 1781 }, { "epoch": 0.3176470588235294, "grad_norm": 0.30859375, "learning_rate": 1.922935944516823e-05, "loss": 1.0737, "num_tokens": 6884233538.0, "step": 1782 }, { "epoch": 0.317825311942959, "grad_norm": 0.25390625, "learning_rate": 1.92283072331042e-05, "loss": 1.0307, "num_tokens": 6890505131.0, "step": 1783 }, { "epoch": 0.3180035650623886, "grad_norm": 0.244140625, "learning_rate": 1.922725433537359e-05, "loss": 1.0699, "num_tokens": 6896788710.0, "step": 1784 }, { "epoch": 0.3181818181818182, "grad_norm": 0.2431640625, "learning_rate": 1.9226200752064134e-05, "loss": 1.0083, "num_tokens": 6903051542.0, "step": 1785 }, { "epoch": 0.31836007130124777, "grad_norm": 0.2734375, "learning_rate": 1.9225146483263632e-05, "loss": 1.0255, "num_tokens": 6909305588.0, "step": 1786 }, { "epoch": 0.31853832442067737, "grad_norm": 0.2373046875, "learning_rate": 1.9224091529059933e-05, "loss": 1.0462, "num_tokens": 6915511328.0, "step": 1787 }, { "epoch": 0.31871657754010696, "grad_norm": 0.330078125, "learning_rate": 1.922303588954095e-05, "loss": 1.0594, "num_tokens": 6921789665.0, "step": 1788 }, { "epoch": 0.31889483065953655, "grad_norm": 0.2373046875, "learning_rate": 1.9221979564794643e-05, "loss": 1.0657, "num_tokens": 6928073437.0, "step": 1789 }, { "epoch": 0.31907308377896615, "grad_norm": 0.314453125, "learning_rate": 1.9220922554909045e-05, "loss": 1.0526, "num_tokens": 6934356478.0, "step": 1790 }, { "epoch": 0.31925133689839574, "grad_norm": 0.279296875, "learning_rate": 1.921986485997223e-05, "loss": 1.0619, "num_tokens": 6940609349.0, "step": 1791 }, { "epoch": 0.31942959001782534, "grad_norm": 0.349609375, "learning_rate": 1.9218806480072338e-05, "loss": 1.0062, "num_tokens": 6946888356.0, "step": 1792 }, { "epoch": 0.3196078431372549, "grad_norm": 0.263671875, "learning_rate": 1.921774741529756e-05, "loss": 1.0392, "num_tokens": 6953135012.0, "step": 1793 }, { "epoch": 0.31978609625668447, "grad_norm": 0.34765625, "learning_rate": 1.921668766573616e-05, "loss": 1.0404, "num_tokens": 6959418818.0, "step": 1794 }, { "epoch": 0.31996434937611407, "grad_norm": 0.326171875, "learning_rate": 1.9215627231476432e-05, "loss": 1.0499, "num_tokens": 6965704496.0, "step": 1795 }, { "epoch": 0.32014260249554366, "grad_norm": 0.328125, "learning_rate": 1.921456611260675e-05, "loss": 1.0531, "num_tokens": 6971972762.0, "step": 1796 }, { "epoch": 0.32032085561497325, "grad_norm": 0.298828125, "learning_rate": 1.921350430921554e-05, "loss": 1.0628, "num_tokens": 6978240743.0, "step": 1797 }, { "epoch": 0.32049910873440285, "grad_norm": 0.27734375, "learning_rate": 1.9212441821391276e-05, "loss": 1.0119, "num_tokens": 6984520248.0, "step": 1798 }, { "epoch": 0.32067736185383244, "grad_norm": 0.29296875, "learning_rate": 1.9211378649222497e-05, "loss": 1.0464, "num_tokens": 6990779521.0, "step": 1799 }, { "epoch": 0.32085561497326204, "grad_norm": 0.259765625, "learning_rate": 1.9210314792797803e-05, "loss": 1.0247, "num_tokens": 6997063736.0, "step": 1800 }, { "epoch": 0.32103386809269163, "grad_norm": 0.310546875, "learning_rate": 1.9209250252205838e-05, "loss": 1.0312, "num_tokens": 7003302759.0, "step": 1801 }, { "epoch": 0.3212121212121212, "grad_norm": 0.2314453125, "learning_rate": 1.9208185027535312e-05, "loss": 1.0435, "num_tokens": 7009530908.0, "step": 1802 }, { "epoch": 0.3213903743315508, "grad_norm": 0.318359375, "learning_rate": 1.9207119118874996e-05, "loss": 1.031, "num_tokens": 7015761735.0, "step": 1803 }, { "epoch": 0.3215686274509804, "grad_norm": 0.265625, "learning_rate": 1.9206052526313704e-05, "loss": 1.0088, "num_tokens": 7022038794.0, "step": 1804 }, { "epoch": 0.32174688057041, "grad_norm": 0.310546875, "learning_rate": 1.9204985249940326e-05, "loss": 1.074, "num_tokens": 7028322338.0, "step": 1805 }, { "epoch": 0.32192513368983955, "grad_norm": 0.279296875, "learning_rate": 1.920391728984379e-05, "loss": 1.0675, "num_tokens": 7034606981.0, "step": 1806 }, { "epoch": 0.32210338680926914, "grad_norm": 0.2890625, "learning_rate": 1.9202848646113083e-05, "loss": 1.0757, "num_tokens": 7040891603.0, "step": 1807 }, { "epoch": 0.32228163992869874, "grad_norm": 0.28125, "learning_rate": 1.920177931883727e-05, "loss": 1.0636, "num_tokens": 7047171110.0, "step": 1808 }, { "epoch": 0.32245989304812833, "grad_norm": 0.265625, "learning_rate": 1.920070930810545e-05, "loss": 1.0679, "num_tokens": 7053432181.0, "step": 1809 }, { "epoch": 0.3226381461675579, "grad_norm": 0.267578125, "learning_rate": 1.919963861400679e-05, "loss": 1.0371, "num_tokens": 7059667214.0, "step": 1810 }, { "epoch": 0.3228163992869875, "grad_norm": 0.283203125, "learning_rate": 1.919856723663051e-05, "loss": 1.0761, "num_tokens": 7065949922.0, "step": 1811 }, { "epoch": 0.3229946524064171, "grad_norm": 0.25390625, "learning_rate": 1.919749517606589e-05, "loss": 1.0578, "num_tokens": 7072203337.0, "step": 1812 }, { "epoch": 0.3231729055258467, "grad_norm": 0.275390625, "learning_rate": 1.919642243240226e-05, "loss": 1.0329, "num_tokens": 7078465884.0, "step": 1813 }, { "epoch": 0.3233511586452763, "grad_norm": 0.2451171875, "learning_rate": 1.919534900572902e-05, "loss": 1.0549, "num_tokens": 7084749961.0, "step": 1814 }, { "epoch": 0.3235294117647059, "grad_norm": 0.298828125, "learning_rate": 1.9194274896135612e-05, "loss": 1.0459, "num_tokens": 7091033105.0, "step": 1815 }, { "epoch": 0.3237076648841355, "grad_norm": 0.2470703125, "learning_rate": 1.919320010371154e-05, "loss": 1.0436, "num_tokens": 7097316255.0, "step": 1816 }, { "epoch": 0.3238859180035651, "grad_norm": 0.298828125, "learning_rate": 1.9192124628546376e-05, "loss": 1.0734, "num_tokens": 7103600650.0, "step": 1817 }, { "epoch": 0.3240641711229946, "grad_norm": 0.2353515625, "learning_rate": 1.9191048470729732e-05, "loss": 1.1019, "num_tokens": 7109884143.0, "step": 1818 }, { "epoch": 0.3242424242424242, "grad_norm": 0.271484375, "learning_rate": 1.9189971630351286e-05, "loss": 1.0512, "num_tokens": 7116164331.0, "step": 1819 }, { "epoch": 0.3244206773618538, "grad_norm": 0.255859375, "learning_rate": 1.9188894107500772e-05, "loss": 1.0531, "num_tokens": 7122419915.0, "step": 1820 }, { "epoch": 0.3245989304812834, "grad_norm": 0.26953125, "learning_rate": 1.9187815902267983e-05, "loss": 1.0571, "num_tokens": 7128675524.0, "step": 1821 }, { "epoch": 0.324777183600713, "grad_norm": 0.296875, "learning_rate": 1.918673701474276e-05, "loss": 1.0624, "num_tokens": 7134915663.0, "step": 1822 }, { "epoch": 0.3249554367201426, "grad_norm": 0.25, "learning_rate": 1.918565744501501e-05, "loss": 1.0376, "num_tokens": 7141199643.0, "step": 1823 }, { "epoch": 0.3251336898395722, "grad_norm": 0.3359375, "learning_rate": 1.9184577193174696e-05, "loss": 1.0216, "num_tokens": 7147422394.0, "step": 1824 }, { "epoch": 0.3253119429590018, "grad_norm": 0.28125, "learning_rate": 1.9183496259311835e-05, "loss": 1.0247, "num_tokens": 7153706381.0, "step": 1825 }, { "epoch": 0.3254901960784314, "grad_norm": 0.33984375, "learning_rate": 1.9182414643516496e-05, "loss": 1.068, "num_tokens": 7159981349.0, "step": 1826 }, { "epoch": 0.325668449197861, "grad_norm": 0.3203125, "learning_rate": 1.918133234587882e-05, "loss": 1.0615, "num_tokens": 7166264491.0, "step": 1827 }, { "epoch": 0.32584670231729057, "grad_norm": 0.298828125, "learning_rate": 1.9180249366488985e-05, "loss": 1.0727, "num_tokens": 7172548538.0, "step": 1828 }, { "epoch": 0.32602495543672017, "grad_norm": 0.29296875, "learning_rate": 1.917916570543724e-05, "loss": 1.0277, "num_tokens": 7178831675.0, "step": 1829 }, { "epoch": 0.32620320855614976, "grad_norm": 0.259765625, "learning_rate": 1.9178081362813888e-05, "loss": 1.049, "num_tokens": 7185103997.0, "step": 1830 }, { "epoch": 0.3263814616755793, "grad_norm": 0.287109375, "learning_rate": 1.9176996338709284e-05, "loss": 1.0628, "num_tokens": 7191333862.0, "step": 1831 }, { "epoch": 0.3265597147950089, "grad_norm": 0.29296875, "learning_rate": 1.9175910633213847e-05, "loss": 1.0516, "num_tokens": 7197617828.0, "step": 1832 }, { "epoch": 0.3267379679144385, "grad_norm": 0.26953125, "learning_rate": 1.917482424641805e-05, "loss": 1.0522, "num_tokens": 7203892637.0, "step": 1833 }, { "epoch": 0.3269162210338681, "grad_norm": 0.2734375, "learning_rate": 1.917373717841242e-05, "loss": 1.0294, "num_tokens": 7210175121.0, "step": 1834 }, { "epoch": 0.3270944741532977, "grad_norm": 0.28515625, "learning_rate": 1.917264942928754e-05, "loss": 1.0587, "num_tokens": 7216453310.0, "step": 1835 }, { "epoch": 0.32727272727272727, "grad_norm": 0.283203125, "learning_rate": 1.917156099913406e-05, "loss": 1.0476, "num_tokens": 7222714952.0, "step": 1836 }, { "epoch": 0.32745098039215687, "grad_norm": 0.234375, "learning_rate": 1.9170471888042667e-05, "loss": 1.04, "num_tokens": 7228965654.0, "step": 1837 }, { "epoch": 0.32762923351158646, "grad_norm": 0.29296875, "learning_rate": 1.9169382096104125e-05, "loss": 1.0546, "num_tokens": 7235199486.0, "step": 1838 }, { "epoch": 0.32780748663101605, "grad_norm": 0.25, "learning_rate": 1.916829162340925e-05, "loss": 1.0617, "num_tokens": 7241480779.0, "step": 1839 }, { "epoch": 0.32798573975044565, "grad_norm": 0.28125, "learning_rate": 1.9167200470048898e-05, "loss": 1.0388, "num_tokens": 7247762816.0, "step": 1840 }, { "epoch": 0.32816399286987524, "grad_norm": 0.26953125, "learning_rate": 1.916610863611401e-05, "loss": 1.0568, "num_tokens": 7254042671.0, "step": 1841 }, { "epoch": 0.32834224598930484, "grad_norm": 0.294921875, "learning_rate": 1.9165016121695562e-05, "loss": 1.0452, "num_tokens": 7260297258.0, "step": 1842 }, { "epoch": 0.3285204991087344, "grad_norm": 0.248046875, "learning_rate": 1.916392292688459e-05, "loss": 1.0342, "num_tokens": 7266550403.0, "step": 1843 }, { "epoch": 0.32869875222816397, "grad_norm": 0.314453125, "learning_rate": 1.9162829051772197e-05, "loss": 1.0474, "num_tokens": 7272770826.0, "step": 1844 }, { "epoch": 0.32887700534759357, "grad_norm": 0.296875, "learning_rate": 1.9161734496449534e-05, "loss": 1.0597, "num_tokens": 7279013589.0, "step": 1845 }, { "epoch": 0.32905525846702316, "grad_norm": 0.275390625, "learning_rate": 1.9160639261007806e-05, "loss": 1.0485, "num_tokens": 7285284922.0, "step": 1846 }, { "epoch": 0.32923351158645275, "grad_norm": 0.302734375, "learning_rate": 1.9159543345538284e-05, "loss": 0.9994, "num_tokens": 7291540367.0, "step": 1847 }, { "epoch": 0.32941176470588235, "grad_norm": 0.27734375, "learning_rate": 1.9158446750132292e-05, "loss": 1.048, "num_tokens": 7297826334.0, "step": 1848 }, { "epoch": 0.32959001782531194, "grad_norm": 0.267578125, "learning_rate": 1.9157349474881203e-05, "loss": 1.0513, "num_tokens": 7304091908.0, "step": 1849 }, { "epoch": 0.32976827094474154, "grad_norm": 0.2578125, "learning_rate": 1.9156251519876457e-05, "loss": 1.046, "num_tokens": 7310357872.0, "step": 1850 }, { "epoch": 0.32994652406417113, "grad_norm": 0.25390625, "learning_rate": 1.915515288520955e-05, "loss": 1.0564, "num_tokens": 7316611270.0, "step": 1851 }, { "epoch": 0.3301247771836007, "grad_norm": 0.25390625, "learning_rate": 1.915405357097203e-05, "loss": 1.0366, "num_tokens": 7322888528.0, "step": 1852 }, { "epoch": 0.3303030303030303, "grad_norm": 0.248046875, "learning_rate": 1.9152953577255494e-05, "loss": 1.0425, "num_tokens": 7329172488.0, "step": 1853 }, { "epoch": 0.3304812834224599, "grad_norm": 0.267578125, "learning_rate": 1.915185290415162e-05, "loss": 1.0698, "num_tokens": 7335456675.0, "step": 1854 }, { "epoch": 0.3306595365418895, "grad_norm": 0.2470703125, "learning_rate": 1.9150751551752114e-05, "loss": 1.0138, "num_tokens": 7341739620.0, "step": 1855 }, { "epoch": 0.33083778966131905, "grad_norm": 0.263671875, "learning_rate": 1.9149649520148758e-05, "loss": 1.0355, "num_tokens": 7348024002.0, "step": 1856 }, { "epoch": 0.33101604278074864, "grad_norm": 0.2578125, "learning_rate": 1.914854680943339e-05, "loss": 1.0471, "num_tokens": 7354255325.0, "step": 1857 }, { "epoch": 0.33119429590017824, "grad_norm": 0.251953125, "learning_rate": 1.9147443419697886e-05, "loss": 1.0433, "num_tokens": 7360522089.0, "step": 1858 }, { "epoch": 0.33137254901960783, "grad_norm": 0.25, "learning_rate": 1.9146339351034204e-05, "loss": 1.0545, "num_tokens": 7366781263.0, "step": 1859 }, { "epoch": 0.3315508021390374, "grad_norm": 0.234375, "learning_rate": 1.9145234603534337e-05, "loss": 1.0699, "num_tokens": 7373066934.0, "step": 1860 }, { "epoch": 0.331729055258467, "grad_norm": 0.2412109375, "learning_rate": 1.914412917729035e-05, "loss": 1.0282, "num_tokens": 7379302698.0, "step": 1861 }, { "epoch": 0.3319073083778966, "grad_norm": 0.244140625, "learning_rate": 1.9143023072394353e-05, "loss": 1.0475, "num_tokens": 7385586365.0, "step": 1862 }, { "epoch": 0.3320855614973262, "grad_norm": 0.2578125, "learning_rate": 1.9141916288938525e-05, "loss": 1.0168, "num_tokens": 7391866263.0, "step": 1863 }, { "epoch": 0.3322638146167558, "grad_norm": 0.2431640625, "learning_rate": 1.9140808827015092e-05, "loss": 1.0683, "num_tokens": 7398145845.0, "step": 1864 }, { "epoch": 0.3324420677361854, "grad_norm": 0.287109375, "learning_rate": 1.9139700686716335e-05, "loss": 1.0475, "num_tokens": 7404428320.0, "step": 1865 }, { "epoch": 0.332620320855615, "grad_norm": 0.2421875, "learning_rate": 1.9138591868134597e-05, "loss": 1.036, "num_tokens": 7410680009.0, "step": 1866 }, { "epoch": 0.3327985739750446, "grad_norm": 0.283203125, "learning_rate": 1.9137482371362282e-05, "loss": 1.0274, "num_tokens": 7416933228.0, "step": 1867 }, { "epoch": 0.3329768270944741, "grad_norm": 0.2197265625, "learning_rate": 1.9136372196491835e-05, "loss": 1.0301, "num_tokens": 7423218040.0, "step": 1868 }, { "epoch": 0.3331550802139037, "grad_norm": 0.271484375, "learning_rate": 1.9135261343615774e-05, "loss": 1.0323, "num_tokens": 7429503837.0, "step": 1869 }, { "epoch": 0.3333333333333333, "grad_norm": 0.275390625, "learning_rate": 1.9134149812826665e-05, "loss": 1.0301, "num_tokens": 7435787255.0, "step": 1870 }, { "epoch": 0.3335115864527629, "grad_norm": 0.279296875, "learning_rate": 1.9133037604217132e-05, "loss": 1.0568, "num_tokens": 7442071663.0, "step": 1871 }, { "epoch": 0.3336898395721925, "grad_norm": 0.294921875, "learning_rate": 1.913192471787986e-05, "loss": 1.0278, "num_tokens": 7448345377.0, "step": 1872 }, { "epoch": 0.3338680926916221, "grad_norm": 0.283203125, "learning_rate": 1.913081115390757e-05, "loss": 1.0364, "num_tokens": 7454597654.0, "step": 1873 }, { "epoch": 0.3340463458110517, "grad_norm": 0.345703125, "learning_rate": 1.9129696912393076e-05, "loss": 1.0663, "num_tokens": 7460880774.0, "step": 1874 }, { "epoch": 0.3342245989304813, "grad_norm": 0.291015625, "learning_rate": 1.912858199342922e-05, "loss": 1.0771, "num_tokens": 7467099604.0, "step": 1875 }, { "epoch": 0.3344028520499109, "grad_norm": 0.345703125, "learning_rate": 1.9127466397108903e-05, "loss": 1.0408, "num_tokens": 7473354034.0, "step": 1876 }, { "epoch": 0.3345811051693405, "grad_norm": 0.353515625, "learning_rate": 1.9126350123525092e-05, "loss": 1.0345, "num_tokens": 7479629000.0, "step": 1877 }, { "epoch": 0.33475935828877007, "grad_norm": 0.275390625, "learning_rate": 1.9125233172770806e-05, "loss": 1.0414, "num_tokens": 7485892467.0, "step": 1878 }, { "epoch": 0.33493761140819966, "grad_norm": 0.337890625, "learning_rate": 1.9124115544939128e-05, "loss": 1.0319, "num_tokens": 7492147277.0, "step": 1879 }, { "epoch": 0.33511586452762926, "grad_norm": 0.2451171875, "learning_rate": 1.9122997240123176e-05, "loss": 1.0686, "num_tokens": 7498411359.0, "step": 1880 }, { "epoch": 0.3352941176470588, "grad_norm": 0.40234375, "learning_rate": 1.9121878258416153e-05, "loss": 1.0492, "num_tokens": 7504695088.0, "step": 1881 }, { "epoch": 0.3354723707664884, "grad_norm": 0.29296875, "learning_rate": 1.912075859991129e-05, "loss": 1.047, "num_tokens": 7510976933.0, "step": 1882 }, { "epoch": 0.335650623885918, "grad_norm": 0.40234375, "learning_rate": 1.9119638264701902e-05, "loss": 1.0399, "num_tokens": 7517258727.0, "step": 1883 }, { "epoch": 0.3358288770053476, "grad_norm": 0.4140625, "learning_rate": 1.9118517252881334e-05, "loss": 1.0585, "num_tokens": 7523515831.0, "step": 1884 }, { "epoch": 0.3360071301247772, "grad_norm": 0.259765625, "learning_rate": 1.911739556454301e-05, "loss": 1.0449, "num_tokens": 7529755865.0, "step": 1885 }, { "epoch": 0.33618538324420677, "grad_norm": 0.37890625, "learning_rate": 1.9116273199780396e-05, "loss": 1.0276, "num_tokens": 7536037556.0, "step": 1886 }, { "epoch": 0.33636363636363636, "grad_norm": 0.310546875, "learning_rate": 1.911515015868702e-05, "loss": 1.0465, "num_tokens": 7542319904.0, "step": 1887 }, { "epoch": 0.33654188948306596, "grad_norm": 0.330078125, "learning_rate": 1.9114026441356466e-05, "loss": 1.0406, "num_tokens": 7548586060.0, "step": 1888 }, { "epoch": 0.33672014260249555, "grad_norm": 0.31640625, "learning_rate": 1.9112902047882373e-05, "loss": 1.0344, "num_tokens": 7554842227.0, "step": 1889 }, { "epoch": 0.33689839572192515, "grad_norm": 0.296875, "learning_rate": 1.9111776978358433e-05, "loss": 1.0532, "num_tokens": 7561125939.0, "step": 1890 }, { "epoch": 0.33707664884135474, "grad_norm": 0.296875, "learning_rate": 1.9110651232878406e-05, "loss": 1.0246, "num_tokens": 7567410123.0, "step": 1891 }, { "epoch": 0.33725490196078434, "grad_norm": 0.263671875, "learning_rate": 1.9109524811536097e-05, "loss": 1.0659, "num_tokens": 7573692145.0, "step": 1892 }, { "epoch": 0.33743315508021393, "grad_norm": 0.26953125, "learning_rate": 1.9108397714425365e-05, "loss": 1.0489, "num_tokens": 7579975029.0, "step": 1893 }, { "epoch": 0.33761140819964347, "grad_norm": 0.240234375, "learning_rate": 1.910726994164014e-05, "loss": 1.0428, "num_tokens": 7586236414.0, "step": 1894 }, { "epoch": 0.33778966131907306, "grad_norm": 0.2490234375, "learning_rate": 1.9106141493274396e-05, "loss": 1.0452, "num_tokens": 7592518855.0, "step": 1895 }, { "epoch": 0.33796791443850266, "grad_norm": 0.2314453125, "learning_rate": 1.910501236942217e-05, "loss": 1.0257, "num_tokens": 7598803553.0, "step": 1896 }, { "epoch": 0.33814616755793225, "grad_norm": 0.255859375, "learning_rate": 1.9103882570177544e-05, "loss": 1.0293, "num_tokens": 7605086303.0, "step": 1897 }, { "epoch": 0.33832442067736185, "grad_norm": 0.2392578125, "learning_rate": 1.9102752095634673e-05, "loss": 1.0193, "num_tokens": 7611371162.0, "step": 1898 }, { "epoch": 0.33850267379679144, "grad_norm": 0.263671875, "learning_rate": 1.9101620945887752e-05, "loss": 1.0565, "num_tokens": 7617648189.0, "step": 1899 }, { "epoch": 0.33868092691622104, "grad_norm": 0.240234375, "learning_rate": 1.9100489121031048e-05, "loss": 1.0401, "num_tokens": 7623932266.0, "step": 1900 }, { "epoch": 0.33885918003565063, "grad_norm": 0.2470703125, "learning_rate": 1.909935662115887e-05, "loss": 1.0174, "num_tokens": 7630196201.0, "step": 1901 }, { "epoch": 0.3390374331550802, "grad_norm": 0.26171875, "learning_rate": 1.9098223446365596e-05, "loss": 1.0506, "num_tokens": 7636478881.0, "step": 1902 }, { "epoch": 0.3392156862745098, "grad_norm": 0.2255859375, "learning_rate": 1.909708959674565e-05, "loss": 1.0615, "num_tokens": 7642762313.0, "step": 1903 }, { "epoch": 0.3393939393939394, "grad_norm": 0.255859375, "learning_rate": 1.909595507239351e-05, "loss": 1.0351, "num_tokens": 7649047163.0, "step": 1904 }, { "epoch": 0.339572192513369, "grad_norm": 0.2451171875, "learning_rate": 1.9094819873403723e-05, "loss": 1.0678, "num_tokens": 7655300272.0, "step": 1905 }, { "epoch": 0.33975044563279855, "grad_norm": 0.271484375, "learning_rate": 1.9093683999870888e-05, "loss": 1.0268, "num_tokens": 7661547038.0, "step": 1906 }, { "epoch": 0.33992869875222814, "grad_norm": 0.232421875, "learning_rate": 1.909254745188965e-05, "loss": 1.0161, "num_tokens": 7667830894.0, "step": 1907 }, { "epoch": 0.34010695187165774, "grad_norm": 0.287109375, "learning_rate": 1.909141022955472e-05, "loss": 1.05, "num_tokens": 7674115994.0, "step": 1908 }, { "epoch": 0.34028520499108733, "grad_norm": 0.267578125, "learning_rate": 1.9090272332960865e-05, "loss": 1.0336, "num_tokens": 7680341921.0, "step": 1909 }, { "epoch": 0.3404634581105169, "grad_norm": 0.2490234375, "learning_rate": 1.9089133762202913e-05, "loss": 1.0335, "num_tokens": 7686625805.0, "step": 1910 }, { "epoch": 0.3406417112299465, "grad_norm": 0.27734375, "learning_rate": 1.9087994517375725e-05, "loss": 1.0414, "num_tokens": 7692906189.0, "step": 1911 }, { "epoch": 0.3408199643493761, "grad_norm": 0.2734375, "learning_rate": 1.9086854598574246e-05, "loss": 1.0522, "num_tokens": 7699188445.0, "step": 1912 }, { "epoch": 0.3409982174688057, "grad_norm": 0.294921875, "learning_rate": 1.9085714005893463e-05, "loss": 1.0228, "num_tokens": 7705443491.0, "step": 1913 }, { "epoch": 0.3411764705882353, "grad_norm": 0.251953125, "learning_rate": 1.9084572739428423e-05, "loss": 1.0335, "num_tokens": 7711726676.0, "step": 1914 }, { "epoch": 0.3413547237076649, "grad_norm": 0.2734375, "learning_rate": 1.9083430799274227e-05, "loss": 1.0279, "num_tokens": 7717994283.0, "step": 1915 }, { "epoch": 0.3415329768270945, "grad_norm": 0.2314453125, "learning_rate": 1.908228818552603e-05, "loss": 1.0366, "num_tokens": 7724260260.0, "step": 1916 }, { "epoch": 0.3417112299465241, "grad_norm": 0.255859375, "learning_rate": 1.908114489827905e-05, "loss": 1.0352, "num_tokens": 7730501690.0, "step": 1917 }, { "epoch": 0.3418894830659537, "grad_norm": 0.24609375, "learning_rate": 1.908000093762856e-05, "loss": 1.0256, "num_tokens": 7736759713.0, "step": 1918 }, { "epoch": 0.3420677361853832, "grad_norm": 0.25, "learning_rate": 1.9078856303669878e-05, "loss": 1.0654, "num_tokens": 7743011956.0, "step": 1919 }, { "epoch": 0.3422459893048128, "grad_norm": 0.2490234375, "learning_rate": 1.90777109964984e-05, "loss": 1.0495, "num_tokens": 7749288876.0, "step": 1920 }, { "epoch": 0.3424242424242424, "grad_norm": 0.2255859375, "learning_rate": 1.9076565016209548e-05, "loss": 1.0724, "num_tokens": 7755564196.0, "step": 1921 }, { "epoch": 0.342602495543672, "grad_norm": 0.259765625, "learning_rate": 1.907541836289883e-05, "loss": 1.0635, "num_tokens": 7761835368.0, "step": 1922 }, { "epoch": 0.3427807486631016, "grad_norm": 0.2392578125, "learning_rate": 1.907427103666179e-05, "loss": 1.0512, "num_tokens": 7768120440.0, "step": 1923 }, { "epoch": 0.3429590017825312, "grad_norm": 0.251953125, "learning_rate": 1.9073123037594038e-05, "loss": 1.0602, "num_tokens": 7774404718.0, "step": 1924 }, { "epoch": 0.3431372549019608, "grad_norm": 0.2412109375, "learning_rate": 1.907197436579123e-05, "loss": 1.0555, "num_tokens": 7780688077.0, "step": 1925 }, { "epoch": 0.3433155080213904, "grad_norm": 0.2421875, "learning_rate": 1.90708250213491e-05, "loss": 1.0341, "num_tokens": 7786970577.0, "step": 1926 }, { "epoch": 0.34349376114082, "grad_norm": 0.263671875, "learning_rate": 1.9069675004363412e-05, "loss": 1.0431, "num_tokens": 7793237614.0, "step": 1927 }, { "epoch": 0.34367201426024957, "grad_norm": 0.2490234375, "learning_rate": 1.9068524314929995e-05, "loss": 1.0158, "num_tokens": 7799507857.0, "step": 1928 }, { "epoch": 0.34385026737967916, "grad_norm": 0.26171875, "learning_rate": 1.9067372953144742e-05, "loss": 1.0521, "num_tokens": 7805790493.0, "step": 1929 }, { "epoch": 0.34402852049910876, "grad_norm": 0.248046875, "learning_rate": 1.9066220919103598e-05, "loss": 1.0538, "num_tokens": 7812074276.0, "step": 1930 }, { "epoch": 0.3442067736185383, "grad_norm": 0.2412109375, "learning_rate": 1.9065068212902552e-05, "loss": 1.0448, "num_tokens": 7818353289.0, "step": 1931 }, { "epoch": 0.3443850267379679, "grad_norm": 0.2373046875, "learning_rate": 1.9063914834637674e-05, "loss": 1.0286, "num_tokens": 7824638669.0, "step": 1932 }, { "epoch": 0.3445632798573975, "grad_norm": 0.296875, "learning_rate": 1.9062760784405063e-05, "loss": 1.0487, "num_tokens": 7830916689.0, "step": 1933 }, { "epoch": 0.3447415329768271, "grad_norm": 0.2353515625, "learning_rate": 1.9061606062300893e-05, "loss": 1.0372, "num_tokens": 7837173546.0, "step": 1934 }, { "epoch": 0.3449197860962567, "grad_norm": 0.28125, "learning_rate": 1.9060450668421385e-05, "loss": 0.9948, "num_tokens": 7843409463.0, "step": 1935 }, { "epoch": 0.34509803921568627, "grad_norm": 0.24609375, "learning_rate": 1.9059294602862814e-05, "loss": 1.0501, "num_tokens": 7849669867.0, "step": 1936 }, { "epoch": 0.34527629233511586, "grad_norm": 0.2392578125, "learning_rate": 1.905813786572152e-05, "loss": 1.0351, "num_tokens": 7855918311.0, "step": 1937 }, { "epoch": 0.34545454545454546, "grad_norm": 0.25390625, "learning_rate": 1.9056980457093896e-05, "loss": 1.0575, "num_tokens": 7862201955.0, "step": 1938 }, { "epoch": 0.34563279857397505, "grad_norm": 0.271484375, "learning_rate": 1.9055822377076386e-05, "loss": 1.0499, "num_tokens": 7868429919.0, "step": 1939 }, { "epoch": 0.34581105169340465, "grad_norm": 0.23046875, "learning_rate": 1.9054663625765496e-05, "loss": 1.0096, "num_tokens": 7874712587.0, "step": 1940 }, { "epoch": 0.34598930481283424, "grad_norm": 0.248046875, "learning_rate": 1.905350420325778e-05, "loss": 1.055, "num_tokens": 7880996972.0, "step": 1941 }, { "epoch": 0.34616755793226384, "grad_norm": 0.234375, "learning_rate": 1.9052344109649856e-05, "loss": 1.0635, "num_tokens": 7887268948.0, "step": 1942 }, { "epoch": 0.34634581105169343, "grad_norm": 0.255859375, "learning_rate": 1.9051183345038395e-05, "loss": 1.0221, "num_tokens": 7893551664.0, "step": 1943 }, { "epoch": 0.34652406417112297, "grad_norm": 0.2451171875, "learning_rate": 1.9050021909520123e-05, "loss": 1.0821, "num_tokens": 7899836105.0, "step": 1944 }, { "epoch": 0.34670231729055256, "grad_norm": 0.240234375, "learning_rate": 1.904885980319182e-05, "loss": 1.024, "num_tokens": 7906119837.0, "step": 1945 }, { "epoch": 0.34688057040998216, "grad_norm": 0.244140625, "learning_rate": 1.9047697026150332e-05, "loss": 1.037, "num_tokens": 7912403274.0, "step": 1946 }, { "epoch": 0.34705882352941175, "grad_norm": 0.2578125, "learning_rate": 1.9046533578492544e-05, "loss": 1.0414, "num_tokens": 7918686966.0, "step": 1947 }, { "epoch": 0.34723707664884135, "grad_norm": 0.240234375, "learning_rate": 1.9045369460315418e-05, "loss": 1.0108, "num_tokens": 7924950951.0, "step": 1948 }, { "epoch": 0.34741532976827094, "grad_norm": 0.251953125, "learning_rate": 1.904420467171595e-05, "loss": 1.0261, "num_tokens": 7931233281.0, "step": 1949 }, { "epoch": 0.34759358288770054, "grad_norm": 0.2392578125, "learning_rate": 1.90430392127912e-05, "loss": 1.0178, "num_tokens": 7937517389.0, "step": 1950 }, { "epoch": 0.34777183600713013, "grad_norm": 0.265625, "learning_rate": 1.9041873083638297e-05, "loss": 1.05, "num_tokens": 7943802584.0, "step": 1951 }, { "epoch": 0.3479500891265597, "grad_norm": 0.2265625, "learning_rate": 1.904070628435441e-05, "loss": 1.0376, "num_tokens": 7950065881.0, "step": 1952 }, { "epoch": 0.3481283422459893, "grad_norm": 0.275390625, "learning_rate": 1.9039538815036766e-05, "loss": 1.0378, "num_tokens": 7956328080.0, "step": 1953 }, { "epoch": 0.3483065953654189, "grad_norm": 0.2578125, "learning_rate": 1.903837067578265e-05, "loss": 1.0385, "num_tokens": 7962566232.0, "step": 1954 }, { "epoch": 0.3484848484848485, "grad_norm": 0.275390625, "learning_rate": 1.903720186668941e-05, "loss": 1.026, "num_tokens": 7968830675.0, "step": 1955 }, { "epoch": 0.34866310160427805, "grad_norm": 0.240234375, "learning_rate": 1.9036032387854438e-05, "loss": 1.0317, "num_tokens": 7975098744.0, "step": 1956 }, { "epoch": 0.34884135472370764, "grad_norm": 0.27734375, "learning_rate": 1.9034862239375186e-05, "loss": 1.0444, "num_tokens": 7981333056.0, "step": 1957 }, { "epoch": 0.34901960784313724, "grad_norm": 0.23828125, "learning_rate": 1.9033691421349166e-05, "loss": 1.0327, "num_tokens": 7987615074.0, "step": 1958 }, { "epoch": 0.34919786096256683, "grad_norm": 0.26171875, "learning_rate": 1.903251993387394e-05, "loss": 1.0314, "num_tokens": 7993895978.0, "step": 1959 }, { "epoch": 0.3493761140819964, "grad_norm": 0.23046875, "learning_rate": 1.9031347777047128e-05, "loss": 1.0323, "num_tokens": 8000174900.0, "step": 1960 }, { "epoch": 0.349554367201426, "grad_norm": 0.291015625, "learning_rate": 1.9030174950966406e-05, "loss": 1.0538, "num_tokens": 8006455832.0, "step": 1961 }, { "epoch": 0.3497326203208556, "grad_norm": 0.2734375, "learning_rate": 1.902900145572951e-05, "loss": 1.0275, "num_tokens": 8012705479.0, "step": 1962 }, { "epoch": 0.3499108734402852, "grad_norm": 0.2734375, "learning_rate": 1.9027827291434223e-05, "loss": 1.0318, "num_tokens": 8018979971.0, "step": 1963 }, { "epoch": 0.3500891265597148, "grad_norm": 0.2392578125, "learning_rate": 1.902665245817839e-05, "loss": 1.0544, "num_tokens": 8025211608.0, "step": 1964 }, { "epoch": 0.3502673796791444, "grad_norm": 0.283203125, "learning_rate": 1.902547695605991e-05, "loss": 1.0481, "num_tokens": 8031494174.0, "step": 1965 }, { "epoch": 0.350445632798574, "grad_norm": 0.2470703125, "learning_rate": 1.902430078517674e-05, "loss": 1.0342, "num_tokens": 8037779081.0, "step": 1966 }, { "epoch": 0.3506238859180036, "grad_norm": 0.306640625, "learning_rate": 1.9023123945626888e-05, "loss": 1.0351, "num_tokens": 8044045931.0, "step": 1967 }, { "epoch": 0.3508021390374332, "grad_norm": 0.263671875, "learning_rate": 1.902194643750842e-05, "loss": 1.0689, "num_tokens": 8050305736.0, "step": 1968 }, { "epoch": 0.3509803921568627, "grad_norm": 0.265625, "learning_rate": 1.902076826091946e-05, "loss": 1.0605, "num_tokens": 8056557154.0, "step": 1969 }, { "epoch": 0.3511586452762923, "grad_norm": 0.28125, "learning_rate": 1.901958941595818e-05, "loss": 1.0029, "num_tokens": 8062841470.0, "step": 1970 }, { "epoch": 0.3513368983957219, "grad_norm": 0.2890625, "learning_rate": 1.9018409902722824e-05, "loss": 1.0639, "num_tokens": 8069116261.0, "step": 1971 }, { "epoch": 0.3515151515151515, "grad_norm": 0.296875, "learning_rate": 1.9017229721311673e-05, "loss": 1.0433, "num_tokens": 8075387919.0, "step": 1972 }, { "epoch": 0.3516934046345811, "grad_norm": 0.26171875, "learning_rate": 1.9016048871823074e-05, "loss": 1.0474, "num_tokens": 8081644930.0, "step": 1973 }, { "epoch": 0.3518716577540107, "grad_norm": 0.27734375, "learning_rate": 1.9014867354355427e-05, "loss": 1.0221, "num_tokens": 8087929225.0, "step": 1974 }, { "epoch": 0.3520499108734403, "grad_norm": 0.251953125, "learning_rate": 1.901368516900719e-05, "loss": 1.0468, "num_tokens": 8094192514.0, "step": 1975 }, { "epoch": 0.3522281639928699, "grad_norm": 0.263671875, "learning_rate": 1.901250231587687e-05, "loss": 1.0433, "num_tokens": 8100477010.0, "step": 1976 }, { "epoch": 0.3524064171122995, "grad_norm": 0.2412109375, "learning_rate": 1.9011318795063035e-05, "loss": 1.0121, "num_tokens": 8106743894.0, "step": 1977 }, { "epoch": 0.35258467023172907, "grad_norm": 0.26171875, "learning_rate": 1.9010134606664315e-05, "loss": 1.0623, "num_tokens": 8113028349.0, "step": 1978 }, { "epoch": 0.35276292335115866, "grad_norm": 0.2431640625, "learning_rate": 1.9008949750779387e-05, "loss": 1.0438, "num_tokens": 8119270222.0, "step": 1979 }, { "epoch": 0.35294117647058826, "grad_norm": 0.263671875, "learning_rate": 1.9007764227506977e-05, "loss": 1.0636, "num_tokens": 8125540185.0, "step": 1980 }, { "epoch": 0.35311942959001785, "grad_norm": 0.2431640625, "learning_rate": 1.900657803694588e-05, "loss": 1.03, "num_tokens": 8131824220.0, "step": 1981 }, { "epoch": 0.3532976827094474, "grad_norm": 0.265625, "learning_rate": 1.9005391179194944e-05, "loss": 1.0408, "num_tokens": 8138078486.0, "step": 1982 }, { "epoch": 0.353475935828877, "grad_norm": 0.23046875, "learning_rate": 1.9004203654353072e-05, "loss": 1.0332, "num_tokens": 8144358787.0, "step": 1983 }, { "epoch": 0.3536541889483066, "grad_norm": 0.248046875, "learning_rate": 1.9003015462519212e-05, "loss": 1.0623, "num_tokens": 8150615090.0, "step": 1984 }, { "epoch": 0.3538324420677362, "grad_norm": 0.263671875, "learning_rate": 1.9001826603792382e-05, "loss": 1.0467, "num_tokens": 8156869464.0, "step": 1985 }, { "epoch": 0.35401069518716577, "grad_norm": 0.24609375, "learning_rate": 1.9000637078271652e-05, "loss": 1.0644, "num_tokens": 8163153151.0, "step": 1986 }, { "epoch": 0.35418894830659536, "grad_norm": 0.275390625, "learning_rate": 1.8999446886056133e-05, "loss": 1.0574, "num_tokens": 8169411231.0, "step": 1987 }, { "epoch": 0.35436720142602496, "grad_norm": 0.25390625, "learning_rate": 1.8998256027245022e-05, "loss": 1.038, "num_tokens": 8175694796.0, "step": 1988 }, { "epoch": 0.35454545454545455, "grad_norm": 0.3359375, "learning_rate": 1.899706450193754e-05, "loss": 1.0684, "num_tokens": 8181957424.0, "step": 1989 }, { "epoch": 0.35472370766488415, "grad_norm": 0.2734375, "learning_rate": 1.899587231023299e-05, "loss": 1.0689, "num_tokens": 8188243201.0, "step": 1990 }, { "epoch": 0.35490196078431374, "grad_norm": 0.298828125, "learning_rate": 1.89946794522307e-05, "loss": 1.0115, "num_tokens": 8194505134.0, "step": 1991 }, { "epoch": 0.35508021390374334, "grad_norm": 0.294921875, "learning_rate": 1.8993485928030082e-05, "loss": 1.0212, "num_tokens": 8200787566.0, "step": 1992 }, { "epoch": 0.35525846702317293, "grad_norm": 0.279296875, "learning_rate": 1.8992291737730596e-05, "loss": 1.0541, "num_tokens": 8207070362.0, "step": 1993 }, { "epoch": 0.35543672014260247, "grad_norm": 0.271484375, "learning_rate": 1.8991096881431742e-05, "loss": 1.0567, "num_tokens": 8213354507.0, "step": 1994 }, { "epoch": 0.35561497326203206, "grad_norm": 0.2890625, "learning_rate": 1.89899013592331e-05, "loss": 1.055, "num_tokens": 8219595238.0, "step": 1995 }, { "epoch": 0.35579322638146166, "grad_norm": 0.27734375, "learning_rate": 1.8988705171234287e-05, "loss": 1.0364, "num_tokens": 8225793385.0, "step": 1996 }, { "epoch": 0.35597147950089125, "grad_norm": 0.306640625, "learning_rate": 1.8987508317534985e-05, "loss": 1.0342, "num_tokens": 8232078442.0, "step": 1997 }, { "epoch": 0.35614973262032085, "grad_norm": 0.291015625, "learning_rate": 1.898631079823492e-05, "loss": 1.0384, "num_tokens": 8238361179.0, "step": 1998 }, { "epoch": 0.35632798573975044, "grad_norm": 0.26953125, "learning_rate": 1.8985112613433893e-05, "loss": 1.0428, "num_tokens": 8244609085.0, "step": 1999 }, { "epoch": 0.35650623885918004, "grad_norm": 0.26171875, "learning_rate": 1.8983913763231738e-05, "loss": 1.0199, "num_tokens": 8250858906.0, "step": 2000 }, { "epoch": 0.35668449197860963, "grad_norm": 0.271484375, "learning_rate": 1.8982714247728364e-05, "loss": 1.0512, "num_tokens": 8257131405.0, "step": 2001 }, { "epoch": 0.3568627450980392, "grad_norm": 0.310546875, "learning_rate": 1.898151406702372e-05, "loss": 0.999, "num_tokens": 8263416049.0, "step": 2002 }, { "epoch": 0.3570409982174688, "grad_norm": 0.275390625, "learning_rate": 1.8980313221217827e-05, "loss": 1.0384, "num_tokens": 8269698751.0, "step": 2003 }, { "epoch": 0.3572192513368984, "grad_norm": 0.271484375, "learning_rate": 1.897911171041074e-05, "loss": 1.061, "num_tokens": 8275954871.0, "step": 2004 }, { "epoch": 0.357397504456328, "grad_norm": 0.2578125, "learning_rate": 1.8977909534702594e-05, "loss": 1.066, "num_tokens": 8282238152.0, "step": 2005 }, { "epoch": 0.3575757575757576, "grad_norm": 0.259765625, "learning_rate": 1.897670669419355e-05, "loss": 1.0596, "num_tokens": 8288523019.0, "step": 2006 }, { "epoch": 0.35775401069518714, "grad_norm": 0.263671875, "learning_rate": 1.897550318898385e-05, "loss": 1.0164, "num_tokens": 8294786248.0, "step": 2007 }, { "epoch": 0.35793226381461674, "grad_norm": 0.314453125, "learning_rate": 1.8974299019173793e-05, "loss": 1.0354, "num_tokens": 8301069150.0, "step": 2008 }, { "epoch": 0.35811051693404633, "grad_norm": 0.265625, "learning_rate": 1.8973094184863704e-05, "loss": 1.0294, "num_tokens": 8307328917.0, "step": 2009 }, { "epoch": 0.3582887700534759, "grad_norm": 0.30859375, "learning_rate": 1.897188868615399e-05, "loss": 1.0561, "num_tokens": 8313570698.0, "step": 2010 }, { "epoch": 0.3584670231729055, "grad_norm": 0.3046875, "learning_rate": 1.897068252314511e-05, "loss": 1.0449, "num_tokens": 8319847127.0, "step": 2011 }, { "epoch": 0.3586452762923351, "grad_norm": 0.302734375, "learning_rate": 1.896947569593757e-05, "loss": 1.0704, "num_tokens": 8326110875.0, "step": 2012 }, { "epoch": 0.3588235294117647, "grad_norm": 0.275390625, "learning_rate": 1.8968268204631934e-05, "loss": 1.052, "num_tokens": 8332373199.0, "step": 2013 }, { "epoch": 0.3590017825311943, "grad_norm": 0.310546875, "learning_rate": 1.896706004932882e-05, "loss": 1.0869, "num_tokens": 8338631031.0, "step": 2014 }, { "epoch": 0.3591800356506239, "grad_norm": 0.306640625, "learning_rate": 1.8965851230128908e-05, "loss": 1.0392, "num_tokens": 8344892155.0, "step": 2015 }, { "epoch": 0.3593582887700535, "grad_norm": 0.26953125, "learning_rate": 1.8964641747132933e-05, "loss": 1.0959, "num_tokens": 8351159594.0, "step": 2016 }, { "epoch": 0.3595365418894831, "grad_norm": 0.29296875, "learning_rate": 1.8963431600441673e-05, "loss": 1.0418, "num_tokens": 8357385232.0, "step": 2017 }, { "epoch": 0.3597147950089127, "grad_norm": 0.287109375, "learning_rate": 1.8962220790155973e-05, "loss": 1.02, "num_tokens": 8363654196.0, "step": 2018 }, { "epoch": 0.3598930481283422, "grad_norm": 0.271484375, "learning_rate": 1.896100931637673e-05, "loss": 1.0253, "num_tokens": 8369910423.0, "step": 2019 }, { "epoch": 0.3600713012477718, "grad_norm": 0.2421875, "learning_rate": 1.89597971792049e-05, "loss": 1.0741, "num_tokens": 8376193704.0, "step": 2020 }, { "epoch": 0.3602495543672014, "grad_norm": 0.2578125, "learning_rate": 1.8958584378741487e-05, "loss": 1.0743, "num_tokens": 8382359087.0, "step": 2021 }, { "epoch": 0.360427807486631, "grad_norm": 0.283203125, "learning_rate": 1.8957370915087556e-05, "loss": 1.0357, "num_tokens": 8388639871.0, "step": 2022 }, { "epoch": 0.3606060606060606, "grad_norm": 0.244140625, "learning_rate": 1.895615678834422e-05, "loss": 1.0585, "num_tokens": 8394859868.0, "step": 2023 }, { "epoch": 0.3607843137254902, "grad_norm": 0.287109375, "learning_rate": 1.895494199861266e-05, "loss": 1.0292, "num_tokens": 8401141671.0, "step": 2024 }, { "epoch": 0.3609625668449198, "grad_norm": 0.28125, "learning_rate": 1.8953726545994096e-05, "loss": 1.0743, "num_tokens": 8407393652.0, "step": 2025 }, { "epoch": 0.3611408199643494, "grad_norm": 0.28515625, "learning_rate": 1.895251043058982e-05, "loss": 1.0306, "num_tokens": 8413633868.0, "step": 2026 }, { "epoch": 0.361319073083779, "grad_norm": 0.279296875, "learning_rate": 1.895129365250117e-05, "loss": 1.0381, "num_tokens": 8419917678.0, "step": 2027 }, { "epoch": 0.36149732620320857, "grad_norm": 0.275390625, "learning_rate": 1.8950076211829534e-05, "loss": 1.0541, "num_tokens": 8426169063.0, "step": 2028 }, { "epoch": 0.36167557932263816, "grad_norm": 0.29296875, "learning_rate": 1.894885810867637e-05, "loss": 1.013, "num_tokens": 8432453123.0, "step": 2029 }, { "epoch": 0.36185383244206776, "grad_norm": 0.26953125, "learning_rate": 1.8947639343143175e-05, "loss": 1.0337, "num_tokens": 8438737017.0, "step": 2030 }, { "epoch": 0.36203208556149735, "grad_norm": 0.267578125, "learning_rate": 1.894641991533152e-05, "loss": 1.0236, "num_tokens": 8445020968.0, "step": 2031 }, { "epoch": 0.3622103386809269, "grad_norm": 0.29296875, "learning_rate": 1.8945199825343005e-05, "loss": 1.052, "num_tokens": 8451305697.0, "step": 2032 }, { "epoch": 0.3623885918003565, "grad_norm": 0.23046875, "learning_rate": 1.894397907327931e-05, "loss": 1.0351, "num_tokens": 8457588515.0, "step": 2033 }, { "epoch": 0.3625668449197861, "grad_norm": 0.29296875, "learning_rate": 1.8942757659242158e-05, "loss": 1.0495, "num_tokens": 8463870441.0, "step": 2034 }, { "epoch": 0.3627450980392157, "grad_norm": 0.2353515625, "learning_rate": 1.8941535583333333e-05, "loss": 1.0225, "num_tokens": 8470155256.0, "step": 2035 }, { "epoch": 0.36292335115864527, "grad_norm": 0.29296875, "learning_rate": 1.8940312845654667e-05, "loss": 1.0222, "num_tokens": 8476410319.0, "step": 2036 }, { "epoch": 0.36310160427807486, "grad_norm": 0.2578125, "learning_rate": 1.8939089446308053e-05, "loss": 1.0447, "num_tokens": 8482679838.0, "step": 2037 }, { "epoch": 0.36327985739750446, "grad_norm": 0.291015625, "learning_rate": 1.893786538539544e-05, "loss": 1.0285, "num_tokens": 8488914550.0, "step": 2038 }, { "epoch": 0.36345811051693405, "grad_norm": 0.25390625, "learning_rate": 1.8936640663018818e-05, "loss": 1.0569, "num_tokens": 8495193136.0, "step": 2039 }, { "epoch": 0.36363636363636365, "grad_norm": 0.2890625, "learning_rate": 1.8935415279280254e-05, "loss": 1.0369, "num_tokens": 8501477645.0, "step": 2040 }, { "epoch": 0.36381461675579324, "grad_norm": 0.283203125, "learning_rate": 1.8934189234281858e-05, "loss": 1.0668, "num_tokens": 8507738762.0, "step": 2041 }, { "epoch": 0.36399286987522284, "grad_norm": 0.294921875, "learning_rate": 1.8932962528125794e-05, "loss": 1.0587, "num_tokens": 8514023638.0, "step": 2042 }, { "epoch": 0.36417112299465243, "grad_norm": 0.251953125, "learning_rate": 1.8931735160914287e-05, "loss": 1.0258, "num_tokens": 8520275540.0, "step": 2043 }, { "epoch": 0.364349376114082, "grad_norm": 0.2734375, "learning_rate": 1.893050713274961e-05, "loss": 1.0648, "num_tokens": 8526536438.0, "step": 2044 }, { "epoch": 0.36452762923351156, "grad_norm": 0.25390625, "learning_rate": 1.8929278443734098e-05, "loss": 1.0485, "num_tokens": 8532816744.0, "step": 2045 }, { "epoch": 0.36470588235294116, "grad_norm": 0.263671875, "learning_rate": 1.8928049093970133e-05, "loss": 1.0578, "num_tokens": 8539092084.0, "step": 2046 }, { "epoch": 0.36488413547237075, "grad_norm": 0.2392578125, "learning_rate": 1.8926819083560165e-05, "loss": 1.0291, "num_tokens": 8545352142.0, "step": 2047 }, { "epoch": 0.36506238859180035, "grad_norm": 0.259765625, "learning_rate": 1.892558841260668e-05, "loss": 1.0556, "num_tokens": 8551635142.0, "step": 2048 }, { "epoch": 0.36524064171122994, "grad_norm": 0.251953125, "learning_rate": 1.8924357081212242e-05, "loss": 1.0487, "num_tokens": 8557898266.0, "step": 2049 }, { "epoch": 0.36541889483065954, "grad_norm": 0.248046875, "learning_rate": 1.892312508947945e-05, "loss": 1.0857, "num_tokens": 8564180984.0, "step": 2050 }, { "epoch": 0.36559714795008913, "grad_norm": 0.275390625, "learning_rate": 1.892189243751097e-05, "loss": 1.0309, "num_tokens": 8570438244.0, "step": 2051 }, { "epoch": 0.3657754010695187, "grad_norm": 0.234375, "learning_rate": 1.8920659125409517e-05, "loss": 1.0049, "num_tokens": 8576720805.0, "step": 2052 }, { "epoch": 0.3659536541889483, "grad_norm": 0.244140625, "learning_rate": 1.8919425153277865e-05, "loss": 1.0346, "num_tokens": 8583002498.0, "step": 2053 }, { "epoch": 0.3661319073083779, "grad_norm": 0.251953125, "learning_rate": 1.8918190521218838e-05, "loss": 1.0323, "num_tokens": 8589276562.0, "step": 2054 }, { "epoch": 0.3663101604278075, "grad_norm": 0.271484375, "learning_rate": 1.891695522933532e-05, "loss": 1.0534, "num_tokens": 8595557571.0, "step": 2055 }, { "epoch": 0.3664884135472371, "grad_norm": 0.25, "learning_rate": 1.8915719277730253e-05, "loss": 1.0398, "num_tokens": 8601839929.0, "step": 2056 }, { "epoch": 0.36666666666666664, "grad_norm": 0.2490234375, "learning_rate": 1.891448266650662e-05, "loss": 1.0443, "num_tokens": 8608120380.0, "step": 2057 }, { "epoch": 0.36684491978609624, "grad_norm": 0.263671875, "learning_rate": 1.891324539576747e-05, "loss": 1.0174, "num_tokens": 8614406058.0, "step": 2058 }, { "epoch": 0.36702317290552583, "grad_norm": 0.275390625, "learning_rate": 1.8912007465615916e-05, "loss": 1.0101, "num_tokens": 8620686201.0, "step": 2059 }, { "epoch": 0.3672014260249554, "grad_norm": 0.2421875, "learning_rate": 1.8910768876155103e-05, "loss": 1.0401, "num_tokens": 8626958338.0, "step": 2060 }, { "epoch": 0.367379679144385, "grad_norm": 0.2470703125, "learning_rate": 1.8909529627488243e-05, "loss": 1.0359, "num_tokens": 8633238880.0, "step": 2061 }, { "epoch": 0.3675579322638146, "grad_norm": 0.244140625, "learning_rate": 1.890828971971861e-05, "loss": 1.0307, "num_tokens": 8639523880.0, "step": 2062 }, { "epoch": 0.3677361853832442, "grad_norm": 0.263671875, "learning_rate": 1.8907049152949523e-05, "loss": 1.0803, "num_tokens": 8645770304.0, "step": 2063 }, { "epoch": 0.3679144385026738, "grad_norm": 0.267578125, "learning_rate": 1.8905807927284356e-05, "loss": 1.0458, "num_tokens": 8652029580.0, "step": 2064 }, { "epoch": 0.3680926916221034, "grad_norm": 0.255859375, "learning_rate": 1.8904566042826544e-05, "loss": 1.0572, "num_tokens": 8658291915.0, "step": 2065 }, { "epoch": 0.368270944741533, "grad_norm": 0.228515625, "learning_rate": 1.8903323499679567e-05, "loss": 1.0617, "num_tokens": 8664567723.0, "step": 2066 }, { "epoch": 0.3684491978609626, "grad_norm": 0.267578125, "learning_rate": 1.890208029794698e-05, "loss": 1.0537, "num_tokens": 8670840009.0, "step": 2067 }, { "epoch": 0.3686274509803922, "grad_norm": 0.236328125, "learning_rate": 1.8900836437732362e-05, "loss": 1.062, "num_tokens": 8677099857.0, "step": 2068 }, { "epoch": 0.3688057040998218, "grad_norm": 0.283203125, "learning_rate": 1.8899591919139378e-05, "loss": 1.0185, "num_tokens": 8683367303.0, "step": 2069 }, { "epoch": 0.3689839572192513, "grad_norm": 0.2431640625, "learning_rate": 1.8898346742271727e-05, "loss": 1.0327, "num_tokens": 8689652528.0, "step": 2070 }, { "epoch": 0.3691622103386809, "grad_norm": 0.2578125, "learning_rate": 1.889710090723317e-05, "loss": 1.0394, "num_tokens": 8695897248.0, "step": 2071 }, { "epoch": 0.3693404634581105, "grad_norm": 0.2421875, "learning_rate": 1.8895854414127527e-05, "loss": 1.0664, "num_tokens": 8702181662.0, "step": 2072 }, { "epoch": 0.3695187165775401, "grad_norm": 0.2275390625, "learning_rate": 1.8894607263058665e-05, "loss": 1.0242, "num_tokens": 8708451682.0, "step": 2073 }, { "epoch": 0.3696969696969697, "grad_norm": 0.251953125, "learning_rate": 1.889335945413051e-05, "loss": 1.0395, "num_tokens": 8714735387.0, "step": 2074 }, { "epoch": 0.3698752228163993, "grad_norm": 0.2275390625, "learning_rate": 1.889211098744704e-05, "loss": 1.0365, "num_tokens": 8721018168.0, "step": 2075 }, { "epoch": 0.3700534759358289, "grad_norm": 0.2734375, "learning_rate": 1.8890861863112298e-05, "loss": 1.0453, "num_tokens": 8727272625.0, "step": 2076 }, { "epoch": 0.3702317290552585, "grad_norm": 0.2333984375, "learning_rate": 1.8889612081230364e-05, "loss": 1.0369, "num_tokens": 8733555183.0, "step": 2077 }, { "epoch": 0.37040998217468807, "grad_norm": 0.29296875, "learning_rate": 1.888836164190539e-05, "loss": 1.0375, "num_tokens": 8739789201.0, "step": 2078 }, { "epoch": 0.37058823529411766, "grad_norm": 0.248046875, "learning_rate": 1.8887110545241567e-05, "loss": 1.0464, "num_tokens": 8746072181.0, "step": 2079 }, { "epoch": 0.37076648841354726, "grad_norm": 0.26953125, "learning_rate": 1.888585879134316e-05, "loss": 1.0428, "num_tokens": 8752322140.0, "step": 2080 }, { "epoch": 0.37094474153297685, "grad_norm": 0.2578125, "learning_rate": 1.8884606380314465e-05, "loss": 1.0316, "num_tokens": 8758604752.0, "step": 2081 }, { "epoch": 0.3711229946524064, "grad_norm": 0.29296875, "learning_rate": 1.888335331225986e-05, "loss": 1.0309, "num_tokens": 8764869653.0, "step": 2082 }, { "epoch": 0.371301247771836, "grad_norm": 0.255859375, "learning_rate": 1.8882099587283752e-05, "loss": 1.0296, "num_tokens": 8771128396.0, "step": 2083 }, { "epoch": 0.3714795008912656, "grad_norm": 0.283203125, "learning_rate": 1.888084520549062e-05, "loss": 1.0191, "num_tokens": 8777411325.0, "step": 2084 }, { "epoch": 0.3716577540106952, "grad_norm": 0.26171875, "learning_rate": 1.887959016698499e-05, "loss": 1.0479, "num_tokens": 8783694455.0, "step": 2085 }, { "epoch": 0.37183600713012477, "grad_norm": 0.2890625, "learning_rate": 1.8878334471871445e-05, "loss": 1.0574, "num_tokens": 8789977151.0, "step": 2086 }, { "epoch": 0.37201426024955436, "grad_norm": 0.279296875, "learning_rate": 1.8877078120254622e-05, "loss": 1.0332, "num_tokens": 8796218330.0, "step": 2087 }, { "epoch": 0.37219251336898396, "grad_norm": 0.2890625, "learning_rate": 1.8875821112239215e-05, "loss": 1.0617, "num_tokens": 8802491921.0, "step": 2088 }, { "epoch": 0.37237076648841355, "grad_norm": 0.2421875, "learning_rate": 1.8874563447929965e-05, "loss": 1.0279, "num_tokens": 8808759868.0, "step": 2089 }, { "epoch": 0.37254901960784315, "grad_norm": 0.287109375, "learning_rate": 1.8873305127431683e-05, "loss": 1.0247, "num_tokens": 8815044855.0, "step": 2090 }, { "epoch": 0.37272727272727274, "grad_norm": 0.255859375, "learning_rate": 1.8872046150849216e-05, "loss": 1.0165, "num_tokens": 8821329401.0, "step": 2091 }, { "epoch": 0.37290552584670233, "grad_norm": 0.26953125, "learning_rate": 1.887078651828748e-05, "loss": 1.0384, "num_tokens": 8827600534.0, "step": 2092 }, { "epoch": 0.37308377896613193, "grad_norm": 0.263671875, "learning_rate": 1.8869526229851437e-05, "loss": 1.0132, "num_tokens": 8833882574.0, "step": 2093 }, { "epoch": 0.3732620320855615, "grad_norm": 0.236328125, "learning_rate": 1.886826528564611e-05, "loss": 1.0159, "num_tokens": 8840136359.0, "step": 2094 }, { "epoch": 0.37344028520499106, "grad_norm": 0.2890625, "learning_rate": 1.8867003685776572e-05, "loss": 1.0674, "num_tokens": 8846419910.0, "step": 2095 }, { "epoch": 0.37361853832442066, "grad_norm": 0.25, "learning_rate": 1.8865741430347957e-05, "loss": 1.045, "num_tokens": 8852703341.0, "step": 2096 }, { "epoch": 0.37379679144385025, "grad_norm": 0.3359375, "learning_rate": 1.886447851946544e-05, "loss": 1.0159, "num_tokens": 8858986626.0, "step": 2097 }, { "epoch": 0.37397504456327985, "grad_norm": 0.271484375, "learning_rate": 1.8863214953234268e-05, "loss": 1.0406, "num_tokens": 8865174089.0, "step": 2098 }, { "epoch": 0.37415329768270944, "grad_norm": 0.26953125, "learning_rate": 1.8861950731759732e-05, "loss": 1.0449, "num_tokens": 8871449906.0, "step": 2099 }, { "epoch": 0.37433155080213903, "grad_norm": 0.279296875, "learning_rate": 1.8860685855147173e-05, "loss": 1.0077, "num_tokens": 8877734790.0, "step": 2100 }, { "epoch": 0.37450980392156863, "grad_norm": 0.271484375, "learning_rate": 1.8859420323502004e-05, "loss": 1.0246, "num_tokens": 8884020509.0, "step": 2101 }, { "epoch": 0.3746880570409982, "grad_norm": 0.2578125, "learning_rate": 1.885815413692968e-05, "loss": 1.0414, "num_tokens": 8890271272.0, "step": 2102 }, { "epoch": 0.3748663101604278, "grad_norm": 0.263671875, "learning_rate": 1.8856887295535704e-05, "loss": 1.0241, "num_tokens": 8896555321.0, "step": 2103 }, { "epoch": 0.3750445632798574, "grad_norm": 0.26953125, "learning_rate": 1.885561979942565e-05, "loss": 1.0185, "num_tokens": 8902838538.0, "step": 2104 }, { "epoch": 0.375222816399287, "grad_norm": 0.267578125, "learning_rate": 1.885435164870514e-05, "loss": 1.0637, "num_tokens": 8909091502.0, "step": 2105 }, { "epoch": 0.3754010695187166, "grad_norm": 0.28125, "learning_rate": 1.8853082843479842e-05, "loss": 1.0379, "num_tokens": 8915343820.0, "step": 2106 }, { "epoch": 0.37557932263814614, "grad_norm": 0.263671875, "learning_rate": 1.8851813383855488e-05, "loss": 1.0299, "num_tokens": 8921618888.0, "step": 2107 }, { "epoch": 0.37575757575757573, "grad_norm": 0.28125, "learning_rate": 1.885054326993787e-05, "loss": 1.0445, "num_tokens": 8927871251.0, "step": 2108 }, { "epoch": 0.37593582887700533, "grad_norm": 0.263671875, "learning_rate": 1.8849272501832817e-05, "loss": 1.0216, "num_tokens": 8934154247.0, "step": 2109 }, { "epoch": 0.3761140819964349, "grad_norm": 0.2734375, "learning_rate": 1.884800107964623e-05, "loss": 1.0283, "num_tokens": 8940437595.0, "step": 2110 }, { "epoch": 0.3762923351158645, "grad_norm": 0.251953125, "learning_rate": 1.884672900348405e-05, "loss": 1.0296, "num_tokens": 8946712144.0, "step": 2111 }, { "epoch": 0.3764705882352941, "grad_norm": 0.283203125, "learning_rate": 1.8845456273452282e-05, "loss": 1.0419, "num_tokens": 8952974563.0, "step": 2112 }, { "epoch": 0.3766488413547237, "grad_norm": 0.283203125, "learning_rate": 1.8844182889656986e-05, "loss": 1.0098, "num_tokens": 8959258043.0, "step": 2113 }, { "epoch": 0.3768270944741533, "grad_norm": 0.30859375, "learning_rate": 1.884290885220427e-05, "loss": 1.056, "num_tokens": 8965541532.0, "step": 2114 }, { "epoch": 0.3770053475935829, "grad_norm": 0.302734375, "learning_rate": 1.8841634161200303e-05, "loss": 1.0495, "num_tokens": 8971798814.0, "step": 2115 }, { "epoch": 0.3771836007130125, "grad_norm": 0.28515625, "learning_rate": 1.8840358816751298e-05, "loss": 1.0527, "num_tokens": 8978047974.0, "step": 2116 }, { "epoch": 0.3773618538324421, "grad_norm": 0.26953125, "learning_rate": 1.8839082818963537e-05, "loss": 1.023, "num_tokens": 8984289590.0, "step": 2117 }, { "epoch": 0.3775401069518717, "grad_norm": 0.271484375, "learning_rate": 1.8837806167943348e-05, "loss": 1.0569, "num_tokens": 8990556724.0, "step": 2118 }, { "epoch": 0.3777183600713013, "grad_norm": 0.2392578125, "learning_rate": 1.8836528863797113e-05, "loss": 1.0477, "num_tokens": 8996825154.0, "step": 2119 }, { "epoch": 0.3778966131907308, "grad_norm": 0.28515625, "learning_rate": 1.8835250906631272e-05, "loss": 1.037, "num_tokens": 9003049499.0, "step": 2120 }, { "epoch": 0.3780748663101604, "grad_norm": 0.236328125, "learning_rate": 1.8833972296552313e-05, "loss": 1.0522, "num_tokens": 9009329981.0, "step": 2121 }, { "epoch": 0.37825311942959, "grad_norm": 0.296875, "learning_rate": 1.8832693033666786e-05, "loss": 1.0565, "num_tokens": 9015614549.0, "step": 2122 }, { "epoch": 0.3784313725490196, "grad_norm": 0.251953125, "learning_rate": 1.883141311808129e-05, "loss": 1.0178, "num_tokens": 9021896524.0, "step": 2123 }, { "epoch": 0.3786096256684492, "grad_norm": 0.265625, "learning_rate": 1.883013254990249e-05, "loss": 1.0245, "num_tokens": 9028181895.0, "step": 2124 }, { "epoch": 0.3787878787878788, "grad_norm": 0.271484375, "learning_rate": 1.8828851329237083e-05, "loss": 1.0434, "num_tokens": 9034467357.0, "step": 2125 }, { "epoch": 0.3789661319073084, "grad_norm": 0.279296875, "learning_rate": 1.882756945619184e-05, "loss": 1.0276, "num_tokens": 9040726765.0, "step": 2126 }, { "epoch": 0.379144385026738, "grad_norm": 0.263671875, "learning_rate": 1.8826286930873578e-05, "loss": 1.0429, "num_tokens": 9047010928.0, "step": 2127 }, { "epoch": 0.37932263814616757, "grad_norm": 0.259765625, "learning_rate": 1.8825003753389174e-05, "loss": 1.0338, "num_tokens": 9053267119.0, "step": 2128 }, { "epoch": 0.37950089126559716, "grad_norm": 0.240234375, "learning_rate": 1.882371992384555e-05, "loss": 1.061, "num_tokens": 9059504402.0, "step": 2129 }, { "epoch": 0.37967914438502676, "grad_norm": 0.271484375, "learning_rate": 1.882243544234969e-05, "loss": 1.0333, "num_tokens": 9065781499.0, "step": 2130 }, { "epoch": 0.37985739750445635, "grad_norm": 0.26171875, "learning_rate": 1.8821150309008632e-05, "loss": 1.0365, "num_tokens": 9072066913.0, "step": 2131 }, { "epoch": 0.38003565062388595, "grad_norm": 0.263671875, "learning_rate": 1.8819864523929462e-05, "loss": 1.0478, "num_tokens": 9078352323.0, "step": 2132 }, { "epoch": 0.3802139037433155, "grad_norm": 0.2578125, "learning_rate": 1.881857808721933e-05, "loss": 1.0221, "num_tokens": 9084636231.0, "step": 2133 }, { "epoch": 0.3803921568627451, "grad_norm": 0.2734375, "learning_rate": 1.881729099898543e-05, "loss": 1.037, "num_tokens": 9090921693.0, "step": 2134 }, { "epoch": 0.3805704099821747, "grad_norm": 0.2490234375, "learning_rate": 1.881600325933502e-05, "loss": 1.0739, "num_tokens": 9097159646.0, "step": 2135 }, { "epoch": 0.38074866310160427, "grad_norm": 0.259765625, "learning_rate": 1.8814714868375406e-05, "loss": 1.0494, "num_tokens": 9103428386.0, "step": 2136 }, { "epoch": 0.38092691622103386, "grad_norm": 0.28515625, "learning_rate": 1.8813425826213945e-05, "loss": 1.0237, "num_tokens": 9109703697.0, "step": 2137 }, { "epoch": 0.38110516934046346, "grad_norm": 0.25390625, "learning_rate": 1.8812136132958065e-05, "loss": 1.0354, "num_tokens": 9115971402.0, "step": 2138 }, { "epoch": 0.38128342245989305, "grad_norm": 0.2578125, "learning_rate": 1.881084578871522e-05, "loss": 1.0238, "num_tokens": 9122211380.0, "step": 2139 }, { "epoch": 0.38146167557932265, "grad_norm": 0.263671875, "learning_rate": 1.880955479359295e-05, "loss": 1.0236, "num_tokens": 9128495194.0, "step": 2140 }, { "epoch": 0.38163992869875224, "grad_norm": 0.2470703125, "learning_rate": 1.8808263147698825e-05, "loss": 0.9971, "num_tokens": 9134778243.0, "step": 2141 }, { "epoch": 0.38181818181818183, "grad_norm": 0.2490234375, "learning_rate": 1.880697085114048e-05, "loss": 1.0332, "num_tokens": 9141043555.0, "step": 2142 }, { "epoch": 0.38199643493761143, "grad_norm": 0.2421875, "learning_rate": 1.8805677904025602e-05, "loss": 1.0712, "num_tokens": 9147326169.0, "step": 2143 }, { "epoch": 0.382174688057041, "grad_norm": 0.259765625, "learning_rate": 1.8804384306461937e-05, "loss": 1.0353, "num_tokens": 9153579816.0, "step": 2144 }, { "epoch": 0.38235294117647056, "grad_norm": 0.244140625, "learning_rate": 1.8803090058557274e-05, "loss": 1.0526, "num_tokens": 9159845182.0, "step": 2145 }, { "epoch": 0.38253119429590016, "grad_norm": 0.259765625, "learning_rate": 1.8801795160419468e-05, "loss": 1.0481, "num_tokens": 9166115999.0, "step": 2146 }, { "epoch": 0.38270944741532975, "grad_norm": 0.251953125, "learning_rate": 1.880049961215642e-05, "loss": 1.0431, "num_tokens": 9172399558.0, "step": 2147 }, { "epoch": 0.38288770053475935, "grad_norm": 0.234375, "learning_rate": 1.8799203413876093e-05, "loss": 1.0563, "num_tokens": 9178669622.0, "step": 2148 }, { "epoch": 0.38306595365418894, "grad_norm": 0.271484375, "learning_rate": 1.8797906565686493e-05, "loss": 1.0345, "num_tokens": 9184874347.0, "step": 2149 }, { "epoch": 0.38324420677361853, "grad_norm": 0.2412109375, "learning_rate": 1.879660906769569e-05, "loss": 1.0115, "num_tokens": 9191127966.0, "step": 2150 }, { "epoch": 0.38342245989304813, "grad_norm": 0.291015625, "learning_rate": 1.8795310920011805e-05, "loss": 1.0201, "num_tokens": 9197413136.0, "step": 2151 }, { "epoch": 0.3836007130124777, "grad_norm": 0.2392578125, "learning_rate": 1.8794012122743012e-05, "loss": 1.035, "num_tokens": 9203685708.0, "step": 2152 }, { "epoch": 0.3837789661319073, "grad_norm": 0.27734375, "learning_rate": 1.879271267599754e-05, "loss": 1.0419, "num_tokens": 9209968426.0, "step": 2153 }, { "epoch": 0.3839572192513369, "grad_norm": 0.25390625, "learning_rate": 1.8791412579883676e-05, "loss": 1.0464, "num_tokens": 9216253265.0, "step": 2154 }, { "epoch": 0.3841354723707665, "grad_norm": 0.28515625, "learning_rate": 1.8790111834509753e-05, "loss": 1.0364, "num_tokens": 9222530310.0, "step": 2155 }, { "epoch": 0.3843137254901961, "grad_norm": 0.2353515625, "learning_rate": 1.8788810439984162e-05, "loss": 1.0036, "num_tokens": 9228812856.0, "step": 2156 }, { "epoch": 0.3844919786096257, "grad_norm": 0.33203125, "learning_rate": 1.8787508396415347e-05, "loss": 1.0153, "num_tokens": 9235061527.0, "step": 2157 }, { "epoch": 0.38467023172905523, "grad_norm": 0.265625, "learning_rate": 1.8786205703911816e-05, "loss": 1.0539, "num_tokens": 9241320087.0, "step": 2158 }, { "epoch": 0.38484848484848483, "grad_norm": 0.29296875, "learning_rate": 1.8784902362582115e-05, "loss": 1.0424, "num_tokens": 9247600562.0, "step": 2159 }, { "epoch": 0.3850267379679144, "grad_norm": 0.2578125, "learning_rate": 1.8783598372534854e-05, "loss": 1.0408, "num_tokens": 9253860567.0, "step": 2160 }, { "epoch": 0.385204991087344, "grad_norm": 0.271484375, "learning_rate": 1.878229373387869e-05, "loss": 1.0284, "num_tokens": 9260133238.0, "step": 2161 }, { "epoch": 0.3853832442067736, "grad_norm": 0.224609375, "learning_rate": 1.8780988446722346e-05, "loss": 1.0382, "num_tokens": 9266414725.0, "step": 2162 }, { "epoch": 0.3855614973262032, "grad_norm": 0.232421875, "learning_rate": 1.8779682511174592e-05, "loss": 1.0116, "num_tokens": 9272697625.0, "step": 2163 }, { "epoch": 0.3857397504456328, "grad_norm": 0.244140625, "learning_rate": 1.8778375927344245e-05, "loss": 1.0555, "num_tokens": 9278982084.0, "step": 2164 }, { "epoch": 0.3859180035650624, "grad_norm": 0.263671875, "learning_rate": 1.877706869534019e-05, "loss": 1.0387, "num_tokens": 9285245921.0, "step": 2165 }, { "epoch": 0.386096256684492, "grad_norm": 0.2333984375, "learning_rate": 1.8775760815271354e-05, "loss": 1.021, "num_tokens": 9291529448.0, "step": 2166 }, { "epoch": 0.3862745098039216, "grad_norm": 0.32421875, "learning_rate": 1.8774452287246728e-05, "loss": 1.0413, "num_tokens": 9297812284.0, "step": 2167 }, { "epoch": 0.3864527629233512, "grad_norm": 0.236328125, "learning_rate": 1.877314311137534e-05, "loss": 1.0368, "num_tokens": 9304074283.0, "step": 2168 }, { "epoch": 0.3866310160427808, "grad_norm": 0.341796875, "learning_rate": 1.8771833287766304e-05, "loss": 0.9915, "num_tokens": 9310306751.0, "step": 2169 }, { "epoch": 0.3868092691622103, "grad_norm": 0.244140625, "learning_rate": 1.8770522816528753e-05, "loss": 1.0503, "num_tokens": 9316570216.0, "step": 2170 }, { "epoch": 0.3869875222816399, "grad_norm": 0.3203125, "learning_rate": 1.876921169777189e-05, "loss": 1.0686, "num_tokens": 9322837419.0, "step": 2171 }, { "epoch": 0.3871657754010695, "grad_norm": 0.28515625, "learning_rate": 1.8767899931604973e-05, "loss": 1.0796, "num_tokens": 9329114269.0, "step": 2172 }, { "epoch": 0.3873440285204991, "grad_norm": 0.287109375, "learning_rate": 1.876658751813731e-05, "loss": 1.0356, "num_tokens": 9335360819.0, "step": 2173 }, { "epoch": 0.3875222816399287, "grad_norm": 0.30859375, "learning_rate": 1.8765274457478273e-05, "loss": 1.0551, "num_tokens": 9341621326.0, "step": 2174 }, { "epoch": 0.3877005347593583, "grad_norm": 0.263671875, "learning_rate": 1.876396074973727e-05, "loss": 1.0224, "num_tokens": 9347905016.0, "step": 2175 }, { "epoch": 0.3878787878787879, "grad_norm": 0.2890625, "learning_rate": 1.8762646395023777e-05, "loss": 1.0698, "num_tokens": 9354109580.0, "step": 2176 }, { "epoch": 0.3880570409982175, "grad_norm": 0.2392578125, "learning_rate": 1.8761331393447313e-05, "loss": 1.0365, "num_tokens": 9360376541.0, "step": 2177 }, { "epoch": 0.38823529411764707, "grad_norm": 0.279296875, "learning_rate": 1.876001574511747e-05, "loss": 1.0501, "num_tokens": 9366601157.0, "step": 2178 }, { "epoch": 0.38841354723707666, "grad_norm": 0.21875, "learning_rate": 1.8758699450143867e-05, "loss": 1.0107, "num_tokens": 9372884281.0, "step": 2179 }, { "epoch": 0.38859180035650626, "grad_norm": 0.271484375, "learning_rate": 1.8757382508636202e-05, "loss": 1.0241, "num_tokens": 9379167720.0, "step": 2180 }, { "epoch": 0.38877005347593585, "grad_norm": 0.248046875, "learning_rate": 1.875606492070421e-05, "loss": 1.0426, "num_tokens": 9385425057.0, "step": 2181 }, { "epoch": 0.38894830659536545, "grad_norm": 0.298828125, "learning_rate": 1.8754746686457687e-05, "loss": 1.05, "num_tokens": 9391689235.0, "step": 2182 }, { "epoch": 0.389126559714795, "grad_norm": 0.271484375, "learning_rate": 1.8753427806006486e-05, "loss": 1.0284, "num_tokens": 9397946790.0, "step": 2183 }, { "epoch": 0.3893048128342246, "grad_norm": 0.31640625, "learning_rate": 1.8752108279460504e-05, "loss": 1.0406, "num_tokens": 9404212343.0, "step": 2184 }, { "epoch": 0.3894830659536542, "grad_norm": 0.275390625, "learning_rate": 1.8750788106929704e-05, "loss": 1.0293, "num_tokens": 9410496498.0, "step": 2185 }, { "epoch": 0.38966131907308377, "grad_norm": 0.28515625, "learning_rate": 1.8749467288524084e-05, "loss": 1.0672, "num_tokens": 9416756169.0, "step": 2186 }, { "epoch": 0.38983957219251336, "grad_norm": 0.267578125, "learning_rate": 1.8748145824353722e-05, "loss": 1.0432, "num_tokens": 9423011977.0, "step": 2187 }, { "epoch": 0.39001782531194296, "grad_norm": 0.275390625, "learning_rate": 1.874682371452873e-05, "loss": 1.0331, "num_tokens": 9429295373.0, "step": 2188 }, { "epoch": 0.39019607843137255, "grad_norm": 0.275390625, "learning_rate": 1.8745500959159278e-05, "loss": 1.0265, "num_tokens": 9435548676.0, "step": 2189 }, { "epoch": 0.39037433155080214, "grad_norm": 0.267578125, "learning_rate": 1.874417755835559e-05, "loss": 1.0226, "num_tokens": 9441830918.0, "step": 2190 }, { "epoch": 0.39055258467023174, "grad_norm": 0.275390625, "learning_rate": 1.8742853512227953e-05, "loss": 1.047, "num_tokens": 9448113618.0, "step": 2191 }, { "epoch": 0.39073083778966133, "grad_norm": 0.271484375, "learning_rate": 1.8741528820886694e-05, "loss": 1.0311, "num_tokens": 9454396736.0, "step": 2192 }, { "epoch": 0.39090909090909093, "grad_norm": 0.25, "learning_rate": 1.8740203484442197e-05, "loss": 1.0627, "num_tokens": 9460653588.0, "step": 2193 }, { "epoch": 0.3910873440285205, "grad_norm": 0.279296875, "learning_rate": 1.873887750300491e-05, "loss": 1.0354, "num_tokens": 9466935714.0, "step": 2194 }, { "epoch": 0.39126559714795006, "grad_norm": 0.251953125, "learning_rate": 1.8737550876685323e-05, "loss": 1.0108, "num_tokens": 9473221357.0, "step": 2195 }, { "epoch": 0.39144385026737966, "grad_norm": 0.21875, "learning_rate": 1.873622360559399e-05, "loss": 1.0432, "num_tokens": 9479469242.0, "step": 2196 }, { "epoch": 0.39162210338680925, "grad_norm": 0.2734375, "learning_rate": 1.8734895689841503e-05, "loss": 1.0419, "num_tokens": 9485750205.0, "step": 2197 }, { "epoch": 0.39180035650623884, "grad_norm": 0.255859375, "learning_rate": 1.873356712953852e-05, "loss": 1.0061, "num_tokens": 9492036307.0, "step": 2198 }, { "epoch": 0.39197860962566844, "grad_norm": 0.267578125, "learning_rate": 1.873223792479576e-05, "loss": 1.0392, "num_tokens": 9498292875.0, "step": 2199 }, { "epoch": 0.39215686274509803, "grad_norm": 0.248046875, "learning_rate": 1.8730908075723975e-05, "loss": 1.0246, "num_tokens": 9504575899.0, "step": 2200 }, { "epoch": 0.39233511586452763, "grad_norm": 0.24609375, "learning_rate": 1.872957758243398e-05, "loss": 1.0306, "num_tokens": 9510859437.0, "step": 2201 }, { "epoch": 0.3925133689839572, "grad_norm": 0.2890625, "learning_rate": 1.8728246445036657e-05, "loss": 1.0452, "num_tokens": 9517127928.0, "step": 2202 }, { "epoch": 0.3926916221033868, "grad_norm": 0.25390625, "learning_rate": 1.8726914663642925e-05, "loss": 1.0471, "num_tokens": 9523410898.0, "step": 2203 }, { "epoch": 0.3928698752228164, "grad_norm": 0.24609375, "learning_rate": 1.872558223836375e-05, "loss": 1.0529, "num_tokens": 9529677216.0, "step": 2204 }, { "epoch": 0.393048128342246, "grad_norm": 0.26953125, "learning_rate": 1.8724249169310184e-05, "loss": 1.0281, "num_tokens": 9535891337.0, "step": 2205 }, { "epoch": 0.3932263814616756, "grad_norm": 0.255859375, "learning_rate": 1.8722915456593294e-05, "loss": 1.0371, "num_tokens": 9542148013.0, "step": 2206 }, { "epoch": 0.3934046345811052, "grad_norm": 0.267578125, "learning_rate": 1.8721581100324227e-05, "loss": 1.0383, "num_tokens": 9548393929.0, "step": 2207 }, { "epoch": 0.39358288770053473, "grad_norm": 0.267578125, "learning_rate": 1.872024610061418e-05, "loss": 1.0583, "num_tokens": 9554670234.0, "step": 2208 }, { "epoch": 0.39376114081996433, "grad_norm": 0.283203125, "learning_rate": 1.8718910457574383e-05, "loss": 1.0567, "num_tokens": 9560953250.0, "step": 2209 }, { "epoch": 0.3939393939393939, "grad_norm": 0.234375, "learning_rate": 1.871757417131615e-05, "loss": 1.0292, "num_tokens": 9567216208.0, "step": 2210 }, { "epoch": 0.3941176470588235, "grad_norm": 0.279296875, "learning_rate": 1.871623724195083e-05, "loss": 0.9932, "num_tokens": 9573448838.0, "step": 2211 }, { "epoch": 0.3942959001782531, "grad_norm": 0.228515625, "learning_rate": 1.8714899669589828e-05, "loss": 1.0493, "num_tokens": 9579732377.0, "step": 2212 }, { "epoch": 0.3944741532976827, "grad_norm": 0.25, "learning_rate": 1.871356145434461e-05, "loss": 1.0476, "num_tokens": 9586016279.0, "step": 2213 }, { "epoch": 0.3946524064171123, "grad_norm": 0.267578125, "learning_rate": 1.8712222596326676e-05, "loss": 1.0438, "num_tokens": 9592258034.0, "step": 2214 }, { "epoch": 0.3948306595365419, "grad_norm": 0.228515625, "learning_rate": 1.8710883095647605e-05, "loss": 1.0416, "num_tokens": 9598542128.0, "step": 2215 }, { "epoch": 0.3950089126559715, "grad_norm": 0.271484375, "learning_rate": 1.8709542952419014e-05, "loss": 1.0315, "num_tokens": 9604826496.0, "step": 2216 }, { "epoch": 0.3951871657754011, "grad_norm": 0.2275390625, "learning_rate": 1.870820216675258e-05, "loss": 1.0579, "num_tokens": 9611108570.0, "step": 2217 }, { "epoch": 0.3953654188948307, "grad_norm": 0.259765625, "learning_rate": 1.8706860738760032e-05, "loss": 1.0374, "num_tokens": 9617391791.0, "step": 2218 }, { "epoch": 0.3955436720142603, "grad_norm": 0.2373046875, "learning_rate": 1.8705518668553146e-05, "loss": 1.039, "num_tokens": 9623641098.0, "step": 2219 }, { "epoch": 0.39572192513368987, "grad_norm": 0.265625, "learning_rate": 1.870417595624376e-05, "loss": 1.0412, "num_tokens": 9629924766.0, "step": 2220 }, { "epoch": 0.3959001782531194, "grad_norm": 0.23828125, "learning_rate": 1.870283260194376e-05, "loss": 1.0054, "num_tokens": 9636208288.0, "step": 2221 }, { "epoch": 0.396078431372549, "grad_norm": 0.236328125, "learning_rate": 1.8701488605765093e-05, "loss": 1.0204, "num_tokens": 9642493674.0, "step": 2222 }, { "epoch": 0.3962566844919786, "grad_norm": 0.2373046875, "learning_rate": 1.8700143967819752e-05, "loss": 1.0413, "num_tokens": 9648756219.0, "step": 2223 }, { "epoch": 0.3964349376114082, "grad_norm": 0.2314453125, "learning_rate": 1.8698798688219785e-05, "loss": 1.0105, "num_tokens": 9655038553.0, "step": 2224 }, { "epoch": 0.3966131907308378, "grad_norm": 0.2490234375, "learning_rate": 1.8697452767077293e-05, "loss": 1.0632, "num_tokens": 9661292689.0, "step": 2225 }, { "epoch": 0.3967914438502674, "grad_norm": 0.251953125, "learning_rate": 1.8696106204504435e-05, "loss": 1.0337, "num_tokens": 9667565586.0, "step": 2226 }, { "epoch": 0.396969696969697, "grad_norm": 0.2421875, "learning_rate": 1.8694759000613422e-05, "loss": 1.0213, "num_tokens": 9673820809.0, "step": 2227 }, { "epoch": 0.39714795008912657, "grad_norm": 0.259765625, "learning_rate": 1.8693411155516513e-05, "loss": 1.0338, "num_tokens": 9680105327.0, "step": 2228 }, { "epoch": 0.39732620320855616, "grad_norm": 0.2431640625, "learning_rate": 1.8692062669326024e-05, "loss": 1.0394, "num_tokens": 9686379878.0, "step": 2229 }, { "epoch": 0.39750445632798576, "grad_norm": 0.255859375, "learning_rate": 1.8690713542154323e-05, "loss": 1.0324, "num_tokens": 9692664931.0, "step": 2230 }, { "epoch": 0.39768270944741535, "grad_norm": 0.2470703125, "learning_rate": 1.8689363774113842e-05, "loss": 1.0379, "num_tokens": 9698947562.0, "step": 2231 }, { "epoch": 0.39786096256684494, "grad_norm": 0.267578125, "learning_rate": 1.8688013365317047e-05, "loss": 1.0394, "num_tokens": 9705188781.0, "step": 2232 }, { "epoch": 0.3980392156862745, "grad_norm": 0.259765625, "learning_rate": 1.8686662315876477e-05, "loss": 1.0067, "num_tokens": 9711473023.0, "step": 2233 }, { "epoch": 0.3982174688057041, "grad_norm": 0.2421875, "learning_rate": 1.868531062590471e-05, "loss": 1.0619, "num_tokens": 9717740208.0, "step": 2234 }, { "epoch": 0.3983957219251337, "grad_norm": 0.25390625, "learning_rate": 1.868395829551438e-05, "loss": 1.0277, "num_tokens": 9724004495.0, "step": 2235 }, { "epoch": 0.39857397504456327, "grad_norm": 0.2421875, "learning_rate": 1.8682605324818182e-05, "loss": 1.0348, "num_tokens": 9730290199.0, "step": 2236 }, { "epoch": 0.39875222816399286, "grad_norm": 0.259765625, "learning_rate": 1.8681251713928858e-05, "loss": 1.0223, "num_tokens": 9736544640.0, "step": 2237 }, { "epoch": 0.39893048128342246, "grad_norm": 0.25390625, "learning_rate": 1.86798974629592e-05, "loss": 1.0644, "num_tokens": 9742828847.0, "step": 2238 }, { "epoch": 0.39910873440285205, "grad_norm": 0.25, "learning_rate": 1.8678542572022067e-05, "loss": 1.0365, "num_tokens": 9749113802.0, "step": 2239 }, { "epoch": 0.39928698752228164, "grad_norm": 0.259765625, "learning_rate": 1.8677187041230354e-05, "loss": 1.0178, "num_tokens": 9755399075.0, "step": 2240 }, { "epoch": 0.39946524064171124, "grad_norm": 0.25390625, "learning_rate": 1.8675830870697025e-05, "loss": 1.0728, "num_tokens": 9761656401.0, "step": 2241 }, { "epoch": 0.39964349376114083, "grad_norm": 0.251953125, "learning_rate": 1.8674474060535088e-05, "loss": 1.0471, "num_tokens": 9767939548.0, "step": 2242 }, { "epoch": 0.39982174688057043, "grad_norm": 0.2734375, "learning_rate": 1.86731166108576e-05, "loss": 1.0319, "num_tokens": 9774222037.0, "step": 2243 }, { "epoch": 0.4, "grad_norm": 0.2490234375, "learning_rate": 1.8671758521777684e-05, "loss": 1.0311, "num_tokens": 9780503165.0, "step": 2244 }, { "epoch": 0.4001782531194296, "grad_norm": 0.302734375, "learning_rate": 1.8670399793408505e-05, "loss": 1.0198, "num_tokens": 9786787802.0, "step": 2245 }, { "epoch": 0.40035650623885916, "grad_norm": 0.263671875, "learning_rate": 1.8669040425863293e-05, "loss": 1.0268, "num_tokens": 9793044781.0, "step": 2246 }, { "epoch": 0.40053475935828875, "grad_norm": 0.255859375, "learning_rate": 1.8667680419255315e-05, "loss": 1.0297, "num_tokens": 9799327453.0, "step": 2247 }, { "epoch": 0.40071301247771834, "grad_norm": 0.251953125, "learning_rate": 1.8666319773697912e-05, "loss": 1.0319, "num_tokens": 9805587484.0, "step": 2248 }, { "epoch": 0.40089126559714794, "grad_norm": 0.259765625, "learning_rate": 1.866495848930446e-05, "loss": 1.0384, "num_tokens": 9811841431.0, "step": 2249 }, { "epoch": 0.40106951871657753, "grad_norm": 0.240234375, "learning_rate": 1.8663596566188394e-05, "loss": 1.0154, "num_tokens": 9818071188.0, "step": 2250 }, { "epoch": 0.4012477718360071, "grad_norm": 0.267578125, "learning_rate": 1.8662234004463205e-05, "loss": 1.0394, "num_tokens": 9824324017.0, "step": 2251 }, { "epoch": 0.4014260249554367, "grad_norm": 0.2451171875, "learning_rate": 1.866087080424244e-05, "loss": 1.0035, "num_tokens": 9830573234.0, "step": 2252 }, { "epoch": 0.4016042780748663, "grad_norm": 0.251953125, "learning_rate": 1.8659506965639687e-05, "loss": 1.02, "num_tokens": 9836826538.0, "step": 2253 }, { "epoch": 0.4017825311942959, "grad_norm": 0.267578125, "learning_rate": 1.8658142488768597e-05, "loss": 1.0258, "num_tokens": 9843077594.0, "step": 2254 }, { "epoch": 0.4019607843137255, "grad_norm": 0.251953125, "learning_rate": 1.8656777373742877e-05, "loss": 1.0412, "num_tokens": 9849356905.0, "step": 2255 }, { "epoch": 0.4021390374331551, "grad_norm": 0.2578125, "learning_rate": 1.8655411620676278e-05, "loss": 1.0385, "num_tokens": 9855641152.0, "step": 2256 }, { "epoch": 0.4023172905525847, "grad_norm": 0.25390625, "learning_rate": 1.865404522968261e-05, "loss": 1.0164, "num_tokens": 9861909549.0, "step": 2257 }, { "epoch": 0.40249554367201423, "grad_norm": 0.2578125, "learning_rate": 1.8652678200875732e-05, "loss": 1.0227, "num_tokens": 9868177641.0, "step": 2258 }, { "epoch": 0.4026737967914438, "grad_norm": 0.26953125, "learning_rate": 1.8651310534369565e-05, "loss": 1.0337, "num_tokens": 9874409989.0, "step": 2259 }, { "epoch": 0.4028520499108734, "grad_norm": 0.251953125, "learning_rate": 1.8649942230278074e-05, "loss": 1.0393, "num_tokens": 9880646990.0, "step": 2260 }, { "epoch": 0.403030303030303, "grad_norm": 0.2412109375, "learning_rate": 1.8648573288715275e-05, "loss": 1.0416, "num_tokens": 9886915273.0, "step": 2261 }, { "epoch": 0.4032085561497326, "grad_norm": 0.2578125, "learning_rate": 1.864720370979525e-05, "loss": 1.0096, "num_tokens": 9893187404.0, "step": 2262 }, { "epoch": 0.4033868092691622, "grad_norm": 0.251953125, "learning_rate": 1.864583349363212e-05, "loss": 1.0477, "num_tokens": 9899441153.0, "step": 2263 }, { "epoch": 0.4035650623885918, "grad_norm": 0.265625, "learning_rate": 1.864446264034007e-05, "loss": 0.9986, "num_tokens": 9905725649.0, "step": 2264 }, { "epoch": 0.4037433155080214, "grad_norm": 0.25, "learning_rate": 1.8643091150033326e-05, "loss": 1.0431, "num_tokens": 9912006169.0, "step": 2265 }, { "epoch": 0.403921568627451, "grad_norm": 0.255859375, "learning_rate": 1.8641719022826186e-05, "loss": 1.0456, "num_tokens": 9918288846.0, "step": 2266 }, { "epoch": 0.4040998217468806, "grad_norm": 0.25390625, "learning_rate": 1.864034625883298e-05, "loss": 1.0434, "num_tokens": 9924573112.0, "step": 2267 }, { "epoch": 0.4042780748663102, "grad_norm": 0.283203125, "learning_rate": 1.8638972858168106e-05, "loss": 1.0571, "num_tokens": 9930857082.0, "step": 2268 }, { "epoch": 0.40445632798573977, "grad_norm": 0.251953125, "learning_rate": 1.863759882094601e-05, "loss": 1.0188, "num_tokens": 9937117068.0, "step": 2269 }, { "epoch": 0.40463458110516937, "grad_norm": 0.26953125, "learning_rate": 1.8636224147281186e-05, "loss": 1.0504, "num_tokens": 9943398328.0, "step": 2270 }, { "epoch": 0.4048128342245989, "grad_norm": 0.26171875, "learning_rate": 1.8634848837288194e-05, "loss": 1.0365, "num_tokens": 9949661588.0, "step": 2271 }, { "epoch": 0.4049910873440285, "grad_norm": 0.267578125, "learning_rate": 1.863347289108163e-05, "loss": 0.9857, "num_tokens": 9955944258.0, "step": 2272 }, { "epoch": 0.4051693404634581, "grad_norm": 0.296875, "learning_rate": 1.8632096308776154e-05, "loss": 1.0729, "num_tokens": 9962222534.0, "step": 2273 }, { "epoch": 0.4053475935828877, "grad_norm": 0.2421875, "learning_rate": 1.863071909048648e-05, "loss": 1.0959, "num_tokens": 9968479879.0, "step": 2274 }, { "epoch": 0.4055258467023173, "grad_norm": 0.279296875, "learning_rate": 1.8629341236327373e-05, "loss": 1.0406, "num_tokens": 9974763843.0, "step": 2275 }, { "epoch": 0.4057040998217469, "grad_norm": 0.255859375, "learning_rate": 1.862796274641364e-05, "loss": 1.048, "num_tokens": 9981020156.0, "step": 2276 }, { "epoch": 0.40588235294117647, "grad_norm": 0.2734375, "learning_rate": 1.8626583620860164e-05, "loss": 1.0587, "num_tokens": 9987270563.0, "step": 2277 }, { "epoch": 0.40606060606060607, "grad_norm": 0.251953125, "learning_rate": 1.862520385978186e-05, "loss": 1.0004, "num_tokens": 9993553152.0, "step": 2278 }, { "epoch": 0.40623885918003566, "grad_norm": 0.25, "learning_rate": 1.8623823463293712e-05, "loss": 0.9994, "num_tokens": 9999814533.0, "step": 2279 }, { "epoch": 0.40641711229946526, "grad_norm": 0.271484375, "learning_rate": 1.8622442431510735e-05, "loss": 1.0355, "num_tokens": 10006036824.0, "step": 2280 }, { "epoch": 0.40659536541889485, "grad_norm": 0.2470703125, "learning_rate": 1.862106076454802e-05, "loss": 1.0408, "num_tokens": 10012292415.0, "step": 2281 }, { "epoch": 0.40677361853832444, "grad_norm": 0.26171875, "learning_rate": 1.86196784625207e-05, "loss": 0.9951, "num_tokens": 10018576464.0, "step": 2282 }, { "epoch": 0.406951871657754, "grad_norm": 0.279296875, "learning_rate": 1.8618295525543964e-05, "loss": 1.0439, "num_tokens": 10024807603.0, "step": 2283 }, { "epoch": 0.4071301247771836, "grad_norm": 0.2578125, "learning_rate": 1.861691195373305e-05, "loss": 1.0318, "num_tokens": 10031060123.0, "step": 2284 }, { "epoch": 0.40730837789661317, "grad_norm": 0.275390625, "learning_rate": 1.861552774720325e-05, "loss": 1.0212, "num_tokens": 10037317671.0, "step": 2285 }, { "epoch": 0.40748663101604277, "grad_norm": 0.2451171875, "learning_rate": 1.861414290606991e-05, "loss": 1.0185, "num_tokens": 10043574888.0, "step": 2286 }, { "epoch": 0.40766488413547236, "grad_norm": 0.271484375, "learning_rate": 1.8612757430448437e-05, "loss": 1.0104, "num_tokens": 10049854812.0, "step": 2287 }, { "epoch": 0.40784313725490196, "grad_norm": 0.251953125, "learning_rate": 1.8611371320454275e-05, "loss": 1.0432, "num_tokens": 10056136915.0, "step": 2288 }, { "epoch": 0.40802139037433155, "grad_norm": 0.2353515625, "learning_rate": 1.8609984576202933e-05, "loss": 1.0582, "num_tokens": 10062422076.0, "step": 2289 }, { "epoch": 0.40819964349376114, "grad_norm": 0.251953125, "learning_rate": 1.8608597197809964e-05, "loss": 1.0319, "num_tokens": 10068675710.0, "step": 2290 }, { "epoch": 0.40837789661319074, "grad_norm": 0.2265625, "learning_rate": 1.860720918539098e-05, "loss": 1.0284, "num_tokens": 10074960539.0, "step": 2291 }, { "epoch": 0.40855614973262033, "grad_norm": 0.259765625, "learning_rate": 1.8605820539061653e-05, "loss": 1.0487, "num_tokens": 10081222086.0, "step": 2292 }, { "epoch": 0.4087344028520499, "grad_norm": 0.236328125, "learning_rate": 1.8604431258937686e-05, "loss": 1.0258, "num_tokens": 10087474762.0, "step": 2293 }, { "epoch": 0.4089126559714795, "grad_norm": 0.271484375, "learning_rate": 1.8603041345134854e-05, "loss": 1.0237, "num_tokens": 10093750785.0, "step": 2294 }, { "epoch": 0.4090909090909091, "grad_norm": 0.248046875, "learning_rate": 1.860165079776898e-05, "loss": 1.0347, "num_tokens": 10100035064.0, "step": 2295 }, { "epoch": 0.40926916221033866, "grad_norm": 0.27734375, "learning_rate": 1.860025961695594e-05, "loss": 1.0505, "num_tokens": 10106302188.0, "step": 2296 }, { "epoch": 0.40944741532976825, "grad_norm": 0.25, "learning_rate": 1.8598867802811653e-05, "loss": 1.026, "num_tokens": 10112568619.0, "step": 2297 }, { "epoch": 0.40962566844919784, "grad_norm": 0.267578125, "learning_rate": 1.859747535545211e-05, "loss": 1.05, "num_tokens": 10118849283.0, "step": 2298 }, { "epoch": 0.40980392156862744, "grad_norm": 0.2373046875, "learning_rate": 1.8596082274993336e-05, "loss": 1.066, "num_tokens": 10125131958.0, "step": 2299 }, { "epoch": 0.40998217468805703, "grad_norm": 0.267578125, "learning_rate": 1.8594688561551423e-05, "loss": 1.0352, "num_tokens": 10131414890.0, "step": 2300 }, { "epoch": 0.4101604278074866, "grad_norm": 0.2734375, "learning_rate": 1.85932942152425e-05, "loss": 1.0401, "num_tokens": 10137697470.0, "step": 2301 }, { "epoch": 0.4103386809269162, "grad_norm": 0.279296875, "learning_rate": 1.859189923618277e-05, "loss": 1.063, "num_tokens": 10143981930.0, "step": 2302 }, { "epoch": 0.4105169340463458, "grad_norm": 0.23046875, "learning_rate": 1.859050362448847e-05, "loss": 1.0431, "num_tokens": 10150232777.0, "step": 2303 }, { "epoch": 0.4106951871657754, "grad_norm": 0.263671875, "learning_rate": 1.8589107380275893e-05, "loss": 1.0664, "num_tokens": 10156496421.0, "step": 2304 }, { "epoch": 0.410873440285205, "grad_norm": 0.2421875, "learning_rate": 1.85877105036614e-05, "loss": 1.024, "num_tokens": 10162778130.0, "step": 2305 }, { "epoch": 0.4110516934046346, "grad_norm": 0.25390625, "learning_rate": 1.858631299476138e-05, "loss": 1.0232, "num_tokens": 10169030668.0, "step": 2306 }, { "epoch": 0.4112299465240642, "grad_norm": 0.26953125, "learning_rate": 1.8584914853692297e-05, "loss": 1.0291, "num_tokens": 10175259341.0, "step": 2307 }, { "epoch": 0.4114081996434938, "grad_norm": 0.26171875, "learning_rate": 1.8583516080570654e-05, "loss": 1.0127, "num_tokens": 10181542708.0, "step": 2308 }, { "epoch": 0.4115864527629233, "grad_norm": 0.2734375, "learning_rate": 1.8582116675513014e-05, "loss": 1.0355, "num_tokens": 10187826740.0, "step": 2309 }, { "epoch": 0.4117647058823529, "grad_norm": 0.251953125, "learning_rate": 1.8580716638635988e-05, "loss": 0.9887, "num_tokens": 10194097099.0, "step": 2310 }, { "epoch": 0.4119429590017825, "grad_norm": 0.271484375, "learning_rate": 1.8579315970056237e-05, "loss": 1.0432, "num_tokens": 10200366171.0, "step": 2311 }, { "epoch": 0.4121212121212121, "grad_norm": 0.265625, "learning_rate": 1.857791466989049e-05, "loss": 1.0248, "num_tokens": 10206650699.0, "step": 2312 }, { "epoch": 0.4122994652406417, "grad_norm": 0.26953125, "learning_rate": 1.8576512738255506e-05, "loss": 1.0337, "num_tokens": 10212919747.0, "step": 2313 }, { "epoch": 0.4124777183600713, "grad_norm": 0.263671875, "learning_rate": 1.8575110175268113e-05, "loss": 1.0502, "num_tokens": 10219179774.0, "step": 2314 }, { "epoch": 0.4126559714795009, "grad_norm": 0.24609375, "learning_rate": 1.857370698104519e-05, "loss": 1.0292, "num_tokens": 10225463181.0, "step": 2315 }, { "epoch": 0.4128342245989305, "grad_norm": 0.2490234375, "learning_rate": 1.857230315570366e-05, "loss": 1.0344, "num_tokens": 10231717849.0, "step": 2316 }, { "epoch": 0.4130124777183601, "grad_norm": 0.240234375, "learning_rate": 1.8570898699360508e-05, "loss": 0.9933, "num_tokens": 10238002589.0, "step": 2317 }, { "epoch": 0.4131907308377897, "grad_norm": 0.25, "learning_rate": 1.8569493612132767e-05, "loss": 1.0427, "num_tokens": 10244285389.0, "step": 2318 }, { "epoch": 0.41336898395721927, "grad_norm": 0.2421875, "learning_rate": 1.856808789413752e-05, "loss": 1.0389, "num_tokens": 10250555779.0, "step": 2319 }, { "epoch": 0.41354723707664887, "grad_norm": 0.25, "learning_rate": 1.8566681545491913e-05, "loss": 1.0315, "num_tokens": 10256840511.0, "step": 2320 }, { "epoch": 0.4137254901960784, "grad_norm": 0.2060546875, "learning_rate": 1.856527456631313e-05, "loss": 1.0204, "num_tokens": 10263046228.0, "step": 2321 }, { "epoch": 0.413903743315508, "grad_norm": 0.2578125, "learning_rate": 1.8563866956718412e-05, "loss": 1.0355, "num_tokens": 10269326029.0, "step": 2322 }, { "epoch": 0.4140819964349376, "grad_norm": 0.2255859375, "learning_rate": 1.8562458716825066e-05, "loss": 1.0272, "num_tokens": 10275550459.0, "step": 2323 }, { "epoch": 0.4142602495543672, "grad_norm": 0.255859375, "learning_rate": 1.8561049846750436e-05, "loss": 1.0443, "num_tokens": 10281822854.0, "step": 2324 }, { "epoch": 0.4144385026737968, "grad_norm": 0.228515625, "learning_rate": 1.855964034661192e-05, "loss": 1.0313, "num_tokens": 10288103828.0, "step": 2325 }, { "epoch": 0.4146167557932264, "grad_norm": 0.248046875, "learning_rate": 1.855823021652698e-05, "loss": 1.0252, "num_tokens": 10294356955.0, "step": 2326 }, { "epoch": 0.41479500891265597, "grad_norm": 0.228515625, "learning_rate": 1.8556819456613114e-05, "loss": 1.029, "num_tokens": 10300641593.0, "step": 2327 }, { "epoch": 0.41497326203208557, "grad_norm": 0.2373046875, "learning_rate": 1.855540806698789e-05, "loss": 1.0253, "num_tokens": 10306860600.0, "step": 2328 }, { "epoch": 0.41515151515151516, "grad_norm": 0.24609375, "learning_rate": 1.855399604776891e-05, "loss": 1.0237, "num_tokens": 10313132329.0, "step": 2329 }, { "epoch": 0.41532976827094475, "grad_norm": 0.2353515625, "learning_rate": 1.8552583399073836e-05, "loss": 1.0248, "num_tokens": 10319404122.0, "step": 2330 }, { "epoch": 0.41550802139037435, "grad_norm": 0.255859375, "learning_rate": 1.85511701210204e-05, "loss": 1.0353, "num_tokens": 10325686880.0, "step": 2331 }, { "epoch": 0.41568627450980394, "grad_norm": 0.2421875, "learning_rate": 1.8549756213726355e-05, "loss": 1.0074, "num_tokens": 10331970591.0, "step": 2332 }, { "epoch": 0.41586452762923354, "grad_norm": 0.25390625, "learning_rate": 1.854834167730953e-05, "loss": 1.0569, "num_tokens": 10338252886.0, "step": 2333 }, { "epoch": 0.4160427807486631, "grad_norm": 0.25, "learning_rate": 1.8546926511887795e-05, "loss": 0.9947, "num_tokens": 10344535491.0, "step": 2334 }, { "epoch": 0.41622103386809267, "grad_norm": 0.244140625, "learning_rate": 1.8545510717579077e-05, "loss": 1.0616, "num_tokens": 10350805066.0, "step": 2335 }, { "epoch": 0.41639928698752227, "grad_norm": 0.2490234375, "learning_rate": 1.854409429450136e-05, "loss": 1.0296, "num_tokens": 10357089404.0, "step": 2336 }, { "epoch": 0.41657754010695186, "grad_norm": 0.2314453125, "learning_rate": 1.8542677242772665e-05, "loss": 1.006, "num_tokens": 10363342483.0, "step": 2337 }, { "epoch": 0.41675579322638145, "grad_norm": 0.25390625, "learning_rate": 1.8541259562511083e-05, "loss": 1.0511, "num_tokens": 10369621619.0, "step": 2338 }, { "epoch": 0.41693404634581105, "grad_norm": 0.25390625, "learning_rate": 1.8539841253834748e-05, "loss": 1.0378, "num_tokens": 10375891124.0, "step": 2339 }, { "epoch": 0.41711229946524064, "grad_norm": 0.255859375, "learning_rate": 1.8538422316861844e-05, "loss": 1.0454, "num_tokens": 10382174177.0, "step": 2340 }, { "epoch": 0.41729055258467024, "grad_norm": 0.24609375, "learning_rate": 1.8537002751710615e-05, "loss": 1.04, "num_tokens": 10388453533.0, "step": 2341 }, { "epoch": 0.41746880570409983, "grad_norm": 0.3046875, "learning_rate": 1.8535582558499353e-05, "loss": 1.0484, "num_tokens": 10394737412.0, "step": 2342 }, { "epoch": 0.4176470588235294, "grad_norm": 0.25, "learning_rate": 1.85341617373464e-05, "loss": 1.0377, "num_tokens": 10400967617.0, "step": 2343 }, { "epoch": 0.417825311942959, "grad_norm": 0.28515625, "learning_rate": 1.853274028837016e-05, "loss": 1.057, "num_tokens": 10407222323.0, "step": 2344 }, { "epoch": 0.4180035650623886, "grad_norm": 0.234375, "learning_rate": 1.8531318211689077e-05, "loss": 1.0262, "num_tokens": 10413506528.0, "step": 2345 }, { "epoch": 0.41818181818181815, "grad_norm": 0.251953125, "learning_rate": 1.8529895507421653e-05, "loss": 1.009, "num_tokens": 10419729342.0, "step": 2346 }, { "epoch": 0.41836007130124775, "grad_norm": 0.2490234375, "learning_rate": 1.852847217568645e-05, "loss": 1.0301, "num_tokens": 10426005127.0, "step": 2347 }, { "epoch": 0.41853832442067734, "grad_norm": 0.26171875, "learning_rate": 1.8527048216602064e-05, "loss": 1.0386, "num_tokens": 10432270559.0, "step": 2348 }, { "epoch": 0.41871657754010694, "grad_norm": 0.2412109375, "learning_rate": 1.8525623630287157e-05, "loss": 1.0473, "num_tokens": 10438556241.0, "step": 2349 }, { "epoch": 0.41889483065953653, "grad_norm": 0.251953125, "learning_rate": 1.852419841686044e-05, "loss": 1.06, "num_tokens": 10444808667.0, "step": 2350 }, { "epoch": 0.4190730837789661, "grad_norm": 0.25, "learning_rate": 1.852277257644068e-05, "loss": 1.0412, "num_tokens": 10451056460.0, "step": 2351 }, { "epoch": 0.4192513368983957, "grad_norm": 0.25390625, "learning_rate": 1.852134610914669e-05, "loss": 1.053, "num_tokens": 10457339427.0, "step": 2352 }, { "epoch": 0.4194295900178253, "grad_norm": 0.271484375, "learning_rate": 1.8519919015097343e-05, "loss": 1.0569, "num_tokens": 10463619324.0, "step": 2353 }, { "epoch": 0.4196078431372549, "grad_norm": 0.296875, "learning_rate": 1.8518491294411547e-05, "loss": 1.0016, "num_tokens": 10469903409.0, "step": 2354 }, { "epoch": 0.4197860962566845, "grad_norm": 0.26953125, "learning_rate": 1.8517062947208287e-05, "loss": 1.0226, "num_tokens": 10476186787.0, "step": 2355 }, { "epoch": 0.4199643493761141, "grad_norm": 0.255859375, "learning_rate": 1.8515633973606578e-05, "loss": 1.0806, "num_tokens": 10482456634.0, "step": 2356 }, { "epoch": 0.4201426024955437, "grad_norm": 0.26953125, "learning_rate": 1.8514204373725503e-05, "loss": 1.0084, "num_tokens": 10488740320.0, "step": 2357 }, { "epoch": 0.4203208556149733, "grad_norm": 0.26171875, "learning_rate": 1.8512774147684188e-05, "loss": 1.0366, "num_tokens": 10495012789.0, "step": 2358 }, { "epoch": 0.4204991087344028, "grad_norm": 0.255859375, "learning_rate": 1.851134329560181e-05, "loss": 1.0204, "num_tokens": 10501297676.0, "step": 2359 }, { "epoch": 0.4206773618538324, "grad_norm": 0.26953125, "learning_rate": 1.850991181759761e-05, "loss": 1.0252, "num_tokens": 10507580917.0, "step": 2360 }, { "epoch": 0.420855614973262, "grad_norm": 0.232421875, "learning_rate": 1.850847971379087e-05, "loss": 1.0172, "num_tokens": 10513857915.0, "step": 2361 }, { "epoch": 0.4210338680926916, "grad_norm": 0.2236328125, "learning_rate": 1.8507046984300928e-05, "loss": 1.0233, "num_tokens": 10520138802.0, "step": 2362 }, { "epoch": 0.4212121212121212, "grad_norm": 0.27734375, "learning_rate": 1.8505613629247173e-05, "loss": 1.0152, "num_tokens": 10526409728.0, "step": 2363 }, { "epoch": 0.4213903743315508, "grad_norm": 0.2236328125, "learning_rate": 1.8504179648749047e-05, "loss": 1.0447, "num_tokens": 10532676447.0, "step": 2364 }, { "epoch": 0.4215686274509804, "grad_norm": 0.279296875, "learning_rate": 1.8502745042926045e-05, "loss": 1.0736, "num_tokens": 10538960447.0, "step": 2365 }, { "epoch": 0.42174688057041, "grad_norm": 0.248046875, "learning_rate": 1.850130981189771e-05, "loss": 1.0079, "num_tokens": 10545245715.0, "step": 2366 }, { "epoch": 0.4219251336898396, "grad_norm": 0.255859375, "learning_rate": 1.849987395578364e-05, "loss": 1.0795, "num_tokens": 10551475093.0, "step": 2367 }, { "epoch": 0.4221033868092692, "grad_norm": 0.2431640625, "learning_rate": 1.849843747470349e-05, "loss": 1.0221, "num_tokens": 10557739025.0, "step": 2368 }, { "epoch": 0.42228163992869877, "grad_norm": 0.3046875, "learning_rate": 1.8497000368776962e-05, "loss": 1.0266, "num_tokens": 10564004086.0, "step": 2369 }, { "epoch": 0.42245989304812837, "grad_norm": 0.255859375, "learning_rate": 1.8495562638123806e-05, "loss": 1.0087, "num_tokens": 10570260697.0, "step": 2370 }, { "epoch": 0.42263814616755796, "grad_norm": 0.291015625, "learning_rate": 1.849412428286383e-05, "loss": 1.0515, "num_tokens": 10576513400.0, "step": 2371 }, { "epoch": 0.4228163992869875, "grad_norm": 0.27734375, "learning_rate": 1.849268530311689e-05, "loss": 1.0196, "num_tokens": 10582776921.0, "step": 2372 }, { "epoch": 0.4229946524064171, "grad_norm": 0.28515625, "learning_rate": 1.84912456990029e-05, "loss": 1.029, "num_tokens": 10589034612.0, "step": 2373 }, { "epoch": 0.4231729055258467, "grad_norm": 0.2451171875, "learning_rate": 1.8489805470641823e-05, "loss": 1.0207, "num_tokens": 10595318863.0, "step": 2374 }, { "epoch": 0.4233511586452763, "grad_norm": 0.259765625, "learning_rate": 1.848836461815367e-05, "loss": 1.0383, "num_tokens": 10601602337.0, "step": 2375 }, { "epoch": 0.4235294117647059, "grad_norm": 0.265625, "learning_rate": 1.848692314165851e-05, "loss": 1.0404, "num_tokens": 10607848926.0, "step": 2376 }, { "epoch": 0.42370766488413547, "grad_norm": 0.2578125, "learning_rate": 1.8485481041276464e-05, "loss": 1.0628, "num_tokens": 10614116249.0, "step": 2377 }, { "epoch": 0.42388591800356507, "grad_norm": 0.275390625, "learning_rate": 1.8484038317127697e-05, "loss": 1.0296, "num_tokens": 10620399819.0, "step": 2378 }, { "epoch": 0.42406417112299466, "grad_norm": 0.240234375, "learning_rate": 1.8482594969332438e-05, "loss": 1.0735, "num_tokens": 10626671770.0, "step": 2379 }, { "epoch": 0.42424242424242425, "grad_norm": 0.24609375, "learning_rate": 1.8481150998010954e-05, "loss": 1.03, "num_tokens": 10632954345.0, "step": 2380 }, { "epoch": 0.42442067736185385, "grad_norm": 0.23828125, "learning_rate": 1.8479706403283576e-05, "loss": 1.043, "num_tokens": 10639211697.0, "step": 2381 }, { "epoch": 0.42459893048128344, "grad_norm": 0.2431640625, "learning_rate": 1.8478261185270682e-05, "loss": 1.0272, "num_tokens": 10645474643.0, "step": 2382 }, { "epoch": 0.42477718360071304, "grad_norm": 0.25, "learning_rate": 1.84768153440927e-05, "loss": 1.035, "num_tokens": 10651758005.0, "step": 2383 }, { "epoch": 0.4249554367201426, "grad_norm": 0.25390625, "learning_rate": 1.8475368879870116e-05, "loss": 1.0144, "num_tokens": 10657998562.0, "step": 2384 }, { "epoch": 0.42513368983957217, "grad_norm": 0.21875, "learning_rate": 1.8473921792723465e-05, "loss": 1.0145, "num_tokens": 10664265919.0, "step": 2385 }, { "epoch": 0.42531194295900177, "grad_norm": 0.28125, "learning_rate": 1.8472474082773325e-05, "loss": 0.9964, "num_tokens": 10670550398.0, "step": 2386 }, { "epoch": 0.42549019607843136, "grad_norm": 0.2275390625, "learning_rate": 1.8471025750140345e-05, "loss": 1.0352, "num_tokens": 10676815443.0, "step": 2387 }, { "epoch": 0.42566844919786095, "grad_norm": 0.234375, "learning_rate": 1.8469576794945206e-05, "loss": 1.0421, "num_tokens": 10683069611.0, "step": 2388 }, { "epoch": 0.42584670231729055, "grad_norm": 0.2275390625, "learning_rate": 1.8468127217308655e-05, "loss": 1.0292, "num_tokens": 10689310536.0, "step": 2389 }, { "epoch": 0.42602495543672014, "grad_norm": 0.2236328125, "learning_rate": 1.846667701735148e-05, "loss": 1.0491, "num_tokens": 10695571728.0, "step": 2390 }, { "epoch": 0.42620320855614974, "grad_norm": 0.26171875, "learning_rate": 1.8465226195194533e-05, "loss": 1.0214, "num_tokens": 10701852824.0, "step": 2391 }, { "epoch": 0.42638146167557933, "grad_norm": 0.26171875, "learning_rate": 1.8463774750958703e-05, "loss": 1.0265, "num_tokens": 10708117005.0, "step": 2392 }, { "epoch": 0.4265597147950089, "grad_norm": 0.25, "learning_rate": 1.846232268476495e-05, "loss": 1.0478, "num_tokens": 10714400997.0, "step": 2393 }, { "epoch": 0.4267379679144385, "grad_norm": 0.251953125, "learning_rate": 1.846086999673427e-05, "loss": 1.0305, "num_tokens": 10720666094.0, "step": 2394 }, { "epoch": 0.4269162210338681, "grad_norm": 0.2236328125, "learning_rate": 1.8459416686987714e-05, "loss": 1.0136, "num_tokens": 10726952829.0, "step": 2395 }, { "epoch": 0.4270944741532977, "grad_norm": 0.240234375, "learning_rate": 1.8457962755646384e-05, "loss": 1.0457, "num_tokens": 10733218556.0, "step": 2396 }, { "epoch": 0.42727272727272725, "grad_norm": 0.28515625, "learning_rate": 1.845650820283145e-05, "loss": 1.0192, "num_tokens": 10739476137.0, "step": 2397 }, { "epoch": 0.42745098039215684, "grad_norm": 0.2578125, "learning_rate": 1.84550530286641e-05, "loss": 1.0113, "num_tokens": 10745759950.0, "step": 2398 }, { "epoch": 0.42762923351158644, "grad_norm": 0.251953125, "learning_rate": 1.845359723326561e-05, "loss": 1.0411, "num_tokens": 10752027248.0, "step": 2399 }, { "epoch": 0.42780748663101603, "grad_norm": 0.255859375, "learning_rate": 1.8452140816757284e-05, "loss": 1.0491, "num_tokens": 10758300565.0, "step": 2400 }, { "epoch": 0.4279857397504456, "grad_norm": 0.255859375, "learning_rate": 1.845068377926049e-05, "loss": 1.0397, "num_tokens": 10764532223.0, "step": 2401 }, { "epoch": 0.4281639928698752, "grad_norm": 0.271484375, "learning_rate": 1.844922612089664e-05, "loss": 0.9983, "num_tokens": 10770788713.0, "step": 2402 }, { "epoch": 0.4283422459893048, "grad_norm": 0.2412109375, "learning_rate": 1.84477678417872e-05, "loss": 1.0576, "num_tokens": 10777070363.0, "step": 2403 }, { "epoch": 0.4285204991087344, "grad_norm": 0.24609375, "learning_rate": 1.8446308942053695e-05, "loss": 1.0136, "num_tokens": 10783353962.0, "step": 2404 }, { "epoch": 0.428698752228164, "grad_norm": 0.2490234375, "learning_rate": 1.844484942181769e-05, "loss": 1.0083, "num_tokens": 10789635581.0, "step": 2405 }, { "epoch": 0.4288770053475936, "grad_norm": 0.23828125, "learning_rate": 1.8443389281200803e-05, "loss": 1.0417, "num_tokens": 10795918457.0, "step": 2406 }, { "epoch": 0.4290552584670232, "grad_norm": 0.2490234375, "learning_rate": 1.8441928520324717e-05, "loss": 1.0228, "num_tokens": 10802201344.0, "step": 2407 }, { "epoch": 0.4292335115864528, "grad_norm": 0.24609375, "learning_rate": 1.8440467139311157e-05, "loss": 1.057, "num_tokens": 10808453575.0, "step": 2408 }, { "epoch": 0.4294117647058823, "grad_norm": 0.255859375, "learning_rate": 1.843900513828189e-05, "loss": 1.0547, "num_tokens": 10814717489.0, "step": 2409 }, { "epoch": 0.4295900178253119, "grad_norm": 0.248046875, "learning_rate": 1.8437542517358755e-05, "loss": 1.0443, "num_tokens": 10820997498.0, "step": 2410 }, { "epoch": 0.4297682709447415, "grad_norm": 0.255859375, "learning_rate": 1.843607927666363e-05, "loss": 1.0347, "num_tokens": 10827234850.0, "step": 2411 }, { "epoch": 0.4299465240641711, "grad_norm": 0.23046875, "learning_rate": 1.8434615416318444e-05, "loss": 1.022, "num_tokens": 10833518360.0, "step": 2412 }, { "epoch": 0.4301247771836007, "grad_norm": 0.224609375, "learning_rate": 1.8433150936445184e-05, "loss": 1.0391, "num_tokens": 10839801483.0, "step": 2413 }, { "epoch": 0.4303030303030303, "grad_norm": 0.255859375, "learning_rate": 1.8431685837165885e-05, "loss": 1.0449, "num_tokens": 10846080587.0, "step": 2414 }, { "epoch": 0.4304812834224599, "grad_norm": 0.25390625, "learning_rate": 1.8430220118602634e-05, "loss": 1.0381, "num_tokens": 10852364842.0, "step": 2415 }, { "epoch": 0.4306595365418895, "grad_norm": 0.2392578125, "learning_rate": 1.8428753780877567e-05, "loss": 1.0337, "num_tokens": 10858650018.0, "step": 2416 }, { "epoch": 0.4308377896613191, "grad_norm": 0.24609375, "learning_rate": 1.8427286824112877e-05, "loss": 1.0294, "num_tokens": 10864914490.0, "step": 2417 }, { "epoch": 0.4310160427807487, "grad_norm": 0.25390625, "learning_rate": 1.8425819248430804e-05, "loss": 1.0073, "num_tokens": 10871180942.0, "step": 2418 }, { "epoch": 0.43119429590017827, "grad_norm": 0.2392578125, "learning_rate": 1.8424351053953646e-05, "loss": 1.0452, "num_tokens": 10877427833.0, "step": 2419 }, { "epoch": 0.43137254901960786, "grad_norm": 0.2734375, "learning_rate": 1.8422882240803742e-05, "loss": 1.0496, "num_tokens": 10883684989.0, "step": 2420 }, { "epoch": 0.43155080213903746, "grad_norm": 0.25390625, "learning_rate": 1.8421412809103495e-05, "loss": 1.0311, "num_tokens": 10889967822.0, "step": 2421 }, { "epoch": 0.431729055258467, "grad_norm": 0.25, "learning_rate": 1.8419942758975346e-05, "loss": 1.0058, "num_tokens": 10896243170.0, "step": 2422 }, { "epoch": 0.4319073083778966, "grad_norm": 0.26171875, "learning_rate": 1.8418472090541798e-05, "loss": 1.038, "num_tokens": 10902526581.0, "step": 2423 }, { "epoch": 0.4320855614973262, "grad_norm": 0.2490234375, "learning_rate": 1.8417000803925398e-05, "loss": 1.0412, "num_tokens": 10908791671.0, "step": 2424 }, { "epoch": 0.4322638146167558, "grad_norm": 0.265625, "learning_rate": 1.8415528899248754e-05, "loss": 1.0218, "num_tokens": 10915052153.0, "step": 2425 }, { "epoch": 0.4324420677361854, "grad_norm": 0.255859375, "learning_rate": 1.8414056376634525e-05, "loss": 1.0649, "num_tokens": 10921308036.0, "step": 2426 }, { "epoch": 0.43262032085561497, "grad_norm": 0.271484375, "learning_rate": 1.8412583236205404e-05, "loss": 1.0388, "num_tokens": 10927574748.0, "step": 2427 }, { "epoch": 0.43279857397504456, "grad_norm": 0.255859375, "learning_rate": 1.8411109478084154e-05, "loss": 1.0635, "num_tokens": 10933799149.0, "step": 2428 }, { "epoch": 0.43297682709447416, "grad_norm": 0.271484375, "learning_rate": 1.8409635102393584e-05, "loss": 1.0738, "num_tokens": 10940079010.0, "step": 2429 }, { "epoch": 0.43315508021390375, "grad_norm": 0.2578125, "learning_rate": 1.8408160109256562e-05, "loss": 1.0565, "num_tokens": 10946361731.0, "step": 2430 }, { "epoch": 0.43333333333333335, "grad_norm": 0.23828125, "learning_rate": 1.8406684498795982e-05, "loss": 1.0283, "num_tokens": 10952632771.0, "step": 2431 }, { "epoch": 0.43351158645276294, "grad_norm": 0.2333984375, "learning_rate": 1.840520827113482e-05, "loss": 1.0143, "num_tokens": 10958915084.0, "step": 2432 }, { "epoch": 0.43368983957219254, "grad_norm": 0.234375, "learning_rate": 1.8403731426396087e-05, "loss": 1.0359, "num_tokens": 10965175891.0, "step": 2433 }, { "epoch": 0.4338680926916221, "grad_norm": 0.236328125, "learning_rate": 1.8402253964702845e-05, "loss": 1.0444, "num_tokens": 10971459856.0, "step": 2434 }, { "epoch": 0.43404634581105167, "grad_norm": 0.2578125, "learning_rate": 1.8400775886178216e-05, "loss": 1.0545, "num_tokens": 10977736628.0, "step": 2435 }, { "epoch": 0.43422459893048126, "grad_norm": 0.240234375, "learning_rate": 1.839929719094537e-05, "loss": 1.0494, "num_tokens": 10984010859.0, "step": 2436 }, { "epoch": 0.43440285204991086, "grad_norm": 0.2431640625, "learning_rate": 1.8397817879127525e-05, "loss": 1.0036, "num_tokens": 10990248548.0, "step": 2437 }, { "epoch": 0.43458110516934045, "grad_norm": 0.236328125, "learning_rate": 1.8396337950847948e-05, "loss": 1.057, "num_tokens": 10996529122.0, "step": 2438 }, { "epoch": 0.43475935828877005, "grad_norm": 0.2421875, "learning_rate": 1.8394857406229966e-05, "loss": 1.0403, "num_tokens": 11002780628.0, "step": 2439 }, { "epoch": 0.43493761140819964, "grad_norm": 0.251953125, "learning_rate": 1.839337624539695e-05, "loss": 1.0247, "num_tokens": 11009037479.0, "step": 2440 }, { "epoch": 0.43511586452762924, "grad_norm": 0.275390625, "learning_rate": 1.8391894468472333e-05, "loss": 1.0194, "num_tokens": 11015309128.0, "step": 2441 }, { "epoch": 0.43529411764705883, "grad_norm": 0.265625, "learning_rate": 1.8390412075579583e-05, "loss": 1.031, "num_tokens": 11021593643.0, "step": 2442 }, { "epoch": 0.4354723707664884, "grad_norm": 0.24609375, "learning_rate": 1.8388929066842235e-05, "loss": 1.0176, "num_tokens": 11027878938.0, "step": 2443 }, { "epoch": 0.435650623885918, "grad_norm": 0.244140625, "learning_rate": 1.8387445442383864e-05, "loss": 1.0247, "num_tokens": 11034162247.0, "step": 2444 }, { "epoch": 0.4358288770053476, "grad_norm": 0.244140625, "learning_rate": 1.8385961202328104e-05, "loss": 1.0569, "num_tokens": 11040385090.0, "step": 2445 }, { "epoch": 0.4360071301247772, "grad_norm": 0.244140625, "learning_rate": 1.838447634679863e-05, "loss": 1.0506, "num_tokens": 11046654492.0, "step": 2446 }, { "epoch": 0.43618538324420675, "grad_norm": 0.2490234375, "learning_rate": 1.8382990875919182e-05, "loss": 1.0066, "num_tokens": 11052888440.0, "step": 2447 }, { "epoch": 0.43636363636363634, "grad_norm": 0.244140625, "learning_rate": 1.8381504789813545e-05, "loss": 1.0071, "num_tokens": 11059140796.0, "step": 2448 }, { "epoch": 0.43654188948306594, "grad_norm": 0.240234375, "learning_rate": 1.838001808860555e-05, "loss": 1.0387, "num_tokens": 11065426356.0, "step": 2449 }, { "epoch": 0.43672014260249553, "grad_norm": 0.2421875, "learning_rate": 1.837853077241909e-05, "loss": 1.0443, "num_tokens": 11071710439.0, "step": 2450 }, { "epoch": 0.4368983957219251, "grad_norm": 0.263671875, "learning_rate": 1.83770428413781e-05, "loss": 1.0374, "num_tokens": 11077994629.0, "step": 2451 }, { "epoch": 0.4370766488413547, "grad_norm": 0.2255859375, "learning_rate": 1.837555429560657e-05, "loss": 1.0433, "num_tokens": 11084252905.0, "step": 2452 }, { "epoch": 0.4372549019607843, "grad_norm": 0.265625, "learning_rate": 1.8374065135228537e-05, "loss": 1.0679, "num_tokens": 11090538262.0, "step": 2453 }, { "epoch": 0.4374331550802139, "grad_norm": 0.2236328125, "learning_rate": 1.83725753603681e-05, "loss": 1.0162, "num_tokens": 11096794952.0, "step": 2454 }, { "epoch": 0.4376114081996435, "grad_norm": 0.267578125, "learning_rate": 1.8371084971149397e-05, "loss": 1.0162, "num_tokens": 11103050279.0, "step": 2455 }, { "epoch": 0.4377896613190731, "grad_norm": 0.2314453125, "learning_rate": 1.8369593967696625e-05, "loss": 1.022, "num_tokens": 11109333715.0, "step": 2456 }, { "epoch": 0.4379679144385027, "grad_norm": 0.2890625, "learning_rate": 1.836810235013403e-05, "loss": 1.0477, "num_tokens": 11115617809.0, "step": 2457 }, { "epoch": 0.4381461675579323, "grad_norm": 0.2265625, "learning_rate": 1.836661011858591e-05, "loss": 1.0353, "num_tokens": 11121900923.0, "step": 2458 }, { "epoch": 0.4383244206773619, "grad_norm": 0.279296875, "learning_rate": 1.8365117273176607e-05, "loss": 1.0193, "num_tokens": 11128182473.0, "step": 2459 }, { "epoch": 0.4385026737967914, "grad_norm": 0.23828125, "learning_rate": 1.8363623814030525e-05, "loss": 1.0494, "num_tokens": 11134468003.0, "step": 2460 }, { "epoch": 0.438680926916221, "grad_norm": 0.271484375, "learning_rate": 1.8362129741272116e-05, "loss": 1.022, "num_tokens": 11140738152.0, "step": 2461 }, { "epoch": 0.4388591800356506, "grad_norm": 0.2734375, "learning_rate": 1.8360635055025873e-05, "loss": 1.0145, "num_tokens": 11146975560.0, "step": 2462 }, { "epoch": 0.4390374331550802, "grad_norm": 0.29296875, "learning_rate": 1.835913975541636e-05, "loss": 1.0308, "num_tokens": 11153260391.0, "step": 2463 }, { "epoch": 0.4392156862745098, "grad_norm": 0.3046875, "learning_rate": 1.8357643842568175e-05, "loss": 1.0275, "num_tokens": 11159490645.0, "step": 2464 }, { "epoch": 0.4393939393939394, "grad_norm": 0.2451171875, "learning_rate": 1.8356147316605968e-05, "loss": 1.0301, "num_tokens": 11165735205.0, "step": 2465 }, { "epoch": 0.439572192513369, "grad_norm": 0.28515625, "learning_rate": 1.8354650177654453e-05, "loss": 1.0509, "num_tokens": 11171991756.0, "step": 2466 }, { "epoch": 0.4397504456327986, "grad_norm": 0.251953125, "learning_rate": 1.8353152425838382e-05, "loss": 1.0351, "num_tokens": 11178276906.0, "step": 2467 }, { "epoch": 0.4399286987522282, "grad_norm": 0.275390625, "learning_rate": 1.8351654061282563e-05, "loss": 1.0455, "num_tokens": 11184560557.0, "step": 2468 }, { "epoch": 0.44010695187165777, "grad_norm": 0.263671875, "learning_rate": 1.8350155084111855e-05, "loss": 1.0174, "num_tokens": 11190818380.0, "step": 2469 }, { "epoch": 0.44028520499108736, "grad_norm": 0.2734375, "learning_rate": 1.8348655494451176e-05, "loss": 1.0185, "num_tokens": 11197098040.0, "step": 2470 }, { "epoch": 0.44046345811051696, "grad_norm": 0.2216796875, "learning_rate": 1.8347155292425473e-05, "loss": 1.0435, "num_tokens": 11203381521.0, "step": 2471 }, { "epoch": 0.4406417112299465, "grad_norm": 0.275390625, "learning_rate": 1.834565447815977e-05, "loss": 1.0053, "num_tokens": 11209666325.0, "step": 2472 }, { "epoch": 0.4408199643493761, "grad_norm": 0.234375, "learning_rate": 1.8344153051779125e-05, "loss": 1.0383, "num_tokens": 11215944315.0, "step": 2473 }, { "epoch": 0.4409982174688057, "grad_norm": 0.2490234375, "learning_rate": 1.8342651013408653e-05, "loss": 1.0282, "num_tokens": 11222228909.0, "step": 2474 }, { "epoch": 0.4411764705882353, "grad_norm": 0.2451171875, "learning_rate": 1.834114836317352e-05, "loss": 1.0386, "num_tokens": 11228486790.0, "step": 2475 }, { "epoch": 0.4413547237076649, "grad_norm": 0.240234375, "learning_rate": 1.833964510119894e-05, "loss": 1.0287, "num_tokens": 11234770926.0, "step": 2476 }, { "epoch": 0.44153297682709447, "grad_norm": 0.251953125, "learning_rate": 1.8338141227610183e-05, "loss": 1.0272, "num_tokens": 11241057389.0, "step": 2477 }, { "epoch": 0.44171122994652406, "grad_norm": 0.232421875, "learning_rate": 1.8336636742532563e-05, "loss": 1.0207, "num_tokens": 11247311677.0, "step": 2478 }, { "epoch": 0.44188948306595366, "grad_norm": 0.2314453125, "learning_rate": 1.833513164609145e-05, "loss": 1.032, "num_tokens": 11253595426.0, "step": 2479 }, { "epoch": 0.44206773618538325, "grad_norm": 0.248046875, "learning_rate": 1.8333625938412273e-05, "loss": 1.0391, "num_tokens": 11259856329.0, "step": 2480 }, { "epoch": 0.44224598930481285, "grad_norm": 0.25, "learning_rate": 1.833211961962049e-05, "loss": 1.0389, "num_tokens": 11266139037.0, "step": 2481 }, { "epoch": 0.44242424242424244, "grad_norm": 0.2421875, "learning_rate": 1.833061268984163e-05, "loss": 1.0029, "num_tokens": 11272390169.0, "step": 2482 }, { "epoch": 0.44260249554367204, "grad_norm": 0.224609375, "learning_rate": 1.8329105149201263e-05, "loss": 1.0084, "num_tokens": 11278674261.0, "step": 2483 }, { "epoch": 0.44278074866310163, "grad_norm": 0.2255859375, "learning_rate": 1.8327596997825018e-05, "loss": 1.0292, "num_tokens": 11284957982.0, "step": 2484 }, { "epoch": 0.44295900178253117, "grad_norm": 0.2314453125, "learning_rate": 1.832608823583856e-05, "loss": 1.0499, "num_tokens": 11291210653.0, "step": 2485 }, { "epoch": 0.44313725490196076, "grad_norm": 0.220703125, "learning_rate": 1.8324578863367622e-05, "loss": 1.0246, "num_tokens": 11297495498.0, "step": 2486 }, { "epoch": 0.44331550802139036, "grad_norm": 0.2109375, "learning_rate": 1.8323068880537983e-05, "loss": 1.0099, "num_tokens": 11303779870.0, "step": 2487 }, { "epoch": 0.44349376114081995, "grad_norm": 0.2255859375, "learning_rate": 1.832155828747546e-05, "loss": 1.0532, "num_tokens": 11310065038.0, "step": 2488 }, { "epoch": 0.44367201426024955, "grad_norm": 0.2353515625, "learning_rate": 1.8320047084305937e-05, "loss": 1.0226, "num_tokens": 11316347657.0, "step": 2489 }, { "epoch": 0.44385026737967914, "grad_norm": 0.2275390625, "learning_rate": 1.831853527115534e-05, "loss": 1.0341, "num_tokens": 11322602724.0, "step": 2490 }, { "epoch": 0.44402852049910874, "grad_norm": 0.244140625, "learning_rate": 1.831702284814966e-05, "loss": 1.0377, "num_tokens": 11328864257.0, "step": 2491 }, { "epoch": 0.44420677361853833, "grad_norm": 0.2412109375, "learning_rate": 1.831550981541491e-05, "loss": 1.0411, "num_tokens": 11335149064.0, "step": 2492 }, { "epoch": 0.4443850267379679, "grad_norm": 0.251953125, "learning_rate": 1.8313996173077183e-05, "loss": 0.9876, "num_tokens": 11341385657.0, "step": 2493 }, { "epoch": 0.4445632798573975, "grad_norm": 0.2431640625, "learning_rate": 1.831248192126261e-05, "loss": 1.0258, "num_tokens": 11347649701.0, "step": 2494 }, { "epoch": 0.4447415329768271, "grad_norm": 0.2314453125, "learning_rate": 1.831096706009737e-05, "loss": 1.0354, "num_tokens": 11353907758.0, "step": 2495 }, { "epoch": 0.4449197860962567, "grad_norm": 0.259765625, "learning_rate": 1.83094515897077e-05, "loss": 1.0244, "num_tokens": 11360192300.0, "step": 2496 }, { "epoch": 0.44509803921568625, "grad_norm": 0.2373046875, "learning_rate": 1.830793551021988e-05, "loss": 1.0755, "num_tokens": 11366459659.0, "step": 2497 }, { "epoch": 0.44527629233511584, "grad_norm": 0.26171875, "learning_rate": 1.830641882176025e-05, "loss": 1.0095, "num_tokens": 11372734611.0, "step": 2498 }, { "epoch": 0.44545454545454544, "grad_norm": 0.240234375, "learning_rate": 1.8304901524455195e-05, "loss": 1.0608, "num_tokens": 11379000790.0, "step": 2499 }, { "epoch": 0.44563279857397503, "grad_norm": 0.25, "learning_rate": 1.830338361843115e-05, "loss": 1.0717, "num_tokens": 11385285530.0, "step": 2500 }, { "epoch": 0.4458110516934046, "grad_norm": 0.24609375, "learning_rate": 1.8301865103814607e-05, "loss": 1.0522, "num_tokens": 11391569682.0, "step": 2501 }, { "epoch": 0.4459893048128342, "grad_norm": 0.267578125, "learning_rate": 1.8300345980732095e-05, "loss": 1.0515, "num_tokens": 11397832581.0, "step": 2502 }, { "epoch": 0.4461675579322638, "grad_norm": 0.25, "learning_rate": 1.8298826249310212e-05, "loss": 1.0214, "num_tokens": 11404074719.0, "step": 2503 }, { "epoch": 0.4463458110516934, "grad_norm": 0.2470703125, "learning_rate": 1.8297305909675596e-05, "loss": 1.0338, "num_tokens": 11410359538.0, "step": 2504 }, { "epoch": 0.446524064171123, "grad_norm": 0.244140625, "learning_rate": 1.829578496195493e-05, "loss": 1.0167, "num_tokens": 11416644630.0, "step": 2505 }, { "epoch": 0.4467023172905526, "grad_norm": 0.25, "learning_rate": 1.8294263406274963e-05, "loss": 1.0298, "num_tokens": 11422896514.0, "step": 2506 }, { "epoch": 0.4468805704099822, "grad_norm": 0.255859375, "learning_rate": 1.8292741242762483e-05, "loss": 1.0677, "num_tokens": 11429154704.0, "step": 2507 }, { "epoch": 0.4470588235294118, "grad_norm": 0.2392578125, "learning_rate": 1.8291218471544337e-05, "loss": 1.027, "num_tokens": 11435395255.0, "step": 2508 }, { "epoch": 0.4472370766488414, "grad_norm": 0.2578125, "learning_rate": 1.828969509274741e-05, "loss": 1.0427, "num_tokens": 11441655290.0, "step": 2509 }, { "epoch": 0.4474153297682709, "grad_norm": 0.2431640625, "learning_rate": 1.8288171106498648e-05, "loss": 1.0769, "num_tokens": 11447915429.0, "step": 2510 }, { "epoch": 0.4475935828877005, "grad_norm": 0.240234375, "learning_rate": 1.8286646512925048e-05, "loss": 1.0555, "num_tokens": 11454199034.0, "step": 2511 }, { "epoch": 0.4477718360071301, "grad_norm": 0.25390625, "learning_rate": 1.8285121312153653e-05, "loss": 0.9937, "num_tokens": 11460476443.0, "step": 2512 }, { "epoch": 0.4479500891265597, "grad_norm": 0.2255859375, "learning_rate": 1.8283595504311557e-05, "loss": 1.0269, "num_tokens": 11466729655.0, "step": 2513 }, { "epoch": 0.4481283422459893, "grad_norm": 0.275390625, "learning_rate": 1.8282069089525908e-05, "loss": 1.0437, "num_tokens": 11473013125.0, "step": 2514 }, { "epoch": 0.4483065953654189, "grad_norm": 0.2431640625, "learning_rate": 1.8280542067923905e-05, "loss": 1.0196, "num_tokens": 11479287977.0, "step": 2515 }, { "epoch": 0.4484848484848485, "grad_norm": 0.251953125, "learning_rate": 1.827901443963279e-05, "loss": 1.048, "num_tokens": 11485529290.0, "step": 2516 }, { "epoch": 0.4486631016042781, "grad_norm": 0.2490234375, "learning_rate": 1.8277486204779862e-05, "loss": 1.0435, "num_tokens": 11491812512.0, "step": 2517 }, { "epoch": 0.4488413547237077, "grad_norm": 0.2333984375, "learning_rate": 1.8275957363492468e-05, "loss": 1.058, "num_tokens": 11498092639.0, "step": 2518 }, { "epoch": 0.44901960784313727, "grad_norm": 0.255859375, "learning_rate": 1.827442791589801e-05, "loss": 1.0583, "num_tokens": 11504377270.0, "step": 2519 }, { "epoch": 0.44919786096256686, "grad_norm": 0.2412109375, "learning_rate": 1.827289786212393e-05, "loss": 1.0399, "num_tokens": 11510653158.0, "step": 2520 }, { "epoch": 0.44937611408199646, "grad_norm": 0.25, "learning_rate": 1.8271367202297742e-05, "loss": 1.0246, "num_tokens": 11516904820.0, "step": 2521 }, { "epoch": 0.449554367201426, "grad_norm": 0.26953125, "learning_rate": 1.8269835936546984e-05, "loss": 1.0142, "num_tokens": 11523157083.0, "step": 2522 }, { "epoch": 0.4497326203208556, "grad_norm": 0.25, "learning_rate": 1.8268304064999255e-05, "loss": 1.0233, "num_tokens": 11529429690.0, "step": 2523 }, { "epoch": 0.4499108734402852, "grad_norm": 0.283203125, "learning_rate": 1.826677158778222e-05, "loss": 1.0485, "num_tokens": 11535678816.0, "step": 2524 }, { "epoch": 0.4500891265597148, "grad_norm": 0.2255859375, "learning_rate": 1.8265238505023567e-05, "loss": 1.0609, "num_tokens": 11541961784.0, "step": 2525 }, { "epoch": 0.4502673796791444, "grad_norm": 0.25, "learning_rate": 1.8263704816851053e-05, "loss": 1.044, "num_tokens": 11548245168.0, "step": 2526 }, { "epoch": 0.45044563279857397, "grad_norm": 0.2421875, "learning_rate": 1.8262170523392477e-05, "loss": 1.0216, "num_tokens": 11554475471.0, "step": 2527 }, { "epoch": 0.45062388591800356, "grad_norm": 0.23046875, "learning_rate": 1.8260635624775698e-05, "loss": 1.0253, "num_tokens": 11560755745.0, "step": 2528 }, { "epoch": 0.45080213903743316, "grad_norm": 0.2216796875, "learning_rate": 1.8259100121128618e-05, "loss": 1.0283, "num_tokens": 11567039849.0, "step": 2529 }, { "epoch": 0.45098039215686275, "grad_norm": 0.228515625, "learning_rate": 1.825756401257919e-05, "loss": 1.0303, "num_tokens": 11573297639.0, "step": 2530 }, { "epoch": 0.45115864527629235, "grad_norm": 0.2314453125, "learning_rate": 1.8256027299255415e-05, "loss": 1.0635, "num_tokens": 11579579456.0, "step": 2531 }, { "epoch": 0.45133689839572194, "grad_norm": 0.22265625, "learning_rate": 1.8254489981285355e-05, "loss": 1.0229, "num_tokens": 11585842275.0, "step": 2532 }, { "epoch": 0.45151515151515154, "grad_norm": 0.2314453125, "learning_rate": 1.825295205879711e-05, "loss": 1.0194, "num_tokens": 11592095207.0, "step": 2533 }, { "epoch": 0.45169340463458113, "grad_norm": 0.28515625, "learning_rate": 1.825141353191883e-05, "loss": 1.0526, "num_tokens": 11598379805.0, "step": 2534 }, { "epoch": 0.45187165775401067, "grad_norm": 0.2333984375, "learning_rate": 1.8249874400778733e-05, "loss": 1.0297, "num_tokens": 11604662591.0, "step": 2535 }, { "epoch": 0.45204991087344026, "grad_norm": 0.271484375, "learning_rate": 1.8248334665505068e-05, "loss": 1.0459, "num_tokens": 11610947134.0, "step": 2536 }, { "epoch": 0.45222816399286986, "grad_norm": 0.2314453125, "learning_rate": 1.824679432622614e-05, "loss": 1.0518, "num_tokens": 11617214489.0, "step": 2537 }, { "epoch": 0.45240641711229945, "grad_norm": 0.267578125, "learning_rate": 1.824525338307031e-05, "loss": 1.0403, "num_tokens": 11623457924.0, "step": 2538 }, { "epoch": 0.45258467023172905, "grad_norm": 0.244140625, "learning_rate": 1.824371183616598e-05, "loss": 1.0257, "num_tokens": 11629741846.0, "step": 2539 }, { "epoch": 0.45276292335115864, "grad_norm": 0.267578125, "learning_rate": 1.824216968564162e-05, "loss": 1.0343, "num_tokens": 11635996867.0, "step": 2540 }, { "epoch": 0.45294117647058824, "grad_norm": 0.25390625, "learning_rate": 1.8240626931625722e-05, "loss": 1.0137, "num_tokens": 11642246579.0, "step": 2541 }, { "epoch": 0.45311942959001783, "grad_norm": 0.302734375, "learning_rate": 1.823908357424685e-05, "loss": 1.0508, "num_tokens": 11648478298.0, "step": 2542 }, { "epoch": 0.4532976827094474, "grad_norm": 0.28515625, "learning_rate": 1.823753961363361e-05, "loss": 1.0145, "num_tokens": 11654725408.0, "step": 2543 }, { "epoch": 0.453475935828877, "grad_norm": 0.283203125, "learning_rate": 1.8235995049914668e-05, "loss": 1.0147, "num_tokens": 11661010560.0, "step": 2544 }, { "epoch": 0.4536541889483066, "grad_norm": 0.251953125, "learning_rate": 1.823444988321873e-05, "loss": 1.0256, "num_tokens": 11667288320.0, "step": 2545 }, { "epoch": 0.4538324420677362, "grad_norm": 0.251953125, "learning_rate": 1.8232904113674546e-05, "loss": 1.0365, "num_tokens": 11673530998.0, "step": 2546 }, { "epoch": 0.4540106951871658, "grad_norm": 0.2734375, "learning_rate": 1.8231357741410943e-05, "loss": 1.0393, "num_tokens": 11679814065.0, "step": 2547 }, { "epoch": 0.45418894830659534, "grad_norm": 0.21875, "learning_rate": 1.822981076655676e-05, "loss": 1.0245, "num_tokens": 11686097819.0, "step": 2548 }, { "epoch": 0.45436720142602494, "grad_norm": 0.267578125, "learning_rate": 1.8228263189240925e-05, "loss": 1.0063, "num_tokens": 11692376006.0, "step": 2549 }, { "epoch": 0.45454545454545453, "grad_norm": 0.24609375, "learning_rate": 1.8226715009592384e-05, "loss": 1.0496, "num_tokens": 11698634273.0, "step": 2550 }, { "epoch": 0.4547237076648841, "grad_norm": 0.263671875, "learning_rate": 1.8225166227740153e-05, "loss": 1.0358, "num_tokens": 11704901387.0, "step": 2551 }, { "epoch": 0.4549019607843137, "grad_norm": 0.25, "learning_rate": 1.8223616843813294e-05, "loss": 1.0065, "num_tokens": 11711148009.0, "step": 2552 }, { "epoch": 0.4550802139037433, "grad_norm": 0.2412109375, "learning_rate": 1.8222066857940915e-05, "loss": 1.0619, "num_tokens": 11717431912.0, "step": 2553 }, { "epoch": 0.4552584670231729, "grad_norm": 0.228515625, "learning_rate": 1.822051627025218e-05, "loss": 1.0384, "num_tokens": 11723683549.0, "step": 2554 }, { "epoch": 0.4554367201426025, "grad_norm": 0.2470703125, "learning_rate": 1.8218965080876295e-05, "loss": 1.0031, "num_tokens": 11729966646.0, "step": 2555 }, { "epoch": 0.4556149732620321, "grad_norm": 0.21484375, "learning_rate": 1.8217413289942524e-05, "loss": 1.0573, "num_tokens": 11736249870.0, "step": 2556 }, { "epoch": 0.4557932263814617, "grad_norm": 0.263671875, "learning_rate": 1.8215860897580174e-05, "loss": 1.0293, "num_tokens": 11742520705.0, "step": 2557 }, { "epoch": 0.4559714795008913, "grad_norm": 0.2177734375, "learning_rate": 1.8214307903918615e-05, "loss": 1.0273, "num_tokens": 11748804058.0, "step": 2558 }, { "epoch": 0.4561497326203209, "grad_norm": 0.244140625, "learning_rate": 1.8212754309087246e-05, "loss": 1.0157, "num_tokens": 11755042666.0, "step": 2559 }, { "epoch": 0.4563279857397504, "grad_norm": 0.2431640625, "learning_rate": 1.8211200113215538e-05, "loss": 1.0613, "num_tokens": 11761327732.0, "step": 2560 }, { "epoch": 0.45650623885918, "grad_norm": 0.283203125, "learning_rate": 1.8209645316433e-05, "loss": 1.0348, "num_tokens": 11767558049.0, "step": 2561 }, { "epoch": 0.4566844919786096, "grad_norm": 0.251953125, "learning_rate": 1.8208089918869194e-05, "loss": 1.0532, "num_tokens": 11773818580.0, "step": 2562 }, { "epoch": 0.4568627450980392, "grad_norm": 0.28125, "learning_rate": 1.820653392065373e-05, "loss": 1.0597, "num_tokens": 11780103393.0, "step": 2563 }, { "epoch": 0.4570409982174688, "grad_norm": 0.2578125, "learning_rate": 1.820497732191627e-05, "loss": 1.0271, "num_tokens": 11786372862.0, "step": 2564 }, { "epoch": 0.4572192513368984, "grad_norm": 0.267578125, "learning_rate": 1.8203420122786527e-05, "loss": 1.0142, "num_tokens": 11792626202.0, "step": 2565 }, { "epoch": 0.457397504456328, "grad_norm": 0.2314453125, "learning_rate": 1.820186232339426e-05, "loss": 1.023, "num_tokens": 11798888279.0, "step": 2566 }, { "epoch": 0.4575757575757576, "grad_norm": 0.25390625, "learning_rate": 1.8200303923869282e-05, "loss": 1.0213, "num_tokens": 11805173326.0, "step": 2567 }, { "epoch": 0.4577540106951872, "grad_norm": 0.2392578125, "learning_rate": 1.8198744924341457e-05, "loss": 1.0422, "num_tokens": 11811456706.0, "step": 2568 }, { "epoch": 0.45793226381461677, "grad_norm": 0.2353515625, "learning_rate": 1.8197185324940695e-05, "loss": 1.0653, "num_tokens": 11817717273.0, "step": 2569 }, { "epoch": 0.45811051693404636, "grad_norm": 0.2490234375, "learning_rate": 1.819562512579696e-05, "loss": 1.0358, "num_tokens": 11824000391.0, "step": 2570 }, { "epoch": 0.45828877005347596, "grad_norm": 0.248046875, "learning_rate": 1.819406432704026e-05, "loss": 1.0112, "num_tokens": 11830285333.0, "step": 2571 }, { "epoch": 0.45846702317290555, "grad_norm": 0.244140625, "learning_rate": 1.8192502928800654e-05, "loss": 1.0385, "num_tokens": 11836547282.0, "step": 2572 }, { "epoch": 0.4586452762923351, "grad_norm": 0.22265625, "learning_rate": 1.819094093120826e-05, "loss": 1.0654, "num_tokens": 11842831150.0, "step": 2573 }, { "epoch": 0.4588235294117647, "grad_norm": 0.267578125, "learning_rate": 1.8189378334393238e-05, "loss": 1.0339, "num_tokens": 11849063265.0, "step": 2574 }, { "epoch": 0.4590017825311943, "grad_norm": 0.2177734375, "learning_rate": 1.81878151384858e-05, "loss": 1.0276, "num_tokens": 11855328562.0, "step": 2575 }, { "epoch": 0.4591800356506239, "grad_norm": 0.2314453125, "learning_rate": 1.8186251343616206e-05, "loss": 1.0471, "num_tokens": 11861612423.0, "step": 2576 }, { "epoch": 0.45935828877005347, "grad_norm": 0.20703125, "learning_rate": 1.8184686949914766e-05, "loss": 1.0495, "num_tokens": 11867896800.0, "step": 2577 }, { "epoch": 0.45953654188948306, "grad_norm": 0.2275390625, "learning_rate": 1.8183121957511847e-05, "loss": 1.0184, "num_tokens": 11874180520.0, "step": 2578 }, { "epoch": 0.45971479500891266, "grad_norm": 0.2275390625, "learning_rate": 1.818155636653785e-05, "loss": 1.0063, "num_tokens": 11880450832.0, "step": 2579 }, { "epoch": 0.45989304812834225, "grad_norm": 0.25390625, "learning_rate": 1.8179990177123247e-05, "loss": 1.0277, "num_tokens": 11886720251.0, "step": 2580 }, { "epoch": 0.46007130124777185, "grad_norm": 0.234375, "learning_rate": 1.8178423389398544e-05, "loss": 1.0367, "num_tokens": 11893002121.0, "step": 2581 }, { "epoch": 0.46024955436720144, "grad_norm": 0.236328125, "learning_rate": 1.81768560034943e-05, "loss": 1.0712, "num_tokens": 11899267136.0, "step": 2582 }, { "epoch": 0.46042780748663104, "grad_norm": 0.2392578125, "learning_rate": 1.8175288019541127e-05, "loss": 1.0576, "num_tokens": 11905550788.0, "step": 2583 }, { "epoch": 0.46060606060606063, "grad_norm": 0.2353515625, "learning_rate": 1.817371943766969e-05, "loss": 1.0043, "num_tokens": 11911789285.0, "step": 2584 }, { "epoch": 0.46078431372549017, "grad_norm": 0.2490234375, "learning_rate": 1.8172150258010693e-05, "loss": 1.0139, "num_tokens": 11918044828.0, "step": 2585 }, { "epoch": 0.46096256684491976, "grad_norm": 0.2197265625, "learning_rate": 1.8170580480694894e-05, "loss": 1.0199, "num_tokens": 11924329784.0, "step": 2586 }, { "epoch": 0.46114081996434936, "grad_norm": 0.2490234375, "learning_rate": 1.816901010585311e-05, "loss": 1.0688, "num_tokens": 11930614212.0, "step": 2587 }, { "epoch": 0.46131907308377895, "grad_norm": 0.2333984375, "learning_rate": 1.8167439133616203e-05, "loss": 1.0352, "num_tokens": 11936895976.0, "step": 2588 }, { "epoch": 0.46149732620320855, "grad_norm": 0.2451171875, "learning_rate": 1.816586756411507e-05, "loss": 1.0338, "num_tokens": 11943180416.0, "step": 2589 }, { "epoch": 0.46167557932263814, "grad_norm": 0.232421875, "learning_rate": 1.816429539748068e-05, "loss": 1.0154, "num_tokens": 11949444035.0, "step": 2590 }, { "epoch": 0.46185383244206774, "grad_norm": 0.244140625, "learning_rate": 1.816272263384404e-05, "loss": 1.0463, "num_tokens": 11955725161.0, "step": 2591 }, { "epoch": 0.46203208556149733, "grad_norm": 0.228515625, "learning_rate": 1.8161149273336206e-05, "loss": 1.0225, "num_tokens": 11962009196.0, "step": 2592 }, { "epoch": 0.4622103386809269, "grad_norm": 0.2412109375, "learning_rate": 1.815957531608829e-05, "loss": 0.9982, "num_tokens": 11968292401.0, "step": 2593 }, { "epoch": 0.4623885918003565, "grad_norm": 0.24609375, "learning_rate": 1.815800076223145e-05, "loss": 1.0186, "num_tokens": 11974577400.0, "step": 2594 }, { "epoch": 0.4625668449197861, "grad_norm": 0.2255859375, "learning_rate": 1.815642561189689e-05, "loss": 1.0163, "num_tokens": 11980860850.0, "step": 2595 }, { "epoch": 0.4627450980392157, "grad_norm": 0.23828125, "learning_rate": 1.815484986521587e-05, "loss": 1.0686, "num_tokens": 11987145084.0, "step": 2596 }, { "epoch": 0.4629233511586453, "grad_norm": 0.2421875, "learning_rate": 1.81532735223197e-05, "loss": 1.0402, "num_tokens": 11993417950.0, "step": 2597 }, { "epoch": 0.46310160427807484, "grad_norm": 0.236328125, "learning_rate": 1.8151696583339736e-05, "loss": 1.0597, "num_tokens": 11999700719.0, "step": 2598 }, { "epoch": 0.46327985739750444, "grad_norm": 0.23046875, "learning_rate": 1.815011904840738e-05, "loss": 1.0132, "num_tokens": 12005957360.0, "step": 2599 }, { "epoch": 0.46345811051693403, "grad_norm": 0.224609375, "learning_rate": 1.8148540917654092e-05, "loss": 1.0317, "num_tokens": 12012210763.0, "step": 2600 }, { "epoch": 0.4636363636363636, "grad_norm": 0.2294921875, "learning_rate": 1.814696219121138e-05, "loss": 1.0471, "num_tokens": 12018489744.0, "step": 2601 }, { "epoch": 0.4638146167557932, "grad_norm": 0.21875, "learning_rate": 1.8145382869210792e-05, "loss": 1.0344, "num_tokens": 12024773937.0, "step": 2602 }, { "epoch": 0.4639928698752228, "grad_norm": 0.2333984375, "learning_rate": 1.8143802951783943e-05, "loss": 1.0324, "num_tokens": 12031038616.0, "step": 2603 }, { "epoch": 0.4641711229946524, "grad_norm": 0.220703125, "learning_rate": 1.814222243906248e-05, "loss": 1.0292, "num_tokens": 12037293330.0, "step": 2604 }, { "epoch": 0.464349376114082, "grad_norm": 0.26171875, "learning_rate": 1.8140641331178115e-05, "loss": 1.0378, "num_tokens": 12043547070.0, "step": 2605 }, { "epoch": 0.4645276292335116, "grad_norm": 0.234375, "learning_rate": 1.8139059628262597e-05, "loss": 1.0405, "num_tokens": 12049774330.0, "step": 2606 }, { "epoch": 0.4647058823529412, "grad_norm": 0.25390625, "learning_rate": 1.8137477330447726e-05, "loss": 1.0669, "num_tokens": 12056056966.0, "step": 2607 }, { "epoch": 0.4648841354723708, "grad_norm": 0.2451171875, "learning_rate": 1.813589443786536e-05, "loss": 1.0455, "num_tokens": 12062340283.0, "step": 2608 }, { "epoch": 0.4650623885918004, "grad_norm": 0.2412109375, "learning_rate": 1.8134310950647405e-05, "loss": 1.0255, "num_tokens": 12068624418.0, "step": 2609 }, { "epoch": 0.46524064171123, "grad_norm": 0.255859375, "learning_rate": 1.813272686892581e-05, "loss": 1.0557, "num_tokens": 12074909027.0, "step": 2610 }, { "epoch": 0.4654188948306595, "grad_norm": 0.248046875, "learning_rate": 1.8131142192832575e-05, "loss": 1.0366, "num_tokens": 12081171188.0, "step": 2611 }, { "epoch": 0.4655971479500891, "grad_norm": 0.25390625, "learning_rate": 1.812955692249975e-05, "loss": 1.0219, "num_tokens": 12087454733.0, "step": 2612 }, { "epoch": 0.4657754010695187, "grad_norm": 0.216796875, "learning_rate": 1.8127971058059444e-05, "loss": 1.0665, "num_tokens": 12093730794.0, "step": 2613 }, { "epoch": 0.4659536541889483, "grad_norm": 0.248046875, "learning_rate": 1.8126384599643798e-05, "loss": 1.018, "num_tokens": 12099979117.0, "step": 2614 }, { "epoch": 0.4661319073083779, "grad_norm": 0.2451171875, "learning_rate": 1.812479754738502e-05, "loss": 1.0425, "num_tokens": 12106213731.0, "step": 2615 }, { "epoch": 0.4663101604278075, "grad_norm": 0.234375, "learning_rate": 1.8123209901415353e-05, "loss": 1.0288, "num_tokens": 12112497762.0, "step": 2616 }, { "epoch": 0.4664884135472371, "grad_norm": 0.2451171875, "learning_rate": 1.8121621661867097e-05, "loss": 1.0511, "num_tokens": 12118758261.0, "step": 2617 }, { "epoch": 0.4666666666666667, "grad_norm": 0.2470703125, "learning_rate": 1.8120032828872605e-05, "loss": 1.0568, "num_tokens": 12125042660.0, "step": 2618 }, { "epoch": 0.46684491978609627, "grad_norm": 0.234375, "learning_rate": 1.8118443402564274e-05, "loss": 1.0287, "num_tokens": 12131326150.0, "step": 2619 }, { "epoch": 0.46702317290552586, "grad_norm": 0.2255859375, "learning_rate": 1.8116853383074544e-05, "loss": 1.0605, "num_tokens": 12137609060.0, "step": 2620 }, { "epoch": 0.46720142602495546, "grad_norm": 0.244140625, "learning_rate": 1.8115262770535916e-05, "loss": 1.0412, "num_tokens": 12143878314.0, "step": 2621 }, { "epoch": 0.46737967914438505, "grad_norm": 0.22265625, "learning_rate": 1.811367156508094e-05, "loss": 1.0521, "num_tokens": 12150162125.0, "step": 2622 }, { "epoch": 0.4675579322638146, "grad_norm": 0.2392578125, "learning_rate": 1.8112079766842202e-05, "loss": 1.0428, "num_tokens": 12156444575.0, "step": 2623 }, { "epoch": 0.4677361853832442, "grad_norm": 0.265625, "learning_rate": 1.811048737595236e-05, "loss": 1.0505, "num_tokens": 12162718825.0, "step": 2624 }, { "epoch": 0.4679144385026738, "grad_norm": 0.2255859375, "learning_rate": 1.81088943925441e-05, "loss": 1.0216, "num_tokens": 12169001088.0, "step": 2625 }, { "epoch": 0.4680926916221034, "grad_norm": 0.26171875, "learning_rate": 1.8107300816750163e-05, "loss": 0.9993, "num_tokens": 12175286464.0, "step": 2626 }, { "epoch": 0.46827094474153297, "grad_norm": 0.234375, "learning_rate": 1.810570664870335e-05, "loss": 1.01, "num_tokens": 12181560545.0, "step": 2627 }, { "epoch": 0.46844919786096256, "grad_norm": 0.271484375, "learning_rate": 1.8104111888536496e-05, "loss": 1.0482, "num_tokens": 12187842606.0, "step": 2628 }, { "epoch": 0.46862745098039216, "grad_norm": 0.2421875, "learning_rate": 1.8102516536382498e-05, "loss": 1.0395, "num_tokens": 12194125287.0, "step": 2629 }, { "epoch": 0.46880570409982175, "grad_norm": 0.2734375, "learning_rate": 1.8100920592374297e-05, "loss": 1.0317, "num_tokens": 12200383001.0, "step": 2630 }, { "epoch": 0.46898395721925135, "grad_norm": 0.25390625, "learning_rate": 1.8099324056644875e-05, "loss": 1.0149, "num_tokens": 12206666594.0, "step": 2631 }, { "epoch": 0.46916221033868094, "grad_norm": 0.24609375, "learning_rate": 1.8097726929327283e-05, "loss": 1.0338, "num_tokens": 12212934200.0, "step": 2632 }, { "epoch": 0.46934046345811054, "grad_norm": 0.259765625, "learning_rate": 1.8096129210554603e-05, "loss": 1.0578, "num_tokens": 12219217802.0, "step": 2633 }, { "epoch": 0.46951871657754013, "grad_norm": 0.2265625, "learning_rate": 1.8094530900459974e-05, "loss": 1.0299, "num_tokens": 12225500038.0, "step": 2634 }, { "epoch": 0.4696969696969697, "grad_norm": 0.2216796875, "learning_rate": 1.8092931999176587e-05, "loss": 1.0279, "num_tokens": 12231783026.0, "step": 2635 }, { "epoch": 0.46987522281639926, "grad_norm": 0.2431640625, "learning_rate": 1.8091332506837676e-05, "loss": 1.0334, "num_tokens": 12238068146.0, "step": 2636 }, { "epoch": 0.47005347593582886, "grad_norm": 0.23828125, "learning_rate": 1.808973242357653e-05, "loss": 1.0347, "num_tokens": 12244350874.0, "step": 2637 }, { "epoch": 0.47023172905525845, "grad_norm": 0.2265625, "learning_rate": 1.8088131749526477e-05, "loss": 1.0379, "num_tokens": 12250633561.0, "step": 2638 }, { "epoch": 0.47040998217468805, "grad_norm": 0.259765625, "learning_rate": 1.808653048482091e-05, "loss": 1.0344, "num_tokens": 12256914669.0, "step": 2639 }, { "epoch": 0.47058823529411764, "grad_norm": 0.259765625, "learning_rate": 1.8084928629593257e-05, "loss": 1.0634, "num_tokens": 12263198653.0, "step": 2640 }, { "epoch": 0.47076648841354723, "grad_norm": 0.271484375, "learning_rate": 1.8083326183977e-05, "loss": 1.0343, "num_tokens": 12269469688.0, "step": 2641 }, { "epoch": 0.47094474153297683, "grad_norm": 0.25390625, "learning_rate": 1.8081723148105682e-05, "loss": 1.0462, "num_tokens": 12275740111.0, "step": 2642 }, { "epoch": 0.4711229946524064, "grad_norm": 0.248046875, "learning_rate": 1.808011952211287e-05, "loss": 1.0158, "num_tokens": 12282024431.0, "step": 2643 }, { "epoch": 0.471301247771836, "grad_norm": 0.259765625, "learning_rate": 1.80785153061322e-05, "loss": 1.0389, "num_tokens": 12288271352.0, "step": 2644 }, { "epoch": 0.4714795008912656, "grad_norm": 0.2578125, "learning_rate": 1.8076910500297354e-05, "loss": 1.0246, "num_tokens": 12294525419.0, "step": 2645 }, { "epoch": 0.4716577540106952, "grad_norm": 0.291015625, "learning_rate": 1.807530510474206e-05, "loss": 1.0351, "num_tokens": 12300809796.0, "step": 2646 }, { "epoch": 0.4718360071301248, "grad_norm": 0.2333984375, "learning_rate": 1.80736991196001e-05, "loss": 1.0438, "num_tokens": 12307084013.0, "step": 2647 }, { "epoch": 0.47201426024955434, "grad_norm": 0.2392578125, "learning_rate": 1.8072092545005292e-05, "loss": 1.0283, "num_tokens": 12313368009.0, "step": 2648 }, { "epoch": 0.47219251336898393, "grad_norm": 0.236328125, "learning_rate": 1.8070485381091515e-05, "loss": 1.0076, "num_tokens": 12319651989.0, "step": 2649 }, { "epoch": 0.47237076648841353, "grad_norm": 0.232421875, "learning_rate": 1.8068877627992697e-05, "loss": 1.0412, "num_tokens": 12325936705.0, "step": 2650 }, { "epoch": 0.4725490196078431, "grad_norm": 0.26171875, "learning_rate": 1.8067269285842816e-05, "loss": 1.0309, "num_tokens": 12332216872.0, "step": 2651 }, { "epoch": 0.4727272727272727, "grad_norm": 0.24609375, "learning_rate": 1.8065660354775884e-05, "loss": 1.0133, "num_tokens": 12338500339.0, "step": 2652 }, { "epoch": 0.4729055258467023, "grad_norm": 0.248046875, "learning_rate": 1.8064050834925985e-05, "loss": 1.064, "num_tokens": 12344761733.0, "step": 2653 }, { "epoch": 0.4730837789661319, "grad_norm": 0.26171875, "learning_rate": 1.8062440726427234e-05, "loss": 1.026, "num_tokens": 12351044092.0, "step": 2654 }, { "epoch": 0.4732620320855615, "grad_norm": 0.255859375, "learning_rate": 1.8060830029413802e-05, "loss": 1.03, "num_tokens": 12357268529.0, "step": 2655 }, { "epoch": 0.4734402852049911, "grad_norm": 0.2451171875, "learning_rate": 1.805921874401991e-05, "loss": 0.9948, "num_tokens": 12363550357.0, "step": 2656 }, { "epoch": 0.4736185383244207, "grad_norm": 0.27734375, "learning_rate": 1.8057606870379833e-05, "loss": 1.0347, "num_tokens": 12369820174.0, "step": 2657 }, { "epoch": 0.4737967914438503, "grad_norm": 0.2490234375, "learning_rate": 1.805599440862788e-05, "loss": 1.022, "num_tokens": 12376103207.0, "step": 2658 }, { "epoch": 0.4739750445632799, "grad_norm": 0.26953125, "learning_rate": 1.805438135889842e-05, "loss": 1.0619, "num_tokens": 12382373990.0, "step": 2659 }, { "epoch": 0.4741532976827095, "grad_norm": 0.2451171875, "learning_rate": 1.805276772132587e-05, "loss": 1.0156, "num_tokens": 12388658364.0, "step": 2660 }, { "epoch": 0.474331550802139, "grad_norm": 0.287109375, "learning_rate": 1.8051153496044692e-05, "loss": 1.0326, "num_tokens": 12394929361.0, "step": 2661 }, { "epoch": 0.4745098039215686, "grad_norm": 0.26171875, "learning_rate": 1.8049538683189404e-05, "loss": 1.0371, "num_tokens": 12401213224.0, "step": 2662 }, { "epoch": 0.4746880570409982, "grad_norm": 0.26953125, "learning_rate": 1.8047923282894568e-05, "loss": 1.019, "num_tokens": 12407497088.0, "step": 2663 }, { "epoch": 0.4748663101604278, "grad_norm": 0.26171875, "learning_rate": 1.804630729529479e-05, "loss": 1.0265, "num_tokens": 12413781016.0, "step": 2664 }, { "epoch": 0.4750445632798574, "grad_norm": 0.25, "learning_rate": 1.8044690720524737e-05, "loss": 1.0129, "num_tokens": 12420032770.0, "step": 2665 }, { "epoch": 0.475222816399287, "grad_norm": 0.26171875, "learning_rate": 1.8043073558719117e-05, "loss": 1.023, "num_tokens": 12426302251.0, "step": 2666 }, { "epoch": 0.4754010695187166, "grad_norm": 0.29296875, "learning_rate": 1.8041455810012688e-05, "loss": 1.0335, "num_tokens": 12432552580.0, "step": 2667 }, { "epoch": 0.4755793226381462, "grad_norm": 0.265625, "learning_rate": 1.8039837474540255e-05, "loss": 1.0299, "num_tokens": 12438803073.0, "step": 2668 }, { "epoch": 0.47575757575757577, "grad_norm": 0.263671875, "learning_rate": 1.8038218552436683e-05, "loss": 1.0125, "num_tokens": 12445087551.0, "step": 2669 }, { "epoch": 0.47593582887700536, "grad_norm": 0.263671875, "learning_rate": 1.8036599043836865e-05, "loss": 1.0189, "num_tokens": 12451370906.0, "step": 2670 }, { "epoch": 0.47611408199643496, "grad_norm": 0.25390625, "learning_rate": 1.803497894887576e-05, "loss": 1.0144, "num_tokens": 12457651274.0, "step": 2671 }, { "epoch": 0.47629233511586455, "grad_norm": 0.259765625, "learning_rate": 1.8033358267688372e-05, "loss": 1.0272, "num_tokens": 12463934963.0, "step": 2672 }, { "epoch": 0.4764705882352941, "grad_norm": 0.2412109375, "learning_rate": 1.8031737000409754e-05, "loss": 1.0296, "num_tokens": 12470192376.0, "step": 2673 }, { "epoch": 0.4766488413547237, "grad_norm": 0.298828125, "learning_rate": 1.8030115147175003e-05, "loss": 1.0277, "num_tokens": 12476450234.0, "step": 2674 }, { "epoch": 0.4768270944741533, "grad_norm": 0.2421875, "learning_rate": 1.8028492708119273e-05, "loss": 1.0362, "num_tokens": 12482733271.0, "step": 2675 }, { "epoch": 0.4770053475935829, "grad_norm": 0.28515625, "learning_rate": 1.8026869683377756e-05, "loss": 1.0305, "num_tokens": 12489017843.0, "step": 2676 }, { "epoch": 0.47718360071301247, "grad_norm": 0.2734375, "learning_rate": 1.8025246073085703e-05, "loss": 1.0396, "num_tokens": 12495283440.0, "step": 2677 }, { "epoch": 0.47736185383244206, "grad_norm": 0.25, "learning_rate": 1.8023621877378406e-05, "loss": 1.0227, "num_tokens": 12501567884.0, "step": 2678 }, { "epoch": 0.47754010695187166, "grad_norm": 0.306640625, "learning_rate": 1.802199709639122e-05, "loss": 1.0532, "num_tokens": 12507818424.0, "step": 2679 }, { "epoch": 0.47771836007130125, "grad_norm": 0.234375, "learning_rate": 1.8020371730259523e-05, "loss": 1.0399, "num_tokens": 12514078387.0, "step": 2680 }, { "epoch": 0.47789661319073085, "grad_norm": 0.287109375, "learning_rate": 1.801874577911877e-05, "loss": 1.0383, "num_tokens": 12520363427.0, "step": 2681 }, { "epoch": 0.47807486631016044, "grad_norm": 0.2275390625, "learning_rate": 1.8017119243104445e-05, "loss": 0.9959, "num_tokens": 12526648902.0, "step": 2682 }, { "epoch": 0.47825311942959003, "grad_norm": 0.255859375, "learning_rate": 1.801549212235209e-05, "loss": 1.0229, "num_tokens": 12532911304.0, "step": 2683 }, { "epoch": 0.47843137254901963, "grad_norm": 0.2353515625, "learning_rate": 1.8013864416997293e-05, "loss": 1.0379, "num_tokens": 12539166645.0, "step": 2684 }, { "epoch": 0.4786096256684492, "grad_norm": 0.265625, "learning_rate": 1.801223612717569e-05, "loss": 1.0451, "num_tokens": 12545432289.0, "step": 2685 }, { "epoch": 0.47878787878787876, "grad_norm": 0.251953125, "learning_rate": 1.801060725302297e-05, "loss": 1.0509, "num_tokens": 12551705992.0, "step": 2686 }, { "epoch": 0.47896613190730836, "grad_norm": 0.27734375, "learning_rate": 1.800897779467486e-05, "loss": 1.0198, "num_tokens": 12557991030.0, "step": 2687 }, { "epoch": 0.47914438502673795, "grad_norm": 0.2734375, "learning_rate": 1.8007347752267153e-05, "loss": 1.0377, "num_tokens": 12564266551.0, "step": 2688 }, { "epoch": 0.47932263814616755, "grad_norm": 0.28125, "learning_rate": 1.8005717125935672e-05, "loss": 1.0164, "num_tokens": 12570550582.0, "step": 2689 }, { "epoch": 0.47950089126559714, "grad_norm": 0.24609375, "learning_rate": 1.8004085915816303e-05, "loss": 1.0147, "num_tokens": 12576834151.0, "step": 2690 }, { "epoch": 0.47967914438502673, "grad_norm": 0.24609375, "learning_rate": 1.8002454122044976e-05, "loss": 1.0048, "num_tokens": 12583115198.0, "step": 2691 }, { "epoch": 0.47985739750445633, "grad_norm": 0.23046875, "learning_rate": 1.8000821744757662e-05, "loss": 1.0554, "num_tokens": 12589398412.0, "step": 2692 }, { "epoch": 0.4800356506238859, "grad_norm": 0.2353515625, "learning_rate": 1.7999188784090392e-05, "loss": 1.0255, "num_tokens": 12595683576.0, "step": 2693 }, { "epoch": 0.4802139037433155, "grad_norm": 0.25, "learning_rate": 1.7997555240179244e-05, "loss": 0.9979, "num_tokens": 12601967935.0, "step": 2694 }, { "epoch": 0.4803921568627451, "grad_norm": 0.2353515625, "learning_rate": 1.7995921113160335e-05, "loss": 1.0245, "num_tokens": 12608209305.0, "step": 2695 }, { "epoch": 0.4805704099821747, "grad_norm": 0.2431640625, "learning_rate": 1.799428640316984e-05, "loss": 1.0368, "num_tokens": 12614492555.0, "step": 2696 }, { "epoch": 0.4807486631016043, "grad_norm": 0.2333984375, "learning_rate": 1.7992651110343982e-05, "loss": 1.0615, "num_tokens": 12620776686.0, "step": 2697 }, { "epoch": 0.4809269162210339, "grad_norm": 0.2275390625, "learning_rate": 1.7991015234819026e-05, "loss": 1.0348, "num_tokens": 12627057278.0, "step": 2698 }, { "epoch": 0.48110516934046343, "grad_norm": 0.2333984375, "learning_rate": 1.7989378776731294e-05, "loss": 0.9984, "num_tokens": 12633342774.0, "step": 2699 }, { "epoch": 0.48128342245989303, "grad_norm": 0.25390625, "learning_rate": 1.7987741736217147e-05, "loss": 1.0436, "num_tokens": 12639609263.0, "step": 2700 }, { "epoch": 0.4814616755793226, "grad_norm": 0.240234375, "learning_rate": 1.7986104113413007e-05, "loss": 1.0492, "num_tokens": 12645857114.0, "step": 2701 }, { "epoch": 0.4816399286987522, "grad_norm": 0.2392578125, "learning_rate": 1.798446590845533e-05, "loss": 1.0384, "num_tokens": 12652126539.0, "step": 2702 }, { "epoch": 0.4818181818181818, "grad_norm": 0.26171875, "learning_rate": 1.7982827121480636e-05, "loss": 1.01, "num_tokens": 12658383087.0, "step": 2703 }, { "epoch": 0.4819964349376114, "grad_norm": 0.25, "learning_rate": 1.7981187752625478e-05, "loss": 1.0644, "num_tokens": 12664637590.0, "step": 2704 }, { "epoch": 0.482174688057041, "grad_norm": 0.232421875, "learning_rate": 1.797954780202647e-05, "loss": 1.026, "num_tokens": 12670921014.0, "step": 2705 }, { "epoch": 0.4823529411764706, "grad_norm": 0.251953125, "learning_rate": 1.7977907269820264e-05, "loss": 1.0114, "num_tokens": 12677203906.0, "step": 2706 }, { "epoch": 0.4825311942959002, "grad_norm": 0.255859375, "learning_rate": 1.7976266156143574e-05, "loss": 1.0543, "num_tokens": 12683486653.0, "step": 2707 }, { "epoch": 0.4827094474153298, "grad_norm": 0.2421875, "learning_rate": 1.7974624461133147e-05, "loss": 1.0205, "num_tokens": 12689770693.0, "step": 2708 }, { "epoch": 0.4828877005347594, "grad_norm": 0.240234375, "learning_rate": 1.797298218492579e-05, "loss": 1.0417, "num_tokens": 12696036222.0, "step": 2709 }, { "epoch": 0.483065953654189, "grad_norm": 0.2158203125, "learning_rate": 1.7971339327658353e-05, "loss": 1.0133, "num_tokens": 12702298567.0, "step": 2710 }, { "epoch": 0.4832442067736185, "grad_norm": 0.251953125, "learning_rate": 1.7969695889467734e-05, "loss": 1.0392, "num_tokens": 12708551599.0, "step": 2711 }, { "epoch": 0.4834224598930481, "grad_norm": 0.2431640625, "learning_rate": 1.7968051870490885e-05, "loss": 1.0077, "num_tokens": 12714835852.0, "step": 2712 }, { "epoch": 0.4836007130124777, "grad_norm": 0.2412109375, "learning_rate": 1.79664072708648e-05, "loss": 1.0511, "num_tokens": 12721119796.0, "step": 2713 }, { "epoch": 0.4837789661319073, "grad_norm": 0.25, "learning_rate": 1.7964762090726523e-05, "loss": 1.0486, "num_tokens": 12727403280.0, "step": 2714 }, { "epoch": 0.4839572192513369, "grad_norm": 0.2255859375, "learning_rate": 1.7963116330213143e-05, "loss": 1.0003, "num_tokens": 12733651521.0, "step": 2715 }, { "epoch": 0.4841354723707665, "grad_norm": 0.2421875, "learning_rate": 1.7961469989461814e-05, "loss": 1.0145, "num_tokens": 12739936569.0, "step": 2716 }, { "epoch": 0.4843137254901961, "grad_norm": 0.2392578125, "learning_rate": 1.7959823068609716e-05, "loss": 1.04, "num_tokens": 12746167647.0, "step": 2717 }, { "epoch": 0.4844919786096257, "grad_norm": 0.26171875, "learning_rate": 1.7958175567794088e-05, "loss": 1.0142, "num_tokens": 12752442943.0, "step": 2718 }, { "epoch": 0.48467023172905527, "grad_norm": 0.265625, "learning_rate": 1.795652748715222e-05, "loss": 1.0538, "num_tokens": 12758680560.0, "step": 2719 }, { "epoch": 0.48484848484848486, "grad_norm": 0.2734375, "learning_rate": 1.7954878826821447e-05, "loss": 1.0416, "num_tokens": 12764963428.0, "step": 2720 }, { "epoch": 0.48502673796791446, "grad_norm": 0.2421875, "learning_rate": 1.7953229586939147e-05, "loss": 1.0299, "num_tokens": 12771246236.0, "step": 2721 }, { "epoch": 0.48520499108734405, "grad_norm": 0.244140625, "learning_rate": 1.7951579767642753e-05, "loss": 1.0586, "num_tokens": 12777531668.0, "step": 2722 }, { "epoch": 0.48538324420677365, "grad_norm": 0.2353515625, "learning_rate": 1.7949929369069753e-05, "loss": 1.0345, "num_tokens": 12783792616.0, "step": 2723 }, { "epoch": 0.4855614973262032, "grad_norm": 0.23046875, "learning_rate": 1.7948278391357664e-05, "loss": 1.0183, "num_tokens": 12790045976.0, "step": 2724 }, { "epoch": 0.4857397504456328, "grad_norm": 0.2353515625, "learning_rate": 1.7946626834644074e-05, "loss": 1.0546, "num_tokens": 12796329608.0, "step": 2725 }, { "epoch": 0.4859180035650624, "grad_norm": 0.220703125, "learning_rate": 1.7944974699066595e-05, "loss": 1.0454, "num_tokens": 12802613558.0, "step": 2726 }, { "epoch": 0.48609625668449197, "grad_norm": 0.2275390625, "learning_rate": 1.7943321984762906e-05, "loss": 1.0374, "num_tokens": 12808878343.0, "step": 2727 }, { "epoch": 0.48627450980392156, "grad_norm": 0.2265625, "learning_rate": 1.794166869187073e-05, "loss": 1.0249, "num_tokens": 12815152304.0, "step": 2728 }, { "epoch": 0.48645276292335116, "grad_norm": 0.2265625, "learning_rate": 1.7940014820527832e-05, "loss": 1.0127, "num_tokens": 12821436885.0, "step": 2729 }, { "epoch": 0.48663101604278075, "grad_norm": 0.2490234375, "learning_rate": 1.793836037087203e-05, "loss": 1.0221, "num_tokens": 12827718393.0, "step": 2730 }, { "epoch": 0.48680926916221035, "grad_norm": 0.220703125, "learning_rate": 1.7936705343041192e-05, "loss": 1.0425, "num_tokens": 12834000079.0, "step": 2731 }, { "epoch": 0.48698752228163994, "grad_norm": 0.234375, "learning_rate": 1.7935049737173232e-05, "loss": 1.0212, "num_tokens": 12840264895.0, "step": 2732 }, { "epoch": 0.48716577540106953, "grad_norm": 0.236328125, "learning_rate": 1.7933393553406107e-05, "loss": 1.0599, "num_tokens": 12846525861.0, "step": 2733 }, { "epoch": 0.48734402852049913, "grad_norm": 0.2451171875, "learning_rate": 1.7931736791877836e-05, "loss": 1.0054, "num_tokens": 12852761346.0, "step": 2734 }, { "epoch": 0.4875222816399287, "grad_norm": 0.23046875, "learning_rate": 1.793007945272647e-05, "loss": 1.0412, "num_tokens": 12859044444.0, "step": 2735 }, { "epoch": 0.48770053475935826, "grad_norm": 0.2412109375, "learning_rate": 1.792842153609012e-05, "loss": 1.0463, "num_tokens": 12865326871.0, "step": 2736 }, { "epoch": 0.48787878787878786, "grad_norm": 0.24609375, "learning_rate": 1.7926763042106934e-05, "loss": 1.0413, "num_tokens": 12871590249.0, "step": 2737 }, { "epoch": 0.48805704099821745, "grad_norm": 0.2421875, "learning_rate": 1.7925103970915125e-05, "loss": 1.0233, "num_tokens": 12877874774.0, "step": 2738 }, { "epoch": 0.48823529411764705, "grad_norm": 0.25, "learning_rate": 1.792344432265293e-05, "loss": 1.0484, "num_tokens": 12884158814.0, "step": 2739 }, { "epoch": 0.48841354723707664, "grad_norm": 0.2451171875, "learning_rate": 1.792178409745866e-05, "loss": 1.0267, "num_tokens": 12890424249.0, "step": 2740 }, { "epoch": 0.48859180035650623, "grad_norm": 0.2275390625, "learning_rate": 1.7920123295470653e-05, "loss": 1.0735, "num_tokens": 12896680596.0, "step": 2741 }, { "epoch": 0.48877005347593583, "grad_norm": 0.24609375, "learning_rate": 1.791846191682731e-05, "loss": 1.0366, "num_tokens": 12902909304.0, "step": 2742 }, { "epoch": 0.4889483065953654, "grad_norm": 0.2421875, "learning_rate": 1.7916799961667074e-05, "loss": 1.0527, "num_tokens": 12909166474.0, "step": 2743 }, { "epoch": 0.489126559714795, "grad_norm": 0.2265625, "learning_rate": 1.791513743012844e-05, "loss": 1.0481, "num_tokens": 12915425834.0, "step": 2744 }, { "epoch": 0.4893048128342246, "grad_norm": 0.2490234375, "learning_rate": 1.791347432234993e-05, "loss": 1.0386, "num_tokens": 12921666320.0, "step": 2745 }, { "epoch": 0.4894830659536542, "grad_norm": 0.236328125, "learning_rate": 1.791181063847015e-05, "loss": 1.0101, "num_tokens": 12927950794.0, "step": 2746 }, { "epoch": 0.4896613190730838, "grad_norm": 0.2421875, "learning_rate": 1.7910146378627728e-05, "loss": 1.035, "num_tokens": 12934235366.0, "step": 2747 }, { "epoch": 0.4898395721925134, "grad_norm": 0.2392578125, "learning_rate": 1.790848154296134e-05, "loss": 1.0045, "num_tokens": 12940518530.0, "step": 2748 }, { "epoch": 0.49001782531194293, "grad_norm": 0.244140625, "learning_rate": 1.790681613160973e-05, "loss": 1.0284, "num_tokens": 12946778093.0, "step": 2749 }, { "epoch": 0.49019607843137253, "grad_norm": 0.26171875, "learning_rate": 1.7905150144711668e-05, "loss": 1.0264, "num_tokens": 12953035008.0, "step": 2750 }, { "epoch": 0.4903743315508021, "grad_norm": 0.2451171875, "learning_rate": 1.7903483582405986e-05, "loss": 1.0267, "num_tokens": 12959317309.0, "step": 2751 }, { "epoch": 0.4905525846702317, "grad_norm": 0.25390625, "learning_rate": 1.7901816444831555e-05, "loss": 1.0199, "num_tokens": 12965601774.0, "step": 2752 }, { "epoch": 0.4907308377896613, "grad_norm": 0.26953125, "learning_rate": 1.7900148732127303e-05, "loss": 1.0008, "num_tokens": 12971885621.0, "step": 2753 }, { "epoch": 0.4909090909090909, "grad_norm": 0.25390625, "learning_rate": 1.7898480444432197e-05, "loss": 1.0561, "num_tokens": 12978142236.0, "step": 2754 }, { "epoch": 0.4910873440285205, "grad_norm": 0.25390625, "learning_rate": 1.789681158188526e-05, "loss": 1.0344, "num_tokens": 12984417011.0, "step": 2755 }, { "epoch": 0.4912655971479501, "grad_norm": 0.232421875, "learning_rate": 1.789514214462555e-05, "loss": 1.0498, "num_tokens": 12990701538.0, "step": 2756 }, { "epoch": 0.4914438502673797, "grad_norm": 0.232421875, "learning_rate": 1.789347213279219e-05, "loss": 1.0245, "num_tokens": 12996943941.0, "step": 2757 }, { "epoch": 0.4916221033868093, "grad_norm": 0.26953125, "learning_rate": 1.7891801546524336e-05, "loss": 1.0217, "num_tokens": 13003200367.0, "step": 2758 }, { "epoch": 0.4918003565062389, "grad_norm": 0.263671875, "learning_rate": 1.7890130385961203e-05, "loss": 1.0236, "num_tokens": 13009467985.0, "step": 2759 }, { "epoch": 0.4919786096256685, "grad_norm": 0.236328125, "learning_rate": 1.788845865124205e-05, "loss": 1.0467, "num_tokens": 13015731409.0, "step": 2760 }, { "epoch": 0.492156862745098, "grad_norm": 0.240234375, "learning_rate": 1.788678634250618e-05, "loss": 1.0302, "num_tokens": 13022018053.0, "step": 2761 }, { "epoch": 0.4923351158645276, "grad_norm": 0.22265625, "learning_rate": 1.7885113459892945e-05, "loss": 1.0375, "num_tokens": 13028246998.0, "step": 2762 }, { "epoch": 0.4925133689839572, "grad_norm": 0.267578125, "learning_rate": 1.788344000354175e-05, "loss": 1.0499, "num_tokens": 13034531233.0, "step": 2763 }, { "epoch": 0.4926916221033868, "grad_norm": 0.2578125, "learning_rate": 1.7881765973592046e-05, "loss": 1.0188, "num_tokens": 13040729685.0, "step": 2764 }, { "epoch": 0.4928698752228164, "grad_norm": 0.251953125, "learning_rate": 1.7880091370183326e-05, "loss": 1.019, "num_tokens": 13046987433.0, "step": 2765 }, { "epoch": 0.493048128342246, "grad_norm": 0.2431640625, "learning_rate": 1.787841619345514e-05, "loss": 1.0182, "num_tokens": 13053238589.0, "step": 2766 }, { "epoch": 0.4932263814616756, "grad_norm": 0.2392578125, "learning_rate": 1.7876740443547075e-05, "loss": 1.0155, "num_tokens": 13059521891.0, "step": 2767 }, { "epoch": 0.4934046345811052, "grad_norm": 0.2578125, "learning_rate": 1.7875064120598775e-05, "loss": 1.0124, "num_tokens": 13065790931.0, "step": 2768 }, { "epoch": 0.49358288770053477, "grad_norm": 0.2451171875, "learning_rate": 1.787338722474993e-05, "loss": 1.0181, "num_tokens": 13072070398.0, "step": 2769 }, { "epoch": 0.49376114081996436, "grad_norm": 0.2431640625, "learning_rate": 1.7871709756140273e-05, "loss": 1.0508, "num_tokens": 13078323653.0, "step": 2770 }, { "epoch": 0.49393939393939396, "grad_norm": 0.2333984375, "learning_rate": 1.7870031714909585e-05, "loss": 1.0322, "num_tokens": 13084589872.0, "step": 2771 }, { "epoch": 0.49411764705882355, "grad_norm": 0.263671875, "learning_rate": 1.7868353101197704e-05, "loss": 1.0396, "num_tokens": 13090846396.0, "step": 2772 }, { "epoch": 0.49429590017825314, "grad_norm": 0.2294921875, "learning_rate": 1.786667391514451e-05, "loss": 1.016, "num_tokens": 13097071017.0, "step": 2773 }, { "epoch": 0.4944741532976827, "grad_norm": 0.2490234375, "learning_rate": 1.786499415688992e-05, "loss": 1.025, "num_tokens": 13103351660.0, "step": 2774 }, { "epoch": 0.4946524064171123, "grad_norm": 0.22265625, "learning_rate": 1.7863313826573918e-05, "loss": 1.012, "num_tokens": 13109609229.0, "step": 2775 }, { "epoch": 0.4948306595365419, "grad_norm": 0.263671875, "learning_rate": 1.7861632924336524e-05, "loss": 1.0537, "num_tokens": 13115887307.0, "step": 2776 }, { "epoch": 0.49500891265597147, "grad_norm": 0.2353515625, "learning_rate": 1.7859951450317807e-05, "loss": 1.0498, "num_tokens": 13122169877.0, "step": 2777 }, { "epoch": 0.49518716577540106, "grad_norm": 0.27734375, "learning_rate": 1.7858269404657885e-05, "loss": 1.0491, "num_tokens": 13128430613.0, "step": 2778 }, { "epoch": 0.49536541889483066, "grad_norm": 0.23828125, "learning_rate": 1.7856586787496922e-05, "loss": 1.0189, "num_tokens": 13134663197.0, "step": 2779 }, { "epoch": 0.49554367201426025, "grad_norm": 0.29296875, "learning_rate": 1.7854903598975133e-05, "loss": 1.0004, "num_tokens": 13140938056.0, "step": 2780 }, { "epoch": 0.49572192513368984, "grad_norm": 0.248046875, "learning_rate": 1.7853219839232776e-05, "loss": 1.0183, "num_tokens": 13147222389.0, "step": 2781 }, { "epoch": 0.49590017825311944, "grad_norm": 0.275390625, "learning_rate": 1.7851535508410164e-05, "loss": 1.0269, "num_tokens": 13153488178.0, "step": 2782 }, { "epoch": 0.49607843137254903, "grad_norm": 0.26953125, "learning_rate": 1.7849850606647646e-05, "loss": 1.0251, "num_tokens": 13159771079.0, "step": 2783 }, { "epoch": 0.49625668449197863, "grad_norm": 0.27734375, "learning_rate": 1.7848165134085628e-05, "loss": 1.0359, "num_tokens": 13166027724.0, "step": 2784 }, { "epoch": 0.4964349376114082, "grad_norm": 0.265625, "learning_rate": 1.7846479090864563e-05, "loss": 1.0191, "num_tokens": 13172280718.0, "step": 2785 }, { "epoch": 0.4966131907308378, "grad_norm": 0.244140625, "learning_rate": 1.7844792477124945e-05, "loss": 1.0206, "num_tokens": 13178565094.0, "step": 2786 }, { "epoch": 0.49679144385026736, "grad_norm": 0.2431640625, "learning_rate": 1.7843105293007323e-05, "loss": 1.0066, "num_tokens": 13184848764.0, "step": 2787 }, { "epoch": 0.49696969696969695, "grad_norm": 0.232421875, "learning_rate": 1.784141753865229e-05, "loss": 1.0314, "num_tokens": 13191106745.0, "step": 2788 }, { "epoch": 0.49714795008912654, "grad_norm": 0.23828125, "learning_rate": 1.783972921420049e-05, "loss": 0.9992, "num_tokens": 13197392139.0, "step": 2789 }, { "epoch": 0.49732620320855614, "grad_norm": 0.2314453125, "learning_rate": 1.7838040319792604e-05, "loss": 1.048, "num_tokens": 13203650015.0, "step": 2790 }, { "epoch": 0.49750445632798573, "grad_norm": 0.2578125, "learning_rate": 1.7836350855569377e-05, "loss": 1.0332, "num_tokens": 13209932049.0, "step": 2791 }, { "epoch": 0.49768270944741533, "grad_norm": 0.2392578125, "learning_rate": 1.7834660821671583e-05, "loss": 1.0133, "num_tokens": 13216154978.0, "step": 2792 }, { "epoch": 0.4978609625668449, "grad_norm": 0.2734375, "learning_rate": 1.7832970218240056e-05, "loss": 1.0221, "num_tokens": 13222432261.0, "step": 2793 }, { "epoch": 0.4980392156862745, "grad_norm": 0.255859375, "learning_rate": 1.783127904541568e-05, "loss": 1.0294, "num_tokens": 13228714674.0, "step": 2794 }, { "epoch": 0.4982174688057041, "grad_norm": 0.2890625, "learning_rate": 1.7829587303339372e-05, "loss": 1.0209, "num_tokens": 13234989473.0, "step": 2795 }, { "epoch": 0.4983957219251337, "grad_norm": 0.3046875, "learning_rate": 1.7827894992152113e-05, "loss": 1.0117, "num_tokens": 13241273592.0, "step": 2796 }, { "epoch": 0.4985739750445633, "grad_norm": 0.2431640625, "learning_rate": 1.7826202111994918e-05, "loss": 1.0269, "num_tokens": 13247556634.0, "step": 2797 }, { "epoch": 0.4987522281639929, "grad_norm": 0.333984375, "learning_rate": 1.7824508663008857e-05, "loss": 1.0143, "num_tokens": 13253833919.0, "step": 2798 }, { "epoch": 0.49893048128342243, "grad_norm": 0.248046875, "learning_rate": 1.7822814645335046e-05, "loss": 1.056, "num_tokens": 13260098680.0, "step": 2799 }, { "epoch": 0.49910873440285203, "grad_norm": 0.296875, "learning_rate": 1.7821120059114652e-05, "loss": 1.0456, "num_tokens": 13266383253.0, "step": 2800 }, { "epoch": 0.4992869875222816, "grad_norm": 0.25, "learning_rate": 1.781942490448888e-05, "loss": 1.022, "num_tokens": 13272667558.0, "step": 2801 }, { "epoch": 0.4994652406417112, "grad_norm": 0.29296875, "learning_rate": 1.781772918159898e-05, "loss": 1.0554, "num_tokens": 13278927720.0, "step": 2802 }, { "epoch": 0.4996434937611408, "grad_norm": 0.267578125, "learning_rate": 1.7816032890586278e-05, "loss": 1.0152, "num_tokens": 13285194571.0, "step": 2803 }, { "epoch": 0.4998217468805704, "grad_norm": 0.2373046875, "learning_rate": 1.7814336031592105e-05, "loss": 1.0405, "num_tokens": 13291480172.0, "step": 2804 }, { "epoch": 0.5, "grad_norm": 0.265625, "learning_rate": 1.7812638604757872e-05, "loss": 1.0079, "num_tokens": 13297755776.0, "step": 2805 }, { "epoch": 0.5001782531194295, "grad_norm": 0.228515625, "learning_rate": 1.7810940610225024e-05, "loss": 1.0898, "num_tokens": 13304039791.0, "step": 2806 }, { "epoch": 0.5003565062388592, "grad_norm": 0.25390625, "learning_rate": 1.7809242048135053e-05, "loss": 1.0044, "num_tokens": 13310321696.0, "step": 2807 }, { "epoch": 0.5005347593582887, "grad_norm": 0.212890625, "learning_rate": 1.7807542918629505e-05, "loss": 1.067, "num_tokens": 13316604118.0, "step": 2808 }, { "epoch": 0.5007130124777184, "grad_norm": 0.255859375, "learning_rate": 1.7805843221849963e-05, "loss": 1.0334, "num_tokens": 13322870111.0, "step": 2809 }, { "epoch": 0.5008912655971479, "grad_norm": 0.2138671875, "learning_rate": 1.7804142957938064e-05, "loss": 1.024, "num_tokens": 13329152990.0, "step": 2810 }, { "epoch": 0.5010695187165776, "grad_norm": 0.240234375, "learning_rate": 1.7802442127035497e-05, "loss": 1.0413, "num_tokens": 13335419576.0, "step": 2811 }, { "epoch": 0.5012477718360071, "grad_norm": 0.23046875, "learning_rate": 1.7800740729283987e-05, "loss": 1.0622, "num_tokens": 13341701428.0, "step": 2812 }, { "epoch": 0.5014260249554368, "grad_norm": 0.265625, "learning_rate": 1.7799038764825317e-05, "loss": 1.031, "num_tokens": 13347960823.0, "step": 2813 }, { "epoch": 0.5016042780748663, "grad_norm": 0.251953125, "learning_rate": 1.779733623380131e-05, "loss": 1.0727, "num_tokens": 13354244669.0, "step": 2814 }, { "epoch": 0.5017825311942959, "grad_norm": 0.25, "learning_rate": 1.779563313635383e-05, "loss": 1.0307, "num_tokens": 13360497388.0, "step": 2815 }, { "epoch": 0.5019607843137255, "grad_norm": 0.2421875, "learning_rate": 1.7793929472624806e-05, "loss": 1.0378, "num_tokens": 13366761997.0, "step": 2816 }, { "epoch": 0.5021390374331551, "grad_norm": 0.263671875, "learning_rate": 1.77922252427562e-05, "loss": 1.0646, "num_tokens": 13373014043.0, "step": 2817 }, { "epoch": 0.5023172905525847, "grad_norm": 0.2412109375, "learning_rate": 1.7790520446890036e-05, "loss": 1.015, "num_tokens": 13379297086.0, "step": 2818 }, { "epoch": 0.5024955436720142, "grad_norm": 0.232421875, "learning_rate": 1.7788815085168362e-05, "loss": 1.0261, "num_tokens": 13385526520.0, "step": 2819 }, { "epoch": 0.5026737967914439, "grad_norm": 0.2373046875, "learning_rate": 1.7787109157733292e-05, "loss": 1.0061, "num_tokens": 13391786674.0, "step": 2820 }, { "epoch": 0.5028520499108734, "grad_norm": 0.234375, "learning_rate": 1.778540266472698e-05, "loss": 0.9976, "num_tokens": 13398061970.0, "step": 2821 }, { "epoch": 0.503030303030303, "grad_norm": 0.24609375, "learning_rate": 1.778369560629163e-05, "loss": 1.0434, "num_tokens": 13404346528.0, "step": 2822 }, { "epoch": 0.5032085561497326, "grad_norm": 0.259765625, "learning_rate": 1.7781987982569492e-05, "loss": 1.0271, "num_tokens": 13410606742.0, "step": 2823 }, { "epoch": 0.5033868092691622, "grad_norm": 0.2890625, "learning_rate": 1.7780279793702857e-05, "loss": 1.067, "num_tokens": 13416889356.0, "step": 2824 }, { "epoch": 0.5035650623885918, "grad_norm": 0.2470703125, "learning_rate": 1.7778571039834076e-05, "loss": 0.9994, "num_tokens": 13423175204.0, "step": 2825 }, { "epoch": 0.5037433155080214, "grad_norm": 0.25390625, "learning_rate": 1.7776861721105537e-05, "loss": 1.0441, "num_tokens": 13429449625.0, "step": 2826 }, { "epoch": 0.503921568627451, "grad_norm": 0.26171875, "learning_rate": 1.7775151837659675e-05, "loss": 1.0351, "num_tokens": 13435733617.0, "step": 2827 }, { "epoch": 0.5040998217468806, "grad_norm": 0.25390625, "learning_rate": 1.7773441389638978e-05, "loss": 1.0109, "num_tokens": 13442009991.0, "step": 2828 }, { "epoch": 0.5042780748663102, "grad_norm": 0.240234375, "learning_rate": 1.7771730377185977e-05, "loss": 1.0254, "num_tokens": 13448293887.0, "step": 2829 }, { "epoch": 0.5044563279857398, "grad_norm": 0.25, "learning_rate": 1.7770018800443252e-05, "loss": 1.0312, "num_tokens": 13454527981.0, "step": 2830 }, { "epoch": 0.5046345811051693, "grad_norm": 0.2392578125, "learning_rate": 1.776830665955343e-05, "loss": 1.0335, "num_tokens": 13460750485.0, "step": 2831 }, { "epoch": 0.5048128342245989, "grad_norm": 0.2451171875, "learning_rate": 1.776659395465918e-05, "loss": 1.0454, "num_tokens": 13467002556.0, "step": 2832 }, { "epoch": 0.5049910873440285, "grad_norm": 0.234375, "learning_rate": 1.776488068590323e-05, "loss": 1.0702, "num_tokens": 13473250192.0, "step": 2833 }, { "epoch": 0.5051693404634581, "grad_norm": 0.2578125, "learning_rate": 1.7763166853428337e-05, "loss": 1.0324, "num_tokens": 13479534196.0, "step": 2834 }, { "epoch": 0.5053475935828877, "grad_norm": 0.2578125, "learning_rate": 1.7761452457377322e-05, "loss": 1.0383, "num_tokens": 13485793328.0, "step": 2835 }, { "epoch": 0.5055258467023173, "grad_norm": 0.287109375, "learning_rate": 1.7759737497893044e-05, "loss": 1.0185, "num_tokens": 13492078760.0, "step": 2836 }, { "epoch": 0.5057040998217469, "grad_norm": 0.251953125, "learning_rate": 1.775802197511841e-05, "loss": 1.0258, "num_tokens": 13498362174.0, "step": 2837 }, { "epoch": 0.5058823529411764, "grad_norm": 0.259765625, "learning_rate": 1.7756305889196372e-05, "loss": 1.0395, "num_tokens": 13504645329.0, "step": 2838 }, { "epoch": 0.5060606060606061, "grad_norm": 0.2431640625, "learning_rate": 1.7754589240269943e-05, "loss": 1.0356, "num_tokens": 13510928967.0, "step": 2839 }, { "epoch": 0.5062388591800356, "grad_norm": 0.224609375, "learning_rate": 1.7752872028482158e-05, "loss": 1.0223, "num_tokens": 13517213547.0, "step": 2840 }, { "epoch": 0.5064171122994653, "grad_norm": 0.240234375, "learning_rate": 1.775115425397612e-05, "loss": 1.0158, "num_tokens": 13523495784.0, "step": 2841 }, { "epoch": 0.5065953654188948, "grad_norm": 0.2353515625, "learning_rate": 1.7749435916894973e-05, "loss": 1.0265, "num_tokens": 13529779868.0, "step": 2842 }, { "epoch": 0.5067736185383244, "grad_norm": 0.26171875, "learning_rate": 1.7747717017381898e-05, "loss": 1.0708, "num_tokens": 13536064895.0, "step": 2843 }, { "epoch": 0.506951871657754, "grad_norm": 0.23046875, "learning_rate": 1.7745997555580143e-05, "loss": 1.0107, "num_tokens": 13542348450.0, "step": 2844 }, { "epoch": 0.5071301247771836, "grad_norm": 0.255859375, "learning_rate": 1.774427753163298e-05, "loss": 1.0384, "num_tokens": 13548572643.0, "step": 2845 }, { "epoch": 0.5073083778966132, "grad_norm": 0.228515625, "learning_rate": 1.774255694568375e-05, "loss": 1.023, "num_tokens": 13554849744.0, "step": 2846 }, { "epoch": 0.5074866310160427, "grad_norm": 0.27734375, "learning_rate": 1.7740835797875817e-05, "loss": 1.0365, "num_tokens": 13561125349.0, "step": 2847 }, { "epoch": 0.5076648841354724, "grad_norm": 0.2470703125, "learning_rate": 1.7739114088352613e-05, "loss": 1.027, "num_tokens": 13567365046.0, "step": 2848 }, { "epoch": 0.5078431372549019, "grad_norm": 0.265625, "learning_rate": 1.773739181725761e-05, "loss": 0.989, "num_tokens": 13573648983.0, "step": 2849 }, { "epoch": 0.5080213903743316, "grad_norm": 0.26171875, "learning_rate": 1.773566898473432e-05, "loss": 1.0559, "num_tokens": 13579905983.0, "step": 2850 }, { "epoch": 0.5081996434937611, "grad_norm": 0.259765625, "learning_rate": 1.7733945590926308e-05, "loss": 1.013, "num_tokens": 13586151923.0, "step": 2851 }, { "epoch": 0.5083778966131908, "grad_norm": 0.25, "learning_rate": 1.7732221635977192e-05, "loss": 1.0172, "num_tokens": 13592435379.0, "step": 2852 }, { "epoch": 0.5085561497326203, "grad_norm": 0.232421875, "learning_rate": 1.7730497120030614e-05, "loss": 1.0203, "num_tokens": 13598688940.0, "step": 2853 }, { "epoch": 0.50873440285205, "grad_norm": 0.25, "learning_rate": 1.7728772043230297e-05, "loss": 1.0323, "num_tokens": 13604972958.0, "step": 2854 }, { "epoch": 0.5089126559714795, "grad_norm": 0.244140625, "learning_rate": 1.7727046405719973e-05, "loss": 0.9993, "num_tokens": 13611246985.0, "step": 2855 }, { "epoch": 0.509090909090909, "grad_norm": 0.26953125, "learning_rate": 1.7725320207643455e-05, "loss": 1.0312, "num_tokens": 13617510162.0, "step": 2856 }, { "epoch": 0.5092691622103387, "grad_norm": 0.240234375, "learning_rate": 1.7723593449144582e-05, "loss": 1.0397, "num_tokens": 13623755007.0, "step": 2857 }, { "epoch": 0.5094474153297682, "grad_norm": 0.2578125, "learning_rate": 1.772186613036724e-05, "loss": 1.0247, "num_tokens": 13630039508.0, "step": 2858 }, { "epoch": 0.5096256684491979, "grad_norm": 0.251953125, "learning_rate": 1.7720138251455382e-05, "loss": 1.0582, "num_tokens": 13636282959.0, "step": 2859 }, { "epoch": 0.5098039215686274, "grad_norm": 0.23046875, "learning_rate": 1.771840981255297e-05, "loss": 1.0183, "num_tokens": 13642548355.0, "step": 2860 }, { "epoch": 0.5099821746880571, "grad_norm": 0.2421875, "learning_rate": 1.7716680813804056e-05, "loss": 1.0395, "num_tokens": 13648829457.0, "step": 2861 }, { "epoch": 0.5101604278074866, "grad_norm": 0.24609375, "learning_rate": 1.7714951255352707e-05, "loss": 1.0118, "num_tokens": 13655113353.0, "step": 2862 }, { "epoch": 0.5103386809269163, "grad_norm": 0.2392578125, "learning_rate": 1.7713221137343047e-05, "loss": 1.0181, "num_tokens": 13661347121.0, "step": 2863 }, { "epoch": 0.5105169340463458, "grad_norm": 0.22265625, "learning_rate": 1.7711490459919257e-05, "loss": 1.0025, "num_tokens": 13667570421.0, "step": 2864 }, { "epoch": 0.5106951871657754, "grad_norm": 0.2216796875, "learning_rate": 1.770975922322554e-05, "loss": 1.0396, "num_tokens": 13673847184.0, "step": 2865 }, { "epoch": 0.510873440285205, "grad_norm": 0.2294921875, "learning_rate": 1.7708027427406172e-05, "loss": 1.044, "num_tokens": 13680109887.0, "step": 2866 }, { "epoch": 0.5110516934046346, "grad_norm": 0.2451171875, "learning_rate": 1.770629507260546e-05, "loss": 1.0185, "num_tokens": 13686393875.0, "step": 2867 }, { "epoch": 0.5112299465240642, "grad_norm": 0.2353515625, "learning_rate": 1.7704562158967758e-05, "loss": 1.0233, "num_tokens": 13692677898.0, "step": 2868 }, { "epoch": 0.5114081996434937, "grad_norm": 0.2431640625, "learning_rate": 1.7702828686637473e-05, "loss": 1.0145, "num_tokens": 13698934334.0, "step": 2869 }, { "epoch": 0.5115864527629234, "grad_norm": 0.251953125, "learning_rate": 1.770109465575906e-05, "loss": 1.0043, "num_tokens": 13705218589.0, "step": 2870 }, { "epoch": 0.5117647058823529, "grad_norm": 0.234375, "learning_rate": 1.769936006647701e-05, "loss": 1.024, "num_tokens": 13711499988.0, "step": 2871 }, { "epoch": 0.5119429590017825, "grad_norm": 0.2392578125, "learning_rate": 1.769762491893587e-05, "loss": 1.0431, "num_tokens": 13717780418.0, "step": 2872 }, { "epoch": 0.5121212121212121, "grad_norm": 0.232421875, "learning_rate": 1.7695889213280227e-05, "loss": 1.0263, "num_tokens": 13724046863.0, "step": 2873 }, { "epoch": 0.5122994652406417, "grad_norm": 0.2392578125, "learning_rate": 1.7694152949654725e-05, "loss": 1.0345, "num_tokens": 13730330999.0, "step": 2874 }, { "epoch": 0.5124777183600713, "grad_norm": 0.2236328125, "learning_rate": 1.7692416128204034e-05, "loss": 1.0286, "num_tokens": 13736604328.0, "step": 2875 }, { "epoch": 0.5126559714795009, "grad_norm": 0.2177734375, "learning_rate": 1.76906787490729e-05, "loss": 1.0633, "num_tokens": 13742886810.0, "step": 2876 }, { "epoch": 0.5128342245989305, "grad_norm": 0.2275390625, "learning_rate": 1.7688940812406083e-05, "loss": 1.0405, "num_tokens": 13749127981.0, "step": 2877 }, { "epoch": 0.5130124777183601, "grad_norm": 0.220703125, "learning_rate": 1.768720231834842e-05, "loss": 1.0326, "num_tokens": 13755409958.0, "step": 2878 }, { "epoch": 0.5131907308377897, "grad_norm": 0.2294921875, "learning_rate": 1.768546326704477e-05, "loss": 1.0409, "num_tokens": 13761666891.0, "step": 2879 }, { "epoch": 0.5133689839572193, "grad_norm": 0.228515625, "learning_rate": 1.7683723658640054e-05, "loss": 1.0196, "num_tokens": 13767951829.0, "step": 2880 }, { "epoch": 0.5135472370766488, "grad_norm": 0.22265625, "learning_rate": 1.768198349327923e-05, "loss": 1.0286, "num_tokens": 13774235484.0, "step": 2881 }, { "epoch": 0.5137254901960784, "grad_norm": 0.2373046875, "learning_rate": 1.7680242771107308e-05, "loss": 1.0465, "num_tokens": 13780520690.0, "step": 2882 }, { "epoch": 0.513903743315508, "grad_norm": 0.2255859375, "learning_rate": 1.7678501492269345e-05, "loss": 1.0341, "num_tokens": 13786802616.0, "step": 2883 }, { "epoch": 0.5140819964349376, "grad_norm": 0.26953125, "learning_rate": 1.767675965691044e-05, "loss": 1.0404, "num_tokens": 13793059274.0, "step": 2884 }, { "epoch": 0.5142602495543672, "grad_norm": 0.2490234375, "learning_rate": 1.767501726517574e-05, "loss": 1.0626, "num_tokens": 13799340711.0, "step": 2885 }, { "epoch": 0.5144385026737968, "grad_norm": 0.240234375, "learning_rate": 1.767327431721044e-05, "loss": 1.0268, "num_tokens": 13805601884.0, "step": 2886 }, { "epoch": 0.5146167557932264, "grad_norm": 0.24609375, "learning_rate": 1.7671530813159782e-05, "loss": 1.0275, "num_tokens": 13811862167.0, "step": 2887 }, { "epoch": 0.514795008912656, "grad_norm": 0.2490234375, "learning_rate": 1.766978675316905e-05, "loss": 1.0652, "num_tokens": 13818136316.0, "step": 2888 }, { "epoch": 0.5149732620320856, "grad_norm": 0.22265625, "learning_rate": 1.7668042137383573e-05, "loss": 1.059, "num_tokens": 13824420396.0, "step": 2889 }, { "epoch": 0.5151515151515151, "grad_norm": 0.232421875, "learning_rate": 1.7666296965948736e-05, "loss": 1.0339, "num_tokens": 13830704786.0, "step": 2890 }, { "epoch": 0.5153297682709448, "grad_norm": 0.26171875, "learning_rate": 1.7664551239009963e-05, "loss": 1.0533, "num_tokens": 13836914252.0, "step": 2891 }, { "epoch": 0.5155080213903743, "grad_norm": 0.2373046875, "learning_rate": 1.7662804956712727e-05, "loss": 1.0015, "num_tokens": 13843196732.0, "step": 2892 }, { "epoch": 0.515686274509804, "grad_norm": 0.23828125, "learning_rate": 1.7661058119202545e-05, "loss": 1.0205, "num_tokens": 13849431197.0, "step": 2893 }, { "epoch": 0.5158645276292335, "grad_norm": 0.23828125, "learning_rate": 1.765931072662498e-05, "loss": 1.042, "num_tokens": 13855715403.0, "step": 2894 }, { "epoch": 0.516042780748663, "grad_norm": 0.25, "learning_rate": 1.7657562779125646e-05, "loss": 1.0374, "num_tokens": 13861999626.0, "step": 2895 }, { "epoch": 0.5162210338680927, "grad_norm": 0.212890625, "learning_rate": 1.7655814276850198e-05, "loss": 1.0454, "num_tokens": 13868283530.0, "step": 2896 }, { "epoch": 0.5163992869875222, "grad_norm": 0.267578125, "learning_rate": 1.7654065219944338e-05, "loss": 1.049, "num_tokens": 13874526665.0, "step": 2897 }, { "epoch": 0.5165775401069519, "grad_norm": 0.2294921875, "learning_rate": 1.765231560855381e-05, "loss": 1.0658, "num_tokens": 13880810860.0, "step": 2898 }, { "epoch": 0.5167557932263814, "grad_norm": 0.27734375, "learning_rate": 1.7650565442824425e-05, "loss": 1.0446, "num_tokens": 13887080577.0, "step": 2899 }, { "epoch": 0.5169340463458111, "grad_norm": 0.244140625, "learning_rate": 1.764881472290201e-05, "loss": 1.0177, "num_tokens": 13893357850.0, "step": 2900 }, { "epoch": 0.5171122994652406, "grad_norm": 0.263671875, "learning_rate": 1.7647063448932457e-05, "loss": 1.0555, "num_tokens": 13899621314.0, "step": 2901 }, { "epoch": 0.5172905525846703, "grad_norm": 0.26171875, "learning_rate": 1.7645311621061706e-05, "loss": 1.0359, "num_tokens": 13905906312.0, "step": 2902 }, { "epoch": 0.5174688057040998, "grad_norm": 0.228515625, "learning_rate": 1.764355923943573e-05, "loss": 1.0316, "num_tokens": 13912177648.0, "step": 2903 }, { "epoch": 0.5176470588235295, "grad_norm": 0.255859375, "learning_rate": 1.7641806304200554e-05, "loss": 1.0283, "num_tokens": 13918431630.0, "step": 2904 }, { "epoch": 0.517825311942959, "grad_norm": 0.234375, "learning_rate": 1.7640052815502257e-05, "loss": 1.0235, "num_tokens": 13924713557.0, "step": 2905 }, { "epoch": 0.5180035650623885, "grad_norm": 0.2890625, "learning_rate": 1.7638298773486955e-05, "loss": 1.054, "num_tokens": 13930941798.0, "step": 2906 }, { "epoch": 0.5181818181818182, "grad_norm": 0.25390625, "learning_rate": 1.763654417830081e-05, "loss": 1.0346, "num_tokens": 13937224523.0, "step": 2907 }, { "epoch": 0.5183600713012477, "grad_norm": 0.283203125, "learning_rate": 1.7634789030090038e-05, "loss": 1.0348, "num_tokens": 13943487668.0, "step": 2908 }, { "epoch": 0.5185383244206774, "grad_norm": 0.2578125, "learning_rate": 1.763303332900089e-05, "loss": 1.0049, "num_tokens": 13949739989.0, "step": 2909 }, { "epoch": 0.5187165775401069, "grad_norm": 0.2578125, "learning_rate": 1.7631277075179675e-05, "loss": 1.0535, "num_tokens": 13955984683.0, "step": 2910 }, { "epoch": 0.5188948306595366, "grad_norm": 0.259765625, "learning_rate": 1.7629520268772735e-05, "loss": 1.0584, "num_tokens": 13962256768.0, "step": 2911 }, { "epoch": 0.5190730837789661, "grad_norm": 0.2412109375, "learning_rate": 1.7627762909926472e-05, "loss": 1.0075, "num_tokens": 13968541936.0, "step": 2912 }, { "epoch": 0.5192513368983958, "grad_norm": 0.25, "learning_rate": 1.7626004998787324e-05, "loss": 1.0132, "num_tokens": 13974826948.0, "step": 2913 }, { "epoch": 0.5194295900178253, "grad_norm": 0.212890625, "learning_rate": 1.7624246535501777e-05, "loss": 1.0104, "num_tokens": 13981090843.0, "step": 2914 }, { "epoch": 0.5196078431372549, "grad_norm": 0.234375, "learning_rate": 1.7622487520216368e-05, "loss": 1.0209, "num_tokens": 13987351910.0, "step": 2915 }, { "epoch": 0.5197860962566845, "grad_norm": 0.224609375, "learning_rate": 1.762072795307767e-05, "loss": 1.0324, "num_tokens": 13993608988.0, "step": 2916 }, { "epoch": 0.5199643493761141, "grad_norm": 0.224609375, "learning_rate": 1.7618967834232313e-05, "loss": 1.0256, "num_tokens": 13999844540.0, "step": 2917 }, { "epoch": 0.5201426024955437, "grad_norm": 0.2265625, "learning_rate": 1.7617207163826967e-05, "loss": 1.0273, "num_tokens": 14006107231.0, "step": 2918 }, { "epoch": 0.5203208556149732, "grad_norm": 0.2294921875, "learning_rate": 1.7615445942008347e-05, "loss": 1.0146, "num_tokens": 14012368567.0, "step": 2919 }, { "epoch": 0.5204991087344029, "grad_norm": 0.212890625, "learning_rate": 1.7613684168923218e-05, "loss": 1.0033, "num_tokens": 14018651663.0, "step": 2920 }, { "epoch": 0.5206773618538324, "grad_norm": 0.244140625, "learning_rate": 1.761192184471839e-05, "loss": 1.034, "num_tokens": 14024908134.0, "step": 2921 }, { "epoch": 0.520855614973262, "grad_norm": 0.2177734375, "learning_rate": 1.7610158969540713e-05, "loss": 1.0281, "num_tokens": 14031183233.0, "step": 2922 }, { "epoch": 0.5210338680926916, "grad_norm": 0.232421875, "learning_rate": 1.7608395543537094e-05, "loss": 1.042, "num_tokens": 14037465767.0, "step": 2923 }, { "epoch": 0.5212121212121212, "grad_norm": 0.2158203125, "learning_rate": 1.760663156685448e-05, "loss": 1.0063, "num_tokens": 14043714250.0, "step": 2924 }, { "epoch": 0.5213903743315508, "grad_norm": 0.259765625, "learning_rate": 1.7604867039639852e-05, "loss": 1.0234, "num_tokens": 14049987541.0, "step": 2925 }, { "epoch": 0.5215686274509804, "grad_norm": 0.232421875, "learning_rate": 1.7603101962040266e-05, "loss": 1.0322, "num_tokens": 14056270183.0, "step": 2926 }, { "epoch": 0.52174688057041, "grad_norm": 0.2421875, "learning_rate": 1.760133633420279e-05, "loss": 1.0364, "num_tokens": 14062555302.0, "step": 2927 }, { "epoch": 0.5219251336898396, "grad_norm": 0.25, "learning_rate": 1.7599570156274565e-05, "loss": 1.0366, "num_tokens": 14068837446.0, "step": 2928 }, { "epoch": 0.5221033868092692, "grad_norm": 0.236328125, "learning_rate": 1.7597803428402764e-05, "loss": 1.0372, "num_tokens": 14075081964.0, "step": 2929 }, { "epoch": 0.5222816399286988, "grad_norm": 0.275390625, "learning_rate": 1.7596036150734603e-05, "loss": 1.0375, "num_tokens": 14081365499.0, "step": 2930 }, { "epoch": 0.5224598930481283, "grad_norm": 0.2294921875, "learning_rate": 1.7594268323417357e-05, "loss": 1.017, "num_tokens": 14087625771.0, "step": 2931 }, { "epoch": 0.5226381461675579, "grad_norm": 0.2294921875, "learning_rate": 1.7592499946598338e-05, "loss": 1.0526, "num_tokens": 14093888447.0, "step": 2932 }, { "epoch": 0.5228163992869875, "grad_norm": 0.224609375, "learning_rate": 1.7590731020424904e-05, "loss": 1.006, "num_tokens": 14100150322.0, "step": 2933 }, { "epoch": 0.5229946524064171, "grad_norm": 0.2216796875, "learning_rate": 1.758896154504446e-05, "loss": 1.0272, "num_tokens": 14106434840.0, "step": 2934 }, { "epoch": 0.5231729055258467, "grad_norm": 0.208984375, "learning_rate": 1.7587191520604456e-05, "loss": 1.015, "num_tokens": 14112717724.0, "step": 2935 }, { "epoch": 0.5233511586452763, "grad_norm": 0.22265625, "learning_rate": 1.758542094725239e-05, "loss": 1.0242, "num_tokens": 14119000327.0, "step": 2936 }, { "epoch": 0.5235294117647059, "grad_norm": 0.2080078125, "learning_rate": 1.7583649825135803e-05, "loss": 1.0128, "num_tokens": 14125286835.0, "step": 2937 }, { "epoch": 0.5237076648841354, "grad_norm": 0.236328125, "learning_rate": 1.7581878154402287e-05, "loss": 1.024, "num_tokens": 14131558236.0, "step": 2938 }, { "epoch": 0.5238859180035651, "grad_norm": 0.2138671875, "learning_rate": 1.758010593519947e-05, "loss": 1.0381, "num_tokens": 14137841666.0, "step": 2939 }, { "epoch": 0.5240641711229946, "grad_norm": 0.25390625, "learning_rate": 1.7578333167675035e-05, "loss": 1.0601, "num_tokens": 14144087232.0, "step": 2940 }, { "epoch": 0.5242424242424243, "grad_norm": 0.236328125, "learning_rate": 1.75765598519767e-05, "loss": 1.0581, "num_tokens": 14150347415.0, "step": 2941 }, { "epoch": 0.5244206773618538, "grad_norm": 0.271484375, "learning_rate": 1.7574785988252248e-05, "loss": 1.0105, "num_tokens": 14156607897.0, "step": 2942 }, { "epoch": 0.5245989304812835, "grad_norm": 0.216796875, "learning_rate": 1.757301157664949e-05, "loss": 1.0042, "num_tokens": 14162870620.0, "step": 2943 }, { "epoch": 0.524777183600713, "grad_norm": 0.265625, "learning_rate": 1.7571236617316284e-05, "loss": 1.0232, "num_tokens": 14169136024.0, "step": 2944 }, { "epoch": 0.5249554367201426, "grad_norm": 0.232421875, "learning_rate": 1.756946111040054e-05, "loss": 1.0168, "num_tokens": 14175379004.0, "step": 2945 }, { "epoch": 0.5251336898395722, "grad_norm": 0.23828125, "learning_rate": 1.7567685056050217e-05, "loss": 1.0018, "num_tokens": 14181664423.0, "step": 2946 }, { "epoch": 0.5253119429590017, "grad_norm": 0.255859375, "learning_rate": 1.7565908454413307e-05, "loss": 1.0116, "num_tokens": 14187918823.0, "step": 2947 }, { "epoch": 0.5254901960784314, "grad_norm": 0.2392578125, "learning_rate": 1.7564131305637858e-05, "loss": 1.0307, "num_tokens": 14194205512.0, "step": 2948 }, { "epoch": 0.5256684491978609, "grad_norm": 0.283203125, "learning_rate": 1.7562353609871956e-05, "loss": 1.0424, "num_tokens": 14200473845.0, "step": 2949 }, { "epoch": 0.5258467023172906, "grad_norm": 0.251953125, "learning_rate": 1.7560575367263743e-05, "loss": 1.0611, "num_tokens": 14206756385.0, "step": 2950 }, { "epoch": 0.5260249554367201, "grad_norm": 0.265625, "learning_rate": 1.7558796577961393e-05, "loss": 0.9986, "num_tokens": 14213038424.0, "step": 2951 }, { "epoch": 0.5262032085561498, "grad_norm": 0.2314453125, "learning_rate": 1.755701724211314e-05, "loss": 1.0473, "num_tokens": 14219321287.0, "step": 2952 }, { "epoch": 0.5263814616755793, "grad_norm": 0.2734375, "learning_rate": 1.7555237359867252e-05, "loss": 1.0732, "num_tokens": 14225567625.0, "step": 2953 }, { "epoch": 0.526559714795009, "grad_norm": 0.2216796875, "learning_rate": 1.7553456931372052e-05, "loss": 1.0451, "num_tokens": 14231849455.0, "step": 2954 }, { "epoch": 0.5267379679144385, "grad_norm": 0.27734375, "learning_rate": 1.7551675956775896e-05, "loss": 0.9925, "num_tokens": 14238114625.0, "step": 2955 }, { "epoch": 0.526916221033868, "grad_norm": 0.228515625, "learning_rate": 1.75498944362272e-05, "loss": 1.0614, "num_tokens": 14244354204.0, "step": 2956 }, { "epoch": 0.5270944741532977, "grad_norm": 0.2255859375, "learning_rate": 1.7548112369874414e-05, "loss": 1.0235, "num_tokens": 14250613652.0, "step": 2957 }, { "epoch": 0.5272727272727272, "grad_norm": 0.2255859375, "learning_rate": 1.7546329757866037e-05, "loss": 1.0031, "num_tokens": 14256884889.0, "step": 2958 }, { "epoch": 0.5274509803921569, "grad_norm": 0.21484375, "learning_rate": 1.754454660035062e-05, "loss": 1.0242, "num_tokens": 14263094937.0, "step": 2959 }, { "epoch": 0.5276292335115864, "grad_norm": 0.2138671875, "learning_rate": 1.754276289747675e-05, "loss": 1.0368, "num_tokens": 14269325204.0, "step": 2960 }, { "epoch": 0.5278074866310161, "grad_norm": 0.234375, "learning_rate": 1.7540978649393066e-05, "loss": 1.0223, "num_tokens": 14275609903.0, "step": 2961 }, { "epoch": 0.5279857397504456, "grad_norm": 0.21484375, "learning_rate": 1.7539193856248243e-05, "loss": 1.0044, "num_tokens": 14281859390.0, "step": 2962 }, { "epoch": 0.5281639928698753, "grad_norm": 0.2275390625, "learning_rate": 1.7537408518191018e-05, "loss": 0.9932, "num_tokens": 14288143283.0, "step": 2963 }, { "epoch": 0.5283422459893048, "grad_norm": 0.21875, "learning_rate": 1.7535622635370156e-05, "loss": 1.0209, "num_tokens": 14294427540.0, "step": 2964 }, { "epoch": 0.5285204991087344, "grad_norm": 0.234375, "learning_rate": 1.753383620793448e-05, "loss": 1.0285, "num_tokens": 14300660957.0, "step": 2965 }, { "epoch": 0.528698752228164, "grad_norm": 0.224609375, "learning_rate": 1.7532049236032847e-05, "loss": 1.0424, "num_tokens": 14306945715.0, "step": 2966 }, { "epoch": 0.5288770053475936, "grad_norm": 0.2431640625, "learning_rate": 1.7530261719814173e-05, "loss": 1.056, "num_tokens": 14313226575.0, "step": 2967 }, { "epoch": 0.5290552584670232, "grad_norm": 0.2177734375, "learning_rate": 1.7528473659427404e-05, "loss": 1.0393, "num_tokens": 14319511331.0, "step": 2968 }, { "epoch": 0.5292335115864527, "grad_norm": 0.21484375, "learning_rate": 1.752668505502155e-05, "loss": 1.046, "num_tokens": 14325794538.0, "step": 2969 }, { "epoch": 0.5294117647058824, "grad_norm": 0.2373046875, "learning_rate": 1.7524895906745648e-05, "loss": 1.0088, "num_tokens": 14332067142.0, "step": 2970 }, { "epoch": 0.5295900178253119, "grad_norm": 0.2060546875, "learning_rate": 1.752310621474879e-05, "loss": 1.0111, "num_tokens": 14338335667.0, "step": 2971 }, { "epoch": 0.5297682709447415, "grad_norm": 0.2275390625, "learning_rate": 1.7521315979180115e-05, "loss": 1.0488, "num_tokens": 14344580515.0, "step": 2972 }, { "epoch": 0.5299465240641711, "grad_norm": 0.2138671875, "learning_rate": 1.7519525200188792e-05, "loss": 1.0178, "num_tokens": 14350865332.0, "step": 2973 }, { "epoch": 0.5301247771836007, "grad_norm": 0.2216796875, "learning_rate": 1.7517733877924063e-05, "loss": 1.0436, "num_tokens": 14357118148.0, "step": 2974 }, { "epoch": 0.5303030303030303, "grad_norm": 0.2236328125, "learning_rate": 1.751594201253519e-05, "loss": 1.0236, "num_tokens": 14363397353.0, "step": 2975 }, { "epoch": 0.5304812834224599, "grad_norm": 0.2197265625, "learning_rate": 1.7514149604171487e-05, "loss": 1.0211, "num_tokens": 14369644414.0, "step": 2976 }, { "epoch": 0.5306595365418895, "grad_norm": 0.25390625, "learning_rate": 1.7512356652982325e-05, "loss": 1.0623, "num_tokens": 14375903712.0, "step": 2977 }, { "epoch": 0.5308377896613191, "grad_norm": 0.23046875, "learning_rate": 1.7510563159117104e-05, "loss": 1.0368, "num_tokens": 14382121036.0, "step": 2978 }, { "epoch": 0.5310160427807487, "grad_norm": 0.2470703125, "learning_rate": 1.7508769122725278e-05, "loss": 1.0274, "num_tokens": 14388400464.0, "step": 2979 }, { "epoch": 0.5311942959001783, "grad_norm": 0.23046875, "learning_rate": 1.750697454395634e-05, "loss": 1.0483, "num_tokens": 14394651546.0, "step": 2980 }, { "epoch": 0.5313725490196078, "grad_norm": 0.265625, "learning_rate": 1.7505179422959843e-05, "loss": 1.0669, "num_tokens": 14400907470.0, "step": 2981 }, { "epoch": 0.5315508021390374, "grad_norm": 0.2265625, "learning_rate": 1.750338375988537e-05, "loss": 1.0654, "num_tokens": 14407190616.0, "step": 2982 }, { "epoch": 0.531729055258467, "grad_norm": 0.27734375, "learning_rate": 1.7501587554882552e-05, "loss": 1.0599, "num_tokens": 14413470447.0, "step": 2983 }, { "epoch": 0.5319073083778966, "grad_norm": 0.255859375, "learning_rate": 1.7499790808101064e-05, "loss": 1.0051, "num_tokens": 14419756264.0, "step": 2984 }, { "epoch": 0.5320855614973262, "grad_norm": 0.234375, "learning_rate": 1.7497993519690636e-05, "loss": 1.0448, "num_tokens": 14426008859.0, "step": 2985 }, { "epoch": 0.5322638146167558, "grad_norm": 0.275390625, "learning_rate": 1.7496195689801033e-05, "loss": 1.0116, "num_tokens": 14432260588.0, "step": 2986 }, { "epoch": 0.5324420677361854, "grad_norm": 0.22265625, "learning_rate": 1.749439731858207e-05, "loss": 1.0369, "num_tokens": 14438467863.0, "step": 2987 }, { "epoch": 0.532620320855615, "grad_norm": 0.259765625, "learning_rate": 1.7492598406183605e-05, "loss": 1.0067, "num_tokens": 14444735912.0, "step": 2988 }, { "epoch": 0.5327985739750446, "grad_norm": 0.2412109375, "learning_rate": 1.749079895275554e-05, "loss": 1.0402, "num_tokens": 14451004642.0, "step": 2989 }, { "epoch": 0.5329768270944741, "grad_norm": 0.23046875, "learning_rate": 1.748899895844783e-05, "loss": 1.0097, "num_tokens": 14457288138.0, "step": 2990 }, { "epoch": 0.5331550802139038, "grad_norm": 0.25, "learning_rate": 1.7487198423410458e-05, "loss": 1.0207, "num_tokens": 14463515849.0, "step": 2991 }, { "epoch": 0.5333333333333333, "grad_norm": 0.28515625, "learning_rate": 1.7485397347793475e-05, "loss": 1.0442, "num_tokens": 14469782675.0, "step": 2992 }, { "epoch": 0.533511586452763, "grad_norm": 0.23828125, "learning_rate": 1.7483595731746958e-05, "loss": 1.0106, "num_tokens": 14476058254.0, "step": 2993 }, { "epoch": 0.5336898395721925, "grad_norm": 0.2294921875, "learning_rate": 1.7481793575421035e-05, "loss": 0.9994, "num_tokens": 14482310799.0, "step": 2994 }, { "epoch": 0.533868092691622, "grad_norm": 0.220703125, "learning_rate": 1.7479990878965887e-05, "loss": 1.0535, "num_tokens": 14488562551.0, "step": 2995 }, { "epoch": 0.5340463458110517, "grad_norm": 0.21484375, "learning_rate": 1.7478187642531725e-05, "loss": 1.0182, "num_tokens": 14494847151.0, "step": 2996 }, { "epoch": 0.5342245989304812, "grad_norm": 0.26953125, "learning_rate": 1.7476383866268816e-05, "loss": 1.0144, "num_tokens": 14501131705.0, "step": 2997 }, { "epoch": 0.5344028520499109, "grad_norm": 0.2265625, "learning_rate": 1.7474579550327473e-05, "loss": 1.0529, "num_tokens": 14507387705.0, "step": 2998 }, { "epoch": 0.5345811051693404, "grad_norm": 0.27734375, "learning_rate": 1.7472774694858046e-05, "loss": 1.0233, "num_tokens": 14513671825.0, "step": 2999 }, { "epoch": 0.5347593582887701, "grad_norm": 0.2392578125, "learning_rate": 1.7470969300010933e-05, "loss": 1.0548, "num_tokens": 14519947927.0, "step": 3000 }, { "epoch": 0.5349376114081996, "grad_norm": 0.291015625, "learning_rate": 1.746916336593658e-05, "loss": 1.0271, "num_tokens": 14526229704.0, "step": 3001 }, { "epoch": 0.5351158645276293, "grad_norm": 0.2734375, "learning_rate": 1.7467356892785477e-05, "loss": 1.0614, "num_tokens": 14532513966.0, "step": 3002 }, { "epoch": 0.5352941176470588, "grad_norm": 0.279296875, "learning_rate": 1.7465549880708156e-05, "loss": 1.0397, "num_tokens": 14538778642.0, "step": 3003 }, { "epoch": 0.5354723707664885, "grad_norm": 0.296875, "learning_rate": 1.7463742329855197e-05, "loss": 1.0238, "num_tokens": 14545030506.0, "step": 3004 }, { "epoch": 0.535650623885918, "grad_norm": 0.2216796875, "learning_rate": 1.746193424037722e-05, "loss": 1.0129, "num_tokens": 14551314285.0, "step": 3005 }, { "epoch": 0.5358288770053476, "grad_norm": 0.275390625, "learning_rate": 1.74601256124249e-05, "loss": 1.0303, "num_tokens": 14557574518.0, "step": 3006 }, { "epoch": 0.5360071301247772, "grad_norm": 0.2392578125, "learning_rate": 1.7458316446148946e-05, "loss": 1.0406, "num_tokens": 14563829705.0, "step": 3007 }, { "epoch": 0.5361853832442067, "grad_norm": 0.259765625, "learning_rate": 1.7456506741700112e-05, "loss": 1.02, "num_tokens": 14570087599.0, "step": 3008 }, { "epoch": 0.5363636363636364, "grad_norm": 0.27734375, "learning_rate": 1.7454696499229208e-05, "loss": 1.0341, "num_tokens": 14576371750.0, "step": 3009 }, { "epoch": 0.5365418894830659, "grad_norm": 0.2109375, "learning_rate": 1.745288571888708e-05, "loss": 1.0517, "num_tokens": 14582656330.0, "step": 3010 }, { "epoch": 0.5367201426024956, "grad_norm": 0.232421875, "learning_rate": 1.7451074400824622e-05, "loss": 1.0416, "num_tokens": 14588940659.0, "step": 3011 }, { "epoch": 0.5368983957219251, "grad_norm": 0.2216796875, "learning_rate": 1.744926254519277e-05, "loss": 1.0059, "num_tokens": 14595222292.0, "step": 3012 }, { "epoch": 0.5370766488413548, "grad_norm": 0.3046875, "learning_rate": 1.74474501521425e-05, "loss": 1.0355, "num_tokens": 14601472727.0, "step": 3013 }, { "epoch": 0.5372549019607843, "grad_norm": 0.236328125, "learning_rate": 1.7445637221824853e-05, "loss": 1.0006, "num_tokens": 14607756078.0, "step": 3014 }, { "epoch": 0.5374331550802139, "grad_norm": 0.25390625, "learning_rate": 1.7443823754390887e-05, "loss": 1.056, "num_tokens": 14614025072.0, "step": 3015 }, { "epoch": 0.5376114081996435, "grad_norm": 0.265625, "learning_rate": 1.744200974999173e-05, "loss": 1.0517, "num_tokens": 14620277062.0, "step": 3016 }, { "epoch": 0.5377896613190731, "grad_norm": 0.2333984375, "learning_rate": 1.7440195208778537e-05, "loss": 1.0412, "num_tokens": 14626552235.0, "step": 3017 }, { "epoch": 0.5379679144385027, "grad_norm": 0.23046875, "learning_rate": 1.7438380130902516e-05, "loss": 1.033, "num_tokens": 14632835976.0, "step": 3018 }, { "epoch": 0.5381461675579322, "grad_norm": 0.224609375, "learning_rate": 1.7436564516514914e-05, "loss": 1.024, "num_tokens": 14639117918.0, "step": 3019 }, { "epoch": 0.5383244206773619, "grad_norm": 0.2216796875, "learning_rate": 1.743474836576703e-05, "loss": 1.05, "num_tokens": 14645393336.0, "step": 3020 }, { "epoch": 0.5385026737967914, "grad_norm": 0.2216796875, "learning_rate": 1.7432931678810207e-05, "loss": 1.0139, "num_tokens": 14651676278.0, "step": 3021 }, { "epoch": 0.538680926916221, "grad_norm": 0.24609375, "learning_rate": 1.7431114455795823e-05, "loss": 1.0088, "num_tokens": 14657943150.0, "step": 3022 }, { "epoch": 0.5388591800356506, "grad_norm": 0.212890625, "learning_rate": 1.7429296696875312e-05, "loss": 1.0277, "num_tokens": 14664220926.0, "step": 3023 }, { "epoch": 0.5390374331550802, "grad_norm": 0.2294921875, "learning_rate": 1.742747840220015e-05, "loss": 1.0278, "num_tokens": 14670477325.0, "step": 3024 }, { "epoch": 0.5392156862745098, "grad_norm": 0.2236328125, "learning_rate": 1.742565957192185e-05, "loss": 1.0201, "num_tokens": 14676752840.0, "step": 3025 }, { "epoch": 0.5393939393939394, "grad_norm": 0.216796875, "learning_rate": 1.742384020619198e-05, "loss": 1.0543, "num_tokens": 14683036726.0, "step": 3026 }, { "epoch": 0.539572192513369, "grad_norm": 0.2373046875, "learning_rate": 1.742202030516215e-05, "loss": 1.0476, "num_tokens": 14689316480.0, "step": 3027 }, { "epoch": 0.5397504456327986, "grad_norm": 0.216796875, "learning_rate": 1.742019986898401e-05, "loss": 1.0488, "num_tokens": 14695585262.0, "step": 3028 }, { "epoch": 0.5399286987522282, "grad_norm": 0.2265625, "learning_rate": 1.741837889780925e-05, "loss": 1.0129, "num_tokens": 14701839097.0, "step": 3029 }, { "epoch": 0.5401069518716578, "grad_norm": 0.208984375, "learning_rate": 1.741655739178963e-05, "loss": 1.0332, "num_tokens": 14708098699.0, "step": 3030 }, { "epoch": 0.5402852049910873, "grad_norm": 0.2138671875, "learning_rate": 1.7414735351076915e-05, "loss": 1.0227, "num_tokens": 14714383302.0, "step": 3031 }, { "epoch": 0.5404634581105169, "grad_norm": 0.251953125, "learning_rate": 1.741291277582295e-05, "loss": 1.0209, "num_tokens": 14720643323.0, "step": 3032 }, { "epoch": 0.5406417112299465, "grad_norm": 0.23046875, "learning_rate": 1.7411089666179612e-05, "loss": 1.0251, "num_tokens": 14726897047.0, "step": 3033 }, { "epoch": 0.5408199643493761, "grad_norm": 0.2412109375, "learning_rate": 1.7409266022298813e-05, "loss": 1.0106, "num_tokens": 14733181070.0, "step": 3034 }, { "epoch": 0.5409982174688057, "grad_norm": 0.251953125, "learning_rate": 1.740744184433252e-05, "loss": 1.0507, "num_tokens": 14739433918.0, "step": 3035 }, { "epoch": 0.5411764705882353, "grad_norm": 0.216796875, "learning_rate": 1.7405617132432742e-05, "loss": 1.0172, "num_tokens": 14745719802.0, "step": 3036 }, { "epoch": 0.5413547237076649, "grad_norm": 0.2265625, "learning_rate": 1.7403791886751535e-05, "loss": 1.0385, "num_tokens": 14752002835.0, "step": 3037 }, { "epoch": 0.5415329768270944, "grad_norm": 0.2421875, "learning_rate": 1.7401966107440997e-05, "loss": 1.0255, "num_tokens": 14758286483.0, "step": 3038 }, { "epoch": 0.5417112299465241, "grad_norm": 0.244140625, "learning_rate": 1.7400139794653266e-05, "loss": 1.0213, "num_tokens": 14764554280.0, "step": 3039 }, { "epoch": 0.5418894830659536, "grad_norm": 0.265625, "learning_rate": 1.7398312948540534e-05, "loss": 1.0214, "num_tokens": 14770811818.0, "step": 3040 }, { "epoch": 0.5420677361853833, "grad_norm": 0.22265625, "learning_rate": 1.7396485569255032e-05, "loss": 1.0643, "num_tokens": 14777064278.0, "step": 3041 }, { "epoch": 0.5422459893048128, "grad_norm": 0.25390625, "learning_rate": 1.7394657656949034e-05, "loss": 1.0105, "num_tokens": 14783346441.0, "step": 3042 }, { "epoch": 0.5424242424242425, "grad_norm": 0.2578125, "learning_rate": 1.7392829211774862e-05, "loss": 1.0164, "num_tokens": 14789629168.0, "step": 3043 }, { "epoch": 0.542602495543672, "grad_norm": 0.23046875, "learning_rate": 1.739100023388488e-05, "loss": 1.0052, "num_tokens": 14795874509.0, "step": 3044 }, { "epoch": 0.5427807486631016, "grad_norm": 0.2431640625, "learning_rate": 1.7389170723431495e-05, "loss": 1.0212, "num_tokens": 14802134595.0, "step": 3045 }, { "epoch": 0.5429590017825312, "grad_norm": 0.2421875, "learning_rate": 1.7387340680567163e-05, "loss": 1.0695, "num_tokens": 14808402734.0, "step": 3046 }, { "epoch": 0.5431372549019607, "grad_norm": 0.23046875, "learning_rate": 1.738551010544438e-05, "loss": 1.0606, "num_tokens": 14814668211.0, "step": 3047 }, { "epoch": 0.5433155080213904, "grad_norm": 0.2392578125, "learning_rate": 1.738367899821569e-05, "loss": 1.0284, "num_tokens": 14820953586.0, "step": 3048 }, { "epoch": 0.5434937611408199, "grad_norm": 0.228515625, "learning_rate": 1.738184735903368e-05, "loss": 1.0507, "num_tokens": 14827207858.0, "step": 3049 }, { "epoch": 0.5436720142602496, "grad_norm": 0.2431640625, "learning_rate": 1.7380015188050985e-05, "loss": 1.0632, "num_tokens": 14833491992.0, "step": 3050 }, { "epoch": 0.5438502673796791, "grad_norm": 0.2197265625, "learning_rate": 1.737818248542027e-05, "loss": 1.0307, "num_tokens": 14839750547.0, "step": 3051 }, { "epoch": 0.5440285204991088, "grad_norm": 0.234375, "learning_rate": 1.7376349251294258e-05, "loss": 1.0123, "num_tokens": 14846034492.0, "step": 3052 }, { "epoch": 0.5442067736185383, "grad_norm": 0.2373046875, "learning_rate": 1.7374515485825717e-05, "loss": 1.0204, "num_tokens": 14852316909.0, "step": 3053 }, { "epoch": 0.544385026737968, "grad_norm": 0.2412109375, "learning_rate": 1.7372681189167453e-05, "loss": 1.0628, "num_tokens": 14858560308.0, "step": 3054 }, { "epoch": 0.5445632798573975, "grad_norm": 0.244140625, "learning_rate": 1.7370846361472312e-05, "loss": 1.0391, "num_tokens": 14864839982.0, "step": 3055 }, { "epoch": 0.5447415329768271, "grad_norm": 0.259765625, "learning_rate": 1.7369011002893203e-05, "loss": 1.0173, "num_tokens": 14871123119.0, "step": 3056 }, { "epoch": 0.5449197860962567, "grad_norm": 0.2431640625, "learning_rate": 1.736717511358306e-05, "loss": 1.0641, "num_tokens": 14877386464.0, "step": 3057 }, { "epoch": 0.5450980392156862, "grad_norm": 0.255859375, "learning_rate": 1.7365338693694863e-05, "loss": 1.0072, "num_tokens": 14883668390.0, "step": 3058 }, { "epoch": 0.5452762923351159, "grad_norm": 0.2265625, "learning_rate": 1.736350174338165e-05, "loss": 1.0324, "num_tokens": 14889953379.0, "step": 3059 }, { "epoch": 0.5454545454545454, "grad_norm": 0.26953125, "learning_rate": 1.736166426279649e-05, "loss": 1.0377, "num_tokens": 14896238914.0, "step": 3060 }, { "epoch": 0.5456327985739751, "grad_norm": 0.2314453125, "learning_rate": 1.7359826252092503e-05, "loss": 1.0551, "num_tokens": 14902517880.0, "step": 3061 }, { "epoch": 0.5458110516934046, "grad_norm": 0.2177734375, "learning_rate": 1.735798771142285e-05, "loss": 1.0248, "num_tokens": 14908802036.0, "step": 3062 }, { "epoch": 0.5459893048128343, "grad_norm": 0.2421875, "learning_rate": 1.7356148640940733e-05, "loss": 1.036, "num_tokens": 14915086146.0, "step": 3063 }, { "epoch": 0.5461675579322638, "grad_norm": 0.21484375, "learning_rate": 1.7354309040799407e-05, "loss": 1.0292, "num_tokens": 14921351358.0, "step": 3064 }, { "epoch": 0.5463458110516934, "grad_norm": 0.2314453125, "learning_rate": 1.735246891115217e-05, "loss": 1.0429, "num_tokens": 14927591734.0, "step": 3065 }, { "epoch": 0.546524064171123, "grad_norm": 0.21484375, "learning_rate": 1.735062825215235e-05, "loss": 1.027, "num_tokens": 14933852122.0, "step": 3066 }, { "epoch": 0.5467023172905526, "grad_norm": 0.228515625, "learning_rate": 1.7348787063953336e-05, "loss": 1.0315, "num_tokens": 14940136321.0, "step": 3067 }, { "epoch": 0.5468805704099822, "grad_norm": 0.2080078125, "learning_rate": 1.7346945346708547e-05, "loss": 1.0169, "num_tokens": 14946388736.0, "step": 3068 }, { "epoch": 0.5470588235294118, "grad_norm": 0.263671875, "learning_rate": 1.7345103100571467e-05, "loss": 1.0651, "num_tokens": 14952663882.0, "step": 3069 }, { "epoch": 0.5472370766488414, "grad_norm": 0.2294921875, "learning_rate": 1.73432603256956e-05, "loss": 1.0319, "num_tokens": 14958947791.0, "step": 3070 }, { "epoch": 0.5474153297682709, "grad_norm": 0.2333984375, "learning_rate": 1.7341417022234513e-05, "loss": 1.0451, "num_tokens": 14965232906.0, "step": 3071 }, { "epoch": 0.5475935828877005, "grad_norm": 0.228515625, "learning_rate": 1.73395731903418e-05, "loss": 1.0575, "num_tokens": 14971517388.0, "step": 3072 }, { "epoch": 0.5477718360071301, "grad_norm": 0.228515625, "learning_rate": 1.7337728830171118e-05, "loss": 1.0401, "num_tokens": 14977777803.0, "step": 3073 }, { "epoch": 0.5479500891265597, "grad_norm": 0.23828125, "learning_rate": 1.733588394187615e-05, "loss": 1.0196, "num_tokens": 14984060948.0, "step": 3074 }, { "epoch": 0.5481283422459893, "grad_norm": 0.2353515625, "learning_rate": 1.733403852561063e-05, "loss": 1.0089, "num_tokens": 14990346317.0, "step": 3075 }, { "epoch": 0.5483065953654189, "grad_norm": 0.232421875, "learning_rate": 1.7332192581528344e-05, "loss": 1.0692, "num_tokens": 14996631189.0, "step": 3076 }, { "epoch": 0.5484848484848485, "grad_norm": 0.220703125, "learning_rate": 1.7330346109783105e-05, "loss": 1.0213, "num_tokens": 15002911033.0, "step": 3077 }, { "epoch": 0.5486631016042781, "grad_norm": 0.2119140625, "learning_rate": 1.7328499110528793e-05, "loss": 1.0726, "num_tokens": 15009162081.0, "step": 3078 }, { "epoch": 0.5488413547237077, "grad_norm": 0.2373046875, "learning_rate": 1.7326651583919303e-05, "loss": 1.0002, "num_tokens": 15015427224.0, "step": 3079 }, { "epoch": 0.5490196078431373, "grad_norm": 0.2255859375, "learning_rate": 1.7324803530108606e-05, "loss": 1.0157, "num_tokens": 15021697230.0, "step": 3080 }, { "epoch": 0.5491978609625668, "grad_norm": 0.2470703125, "learning_rate": 1.7322954949250695e-05, "loss": 1.0081, "num_tokens": 15027954995.0, "step": 3081 }, { "epoch": 0.5493761140819964, "grad_norm": 0.2392578125, "learning_rate": 1.7321105841499606e-05, "loss": 1.0174, "num_tokens": 15034211294.0, "step": 3082 }, { "epoch": 0.549554367201426, "grad_norm": 0.2353515625, "learning_rate": 1.7319256207009433e-05, "loss": 1.0297, "num_tokens": 15040495702.0, "step": 3083 }, { "epoch": 0.5497326203208556, "grad_norm": 0.2373046875, "learning_rate": 1.7317406045934303e-05, "loss": 1.015, "num_tokens": 15046753474.0, "step": 3084 }, { "epoch": 0.5499108734402852, "grad_norm": 0.232421875, "learning_rate": 1.731555535842839e-05, "loss": 1.0326, "num_tokens": 15053014360.0, "step": 3085 }, { "epoch": 0.5500891265597148, "grad_norm": 0.22265625, "learning_rate": 1.731370414464591e-05, "loss": 1.0162, "num_tokens": 15059267546.0, "step": 3086 }, { "epoch": 0.5502673796791444, "grad_norm": 0.2578125, "learning_rate": 1.7311852404741133e-05, "loss": 1.0166, "num_tokens": 15065540326.0, "step": 3087 }, { "epoch": 0.550445632798574, "grad_norm": 0.2265625, "learning_rate": 1.731000013886836e-05, "loss": 1.0326, "num_tokens": 15071823961.0, "step": 3088 }, { "epoch": 0.5506238859180036, "grad_norm": 0.25390625, "learning_rate": 1.7308147347181942e-05, "loss": 1.0392, "num_tokens": 15078081259.0, "step": 3089 }, { "epoch": 0.5508021390374331, "grad_norm": 0.26171875, "learning_rate": 1.730629402983627e-05, "loss": 1.0377, "num_tokens": 15084364761.0, "step": 3090 }, { "epoch": 0.5509803921568628, "grad_norm": 0.248046875, "learning_rate": 1.7304440186985782e-05, "loss": 1.0149, "num_tokens": 15090647236.0, "step": 3091 }, { "epoch": 0.5511586452762923, "grad_norm": 0.255859375, "learning_rate": 1.7302585818784963e-05, "loss": 1.0135, "num_tokens": 15096913416.0, "step": 3092 }, { "epoch": 0.551336898395722, "grad_norm": 0.2578125, "learning_rate": 1.730073092538833e-05, "loss": 1.0224, "num_tokens": 15103198236.0, "step": 3093 }, { "epoch": 0.5515151515151515, "grad_norm": 0.251953125, "learning_rate": 1.729887550695046e-05, "loss": 1.0042, "num_tokens": 15109432970.0, "step": 3094 }, { "epoch": 0.551693404634581, "grad_norm": 0.2451171875, "learning_rate": 1.729701956362596e-05, "loss": 1.0132, "num_tokens": 15115716052.0, "step": 3095 }, { "epoch": 0.5518716577540107, "grad_norm": 0.2734375, "learning_rate": 1.7295163095569493e-05, "loss": 1.0342, "num_tokens": 15121999171.0, "step": 3096 }, { "epoch": 0.5520499108734402, "grad_norm": 0.23828125, "learning_rate": 1.729330610293575e-05, "loss": 1.0379, "num_tokens": 15128239887.0, "step": 3097 }, { "epoch": 0.5522281639928699, "grad_norm": 0.263671875, "learning_rate": 1.7291448585879476e-05, "loss": 1.0419, "num_tokens": 15134524386.0, "step": 3098 }, { "epoch": 0.5524064171122994, "grad_norm": 0.232421875, "learning_rate": 1.7289590544555463e-05, "loss": 1.0058, "num_tokens": 15140807157.0, "step": 3099 }, { "epoch": 0.5525846702317291, "grad_norm": 0.23828125, "learning_rate": 1.7287731979118542e-05, "loss": 1.0264, "num_tokens": 15147055783.0, "step": 3100 }, { "epoch": 0.5527629233511586, "grad_norm": 0.2265625, "learning_rate": 1.7285872889723584e-05, "loss": 1.0437, "num_tokens": 15153283295.0, "step": 3101 }, { "epoch": 0.5529411764705883, "grad_norm": 0.240234375, "learning_rate": 1.7284013276525505e-05, "loss": 1.0301, "num_tokens": 15159550169.0, "step": 3102 }, { "epoch": 0.5531194295900178, "grad_norm": 0.2265625, "learning_rate": 1.7282153139679274e-05, "loss": 1.0401, "num_tokens": 15165814466.0, "step": 3103 }, { "epoch": 0.5532976827094475, "grad_norm": 0.224609375, "learning_rate": 1.7280292479339895e-05, "loss": 1.0442, "num_tokens": 15172097387.0, "step": 3104 }, { "epoch": 0.553475935828877, "grad_norm": 0.2333984375, "learning_rate": 1.7278431295662412e-05, "loss": 1.0173, "num_tokens": 15178380112.0, "step": 3105 }, { "epoch": 0.5536541889483066, "grad_norm": 0.2294921875, "learning_rate": 1.7276569588801924e-05, "loss": 1.0121, "num_tokens": 15184636180.0, "step": 3106 }, { "epoch": 0.5538324420677362, "grad_norm": 0.26171875, "learning_rate": 1.7274707358913564e-05, "loss": 1.0738, "num_tokens": 15190897909.0, "step": 3107 }, { "epoch": 0.5540106951871657, "grad_norm": 0.224609375, "learning_rate": 1.7272844606152514e-05, "loss": 1.0233, "num_tokens": 15197109101.0, "step": 3108 }, { "epoch": 0.5541889483065954, "grad_norm": 0.224609375, "learning_rate": 1.727098133067399e-05, "loss": 1.0123, "num_tokens": 15203366047.0, "step": 3109 }, { "epoch": 0.5543672014260249, "grad_norm": 0.2392578125, "learning_rate": 1.7269117532633274e-05, "loss": 1.0368, "num_tokens": 15209633754.0, "step": 3110 }, { "epoch": 0.5545454545454546, "grad_norm": 0.2333984375, "learning_rate": 1.7267253212185668e-05, "loss": 1.0148, "num_tokens": 15215898623.0, "step": 3111 }, { "epoch": 0.5547237076648841, "grad_norm": 0.2373046875, "learning_rate": 1.726538836948652e-05, "loss": 1.0, "num_tokens": 15222184828.0, "step": 3112 }, { "epoch": 0.5549019607843138, "grad_norm": 0.2265625, "learning_rate": 1.7263523004691236e-05, "loss": 1.0344, "num_tokens": 15228445394.0, "step": 3113 }, { "epoch": 0.5550802139037433, "grad_norm": 0.251953125, "learning_rate": 1.7261657117955257e-05, "loss": 1.0065, "num_tokens": 15234728892.0, "step": 3114 }, { "epoch": 0.5552584670231729, "grad_norm": 0.234375, "learning_rate": 1.7259790709434068e-05, "loss": 1.0169, "num_tokens": 15241000335.0, "step": 3115 }, { "epoch": 0.5554367201426025, "grad_norm": 0.236328125, "learning_rate": 1.725792377928319e-05, "loss": 1.0089, "num_tokens": 15247248689.0, "step": 3116 }, { "epoch": 0.5556149732620321, "grad_norm": 0.2099609375, "learning_rate": 1.7256056327658204e-05, "loss": 1.0168, "num_tokens": 15253503736.0, "step": 3117 }, { "epoch": 0.5557932263814617, "grad_norm": 0.2138671875, "learning_rate": 1.7254188354714717e-05, "loss": 0.992, "num_tokens": 15259775352.0, "step": 3118 }, { "epoch": 0.5559714795008913, "grad_norm": 0.25390625, "learning_rate": 1.7252319860608398e-05, "loss": 1.0403, "num_tokens": 15266031280.0, "step": 3119 }, { "epoch": 0.5561497326203209, "grad_norm": 0.236328125, "learning_rate": 1.725045084549494e-05, "loss": 1.0347, "num_tokens": 15272293074.0, "step": 3120 }, { "epoch": 0.5563279857397504, "grad_norm": 0.2236328125, "learning_rate": 1.724858130953009e-05, "loss": 1.0206, "num_tokens": 15278531634.0, "step": 3121 }, { "epoch": 0.55650623885918, "grad_norm": 0.2216796875, "learning_rate": 1.7246711252869637e-05, "loss": 1.021, "num_tokens": 15284801729.0, "step": 3122 }, { "epoch": 0.5566844919786096, "grad_norm": 0.2353515625, "learning_rate": 1.7244840675669418e-05, "loss": 1.0141, "num_tokens": 15291081496.0, "step": 3123 }, { "epoch": 0.5568627450980392, "grad_norm": 0.236328125, "learning_rate": 1.72429695780853e-05, "loss": 1.0449, "num_tokens": 15297366126.0, "step": 3124 }, { "epoch": 0.5570409982174688, "grad_norm": 0.21484375, "learning_rate": 1.7241097960273214e-05, "loss": 1.0351, "num_tokens": 15303649962.0, "step": 3125 }, { "epoch": 0.5572192513368984, "grad_norm": 0.2265625, "learning_rate": 1.7239225822389107e-05, "loss": 1.0549, "num_tokens": 15309924517.0, "step": 3126 }, { "epoch": 0.557397504456328, "grad_norm": 0.26171875, "learning_rate": 1.7237353164588998e-05, "loss": 1.0233, "num_tokens": 15316164455.0, "step": 3127 }, { "epoch": 0.5575757575757576, "grad_norm": 0.2265625, "learning_rate": 1.7235479987028927e-05, "loss": 1.0152, "num_tokens": 15322447241.0, "step": 3128 }, { "epoch": 0.5577540106951872, "grad_norm": 0.2431640625, "learning_rate": 1.723360628986499e-05, "loss": 1.0133, "num_tokens": 15328712583.0, "step": 3129 }, { "epoch": 0.5579322638146168, "grad_norm": 0.240234375, "learning_rate": 1.7231732073253328e-05, "loss": 0.9849, "num_tokens": 15334980912.0, "step": 3130 }, { "epoch": 0.5581105169340463, "grad_norm": 0.2216796875, "learning_rate": 1.7229857337350114e-05, "loss": 1.0549, "num_tokens": 15341240968.0, "step": 3131 }, { "epoch": 0.558288770053476, "grad_norm": 0.25390625, "learning_rate": 1.7227982082311564e-05, "loss": 1.016, "num_tokens": 15347516324.0, "step": 3132 }, { "epoch": 0.5584670231729055, "grad_norm": 0.2177734375, "learning_rate": 1.7226106308293952e-05, "loss": 1.0285, "num_tokens": 15353799687.0, "step": 3133 }, { "epoch": 0.5586452762923351, "grad_norm": 0.2216796875, "learning_rate": 1.722423001545359e-05, "loss": 1.0122, "num_tokens": 15360070136.0, "step": 3134 }, { "epoch": 0.5588235294117647, "grad_norm": 0.2353515625, "learning_rate": 1.722235320394682e-05, "loss": 1.0329, "num_tokens": 15366334175.0, "step": 3135 }, { "epoch": 0.5590017825311943, "grad_norm": 0.2412109375, "learning_rate": 1.7220475873930042e-05, "loss": 1.0308, "num_tokens": 15372619634.0, "step": 3136 }, { "epoch": 0.5591800356506239, "grad_norm": 0.2294921875, "learning_rate": 1.7218598025559693e-05, "loss": 1.0486, "num_tokens": 15378902546.0, "step": 3137 }, { "epoch": 0.5593582887700534, "grad_norm": 0.255859375, "learning_rate": 1.721671965899226e-05, "loss": 1.0264, "num_tokens": 15385155773.0, "step": 3138 }, { "epoch": 0.5595365418894831, "grad_norm": 0.232421875, "learning_rate": 1.721484077438426e-05, "loss": 1.0136, "num_tokens": 15391422326.0, "step": 3139 }, { "epoch": 0.5597147950089126, "grad_norm": 0.26171875, "learning_rate": 1.721296137189226e-05, "loss": 1.0188, "num_tokens": 15397659795.0, "step": 3140 }, { "epoch": 0.5598930481283423, "grad_norm": 0.2236328125, "learning_rate": 1.721108145167288e-05, "loss": 1.0167, "num_tokens": 15403932638.0, "step": 3141 }, { "epoch": 0.5600713012477718, "grad_norm": 0.240234375, "learning_rate": 1.7209201013882766e-05, "loss": 1.0092, "num_tokens": 15410217113.0, "step": 3142 }, { "epoch": 0.5602495543672015, "grad_norm": 0.228515625, "learning_rate": 1.720732005867862e-05, "loss": 1.0147, "num_tokens": 15416500632.0, "step": 3143 }, { "epoch": 0.560427807486631, "grad_norm": 0.234375, "learning_rate": 1.720543858621718e-05, "loss": 1.0486, "num_tokens": 15422753694.0, "step": 3144 }, { "epoch": 0.5606060606060606, "grad_norm": 0.23046875, "learning_rate": 1.720355659665523e-05, "loss": 1.0099, "num_tokens": 15429038594.0, "step": 3145 }, { "epoch": 0.5607843137254902, "grad_norm": 0.2177734375, "learning_rate": 1.7201674090149602e-05, "loss": 1.002, "num_tokens": 15435297436.0, "step": 3146 }, { "epoch": 0.5609625668449197, "grad_norm": 0.224609375, "learning_rate": 1.7199791066857154e-05, "loss": 1.0093, "num_tokens": 15441579193.0, "step": 3147 }, { "epoch": 0.5611408199643494, "grad_norm": 0.2080078125, "learning_rate": 1.7197907526934808e-05, "loss": 1.0152, "num_tokens": 15447863224.0, "step": 3148 }, { "epoch": 0.5613190730837789, "grad_norm": 0.23046875, "learning_rate": 1.7196023470539517e-05, "loss": 1.0165, "num_tokens": 15454125829.0, "step": 3149 }, { "epoch": 0.5614973262032086, "grad_norm": 0.2236328125, "learning_rate": 1.719413889782828e-05, "loss": 1.0157, "num_tokens": 15460408467.0, "step": 3150 }, { "epoch": 0.5616755793226381, "grad_norm": 0.2236328125, "learning_rate": 1.7192253808958135e-05, "loss": 1.0243, "num_tokens": 15466678271.0, "step": 3151 }, { "epoch": 0.5618538324420678, "grad_norm": 0.2236328125, "learning_rate": 1.7190368204086175e-05, "loss": 1.0163, "num_tokens": 15472899613.0, "step": 3152 }, { "epoch": 0.5620320855614973, "grad_norm": 0.26171875, "learning_rate": 1.718848208336952e-05, "loss": 1.0331, "num_tokens": 15479183393.0, "step": 3153 }, { "epoch": 0.562210338680927, "grad_norm": 0.251953125, "learning_rate": 1.718659544696535e-05, "loss": 1.0095, "num_tokens": 15485445111.0, "step": 3154 }, { "epoch": 0.5623885918003565, "grad_norm": 0.232421875, "learning_rate": 1.7184708295030866e-05, "loss": 1.0143, "num_tokens": 15491730341.0, "step": 3155 }, { "epoch": 0.5625668449197861, "grad_norm": 0.2421875, "learning_rate": 1.7182820627723336e-05, "loss": 0.9953, "num_tokens": 15498014545.0, "step": 3156 }, { "epoch": 0.5627450980392157, "grad_norm": 0.2216796875, "learning_rate": 1.7180932445200054e-05, "loss": 1.0357, "num_tokens": 15504297527.0, "step": 3157 }, { "epoch": 0.5629233511586452, "grad_norm": 0.26171875, "learning_rate": 1.7179043747618364e-05, "loss": 1.0233, "num_tokens": 15510574752.0, "step": 3158 }, { "epoch": 0.5631016042780749, "grad_norm": 0.216796875, "learning_rate": 1.7177154535135653e-05, "loss": 1.029, "num_tokens": 15516858643.0, "step": 3159 }, { "epoch": 0.5632798573975044, "grad_norm": 0.294921875, "learning_rate": 1.717526480790935e-05, "loss": 0.9991, "num_tokens": 15523133688.0, "step": 3160 }, { "epoch": 0.5634581105169341, "grad_norm": 0.251953125, "learning_rate": 1.717337456609692e-05, "loss": 1.0143, "num_tokens": 15529372438.0, "step": 3161 }, { "epoch": 0.5636363636363636, "grad_norm": 0.2470703125, "learning_rate": 1.7171483809855882e-05, "loss": 1.0229, "num_tokens": 15535641457.0, "step": 3162 }, { "epoch": 0.5638146167557933, "grad_norm": 0.26953125, "learning_rate": 1.7169592539343798e-05, "loss": 0.9986, "num_tokens": 15541912292.0, "step": 3163 }, { "epoch": 0.5639928698752228, "grad_norm": 0.208984375, "learning_rate": 1.716770075471826e-05, "loss": 1.0307, "num_tokens": 15548195028.0, "step": 3164 }, { "epoch": 0.5641711229946524, "grad_norm": 0.2578125, "learning_rate": 1.7165808456136915e-05, "loss": 1.0415, "num_tokens": 15554478215.0, "step": 3165 }, { "epoch": 0.564349376114082, "grad_norm": 0.2197265625, "learning_rate": 1.7163915643757445e-05, "loss": 1.0019, "num_tokens": 15560718508.0, "step": 3166 }, { "epoch": 0.5645276292335116, "grad_norm": 0.259765625, "learning_rate": 1.7162022317737582e-05, "loss": 1.0302, "num_tokens": 15567002836.0, "step": 3167 }, { "epoch": 0.5647058823529412, "grad_norm": 0.2333984375, "learning_rate": 1.7160128478235095e-05, "loss": 1.054, "num_tokens": 15573263267.0, "step": 3168 }, { "epoch": 0.5648841354723708, "grad_norm": 0.2578125, "learning_rate": 1.71582341254078e-05, "loss": 1.0717, "num_tokens": 15579546066.0, "step": 3169 }, { "epoch": 0.5650623885918004, "grad_norm": 0.248046875, "learning_rate": 1.715633925941355e-05, "loss": 1.0536, "num_tokens": 15585830078.0, "step": 3170 }, { "epoch": 0.5652406417112299, "grad_norm": 0.244140625, "learning_rate": 1.7154443880410254e-05, "loss": 1.0386, "num_tokens": 15592107010.0, "step": 3171 }, { "epoch": 0.5654188948306595, "grad_norm": 0.251953125, "learning_rate": 1.7152547988555843e-05, "loss": 1.0273, "num_tokens": 15598391989.0, "step": 3172 }, { "epoch": 0.5655971479500891, "grad_norm": 0.2060546875, "learning_rate": 1.715065158400831e-05, "loss": 1.017, "num_tokens": 15604650289.0, "step": 3173 }, { "epoch": 0.5657754010695187, "grad_norm": 0.271484375, "learning_rate": 1.7148754666925677e-05, "loss": 1.0429, "num_tokens": 15610935263.0, "step": 3174 }, { "epoch": 0.5659536541889483, "grad_norm": 0.208984375, "learning_rate": 1.7146857237466018e-05, "loss": 1.039, "num_tokens": 15617221363.0, "step": 3175 }, { "epoch": 0.5661319073083779, "grad_norm": 0.234375, "learning_rate": 1.7144959295787446e-05, "loss": 1.0558, "num_tokens": 15623490292.0, "step": 3176 }, { "epoch": 0.5663101604278075, "grad_norm": 0.2353515625, "learning_rate": 1.7143060842048117e-05, "loss": 1.0184, "num_tokens": 15629747775.0, "step": 3177 }, { "epoch": 0.5664884135472371, "grad_norm": 0.2236328125, "learning_rate": 1.7141161876406228e-05, "loss": 1.0496, "num_tokens": 15636015092.0, "step": 3178 }, { "epoch": 0.5666666666666667, "grad_norm": 0.2392578125, "learning_rate": 1.713926239902002e-05, "loss": 0.9943, "num_tokens": 15642276778.0, "step": 3179 }, { "epoch": 0.5668449197860963, "grad_norm": 0.208984375, "learning_rate": 1.713736241004778e-05, "loss": 1.0232, "num_tokens": 15648549068.0, "step": 3180 }, { "epoch": 0.5670231729055258, "grad_norm": 0.2333984375, "learning_rate": 1.7135461909647834e-05, "loss": 1.0268, "num_tokens": 15654801839.0, "step": 3181 }, { "epoch": 0.5672014260249555, "grad_norm": 0.2060546875, "learning_rate": 1.7133560897978542e-05, "loss": 1.0406, "num_tokens": 15661063796.0, "step": 3182 }, { "epoch": 0.567379679144385, "grad_norm": 0.25390625, "learning_rate": 1.7131659375198328e-05, "loss": 1.0568, "num_tokens": 15667347628.0, "step": 3183 }, { "epoch": 0.5675579322638146, "grad_norm": 0.23828125, "learning_rate": 1.7129757341465645e-05, "loss": 1.0684, "num_tokens": 15673575912.0, "step": 3184 }, { "epoch": 0.5677361853832442, "grad_norm": 0.24609375, "learning_rate": 1.712785479693898e-05, "loss": 1.0325, "num_tokens": 15679848884.0, "step": 3185 }, { "epoch": 0.5679144385026738, "grad_norm": 0.23828125, "learning_rate": 1.7125951741776884e-05, "loss": 1.0152, "num_tokens": 15686131269.0, "step": 3186 }, { "epoch": 0.5680926916221034, "grad_norm": 0.255859375, "learning_rate": 1.7124048176137932e-05, "loss": 0.9872, "num_tokens": 15692390288.0, "step": 3187 }, { "epoch": 0.568270944741533, "grad_norm": 0.255859375, "learning_rate": 1.712214410018075e-05, "loss": 1.0227, "num_tokens": 15698644549.0, "step": 3188 }, { "epoch": 0.5684491978609626, "grad_norm": 0.23046875, "learning_rate": 1.7120239514064007e-05, "loss": 1.0262, "num_tokens": 15704896656.0, "step": 3189 }, { "epoch": 0.5686274509803921, "grad_norm": 0.232421875, "learning_rate": 1.7118334417946407e-05, "loss": 0.9835, "num_tokens": 15711172062.0, "step": 3190 }, { "epoch": 0.5688057040998218, "grad_norm": 0.2197265625, "learning_rate": 1.7116428811986706e-05, "loss": 1.0245, "num_tokens": 15717455540.0, "step": 3191 }, { "epoch": 0.5689839572192513, "grad_norm": 0.2236328125, "learning_rate": 1.7114522696343703e-05, "loss": 1.0318, "num_tokens": 15723740090.0, "step": 3192 }, { "epoch": 0.569162210338681, "grad_norm": 0.23046875, "learning_rate": 1.711261607117623e-05, "loss": 1.0244, "num_tokens": 15730019306.0, "step": 3193 }, { "epoch": 0.5693404634581105, "grad_norm": 0.2236328125, "learning_rate": 1.7110708936643162e-05, "loss": 1.0326, "num_tokens": 15736303249.0, "step": 3194 }, { "epoch": 0.56951871657754, "grad_norm": 0.2294921875, "learning_rate": 1.710880129290343e-05, "loss": 1.0146, "num_tokens": 15742587822.0, "step": 3195 }, { "epoch": 0.5696969696969697, "grad_norm": 0.2265625, "learning_rate": 1.7106893140115995e-05, "loss": 1.0253, "num_tokens": 15748848240.0, "step": 3196 }, { "epoch": 0.5698752228163992, "grad_norm": 0.2255859375, "learning_rate": 1.7104984478439865e-05, "loss": 1.054, "num_tokens": 15755125828.0, "step": 3197 }, { "epoch": 0.5700534759358289, "grad_norm": 0.2138671875, "learning_rate": 1.7103075308034082e-05, "loss": 1.0166, "num_tokens": 15761408418.0, "step": 3198 }, { "epoch": 0.5702317290552584, "grad_norm": 0.2177734375, "learning_rate": 1.7101165629057745e-05, "loss": 1.0623, "num_tokens": 15767689735.0, "step": 3199 }, { "epoch": 0.5704099821746881, "grad_norm": 0.216796875, "learning_rate": 1.7099255441669986e-05, "loss": 1.035, "num_tokens": 15773973123.0, "step": 3200 }, { "epoch": 0.5705882352941176, "grad_norm": 0.2216796875, "learning_rate": 1.709734474602998e-05, "loss": 1.0322, "num_tokens": 15780241084.0, "step": 3201 }, { "epoch": 0.5707664884135473, "grad_norm": 0.2216796875, "learning_rate": 1.709543354229695e-05, "loss": 1.0408, "num_tokens": 15786525375.0, "step": 3202 }, { "epoch": 0.5709447415329768, "grad_norm": 0.21484375, "learning_rate": 1.7093521830630153e-05, "loss": 1.0239, "num_tokens": 15792806711.0, "step": 3203 }, { "epoch": 0.5711229946524065, "grad_norm": 0.2236328125, "learning_rate": 1.709160961118889e-05, "loss": 1.025, "num_tokens": 15799088184.0, "step": 3204 }, { "epoch": 0.571301247771836, "grad_norm": 0.21484375, "learning_rate": 1.7089696884132512e-05, "loss": 1.0462, "num_tokens": 15805369298.0, "step": 3205 }, { "epoch": 0.5714795008912656, "grad_norm": 0.216796875, "learning_rate": 1.7087783649620405e-05, "loss": 1.0234, "num_tokens": 15811652085.0, "step": 3206 }, { "epoch": 0.5716577540106952, "grad_norm": 0.228515625, "learning_rate": 1.7085869907811997e-05, "loss": 1.0387, "num_tokens": 15817936716.0, "step": 3207 }, { "epoch": 0.5718360071301247, "grad_norm": 0.20703125, "learning_rate": 1.7083955658866768e-05, "loss": 1.0415, "num_tokens": 15824221417.0, "step": 3208 }, { "epoch": 0.5720142602495544, "grad_norm": 0.220703125, "learning_rate": 1.7082040902944223e-05, "loss": 1.0239, "num_tokens": 15830503614.0, "step": 3209 }, { "epoch": 0.5721925133689839, "grad_norm": 0.212890625, "learning_rate": 1.708012564020392e-05, "loss": 1.031, "num_tokens": 15836788030.0, "step": 3210 }, { "epoch": 0.5723707664884136, "grad_norm": 0.220703125, "learning_rate": 1.7078209870805465e-05, "loss": 1.0381, "num_tokens": 15843056692.0, "step": 3211 }, { "epoch": 0.5725490196078431, "grad_norm": 0.234375, "learning_rate": 1.70762935949085e-05, "loss": 1.0363, "num_tokens": 15849340965.0, "step": 3212 }, { "epoch": 0.5727272727272728, "grad_norm": 0.2353515625, "learning_rate": 1.7074376812672698e-05, "loss": 1.0241, "num_tokens": 15855625744.0, "step": 3213 }, { "epoch": 0.5729055258467023, "grad_norm": 0.2294921875, "learning_rate": 1.7072459524257796e-05, "loss": 1.043, "num_tokens": 15861909768.0, "step": 3214 }, { "epoch": 0.5730837789661319, "grad_norm": 0.23828125, "learning_rate": 1.7070541729823554e-05, "loss": 1.0325, "num_tokens": 15868193815.0, "step": 3215 }, { "epoch": 0.5732620320855615, "grad_norm": 0.23046875, "learning_rate": 1.706862342952979e-05, "loss": 1.0483, "num_tokens": 15874477676.0, "step": 3216 }, { "epoch": 0.5734402852049911, "grad_norm": 0.2275390625, "learning_rate": 1.706670462353635e-05, "loss": 1.0294, "num_tokens": 15880755173.0, "step": 3217 }, { "epoch": 0.5736185383244207, "grad_norm": 0.2216796875, "learning_rate": 1.7064785312003128e-05, "loss": 1.0316, "num_tokens": 15887038538.0, "step": 3218 }, { "epoch": 0.5737967914438503, "grad_norm": 0.2265625, "learning_rate": 1.706286549509007e-05, "loss": 1.0144, "num_tokens": 15893323915.0, "step": 3219 }, { "epoch": 0.5739750445632799, "grad_norm": 0.25, "learning_rate": 1.7060945172957143e-05, "loss": 0.996, "num_tokens": 15899600439.0, "step": 3220 }, { "epoch": 0.5741532976827094, "grad_norm": 0.2353515625, "learning_rate": 1.7059024345764375e-05, "loss": 1.0295, "num_tokens": 15905883170.0, "step": 3221 }, { "epoch": 0.574331550802139, "grad_norm": 0.22265625, "learning_rate": 1.7057103013671828e-05, "loss": 1.0102, "num_tokens": 15912166905.0, "step": 3222 }, { "epoch": 0.5745098039215686, "grad_norm": 0.22265625, "learning_rate": 1.7055181176839602e-05, "loss": 0.9834, "num_tokens": 15918448914.0, "step": 3223 }, { "epoch": 0.5746880570409982, "grad_norm": 0.2333984375, "learning_rate": 1.705325883542785e-05, "loss": 1.0346, "num_tokens": 15924685664.0, "step": 3224 }, { "epoch": 0.5748663101604278, "grad_norm": 0.23046875, "learning_rate": 1.705133598959676e-05, "loss": 1.0063, "num_tokens": 15930966891.0, "step": 3225 }, { "epoch": 0.5750445632798574, "grad_norm": 0.2294921875, "learning_rate": 1.704941263950656e-05, "loss": 1.0214, "num_tokens": 15937219091.0, "step": 3226 }, { "epoch": 0.575222816399287, "grad_norm": 0.23046875, "learning_rate": 1.7047488785317527e-05, "loss": 1.0039, "num_tokens": 15943486251.0, "step": 3227 }, { "epoch": 0.5754010695187166, "grad_norm": 0.2353515625, "learning_rate": 1.7045564427189974e-05, "loss": 1.0592, "num_tokens": 15949771070.0, "step": 3228 }, { "epoch": 0.5755793226381462, "grad_norm": 0.23046875, "learning_rate": 1.7043639565284257e-05, "loss": 1.0257, "num_tokens": 15956054309.0, "step": 3229 }, { "epoch": 0.5757575757575758, "grad_norm": 0.2431640625, "learning_rate": 1.7041714199760782e-05, "loss": 1.0428, "num_tokens": 15962337377.0, "step": 3230 }, { "epoch": 0.5759358288770053, "grad_norm": 0.212890625, "learning_rate": 1.703978833077998e-05, "loss": 1.0146, "num_tokens": 15968597788.0, "step": 3231 }, { "epoch": 0.576114081996435, "grad_norm": 0.2353515625, "learning_rate": 1.703786195850234e-05, "loss": 1.0302, "num_tokens": 15974881316.0, "step": 3232 }, { "epoch": 0.5762923351158645, "grad_norm": 0.2099609375, "learning_rate": 1.703593508308839e-05, "loss": 1.0469, "num_tokens": 15981163938.0, "step": 3233 }, { "epoch": 0.5764705882352941, "grad_norm": 0.244140625, "learning_rate": 1.703400770469869e-05, "loss": 1.0397, "num_tokens": 15987389554.0, "step": 3234 }, { "epoch": 0.5766488413547237, "grad_norm": 0.2314453125, "learning_rate": 1.703207982349385e-05, "loss": 1.0172, "num_tokens": 15993674404.0, "step": 3235 }, { "epoch": 0.5768270944741533, "grad_norm": 0.240234375, "learning_rate": 1.7030151439634526e-05, "loss": 1.053, "num_tokens": 15999934793.0, "step": 3236 }, { "epoch": 0.5770053475935829, "grad_norm": 0.220703125, "learning_rate": 1.7028222553281404e-05, "loss": 1.0168, "num_tokens": 16006211096.0, "step": 3237 }, { "epoch": 0.5771836007130124, "grad_norm": 0.255859375, "learning_rate": 1.7026293164595223e-05, "loss": 1.0127, "num_tokens": 16012457725.0, "step": 3238 }, { "epoch": 0.5773618538324421, "grad_norm": 0.2373046875, "learning_rate": 1.702436327373676e-05, "loss": 1.0164, "num_tokens": 16018741100.0, "step": 3239 }, { "epoch": 0.5775401069518716, "grad_norm": 0.2265625, "learning_rate": 1.7022432880866827e-05, "loss": 1.0026, "num_tokens": 16024982182.0, "step": 3240 }, { "epoch": 0.5777183600713013, "grad_norm": 0.25, "learning_rate": 1.7020501986146295e-05, "loss": 1.0122, "num_tokens": 16031265744.0, "step": 3241 }, { "epoch": 0.5778966131907308, "grad_norm": 0.228515625, "learning_rate": 1.7018570589736055e-05, "loss": 1.0459, "num_tokens": 16037500620.0, "step": 3242 }, { "epoch": 0.5780748663101605, "grad_norm": 0.26953125, "learning_rate": 1.7016638691797056e-05, "loss": 1.0073, "num_tokens": 16043786014.0, "step": 3243 }, { "epoch": 0.57825311942959, "grad_norm": 0.23828125, "learning_rate": 1.7014706292490283e-05, "loss": 1.0318, "num_tokens": 16050071641.0, "step": 3244 }, { "epoch": 0.5784313725490197, "grad_norm": 0.232421875, "learning_rate": 1.701277339197676e-05, "loss": 1.0144, "num_tokens": 16056302708.0, "step": 3245 }, { "epoch": 0.5786096256684492, "grad_norm": 0.2578125, "learning_rate": 1.701083999041756e-05, "loss": 0.9931, "num_tokens": 16062553611.0, "step": 3246 }, { "epoch": 0.5787878787878787, "grad_norm": 0.23828125, "learning_rate": 1.700890608797379e-05, "loss": 1.0122, "num_tokens": 16068831571.0, "step": 3247 }, { "epoch": 0.5789661319073084, "grad_norm": 0.2392578125, "learning_rate": 1.7006971684806608e-05, "loss": 0.9995, "num_tokens": 16075092146.0, "step": 3248 }, { "epoch": 0.5791443850267379, "grad_norm": 0.2275390625, "learning_rate": 1.7005036781077208e-05, "loss": 1.0359, "num_tokens": 16081362020.0, "step": 3249 }, { "epoch": 0.5793226381461676, "grad_norm": 0.23828125, "learning_rate": 1.700310137694682e-05, "loss": 0.9942, "num_tokens": 16087595516.0, "step": 3250 }, { "epoch": 0.5795008912655971, "grad_norm": 0.2119140625, "learning_rate": 1.700116547257672e-05, "loss": 1.044, "num_tokens": 16093860420.0, "step": 3251 }, { "epoch": 0.5796791443850268, "grad_norm": 0.2578125, "learning_rate": 1.699922906812824e-05, "loss": 1.0162, "num_tokens": 16100121548.0, "step": 3252 }, { "epoch": 0.5798573975044563, "grad_norm": 0.2177734375, "learning_rate": 1.6997292163762726e-05, "loss": 1.0468, "num_tokens": 16106401268.0, "step": 3253 }, { "epoch": 0.580035650623886, "grad_norm": 0.2236328125, "learning_rate": 1.699535475964159e-05, "loss": 1.0208, "num_tokens": 16112685875.0, "step": 3254 }, { "epoch": 0.5802139037433155, "grad_norm": 0.2216796875, "learning_rate": 1.6993416855926274e-05, "loss": 1.0287, "num_tokens": 16118937999.0, "step": 3255 }, { "epoch": 0.5803921568627451, "grad_norm": 0.224609375, "learning_rate": 1.699147845277827e-05, "loss": 1.0104, "num_tokens": 16125222397.0, "step": 3256 }, { "epoch": 0.5805704099821747, "grad_norm": 0.2275390625, "learning_rate": 1.6989539550359092e-05, "loss": 1.0021, "num_tokens": 16131480195.0, "step": 3257 }, { "epoch": 0.5807486631016042, "grad_norm": 0.2099609375, "learning_rate": 1.698760014883032e-05, "loss": 1.0145, "num_tokens": 16137741263.0, "step": 3258 }, { "epoch": 0.5809269162210339, "grad_norm": 0.2265625, "learning_rate": 1.698566024835356e-05, "loss": 1.0444, "num_tokens": 16144024587.0, "step": 3259 }, { "epoch": 0.5811051693404634, "grad_norm": 0.20703125, "learning_rate": 1.6983719849090465e-05, "loss": 1.0191, "num_tokens": 16150302526.0, "step": 3260 }, { "epoch": 0.5812834224598931, "grad_norm": 0.228515625, "learning_rate": 1.6981778951202733e-05, "loss": 1.0677, "num_tokens": 16156558898.0, "step": 3261 }, { "epoch": 0.5814616755793226, "grad_norm": 0.2265625, "learning_rate": 1.697983755485209e-05, "loss": 1.0148, "num_tokens": 16162814247.0, "step": 3262 }, { "epoch": 0.5816399286987523, "grad_norm": 0.228515625, "learning_rate": 1.6977895660200324e-05, "loss": 1.0546, "num_tokens": 16169098359.0, "step": 3263 }, { "epoch": 0.5818181818181818, "grad_norm": 0.23828125, "learning_rate": 1.6975953267409246e-05, "loss": 1.0166, "num_tokens": 16175367303.0, "step": 3264 }, { "epoch": 0.5819964349376114, "grad_norm": 0.2275390625, "learning_rate": 1.697401037664072e-05, "loss": 1.0247, "num_tokens": 16181649340.0, "step": 3265 }, { "epoch": 0.582174688057041, "grad_norm": 0.2236328125, "learning_rate": 1.6972066988056642e-05, "loss": 1.0147, "num_tokens": 16187933014.0, "step": 3266 }, { "epoch": 0.5823529411764706, "grad_norm": 0.2216796875, "learning_rate": 1.6970123101818962e-05, "loss": 1.002, "num_tokens": 16194148057.0, "step": 3267 }, { "epoch": 0.5825311942959002, "grad_norm": 0.2158203125, "learning_rate": 1.696817871808966e-05, "loss": 0.9966, "num_tokens": 16200327460.0, "step": 3268 }, { "epoch": 0.5827094474153298, "grad_norm": 0.2255859375, "learning_rate": 1.696623383703077e-05, "loss": 1.0247, "num_tokens": 16206584813.0, "step": 3269 }, { "epoch": 0.5828877005347594, "grad_norm": 0.2197265625, "learning_rate": 1.6964288458804344e-05, "loss": 1.032, "num_tokens": 16212839433.0, "step": 3270 }, { "epoch": 0.5830659536541889, "grad_norm": 0.212890625, "learning_rate": 1.69623425835725e-05, "loss": 1.0079, "num_tokens": 16219092109.0, "step": 3271 }, { "epoch": 0.5832442067736185, "grad_norm": 0.2314453125, "learning_rate": 1.6960396211497388e-05, "loss": 0.9893, "num_tokens": 16225375203.0, "step": 3272 }, { "epoch": 0.5834224598930481, "grad_norm": 0.2373046875, "learning_rate": 1.6958449342741204e-05, "loss": 1.0303, "num_tokens": 16231656922.0, "step": 3273 }, { "epoch": 0.5836007130124777, "grad_norm": 0.228515625, "learning_rate": 1.6956501977466174e-05, "loss": 1.0262, "num_tokens": 16237933126.0, "step": 3274 }, { "epoch": 0.5837789661319073, "grad_norm": 0.240234375, "learning_rate": 1.6954554115834574e-05, "loss": 1.0129, "num_tokens": 16244192865.0, "step": 3275 }, { "epoch": 0.5839572192513369, "grad_norm": 0.20703125, "learning_rate": 1.695260575800872e-05, "loss": 1.0365, "num_tokens": 16250475362.0, "step": 3276 }, { "epoch": 0.5841354723707665, "grad_norm": 0.2138671875, "learning_rate": 1.6950656904150967e-05, "loss": 1.02, "num_tokens": 16256746343.0, "step": 3277 }, { "epoch": 0.5843137254901961, "grad_norm": 0.22265625, "learning_rate": 1.6948707554423716e-05, "loss": 1.0137, "num_tokens": 16263003992.0, "step": 3278 }, { "epoch": 0.5844919786096257, "grad_norm": 0.224609375, "learning_rate": 1.694675770898941e-05, "loss": 1.0184, "num_tokens": 16269259220.0, "step": 3279 }, { "epoch": 0.5846702317290553, "grad_norm": 0.228515625, "learning_rate": 1.6944807368010527e-05, "loss": 0.9966, "num_tokens": 16275541777.0, "step": 3280 }, { "epoch": 0.5848484848484848, "grad_norm": 0.21484375, "learning_rate": 1.6942856531649587e-05, "loss": 1.0022, "num_tokens": 16281807157.0, "step": 3281 }, { "epoch": 0.5850267379679145, "grad_norm": 0.22265625, "learning_rate": 1.6940905200069154e-05, "loss": 1.0288, "num_tokens": 16288091778.0, "step": 3282 }, { "epoch": 0.585204991087344, "grad_norm": 0.2099609375, "learning_rate": 1.6938953373431835e-05, "loss": 1.0116, "num_tokens": 16294376891.0, "step": 3283 }, { "epoch": 0.5853832442067736, "grad_norm": 0.240234375, "learning_rate": 1.6937001051900277e-05, "loss": 1.0475, "num_tokens": 16300619616.0, "step": 3284 }, { "epoch": 0.5855614973262032, "grad_norm": 0.2236328125, "learning_rate": 1.693504823563716e-05, "loss": 1.0384, "num_tokens": 16306866832.0, "step": 3285 }, { "epoch": 0.5857397504456328, "grad_norm": 0.25390625, "learning_rate": 1.6933094924805225e-05, "loss": 1.0021, "num_tokens": 16313150720.0, "step": 3286 }, { "epoch": 0.5859180035650624, "grad_norm": 0.236328125, "learning_rate": 1.6931141119567235e-05, "loss": 1.0248, "num_tokens": 16319410008.0, "step": 3287 }, { "epoch": 0.586096256684492, "grad_norm": 0.2451171875, "learning_rate": 1.6929186820086e-05, "loss": 1.024, "num_tokens": 16325691667.0, "step": 3288 }, { "epoch": 0.5862745098039216, "grad_norm": 0.251953125, "learning_rate": 1.6927232026524373e-05, "loss": 1.0394, "num_tokens": 16331961582.0, "step": 3289 }, { "epoch": 0.5864527629233511, "grad_norm": 0.244140625, "learning_rate": 1.6925276739045245e-05, "loss": 1.0101, "num_tokens": 16338246285.0, "step": 3290 }, { "epoch": 0.5866310160427808, "grad_norm": 0.2412109375, "learning_rate": 1.6923320957811554e-05, "loss": 1.0436, "num_tokens": 16344530261.0, "step": 3291 }, { "epoch": 0.5868092691622103, "grad_norm": 0.279296875, "learning_rate": 1.692136468298628e-05, "loss": 1.0415, "num_tokens": 16350811699.0, "step": 3292 }, { "epoch": 0.58698752228164, "grad_norm": 0.2275390625, "learning_rate": 1.691940791473243e-05, "loss": 1.0166, "num_tokens": 16357079220.0, "step": 3293 }, { "epoch": 0.5871657754010695, "grad_norm": 0.2890625, "learning_rate": 1.6917450653213064e-05, "loss": 1.0448, "num_tokens": 16363362585.0, "step": 3294 }, { "epoch": 0.5873440285204992, "grad_norm": 0.28125, "learning_rate": 1.691549289859129e-05, "loss": 1.0171, "num_tokens": 16369644852.0, "step": 3295 }, { "epoch": 0.5875222816399287, "grad_norm": 0.251953125, "learning_rate": 1.6913534651030238e-05, "loss": 1.0285, "num_tokens": 16375908330.0, "step": 3296 }, { "epoch": 0.5877005347593582, "grad_norm": 0.279296875, "learning_rate": 1.691157591069309e-05, "loss": 0.9916, "num_tokens": 16382153533.0, "step": 3297 }, { "epoch": 0.5878787878787879, "grad_norm": 0.2392578125, "learning_rate": 1.6909616677743078e-05, "loss": 1.0387, "num_tokens": 16388374094.0, "step": 3298 }, { "epoch": 0.5880570409982174, "grad_norm": 0.26171875, "learning_rate": 1.6907656952343456e-05, "loss": 1.046, "num_tokens": 16394651134.0, "step": 3299 }, { "epoch": 0.5882352941176471, "grad_norm": 0.2412109375, "learning_rate": 1.690569673465753e-05, "loss": 1.0358, "num_tokens": 16400908471.0, "step": 3300 }, { "epoch": 0.5884135472370766, "grad_norm": 0.2451171875, "learning_rate": 1.6903736024848647e-05, "loss": 1.059, "num_tokens": 16407190472.0, "step": 3301 }, { "epoch": 0.5885918003565063, "grad_norm": 0.240234375, "learning_rate": 1.690177482308019e-05, "loss": 0.9989, "num_tokens": 16413441672.0, "step": 3302 }, { "epoch": 0.5887700534759358, "grad_norm": 0.205078125, "learning_rate": 1.6899813129515592e-05, "loss": 1.0276, "num_tokens": 16419725670.0, "step": 3303 }, { "epoch": 0.5889483065953655, "grad_norm": 0.2490234375, "learning_rate": 1.6897850944318316e-05, "loss": 1.0301, "num_tokens": 16426010427.0, "step": 3304 }, { "epoch": 0.589126559714795, "grad_norm": 0.2294921875, "learning_rate": 1.6895888267651873e-05, "loss": 0.999, "num_tokens": 16432248652.0, "step": 3305 }, { "epoch": 0.5893048128342246, "grad_norm": 0.2578125, "learning_rate": 1.689392509967982e-05, "loss": 1.0371, "num_tokens": 16438515423.0, "step": 3306 }, { "epoch": 0.5894830659536542, "grad_norm": 0.2490234375, "learning_rate": 1.6891961440565733e-05, "loss": 1.0365, "num_tokens": 16444772968.0, "step": 3307 }, { "epoch": 0.5896613190730838, "grad_norm": 0.248046875, "learning_rate": 1.688999729047326e-05, "loss": 1.034, "num_tokens": 16451055396.0, "step": 3308 }, { "epoch": 0.5898395721925134, "grad_norm": 0.2373046875, "learning_rate": 1.6888032649566063e-05, "loss": 1.0652, "num_tokens": 16457339743.0, "step": 3309 }, { "epoch": 0.5900178253119429, "grad_norm": 0.251953125, "learning_rate": 1.688606751800786e-05, "loss": 1.019, "num_tokens": 16463595844.0, "step": 3310 }, { "epoch": 0.5901960784313726, "grad_norm": 0.2353515625, "learning_rate": 1.6884101895962408e-05, "loss": 0.9921, "num_tokens": 16469879686.0, "step": 3311 }, { "epoch": 0.5903743315508021, "grad_norm": 0.2265625, "learning_rate": 1.6882135783593498e-05, "loss": 1.0238, "num_tokens": 16476163090.0, "step": 3312 }, { "epoch": 0.5905525846702318, "grad_norm": 0.220703125, "learning_rate": 1.6880169181064975e-05, "loss": 1.0265, "num_tokens": 16482445615.0, "step": 3313 }, { "epoch": 0.5907308377896613, "grad_norm": 0.2431640625, "learning_rate": 1.6878202088540706e-05, "loss": 1.0227, "num_tokens": 16488729397.0, "step": 3314 }, { "epoch": 0.5909090909090909, "grad_norm": 0.2119140625, "learning_rate": 1.6876234506184615e-05, "loss": 1.0268, "num_tokens": 16495013158.0, "step": 3315 }, { "epoch": 0.5910873440285205, "grad_norm": 0.2275390625, "learning_rate": 1.687426643416066e-05, "loss": 1.041, "num_tokens": 16501271708.0, "step": 3316 }, { "epoch": 0.5912655971479501, "grad_norm": 0.2431640625, "learning_rate": 1.687229787263284e-05, "loss": 1.0336, "num_tokens": 16507554794.0, "step": 3317 }, { "epoch": 0.5914438502673797, "grad_norm": 0.2138671875, "learning_rate": 1.6870328821765195e-05, "loss": 0.9929, "num_tokens": 16513839171.0, "step": 3318 }, { "epoch": 0.5916221033868093, "grad_norm": 0.251953125, "learning_rate": 1.686835928172181e-05, "loss": 1.025, "num_tokens": 16520098948.0, "step": 3319 }, { "epoch": 0.5918003565062389, "grad_norm": 0.212890625, "learning_rate": 1.6866389252666803e-05, "loss": 1.0171, "num_tokens": 16526384688.0, "step": 3320 }, { "epoch": 0.5919786096256684, "grad_norm": 0.216796875, "learning_rate": 1.6864418734764337e-05, "loss": 1.0123, "num_tokens": 16532667971.0, "step": 3321 }, { "epoch": 0.592156862745098, "grad_norm": 0.251953125, "learning_rate": 1.6862447728178627e-05, "loss": 0.999, "num_tokens": 16538921562.0, "step": 3322 }, { "epoch": 0.5923351158645276, "grad_norm": 0.23046875, "learning_rate": 1.68604762330739e-05, "loss": 1.0524, "num_tokens": 16545195487.0, "step": 3323 }, { "epoch": 0.5925133689839572, "grad_norm": 0.263671875, "learning_rate": 1.6858504249614452e-05, "loss": 1.0222, "num_tokens": 16551480705.0, "step": 3324 }, { "epoch": 0.5926916221033868, "grad_norm": 0.24609375, "learning_rate": 1.6856531777964602e-05, "loss": 1.0441, "num_tokens": 16557739930.0, "step": 3325 }, { "epoch": 0.5928698752228164, "grad_norm": 0.25390625, "learning_rate": 1.6854558818288726e-05, "loss": 1.0638, "num_tokens": 16563997506.0, "step": 3326 }, { "epoch": 0.593048128342246, "grad_norm": 0.2490234375, "learning_rate": 1.685258537075122e-05, "loss": 1.0122, "num_tokens": 16570233470.0, "step": 3327 }, { "epoch": 0.5932263814616756, "grad_norm": 0.23828125, "learning_rate": 1.685061143551654e-05, "loss": 1.0301, "num_tokens": 16576511003.0, "step": 3328 }, { "epoch": 0.5934046345811052, "grad_norm": 0.24609375, "learning_rate": 1.6848637012749175e-05, "loss": 1.0202, "num_tokens": 16582793986.0, "step": 3329 }, { "epoch": 0.5935828877005348, "grad_norm": 0.2392578125, "learning_rate": 1.6846662102613646e-05, "loss": 1.0133, "num_tokens": 16589078079.0, "step": 3330 }, { "epoch": 0.5937611408199643, "grad_norm": 0.267578125, "learning_rate": 1.684468670527453e-05, "loss": 1.0269, "num_tokens": 16595360144.0, "step": 3331 }, { "epoch": 0.593939393939394, "grad_norm": 0.2236328125, "learning_rate": 1.6842710820896432e-05, "loss": 1.0295, "num_tokens": 16601620579.0, "step": 3332 }, { "epoch": 0.5941176470588235, "grad_norm": 0.263671875, "learning_rate": 1.6840734449644007e-05, "loss": 1.0386, "num_tokens": 16607893165.0, "step": 3333 }, { "epoch": 0.5942959001782531, "grad_norm": 0.2392578125, "learning_rate": 1.6838757591681948e-05, "loss": 1.0306, "num_tokens": 16614143547.0, "step": 3334 }, { "epoch": 0.5944741532976827, "grad_norm": 0.216796875, "learning_rate": 1.6836780247174978e-05, "loss": 1.017, "num_tokens": 16620424302.0, "step": 3335 }, { "epoch": 0.5946524064171123, "grad_norm": 0.255859375, "learning_rate": 1.683480241628788e-05, "loss": 1.0198, "num_tokens": 16626691070.0, "step": 3336 }, { "epoch": 0.5948306595365419, "grad_norm": 0.2060546875, "learning_rate": 1.6832824099185462e-05, "loss": 1.0493, "num_tokens": 16632956706.0, "step": 3337 }, { "epoch": 0.5950089126559714, "grad_norm": 0.287109375, "learning_rate": 1.683084529603258e-05, "loss": 1.0079, "num_tokens": 16639195527.0, "step": 3338 }, { "epoch": 0.5951871657754011, "grad_norm": 0.2255859375, "learning_rate": 1.6828866006994125e-05, "loss": 1.0311, "num_tokens": 16645477573.0, "step": 3339 }, { "epoch": 0.5953654188948306, "grad_norm": 0.2490234375, "learning_rate": 1.6826886232235033e-05, "loss": 1.0324, "num_tokens": 16651746310.0, "step": 3340 }, { "epoch": 0.5955436720142603, "grad_norm": 0.2353515625, "learning_rate": 1.6824905971920277e-05, "loss": 1.0417, "num_tokens": 16658029779.0, "step": 3341 }, { "epoch": 0.5957219251336898, "grad_norm": 0.2275390625, "learning_rate": 1.6822925226214876e-05, "loss": 1.0176, "num_tokens": 16664312858.0, "step": 3342 }, { "epoch": 0.5959001782531195, "grad_norm": 0.2373046875, "learning_rate": 1.6820943995283884e-05, "loss": 1.0235, "num_tokens": 16670593683.0, "step": 3343 }, { "epoch": 0.596078431372549, "grad_norm": 0.234375, "learning_rate": 1.6818962279292396e-05, "loss": 1.019, "num_tokens": 16676854770.0, "step": 3344 }, { "epoch": 0.5962566844919787, "grad_norm": 0.220703125, "learning_rate": 1.6816980078405554e-05, "loss": 1.0231, "num_tokens": 16683084275.0, "step": 3345 }, { "epoch": 0.5964349376114082, "grad_norm": 0.23828125, "learning_rate": 1.6814997392788533e-05, "loss": 1.0447, "num_tokens": 16689337794.0, "step": 3346 }, { "epoch": 0.5966131907308377, "grad_norm": 0.2412109375, "learning_rate": 1.681301422260655e-05, "loss": 1.0254, "num_tokens": 16695619028.0, "step": 3347 }, { "epoch": 0.5967914438502674, "grad_norm": 0.2197265625, "learning_rate": 1.6811030568024856e-05, "loss": 0.9906, "num_tokens": 16701865238.0, "step": 3348 }, { "epoch": 0.5969696969696969, "grad_norm": 0.2353515625, "learning_rate": 1.6809046429208762e-05, "loss": 1.0059, "num_tokens": 16708137306.0, "step": 3349 }, { "epoch": 0.5971479500891266, "grad_norm": 0.2158203125, "learning_rate": 1.6807061806323596e-05, "loss": 1.0424, "num_tokens": 16714356418.0, "step": 3350 }, { "epoch": 0.5973262032085561, "grad_norm": 0.267578125, "learning_rate": 1.6805076699534747e-05, "loss": 0.9829, "num_tokens": 16720640472.0, "step": 3351 }, { "epoch": 0.5975044563279858, "grad_norm": 0.255859375, "learning_rate": 1.680309110900762e-05, "loss": 1.0086, "num_tokens": 16726904073.0, "step": 3352 }, { "epoch": 0.5976827094474153, "grad_norm": 0.2392578125, "learning_rate": 1.680110503490769e-05, "loss": 1.0305, "num_tokens": 16733187355.0, "step": 3353 }, { "epoch": 0.597860962566845, "grad_norm": 0.259765625, "learning_rate": 1.679911847740045e-05, "loss": 1.0259, "num_tokens": 16739432688.0, "step": 3354 }, { "epoch": 0.5980392156862745, "grad_norm": 0.2373046875, "learning_rate": 1.679713143665144e-05, "loss": 0.9934, "num_tokens": 16745691097.0, "step": 3355 }, { "epoch": 0.5982174688057041, "grad_norm": 0.2275390625, "learning_rate": 1.6795143912826244e-05, "loss": 1.0633, "num_tokens": 16751975258.0, "step": 3356 }, { "epoch": 0.5983957219251337, "grad_norm": 0.2314453125, "learning_rate": 1.679315590609048e-05, "loss": 1.0264, "num_tokens": 16758259322.0, "step": 3357 }, { "epoch": 0.5985739750445633, "grad_norm": 0.2490234375, "learning_rate": 1.6791167416609807e-05, "loss": 1.0354, "num_tokens": 16764544117.0, "step": 3358 }, { "epoch": 0.5987522281639929, "grad_norm": 0.240234375, "learning_rate": 1.6789178444549926e-05, "loss": 1.0286, "num_tokens": 16770802772.0, "step": 3359 }, { "epoch": 0.5989304812834224, "grad_norm": 0.2431640625, "learning_rate": 1.6787188990076583e-05, "loss": 1.0585, "num_tokens": 16777086567.0, "step": 3360 }, { "epoch": 0.5991087344028521, "grad_norm": 0.240234375, "learning_rate": 1.678519905335556e-05, "loss": 1.0379, "num_tokens": 16783342589.0, "step": 3361 }, { "epoch": 0.5992869875222816, "grad_norm": 0.2275390625, "learning_rate": 1.6783208634552673e-05, "loss": 1.0203, "num_tokens": 16789574392.0, "step": 3362 }, { "epoch": 0.5994652406417113, "grad_norm": 0.2177734375, "learning_rate": 1.678121773383379e-05, "loss": 1.0421, "num_tokens": 16795851639.0, "step": 3363 }, { "epoch": 0.5996434937611408, "grad_norm": 0.2373046875, "learning_rate": 1.677922635136481e-05, "loss": 1.0004, "num_tokens": 16802131371.0, "step": 3364 }, { "epoch": 0.5998217468805704, "grad_norm": 0.2578125, "learning_rate": 1.6777234487311677e-05, "loss": 0.9904, "num_tokens": 16808389568.0, "step": 3365 }, { "epoch": 0.6, "grad_norm": 0.2265625, "learning_rate": 1.677524214184037e-05, "loss": 1.0228, "num_tokens": 16814644775.0, "step": 3366 }, { "epoch": 0.6001782531194296, "grad_norm": 0.2890625, "learning_rate": 1.6773249315116915e-05, "loss": 1.0167, "num_tokens": 16820928658.0, "step": 3367 }, { "epoch": 0.6003565062388592, "grad_norm": 0.248046875, "learning_rate": 1.6771256007307373e-05, "loss": 1.0265, "num_tokens": 16827210909.0, "step": 3368 }, { "epoch": 0.6005347593582888, "grad_norm": 0.2255859375, "learning_rate": 1.676926221857785e-05, "loss": 1.0314, "num_tokens": 16833490920.0, "step": 3369 }, { "epoch": 0.6007130124777184, "grad_norm": 0.25, "learning_rate": 1.6767267949094482e-05, "loss": 1.0278, "num_tokens": 16839769199.0, "step": 3370 }, { "epoch": 0.600891265597148, "grad_norm": 0.2265625, "learning_rate": 1.676527319902346e-05, "loss": 1.0307, "num_tokens": 16846055043.0, "step": 3371 }, { "epoch": 0.6010695187165775, "grad_norm": 0.267578125, "learning_rate": 1.6763277968531e-05, "loss": 1.0259, "num_tokens": 16852337664.0, "step": 3372 }, { "epoch": 0.6012477718360071, "grad_norm": 0.2490234375, "learning_rate": 1.676128225778337e-05, "loss": 1.0177, "num_tokens": 16858599833.0, "step": 3373 }, { "epoch": 0.6014260249554367, "grad_norm": 0.2216796875, "learning_rate": 1.675928606694687e-05, "loss": 1.0262, "num_tokens": 16864882524.0, "step": 3374 }, { "epoch": 0.6016042780748663, "grad_norm": 0.2314453125, "learning_rate": 1.6757289396187843e-05, "loss": 1.0201, "num_tokens": 16871164531.0, "step": 3375 }, { "epoch": 0.6017825311942959, "grad_norm": 0.2216796875, "learning_rate": 1.6755292245672677e-05, "loss": 1.045, "num_tokens": 16877446837.0, "step": 3376 }, { "epoch": 0.6019607843137255, "grad_norm": 0.255859375, "learning_rate": 1.6753294615567785e-05, "loss": 1.0133, "num_tokens": 16883730009.0, "step": 3377 }, { "epoch": 0.6021390374331551, "grad_norm": 0.236328125, "learning_rate": 1.675129650603964e-05, "loss": 1.0462, "num_tokens": 16890011979.0, "step": 3378 }, { "epoch": 0.6023172905525846, "grad_norm": 0.2421875, "learning_rate": 1.674929791725474e-05, "loss": 1.0211, "num_tokens": 16896293979.0, "step": 3379 }, { "epoch": 0.6024955436720143, "grad_norm": 0.2421875, "learning_rate": 1.674729884937963e-05, "loss": 1.0442, "num_tokens": 16902512477.0, "step": 3380 }, { "epoch": 0.6026737967914438, "grad_norm": 0.2158203125, "learning_rate": 1.674529930258089e-05, "loss": 1.0264, "num_tokens": 16908796118.0, "step": 3381 }, { "epoch": 0.6028520499108735, "grad_norm": 0.240234375, "learning_rate": 1.6743299277025144e-05, "loss": 1.0084, "num_tokens": 16915080026.0, "step": 3382 }, { "epoch": 0.603030303030303, "grad_norm": 0.234375, "learning_rate": 1.6741298772879055e-05, "loss": 1.0192, "num_tokens": 16921363471.0, "step": 3383 }, { "epoch": 0.6032085561497326, "grad_norm": 0.232421875, "learning_rate": 1.6739297790309325e-05, "loss": 1.014, "num_tokens": 16927630043.0, "step": 3384 }, { "epoch": 0.6033868092691622, "grad_norm": 0.2353515625, "learning_rate": 1.6737296329482697e-05, "loss": 1.0319, "num_tokens": 16933914164.0, "step": 3385 }, { "epoch": 0.6035650623885918, "grad_norm": 0.212890625, "learning_rate": 1.673529439056595e-05, "loss": 1.0362, "num_tokens": 16940177586.0, "step": 3386 }, { "epoch": 0.6037433155080214, "grad_norm": 0.2197265625, "learning_rate": 1.673329197372591e-05, "loss": 1.0153, "num_tokens": 16946460514.0, "step": 3387 }, { "epoch": 0.6039215686274509, "grad_norm": 0.21875, "learning_rate": 1.673128907912944e-05, "loss": 1.0368, "num_tokens": 16952703484.0, "step": 3388 }, { "epoch": 0.6040998217468806, "grad_norm": 0.2431640625, "learning_rate": 1.6729285706943437e-05, "loss": 1.0233, "num_tokens": 16958988498.0, "step": 3389 }, { "epoch": 0.6042780748663101, "grad_norm": 0.23046875, "learning_rate": 1.6727281857334843e-05, "loss": 1.0184, "num_tokens": 16965268117.0, "step": 3390 }, { "epoch": 0.6044563279857398, "grad_norm": 0.2265625, "learning_rate": 1.6725277530470644e-05, "loss": 0.9948, "num_tokens": 16971551710.0, "step": 3391 }, { "epoch": 0.6046345811051693, "grad_norm": 0.2158203125, "learning_rate": 1.6723272726517855e-05, "loss": 1.0333, "num_tokens": 16977834170.0, "step": 3392 }, { "epoch": 0.604812834224599, "grad_norm": 0.2158203125, "learning_rate": 1.6721267445643537e-05, "loss": 0.9826, "num_tokens": 16984120545.0, "step": 3393 }, { "epoch": 0.6049910873440285, "grad_norm": 0.2236328125, "learning_rate": 1.6719261688014798e-05, "loss": 1.011, "num_tokens": 16990405538.0, "step": 3394 }, { "epoch": 0.6051693404634582, "grad_norm": 0.2177734375, "learning_rate": 1.671725545379877e-05, "loss": 1.0229, "num_tokens": 16996689363.0, "step": 3395 }, { "epoch": 0.6053475935828877, "grad_norm": 0.2265625, "learning_rate": 1.6715248743162635e-05, "loss": 1.0648, "num_tokens": 17002974849.0, "step": 3396 }, { "epoch": 0.6055258467023172, "grad_norm": 0.2216796875, "learning_rate": 1.6713241556273618e-05, "loss": 1.0081, "num_tokens": 17009257479.0, "step": 3397 }, { "epoch": 0.6057040998217469, "grad_norm": 0.220703125, "learning_rate": 1.6711233893298968e-05, "loss": 1.0474, "num_tokens": 17015525911.0, "step": 3398 }, { "epoch": 0.6058823529411764, "grad_norm": 0.2109375, "learning_rate": 1.6709225754405992e-05, "loss": 1.0193, "num_tokens": 17021810118.0, "step": 3399 }, { "epoch": 0.6060606060606061, "grad_norm": 0.22265625, "learning_rate": 1.670721713976203e-05, "loss": 1.0291, "num_tokens": 17028068133.0, "step": 3400 }, { "epoch": 0.6062388591800356, "grad_norm": 0.2158203125, "learning_rate": 1.670520804953445e-05, "loss": 1.0272, "num_tokens": 17034351477.0, "step": 3401 }, { "epoch": 0.6064171122994653, "grad_norm": 0.19921875, "learning_rate": 1.6703198483890682e-05, "loss": 1.021, "num_tokens": 17040630009.0, "step": 3402 }, { "epoch": 0.6065953654188948, "grad_norm": 0.228515625, "learning_rate": 1.6701188442998176e-05, "loss": 1.0374, "num_tokens": 17046894336.0, "step": 3403 }, { "epoch": 0.6067736185383245, "grad_norm": 0.21484375, "learning_rate": 1.6699177927024433e-05, "loss": 1.0306, "num_tokens": 17053152249.0, "step": 3404 }, { "epoch": 0.606951871657754, "grad_norm": 0.2001953125, "learning_rate": 1.669716693613699e-05, "loss": 1.0167, "num_tokens": 17059435227.0, "step": 3405 }, { "epoch": 0.6071301247771836, "grad_norm": 0.2255859375, "learning_rate": 1.6695155470503413e-05, "loss": 1.0589, "num_tokens": 17065690203.0, "step": 3406 }, { "epoch": 0.6073083778966132, "grad_norm": 0.208984375, "learning_rate": 1.6693143530291335e-05, "loss": 1.0122, "num_tokens": 17071963867.0, "step": 3407 }, { "epoch": 0.6074866310160428, "grad_norm": 0.22265625, "learning_rate": 1.66911311156684e-05, "loss": 1.0525, "num_tokens": 17078239634.0, "step": 3408 }, { "epoch": 0.6076648841354724, "grad_norm": 0.2216796875, "learning_rate": 1.6689118226802303e-05, "loss": 1.0275, "num_tokens": 17084454656.0, "step": 3409 }, { "epoch": 0.6078431372549019, "grad_norm": 0.22265625, "learning_rate": 1.6687104863860784e-05, "loss": 1.0165, "num_tokens": 17090736868.0, "step": 3410 }, { "epoch": 0.6080213903743316, "grad_norm": 0.2294921875, "learning_rate": 1.6685091027011612e-05, "loss": 1.0347, "num_tokens": 17097020035.0, "step": 3411 }, { "epoch": 0.6081996434937611, "grad_norm": 0.2119140625, "learning_rate": 1.6683076716422602e-05, "loss": 1.0012, "num_tokens": 17103213595.0, "step": 3412 }, { "epoch": 0.6083778966131907, "grad_norm": 0.228515625, "learning_rate": 1.6681061932261608e-05, "loss": 1.0249, "num_tokens": 17109475947.0, "step": 3413 }, { "epoch": 0.6085561497326203, "grad_norm": 0.2216796875, "learning_rate": 1.6679046674696525e-05, "loss": 0.9798, "num_tokens": 17115713032.0, "step": 3414 }, { "epoch": 0.6087344028520499, "grad_norm": 0.2080078125, "learning_rate": 1.6677030943895276e-05, "loss": 1.0191, "num_tokens": 17121996567.0, "step": 3415 }, { "epoch": 0.6089126559714795, "grad_norm": 0.2255859375, "learning_rate": 1.6675014740025842e-05, "loss": 1.0447, "num_tokens": 17128242231.0, "step": 3416 }, { "epoch": 0.6090909090909091, "grad_norm": 0.23046875, "learning_rate": 1.6672998063256226e-05, "loss": 1.0376, "num_tokens": 17134526348.0, "step": 3417 }, { "epoch": 0.6092691622103387, "grad_norm": 0.2373046875, "learning_rate": 1.6670980913754483e-05, "loss": 1.0415, "num_tokens": 17140809643.0, "step": 3418 }, { "epoch": 0.6094474153297683, "grad_norm": 0.2158203125, "learning_rate": 1.6668963291688702e-05, "loss": 1.03, "num_tokens": 17147093352.0, "step": 3419 }, { "epoch": 0.6096256684491979, "grad_norm": 0.2197265625, "learning_rate": 1.6666945197227015e-05, "loss": 1.0268, "num_tokens": 17153377215.0, "step": 3420 }, { "epoch": 0.6098039215686275, "grad_norm": 0.2333984375, "learning_rate": 1.666492663053758e-05, "loss": 1.0266, "num_tokens": 17159622170.0, "step": 3421 }, { "epoch": 0.609982174688057, "grad_norm": 0.21875, "learning_rate": 1.6662907591788613e-05, "loss": 1.0266, "num_tokens": 17165907945.0, "step": 3422 }, { "epoch": 0.6101604278074866, "grad_norm": 0.232421875, "learning_rate": 1.6660888081148357e-05, "loss": 1.0598, "num_tokens": 17172178583.0, "step": 3423 }, { "epoch": 0.6103386809269162, "grad_norm": 0.2431640625, "learning_rate": 1.6658868098785102e-05, "loss": 1.0242, "num_tokens": 17178458862.0, "step": 3424 }, { "epoch": 0.6105169340463458, "grad_norm": 0.2275390625, "learning_rate": 1.665684764486717e-05, "loss": 1.0326, "num_tokens": 17184742690.0, "step": 3425 }, { "epoch": 0.6106951871657754, "grad_norm": 0.2490234375, "learning_rate": 1.665482671956293e-05, "loss": 1.0065, "num_tokens": 17190995561.0, "step": 3426 }, { "epoch": 0.610873440285205, "grad_norm": 0.2431640625, "learning_rate": 1.6652805323040783e-05, "loss": 1.0522, "num_tokens": 17197234622.0, "step": 3427 }, { "epoch": 0.6110516934046346, "grad_norm": 0.232421875, "learning_rate": 1.6650783455469172e-05, "loss": 1.0173, "num_tokens": 17203501665.0, "step": 3428 }, { "epoch": 0.6112299465240641, "grad_norm": 0.2314453125, "learning_rate": 1.664876111701658e-05, "loss": 1.0602, "num_tokens": 17209737760.0, "step": 3429 }, { "epoch": 0.6114081996434938, "grad_norm": 0.2255859375, "learning_rate": 1.6646738307851534e-05, "loss": 1.0353, "num_tokens": 17216021166.0, "step": 3430 }, { "epoch": 0.6115864527629233, "grad_norm": 0.224609375, "learning_rate": 1.6644715028142583e-05, "loss": 1.0205, "num_tokens": 17222297460.0, "step": 3431 }, { "epoch": 0.611764705882353, "grad_norm": 0.2353515625, "learning_rate": 1.664269127805834e-05, "loss": 1.0079, "num_tokens": 17228555252.0, "step": 3432 }, { "epoch": 0.6119429590017825, "grad_norm": 0.23046875, "learning_rate": 1.6640667057767438e-05, "loss": 1.0382, "num_tokens": 17234837384.0, "step": 3433 }, { "epoch": 0.6121212121212121, "grad_norm": 0.236328125, "learning_rate": 1.663864236743856e-05, "loss": 1.0441, "num_tokens": 17241121107.0, "step": 3434 }, { "epoch": 0.6122994652406417, "grad_norm": 0.2392578125, "learning_rate": 1.6636617207240416e-05, "loss": 1.0307, "num_tokens": 17247406447.0, "step": 3435 }, { "epoch": 0.6124777183600713, "grad_norm": 0.2041015625, "learning_rate": 1.6634591577341774e-05, "loss": 1.0334, "num_tokens": 17253669045.0, "step": 3436 }, { "epoch": 0.6126559714795009, "grad_norm": 0.232421875, "learning_rate": 1.6632565477911417e-05, "loss": 1.0332, "num_tokens": 17259952867.0, "step": 3437 }, { "epoch": 0.6128342245989304, "grad_norm": 0.2333984375, "learning_rate": 1.6630538909118192e-05, "loss": 1.0291, "num_tokens": 17266235946.0, "step": 3438 }, { "epoch": 0.6130124777183601, "grad_norm": 0.23046875, "learning_rate": 1.662851187113097e-05, "loss": 1.0521, "num_tokens": 17272518744.0, "step": 3439 }, { "epoch": 0.6131907308377896, "grad_norm": 0.240234375, "learning_rate": 1.662648436411866e-05, "loss": 1.0159, "num_tokens": 17278801524.0, "step": 3440 }, { "epoch": 0.6133689839572193, "grad_norm": 0.236328125, "learning_rate": 1.662445638825022e-05, "loss": 1.0235, "num_tokens": 17285084826.0, "step": 3441 }, { "epoch": 0.6135472370766488, "grad_norm": 0.244140625, "learning_rate": 1.662242794369464e-05, "loss": 1.0006, "num_tokens": 17291361221.0, "step": 3442 }, { "epoch": 0.6137254901960785, "grad_norm": 0.2451171875, "learning_rate": 1.662039903062095e-05, "loss": 1.0258, "num_tokens": 17297645508.0, "step": 3443 }, { "epoch": 0.613903743315508, "grad_norm": 0.275390625, "learning_rate": 1.661836964919822e-05, "loss": 1.0149, "num_tokens": 17303927847.0, "step": 3444 }, { "epoch": 0.6140819964349377, "grad_norm": 0.21875, "learning_rate": 1.661633979959556e-05, "loss": 1.0421, "num_tokens": 17310208891.0, "step": 3445 }, { "epoch": 0.6142602495543672, "grad_norm": 0.2451171875, "learning_rate": 1.6614309481982112e-05, "loss": 1.028, "num_tokens": 17316490993.0, "step": 3446 }, { "epoch": 0.6144385026737967, "grad_norm": 0.244140625, "learning_rate": 1.6612278696527075e-05, "loss": 1.0117, "num_tokens": 17322775113.0, "step": 3447 }, { "epoch": 0.6146167557932264, "grad_norm": 0.234375, "learning_rate": 1.661024744339966e-05, "loss": 1.0649, "num_tokens": 17329057567.0, "step": 3448 }, { "epoch": 0.6147950089126559, "grad_norm": 0.259765625, "learning_rate": 1.6608215722769144e-05, "loss": 1.0504, "num_tokens": 17335321471.0, "step": 3449 }, { "epoch": 0.6149732620320856, "grad_norm": 0.25, "learning_rate": 1.6606183534804823e-05, "loss": 0.9895, "num_tokens": 17341605139.0, "step": 3450 }, { "epoch": 0.6151515151515151, "grad_norm": 0.2490234375, "learning_rate": 1.6604150879676045e-05, "loss": 1.0256, "num_tokens": 17347858220.0, "step": 3451 }, { "epoch": 0.6153297682709448, "grad_norm": 0.236328125, "learning_rate": 1.6602117757552188e-05, "loss": 1.0194, "num_tokens": 17354116724.0, "step": 3452 }, { "epoch": 0.6155080213903743, "grad_norm": 0.2275390625, "learning_rate": 1.6600084168602674e-05, "loss": 1.0129, "num_tokens": 17360401879.0, "step": 3453 }, { "epoch": 0.615686274509804, "grad_norm": 0.251953125, "learning_rate": 1.659805011299696e-05, "loss": 1.0582, "num_tokens": 17366635921.0, "step": 3454 }, { "epoch": 0.6158645276292335, "grad_norm": 0.2490234375, "learning_rate": 1.6596015590904546e-05, "loss": 1.0252, "num_tokens": 17372916886.0, "step": 3455 }, { "epoch": 0.6160427807486631, "grad_norm": 0.2294921875, "learning_rate": 1.6593980602494972e-05, "loss": 1.0183, "num_tokens": 17379184714.0, "step": 3456 }, { "epoch": 0.6162210338680927, "grad_norm": 0.248046875, "learning_rate": 1.659194514793781e-05, "loss": 1.0366, "num_tokens": 17385398923.0, "step": 3457 }, { "epoch": 0.6163992869875223, "grad_norm": 0.220703125, "learning_rate": 1.6589909227402674e-05, "loss": 1.0149, "num_tokens": 17391667220.0, "step": 3458 }, { "epoch": 0.6165775401069519, "grad_norm": 0.2421875, "learning_rate": 1.658787284105922e-05, "loss": 1.0328, "num_tokens": 17397924100.0, "step": 3459 }, { "epoch": 0.6167557932263814, "grad_norm": 0.2255859375, "learning_rate": 1.6585835989077142e-05, "loss": 1.0066, "num_tokens": 17404208048.0, "step": 3460 }, { "epoch": 0.6169340463458111, "grad_norm": 0.2216796875, "learning_rate": 1.6583798671626176e-05, "loss": 1.0254, "num_tokens": 17410479821.0, "step": 3461 }, { "epoch": 0.6171122994652406, "grad_norm": 0.2294921875, "learning_rate": 1.658176088887608e-05, "loss": 1.0112, "num_tokens": 17416749877.0, "step": 3462 }, { "epoch": 0.6172905525846702, "grad_norm": 0.25390625, "learning_rate": 1.6579722640996665e-05, "loss": 1.0375, "num_tokens": 17423034605.0, "step": 3463 }, { "epoch": 0.6174688057040998, "grad_norm": 0.22265625, "learning_rate": 1.6577683928157788e-05, "loss": 0.9957, "num_tokens": 17429284221.0, "step": 3464 }, { "epoch": 0.6176470588235294, "grad_norm": 0.2431640625, "learning_rate": 1.6575644750529326e-05, "loss": 1.0281, "num_tokens": 17435567301.0, "step": 3465 }, { "epoch": 0.617825311942959, "grad_norm": 0.2138671875, "learning_rate": 1.657360510828121e-05, "loss": 1.0163, "num_tokens": 17441829201.0, "step": 3466 }, { "epoch": 0.6180035650623886, "grad_norm": 0.25390625, "learning_rate": 1.65715650015834e-05, "loss": 1.0357, "num_tokens": 17448114273.0, "step": 3467 }, { "epoch": 0.6181818181818182, "grad_norm": 0.2294921875, "learning_rate": 1.6569524430605902e-05, "loss": 1.0267, "num_tokens": 17454397708.0, "step": 3468 }, { "epoch": 0.6183600713012478, "grad_norm": 0.212890625, "learning_rate": 1.6567483395518755e-05, "loss": 1.0579, "num_tokens": 17460681059.0, "step": 3469 }, { "epoch": 0.6185383244206774, "grad_norm": 0.2392578125, "learning_rate": 1.656544189649204e-05, "loss": 1.0264, "num_tokens": 17466956192.0, "step": 3470 }, { "epoch": 0.618716577540107, "grad_norm": 0.255859375, "learning_rate": 1.656339993369587e-05, "loss": 1.0165, "num_tokens": 17473234365.0, "step": 3471 }, { "epoch": 0.6188948306595365, "grad_norm": 0.244140625, "learning_rate": 1.6561357507300412e-05, "loss": 1.0314, "num_tokens": 17479489755.0, "step": 3472 }, { "epoch": 0.6190730837789661, "grad_norm": 0.2294921875, "learning_rate": 1.6559314617475855e-05, "loss": 1.038, "num_tokens": 17485729804.0, "step": 3473 }, { "epoch": 0.6192513368983957, "grad_norm": 0.220703125, "learning_rate": 1.6557271264392436e-05, "loss": 1.0236, "num_tokens": 17492013487.0, "step": 3474 }, { "epoch": 0.6194295900178253, "grad_norm": 0.23046875, "learning_rate": 1.6555227448220424e-05, "loss": 1.0021, "num_tokens": 17498211033.0, "step": 3475 }, { "epoch": 0.6196078431372549, "grad_norm": 0.2109375, "learning_rate": 1.655318316913014e-05, "loss": 1.0624, "num_tokens": 17504485781.0, "step": 3476 }, { "epoch": 0.6197860962566845, "grad_norm": 0.220703125, "learning_rate": 1.6551138427291925e-05, "loss": 1.0347, "num_tokens": 17510718736.0, "step": 3477 }, { "epoch": 0.6199643493761141, "grad_norm": 0.216796875, "learning_rate": 1.654909322287617e-05, "loss": 1.008, "num_tokens": 17516978360.0, "step": 3478 }, { "epoch": 0.6201426024955436, "grad_norm": 0.2275390625, "learning_rate": 1.6547047556053302e-05, "loss": 1.0451, "num_tokens": 17523237495.0, "step": 3479 }, { "epoch": 0.6203208556149733, "grad_norm": 0.2314453125, "learning_rate": 1.6545001426993792e-05, "loss": 1.0227, "num_tokens": 17529499530.0, "step": 3480 }, { "epoch": 0.6204991087344028, "grad_norm": 0.208984375, "learning_rate": 1.6542954835868136e-05, "loss": 1.0534, "num_tokens": 17535781750.0, "step": 3481 }, { "epoch": 0.6206773618538325, "grad_norm": 0.2333984375, "learning_rate": 1.6540907782846883e-05, "loss": 1.0121, "num_tokens": 17542064018.0, "step": 3482 }, { "epoch": 0.620855614973262, "grad_norm": 0.2177734375, "learning_rate": 1.6538860268100614e-05, "loss": 1.0211, "num_tokens": 17548346524.0, "step": 3483 }, { "epoch": 0.6210338680926917, "grad_norm": 0.21875, "learning_rate": 1.6536812291799947e-05, "loss": 1.0361, "num_tokens": 17554614462.0, "step": 3484 }, { "epoch": 0.6212121212121212, "grad_norm": 0.228515625, "learning_rate": 1.653476385411554e-05, "loss": 1.0074, "num_tokens": 17560899161.0, "step": 3485 }, { "epoch": 0.6213903743315508, "grad_norm": 0.2236328125, "learning_rate": 1.653271495521809e-05, "loss": 1.0244, "num_tokens": 17567154280.0, "step": 3486 }, { "epoch": 0.6215686274509804, "grad_norm": 0.2314453125, "learning_rate": 1.653066559527833e-05, "loss": 1.0473, "num_tokens": 17573408448.0, "step": 3487 }, { "epoch": 0.6217468805704099, "grad_norm": 0.220703125, "learning_rate": 1.6528615774467042e-05, "loss": 1.0246, "num_tokens": 17579677308.0, "step": 3488 }, { "epoch": 0.6219251336898396, "grad_norm": 0.2275390625, "learning_rate": 1.652656549295503e-05, "loss": 1.0431, "num_tokens": 17585939879.0, "step": 3489 }, { "epoch": 0.6221033868092691, "grad_norm": 0.21875, "learning_rate": 1.652451475091315e-05, "loss": 1.0349, "num_tokens": 17592211381.0, "step": 3490 }, { "epoch": 0.6222816399286988, "grad_norm": 0.232421875, "learning_rate": 1.652246354851228e-05, "loss": 1.017, "num_tokens": 17598483448.0, "step": 3491 }, { "epoch": 0.6224598930481283, "grad_norm": 0.2158203125, "learning_rate": 1.6520411885923363e-05, "loss": 1.0233, "num_tokens": 17604767193.0, "step": 3492 }, { "epoch": 0.622638146167558, "grad_norm": 0.2490234375, "learning_rate": 1.651835976331735e-05, "loss": 1.008, "num_tokens": 17611053127.0, "step": 3493 }, { "epoch": 0.6228163992869875, "grad_norm": 0.220703125, "learning_rate": 1.6516307180865253e-05, "loss": 1.009, "num_tokens": 17617275491.0, "step": 3494 }, { "epoch": 0.6229946524064172, "grad_norm": 0.23828125, "learning_rate": 1.651425413873811e-05, "loss": 1.0295, "num_tokens": 17623551923.0, "step": 3495 }, { "epoch": 0.6231729055258467, "grad_norm": 0.244140625, "learning_rate": 1.6512200637107006e-05, "loss": 1.0202, "num_tokens": 17629799651.0, "step": 3496 }, { "epoch": 0.6233511586452762, "grad_norm": 0.23046875, "learning_rate": 1.6510146676143056e-05, "loss": 1.0347, "num_tokens": 17636077197.0, "step": 3497 }, { "epoch": 0.6235294117647059, "grad_norm": 0.263671875, "learning_rate": 1.650809225601742e-05, "loss": 1.0786, "num_tokens": 17642344047.0, "step": 3498 }, { "epoch": 0.6237076648841354, "grad_norm": 0.2353515625, "learning_rate": 1.650603737690129e-05, "loss": 1.0352, "num_tokens": 17648628855.0, "step": 3499 }, { "epoch": 0.6238859180035651, "grad_norm": 0.265625, "learning_rate": 1.65039820389659e-05, "loss": 1.0152, "num_tokens": 17654895333.0, "step": 3500 }, { "epoch": 0.6240641711229946, "grad_norm": 0.2265625, "learning_rate": 1.6501926242382523e-05, "loss": 1.0349, "num_tokens": 17661150475.0, "step": 3501 }, { "epoch": 0.6242424242424243, "grad_norm": 0.244140625, "learning_rate": 1.649986998732247e-05, "loss": 1.0037, "num_tokens": 17667433101.0, "step": 3502 }, { "epoch": 0.6244206773618538, "grad_norm": 0.259765625, "learning_rate": 1.6497813273957088e-05, "loss": 1.0417, "num_tokens": 17673695786.0, "step": 3503 }, { "epoch": 0.6245989304812835, "grad_norm": 0.2265625, "learning_rate": 1.649575610245776e-05, "loss": 1.0058, "num_tokens": 17679960084.0, "step": 3504 }, { "epoch": 0.624777183600713, "grad_norm": 0.28125, "learning_rate": 1.649369847299592e-05, "loss": 1.0336, "num_tokens": 17686210583.0, "step": 3505 }, { "epoch": 0.6249554367201426, "grad_norm": 0.265625, "learning_rate": 1.6491640385743018e-05, "loss": 1.0435, "num_tokens": 17692485245.0, "step": 3506 }, { "epoch": 0.6251336898395722, "grad_norm": 0.2216796875, "learning_rate": 1.6489581840870566e-05, "loss": 1.022, "num_tokens": 17698759326.0, "step": 3507 }, { "epoch": 0.6253119429590018, "grad_norm": 0.251953125, "learning_rate": 1.6487522838550093e-05, "loss": 1.026, "num_tokens": 17705044706.0, "step": 3508 }, { "epoch": 0.6254901960784314, "grad_norm": 0.25, "learning_rate": 1.6485463378953185e-05, "loss": 1.0121, "num_tokens": 17711280903.0, "step": 3509 }, { "epoch": 0.6256684491978609, "grad_norm": 0.248046875, "learning_rate": 1.648340346225145e-05, "loss": 0.9916, "num_tokens": 17717565937.0, "step": 3510 }, { "epoch": 0.6258467023172906, "grad_norm": 0.25390625, "learning_rate": 1.648134308861655e-05, "loss": 1.0264, "num_tokens": 17723850501.0, "step": 3511 }, { "epoch": 0.6260249554367201, "grad_norm": 0.236328125, "learning_rate": 1.647928225822017e-05, "loss": 1.008, "num_tokens": 17730111247.0, "step": 3512 }, { "epoch": 0.6262032085561497, "grad_norm": 0.23828125, "learning_rate": 1.647722097123404e-05, "loss": 1.033, "num_tokens": 17736395264.0, "step": 3513 }, { "epoch": 0.6263814616755793, "grad_norm": 0.23828125, "learning_rate": 1.6475159227829924e-05, "loss": 1.0196, "num_tokens": 17742679187.0, "step": 3514 }, { "epoch": 0.6265597147950089, "grad_norm": 0.236328125, "learning_rate": 1.6473097028179636e-05, "loss": 1.0489, "num_tokens": 17748945486.0, "step": 3515 }, { "epoch": 0.6267379679144385, "grad_norm": 0.2138671875, "learning_rate": 1.6471034372455015e-05, "loss": 0.9661, "num_tokens": 17755230291.0, "step": 3516 }, { "epoch": 0.6269162210338681, "grad_norm": 0.2333984375, "learning_rate": 1.6468971260827938e-05, "loss": 1.0438, "num_tokens": 17761514487.0, "step": 3517 }, { "epoch": 0.6270944741532977, "grad_norm": 0.232421875, "learning_rate": 1.646690769347034e-05, "loss": 1.0261, "num_tokens": 17767732085.0, "step": 3518 }, { "epoch": 0.6272727272727273, "grad_norm": 0.2236328125, "learning_rate": 1.646484367055416e-05, "loss": 1.0241, "num_tokens": 17774016642.0, "step": 3519 }, { "epoch": 0.6274509803921569, "grad_norm": 0.2373046875, "learning_rate": 1.64627791922514e-05, "loss": 1.0112, "num_tokens": 17780299364.0, "step": 3520 }, { "epoch": 0.6276292335115865, "grad_norm": 0.2236328125, "learning_rate": 1.64607142587341e-05, "loss": 1.0078, "num_tokens": 17786582856.0, "step": 3521 }, { "epoch": 0.627807486631016, "grad_norm": 0.234375, "learning_rate": 1.645864887017432e-05, "loss": 1.0352, "num_tokens": 17792866060.0, "step": 3522 }, { "epoch": 0.6279857397504456, "grad_norm": 0.21484375, "learning_rate": 1.645658302674418e-05, "loss": 1.004, "num_tokens": 17799150717.0, "step": 3523 }, { "epoch": 0.6281639928698752, "grad_norm": 0.21875, "learning_rate": 1.6454516728615817e-05, "loss": 1.0155, "num_tokens": 17805424476.0, "step": 3524 }, { "epoch": 0.6283422459893048, "grad_norm": 0.2265625, "learning_rate": 1.6452449975961427e-05, "loss": 1.0224, "num_tokens": 17811708815.0, "step": 3525 }, { "epoch": 0.6285204991087344, "grad_norm": 0.2265625, "learning_rate": 1.6450382768953225e-05, "loss": 1.0111, "num_tokens": 17817969675.0, "step": 3526 }, { "epoch": 0.628698752228164, "grad_norm": 0.220703125, "learning_rate": 1.6448315107763476e-05, "loss": 1.0634, "num_tokens": 17824251714.0, "step": 3527 }, { "epoch": 0.6288770053475936, "grad_norm": 0.240234375, "learning_rate": 1.644624699256447e-05, "loss": 1.0527, "num_tokens": 17830518743.0, "step": 3528 }, { "epoch": 0.6290552584670231, "grad_norm": 0.2255859375, "learning_rate": 1.644417842352856e-05, "loss": 1.0377, "num_tokens": 17836801664.0, "step": 3529 }, { "epoch": 0.6292335115864528, "grad_norm": 0.24609375, "learning_rate": 1.6442109400828104e-05, "loss": 1.0299, "num_tokens": 17843086112.0, "step": 3530 }, { "epoch": 0.6294117647058823, "grad_norm": 0.2177734375, "learning_rate": 1.6440039924635526e-05, "loss": 1.0222, "num_tokens": 17849366858.0, "step": 3531 }, { "epoch": 0.629590017825312, "grad_norm": 0.234375, "learning_rate": 1.6437969995123264e-05, "loss": 1.0487, "num_tokens": 17855650627.0, "step": 3532 }, { "epoch": 0.6297682709447415, "grad_norm": 0.2236328125, "learning_rate": 1.643589961246382e-05, "loss": 1.0213, "num_tokens": 17861927829.0, "step": 3533 }, { "epoch": 0.6299465240641712, "grad_norm": 0.232421875, "learning_rate": 1.6433828776829706e-05, "loss": 1.0261, "num_tokens": 17868199930.0, "step": 3534 }, { "epoch": 0.6301247771836007, "grad_norm": 0.2265625, "learning_rate": 1.643175748839349e-05, "loss": 1.0133, "num_tokens": 17874479971.0, "step": 3535 }, { "epoch": 0.6303030303030303, "grad_norm": 0.2451171875, "learning_rate": 1.6429685747327776e-05, "loss": 1.026, "num_tokens": 17880703133.0, "step": 3536 }, { "epoch": 0.6304812834224599, "grad_norm": 0.2353515625, "learning_rate": 1.6427613553805202e-05, "loss": 1.033, "num_tokens": 17886986258.0, "step": 3537 }, { "epoch": 0.6306595365418894, "grad_norm": 0.2109375, "learning_rate": 1.6425540907998442e-05, "loss": 1.0238, "num_tokens": 17893268795.0, "step": 3538 }, { "epoch": 0.6308377896613191, "grad_norm": 0.22265625, "learning_rate": 1.642346781008021e-05, "loss": 1.0116, "num_tokens": 17899521962.0, "step": 3539 }, { "epoch": 0.6310160427807486, "grad_norm": 0.2392578125, "learning_rate": 1.6421394260223255e-05, "loss": 1.0429, "num_tokens": 17905804397.0, "step": 3540 }, { "epoch": 0.6311942959001783, "grad_norm": 0.22265625, "learning_rate": 1.6419320258600374e-05, "loss": 1.0187, "num_tokens": 17912089170.0, "step": 3541 }, { "epoch": 0.6313725490196078, "grad_norm": 0.2197265625, "learning_rate": 1.6417245805384387e-05, "loss": 1.0103, "num_tokens": 17918323981.0, "step": 3542 }, { "epoch": 0.6315508021390375, "grad_norm": 0.2197265625, "learning_rate": 1.6415170900748165e-05, "loss": 1.0423, "num_tokens": 17924607635.0, "step": 3543 }, { "epoch": 0.631729055258467, "grad_norm": 0.236328125, "learning_rate": 1.6413095544864602e-05, "loss": 0.9813, "num_tokens": 17930880239.0, "step": 3544 }, { "epoch": 0.6319073083778967, "grad_norm": 0.2490234375, "learning_rate": 1.6411019737906648e-05, "loss": 1.0252, "num_tokens": 17937148700.0, "step": 3545 }, { "epoch": 0.6320855614973262, "grad_norm": 0.2216796875, "learning_rate": 1.640894348004727e-05, "loss": 1.0064, "num_tokens": 17943413321.0, "step": 3546 }, { "epoch": 0.6322638146167558, "grad_norm": 0.2392578125, "learning_rate": 1.6406866771459493e-05, "loss": 1.0218, "num_tokens": 17949662006.0, "step": 3547 }, { "epoch": 0.6324420677361854, "grad_norm": 0.2138671875, "learning_rate": 1.640478961231636e-05, "loss": 1.0063, "num_tokens": 17955916540.0, "step": 3548 }, { "epoch": 0.6326203208556149, "grad_norm": 0.23046875, "learning_rate": 1.6402712002790968e-05, "loss": 1.0231, "num_tokens": 17962202255.0, "step": 3549 }, { "epoch": 0.6327985739750446, "grad_norm": 0.224609375, "learning_rate": 1.640063394305644e-05, "loss": 1.0504, "num_tokens": 17968485216.0, "step": 3550 }, { "epoch": 0.6329768270944741, "grad_norm": 0.224609375, "learning_rate": 1.6398555433285944e-05, "loss": 1.0545, "num_tokens": 17974724596.0, "step": 3551 }, { "epoch": 0.6331550802139038, "grad_norm": 0.248046875, "learning_rate": 1.6396476473652685e-05, "loss": 0.9988, "num_tokens": 17981008122.0, "step": 3552 }, { "epoch": 0.6333333333333333, "grad_norm": 0.2353515625, "learning_rate": 1.6394397064329898e-05, "loss": 1.0059, "num_tokens": 17987292098.0, "step": 3553 }, { "epoch": 0.633511586452763, "grad_norm": 0.2451171875, "learning_rate": 1.6392317205490863e-05, "loss": 1.0139, "num_tokens": 17993577194.0, "step": 3554 }, { "epoch": 0.6336898395721925, "grad_norm": 0.2421875, "learning_rate": 1.6390236897308892e-05, "loss": 1.0399, "num_tokens": 17999829847.0, "step": 3555 }, { "epoch": 0.6338680926916221, "grad_norm": 0.212890625, "learning_rate": 1.6388156139957344e-05, "loss": 1.0414, "num_tokens": 18006087087.0, "step": 3556 }, { "epoch": 0.6340463458110517, "grad_norm": 0.2412109375, "learning_rate": 1.6386074933609602e-05, "loss": 1.0267, "num_tokens": 18012358968.0, "step": 3557 }, { "epoch": 0.6342245989304813, "grad_norm": 0.216796875, "learning_rate": 1.63839932784391e-05, "loss": 1.0225, "num_tokens": 18018637495.0, "step": 3558 }, { "epoch": 0.6344028520499109, "grad_norm": 0.2373046875, "learning_rate": 1.6381911174619298e-05, "loss": 1.001, "num_tokens": 18024921599.0, "step": 3559 }, { "epoch": 0.6345811051693404, "grad_norm": 0.2373046875, "learning_rate": 1.63798286223237e-05, "loss": 1.0392, "num_tokens": 18031172357.0, "step": 3560 }, { "epoch": 0.6347593582887701, "grad_norm": 0.2236328125, "learning_rate": 1.6377745621725846e-05, "loss": 1.0098, "num_tokens": 18037438718.0, "step": 3561 }, { "epoch": 0.6349376114081996, "grad_norm": 0.244140625, "learning_rate": 1.6375662172999313e-05, "loss": 1.0621, "num_tokens": 18043687577.0, "step": 3562 }, { "epoch": 0.6351158645276292, "grad_norm": 0.236328125, "learning_rate": 1.637357827631771e-05, "loss": 1.0333, "num_tokens": 18049921438.0, "step": 3563 }, { "epoch": 0.6352941176470588, "grad_norm": 0.2236328125, "learning_rate": 1.6371493931854696e-05, "loss": 1.0133, "num_tokens": 18056206904.0, "step": 3564 }, { "epoch": 0.6354723707664884, "grad_norm": 0.2421875, "learning_rate": 1.6369409139783958e-05, "loss": 1.0286, "num_tokens": 18062467335.0, "step": 3565 }, { "epoch": 0.635650623885918, "grad_norm": 0.267578125, "learning_rate": 1.636732390027922e-05, "loss": 1.0339, "num_tokens": 18068752356.0, "step": 3566 }, { "epoch": 0.6358288770053476, "grad_norm": 0.263671875, "learning_rate": 1.6365238213514242e-05, "loss": 1.0229, "num_tokens": 18074992993.0, "step": 3567 }, { "epoch": 0.6360071301247772, "grad_norm": 0.2177734375, "learning_rate": 1.6363152079662834e-05, "loss": 1.0557, "num_tokens": 18081275459.0, "step": 3568 }, { "epoch": 0.6361853832442068, "grad_norm": 0.24609375, "learning_rate": 1.6361065498898827e-05, "loss": 1.037, "num_tokens": 18087558588.0, "step": 3569 }, { "epoch": 0.6363636363636364, "grad_norm": 0.212890625, "learning_rate": 1.63589784713961e-05, "loss": 1.046, "num_tokens": 18093827546.0, "step": 3570 }, { "epoch": 0.636541889483066, "grad_norm": 0.240234375, "learning_rate": 1.6356890997328558e-05, "loss": 1.0022, "num_tokens": 18100111735.0, "step": 3571 }, { "epoch": 0.6367201426024955, "grad_norm": 0.2294921875, "learning_rate": 1.635480307687016e-05, "loss": 1.0068, "num_tokens": 18106383007.0, "step": 3572 }, { "epoch": 0.6368983957219251, "grad_norm": 0.2265625, "learning_rate": 1.635271471019489e-05, "loss": 1.0202, "num_tokens": 18112630326.0, "step": 3573 }, { "epoch": 0.6370766488413547, "grad_norm": 0.236328125, "learning_rate": 1.6350625897476772e-05, "loss": 1.0186, "num_tokens": 18118890223.0, "step": 3574 }, { "epoch": 0.6372549019607843, "grad_norm": 0.220703125, "learning_rate": 1.6348536638889864e-05, "loss": 1.0363, "num_tokens": 18125173048.0, "step": 3575 }, { "epoch": 0.6374331550802139, "grad_norm": 0.22265625, "learning_rate": 1.6346446934608265e-05, "loss": 1.0167, "num_tokens": 18131373853.0, "step": 3576 }, { "epoch": 0.6376114081996435, "grad_norm": 0.2109375, "learning_rate": 1.6344356784806118e-05, "loss": 0.9984, "num_tokens": 18137629264.0, "step": 3577 }, { "epoch": 0.6377896613190731, "grad_norm": 0.228515625, "learning_rate": 1.6342266189657587e-05, "loss": 1.0143, "num_tokens": 18143913363.0, "step": 3578 }, { "epoch": 0.6379679144385026, "grad_norm": 0.2255859375, "learning_rate": 1.6340175149336884e-05, "loss": 1.0468, "num_tokens": 18150196681.0, "step": 3579 }, { "epoch": 0.6381461675579323, "grad_norm": 0.2333984375, "learning_rate": 1.6338083664018257e-05, "loss": 1.0169, "num_tokens": 18156480254.0, "step": 3580 }, { "epoch": 0.6383244206773618, "grad_norm": 0.2412109375, "learning_rate": 1.633599173387599e-05, "loss": 1.0516, "num_tokens": 18162765317.0, "step": 3581 }, { "epoch": 0.6385026737967915, "grad_norm": 0.2333984375, "learning_rate": 1.63338993590844e-05, "loss": 1.0308, "num_tokens": 18169049218.0, "step": 3582 }, { "epoch": 0.638680926916221, "grad_norm": 0.24609375, "learning_rate": 1.633180653981785e-05, "loss": 1.0434, "num_tokens": 18175306123.0, "step": 3583 }, { "epoch": 0.6388591800356507, "grad_norm": 0.2197265625, "learning_rate": 1.6329713276250735e-05, "loss": 1.005, "num_tokens": 18181557968.0, "step": 3584 }, { "epoch": 0.6390374331550802, "grad_norm": 0.240234375, "learning_rate": 1.6327619568557487e-05, "loss": 1.0163, "num_tokens": 18187842552.0, "step": 3585 }, { "epoch": 0.6392156862745098, "grad_norm": 0.21484375, "learning_rate": 1.6325525416912568e-05, "loss": 1.0001, "num_tokens": 18194106270.0, "step": 3586 }, { "epoch": 0.6393939393939394, "grad_norm": 0.2177734375, "learning_rate": 1.6323430821490495e-05, "loss": 1.044, "num_tokens": 18200364188.0, "step": 3587 }, { "epoch": 0.6395721925133689, "grad_norm": 0.203125, "learning_rate": 1.6321335782465802e-05, "loss": 1.0503, "num_tokens": 18206616959.0, "step": 3588 }, { "epoch": 0.6397504456327986, "grad_norm": 0.212890625, "learning_rate": 1.631924030001308e-05, "loss": 0.9956, "num_tokens": 18212889653.0, "step": 3589 }, { "epoch": 0.6399286987522281, "grad_norm": 0.2099609375, "learning_rate": 1.6317144374306933e-05, "loss": 1.0291, "num_tokens": 18219137324.0, "step": 3590 }, { "epoch": 0.6401069518716578, "grad_norm": 0.212890625, "learning_rate": 1.631504800552202e-05, "loss": 1.0234, "num_tokens": 18225419996.0, "step": 3591 }, { "epoch": 0.6402852049910873, "grad_norm": 0.2060546875, "learning_rate": 1.6312951193833035e-05, "loss": 1.0252, "num_tokens": 18231672089.0, "step": 3592 }, { "epoch": 0.640463458110517, "grad_norm": 0.21484375, "learning_rate": 1.6310853939414702e-05, "loss": 0.9932, "num_tokens": 18237945074.0, "step": 3593 }, { "epoch": 0.6406417112299465, "grad_norm": 0.197265625, "learning_rate": 1.6308756242441787e-05, "loss": 1.0101, "num_tokens": 18244200891.0, "step": 3594 }, { "epoch": 0.6408199643493762, "grad_norm": 0.205078125, "learning_rate": 1.6306658103089096e-05, "loss": 1.0236, "num_tokens": 18250467889.0, "step": 3595 }, { "epoch": 0.6409982174688057, "grad_norm": 0.2080078125, "learning_rate": 1.6304559521531454e-05, "loss": 1.0072, "num_tokens": 18256751384.0, "step": 3596 }, { "epoch": 0.6411764705882353, "grad_norm": 0.20703125, "learning_rate": 1.6302460497943753e-05, "loss": 1.0161, "num_tokens": 18263019183.0, "step": 3597 }, { "epoch": 0.6413547237076649, "grad_norm": 0.2080078125, "learning_rate": 1.6300361032500892e-05, "loss": 1.0406, "num_tokens": 18269304330.0, "step": 3598 }, { "epoch": 0.6415329768270944, "grad_norm": 0.2236328125, "learning_rate": 1.629826112537783e-05, "loss": 1.0406, "num_tokens": 18275558084.0, "step": 3599 }, { "epoch": 0.6417112299465241, "grad_norm": 0.216796875, "learning_rate": 1.6296160776749547e-05, "loss": 1.0311, "num_tokens": 18281824164.0, "step": 3600 }, { "epoch": 0.6418894830659536, "grad_norm": 0.216796875, "learning_rate": 1.6294059986791068e-05, "loss": 1.0218, "num_tokens": 18288073981.0, "step": 3601 }, { "epoch": 0.6420677361853833, "grad_norm": 0.2275390625, "learning_rate": 1.6291958755677443e-05, "loss": 1.0004, "num_tokens": 18294330847.0, "step": 3602 }, { "epoch": 0.6422459893048128, "grad_norm": 0.234375, "learning_rate": 1.6289857083583785e-05, "loss": 1.0017, "num_tokens": 18300614182.0, "step": 3603 }, { "epoch": 0.6424242424242425, "grad_norm": 0.2421875, "learning_rate": 1.628775497068521e-05, "loss": 1.0225, "num_tokens": 18306897086.0, "step": 3604 }, { "epoch": 0.642602495543672, "grad_norm": 0.2333984375, "learning_rate": 1.6285652417156897e-05, "loss": 1.0259, "num_tokens": 18313182316.0, "step": 3605 }, { "epoch": 0.6427807486631016, "grad_norm": 0.234375, "learning_rate": 1.6283549423174054e-05, "loss": 1.0487, "num_tokens": 18319465648.0, "step": 3606 }, { "epoch": 0.6429590017825312, "grad_norm": 0.259765625, "learning_rate": 1.628144598891192e-05, "loss": 0.9986, "num_tokens": 18325749486.0, "step": 3607 }, { "epoch": 0.6431372549019608, "grad_norm": 0.2265625, "learning_rate": 1.627934211454577e-05, "loss": 1.0133, "num_tokens": 18331971664.0, "step": 3608 }, { "epoch": 0.6433155080213904, "grad_norm": 0.2294921875, "learning_rate": 1.6277237800250926e-05, "loss": 1.0278, "num_tokens": 18338254097.0, "step": 3609 }, { "epoch": 0.64349376114082, "grad_norm": 0.2373046875, "learning_rate": 1.6275133046202743e-05, "loss": 1.0442, "num_tokens": 18344538377.0, "step": 3610 }, { "epoch": 0.6436720142602496, "grad_norm": 0.248046875, "learning_rate": 1.6273027852576607e-05, "loss": 1.024, "num_tokens": 18350780047.0, "step": 3611 }, { "epoch": 0.6438502673796791, "grad_norm": 0.2080078125, "learning_rate": 1.6270922219547945e-05, "loss": 1.0021, "num_tokens": 18357065354.0, "step": 3612 }, { "epoch": 0.6440285204991087, "grad_norm": 0.2158203125, "learning_rate": 1.6268816147292216e-05, "loss": 1.0333, "num_tokens": 18363320096.0, "step": 3613 }, { "epoch": 0.6442067736185383, "grad_norm": 0.212890625, "learning_rate": 1.6266709635984926e-05, "loss": 1.017, "num_tokens": 18369603014.0, "step": 3614 }, { "epoch": 0.6443850267379679, "grad_norm": 0.21484375, "learning_rate": 1.626460268580161e-05, "loss": 1.0285, "num_tokens": 18375870515.0, "step": 3615 }, { "epoch": 0.6445632798573975, "grad_norm": 0.212890625, "learning_rate": 1.6262495296917837e-05, "loss": 1.0215, "num_tokens": 18382127378.0, "step": 3616 }, { "epoch": 0.6447415329768271, "grad_norm": 0.203125, "learning_rate": 1.6260387469509216e-05, "loss": 1.0502, "num_tokens": 18388382386.0, "step": 3617 }, { "epoch": 0.6449197860962567, "grad_norm": 0.2080078125, "learning_rate": 1.62582792037514e-05, "loss": 1.006, "num_tokens": 18394622871.0, "step": 3618 }, { "epoch": 0.6450980392156863, "grad_norm": 0.2138671875, "learning_rate": 1.6256170499820064e-05, "loss": 1.0147, "num_tokens": 18400883948.0, "step": 3619 }, { "epoch": 0.6452762923351159, "grad_norm": 0.2294921875, "learning_rate": 1.6254061357890923e-05, "loss": 1.0198, "num_tokens": 18407134831.0, "step": 3620 }, { "epoch": 0.6454545454545455, "grad_norm": 0.2158203125, "learning_rate": 1.625195177813974e-05, "loss": 1.0408, "num_tokens": 18413394895.0, "step": 3621 }, { "epoch": 0.645632798573975, "grad_norm": 0.2158203125, "learning_rate": 1.624984176074231e-05, "loss": 1.0251, "num_tokens": 18419675048.0, "step": 3622 }, { "epoch": 0.6458110516934046, "grad_norm": 0.23046875, "learning_rate": 1.6247731305874455e-05, "loss": 1.0197, "num_tokens": 18425932347.0, "step": 3623 }, { "epoch": 0.6459893048128342, "grad_norm": 0.212890625, "learning_rate": 1.6245620413712037e-05, "loss": 1.0188, "num_tokens": 18432213742.0, "step": 3624 }, { "epoch": 0.6461675579322638, "grad_norm": 0.2099609375, "learning_rate": 1.6243509084430963e-05, "loss": 0.9889, "num_tokens": 18438497515.0, "step": 3625 }, { "epoch": 0.6463458110516934, "grad_norm": 0.2265625, "learning_rate": 1.6241397318207166e-05, "loss": 0.9973, "num_tokens": 18444756340.0, "step": 3626 }, { "epoch": 0.646524064171123, "grad_norm": 0.224609375, "learning_rate": 1.6239285115216624e-05, "loss": 1.0017, "num_tokens": 18451039232.0, "step": 3627 }, { "epoch": 0.6467023172905526, "grad_norm": 0.205078125, "learning_rate": 1.6237172475635344e-05, "loss": 0.9866, "num_tokens": 18457286677.0, "step": 3628 }, { "epoch": 0.6468805704099821, "grad_norm": 0.251953125, "learning_rate": 1.623505939963937e-05, "loss": 1.0104, "num_tokens": 18463561776.0, "step": 3629 }, { "epoch": 0.6470588235294118, "grad_norm": 0.20703125, "learning_rate": 1.6232945887404796e-05, "loss": 1.0525, "num_tokens": 18469844699.0, "step": 3630 }, { "epoch": 0.6472370766488413, "grad_norm": 0.216796875, "learning_rate": 1.623083193910773e-05, "loss": 1.0416, "num_tokens": 18476126173.0, "step": 3631 }, { "epoch": 0.647415329768271, "grad_norm": 0.2421875, "learning_rate": 1.6228717554924337e-05, "loss": 1.0313, "num_tokens": 18482405314.0, "step": 3632 }, { "epoch": 0.6475935828877005, "grad_norm": 0.2197265625, "learning_rate": 1.6226602735030797e-05, "loss": 1.001, "num_tokens": 18488662748.0, "step": 3633 }, { "epoch": 0.6477718360071302, "grad_norm": 0.259765625, "learning_rate": 1.622448747960335e-05, "loss": 1.0668, "num_tokens": 18494902761.0, "step": 3634 }, { "epoch": 0.6479500891265597, "grad_norm": 0.22265625, "learning_rate": 1.622237178881826e-05, "loss": 1.0569, "num_tokens": 18501187107.0, "step": 3635 }, { "epoch": 0.6481283422459893, "grad_norm": 0.2294921875, "learning_rate": 1.6220255662851818e-05, "loss": 0.9963, "num_tokens": 18507470737.0, "step": 3636 }, { "epoch": 0.6483065953654189, "grad_norm": 0.234375, "learning_rate": 1.621813910188037e-05, "loss": 1.0338, "num_tokens": 18513728604.0, "step": 3637 }, { "epoch": 0.6484848484848484, "grad_norm": 0.2275390625, "learning_rate": 1.6216022106080287e-05, "loss": 0.9989, "num_tokens": 18520012921.0, "step": 3638 }, { "epoch": 0.6486631016042781, "grad_norm": 0.236328125, "learning_rate": 1.6213904675627982e-05, "loss": 1.0141, "num_tokens": 18526296623.0, "step": 3639 }, { "epoch": 0.6488413547237076, "grad_norm": 0.220703125, "learning_rate": 1.6211786810699897e-05, "loss": 1.0164, "num_tokens": 18532566709.0, "step": 3640 }, { "epoch": 0.6490196078431373, "grad_norm": 0.2431640625, "learning_rate": 1.6209668511472515e-05, "loss": 1.0117, "num_tokens": 18538849755.0, "step": 3641 }, { "epoch": 0.6491978609625668, "grad_norm": 0.228515625, "learning_rate": 1.620754977812235e-05, "loss": 0.9847, "num_tokens": 18545133735.0, "step": 3642 }, { "epoch": 0.6493761140819965, "grad_norm": 0.2236328125, "learning_rate": 1.620543061082596e-05, "loss": 1.0153, "num_tokens": 18551404287.0, "step": 3643 }, { "epoch": 0.649554367201426, "grad_norm": 0.2333984375, "learning_rate": 1.6203311009759942e-05, "loss": 1.0196, "num_tokens": 18557663182.0, "step": 3644 }, { "epoch": 0.6497326203208557, "grad_norm": 0.23046875, "learning_rate": 1.6201190975100916e-05, "loss": 1.0592, "num_tokens": 18563945594.0, "step": 3645 }, { "epoch": 0.6499108734402852, "grad_norm": 0.232421875, "learning_rate": 1.6199070507025543e-05, "loss": 0.9954, "num_tokens": 18570219903.0, "step": 3646 }, { "epoch": 0.6500891265597148, "grad_norm": 0.2177734375, "learning_rate": 1.6196949605710524e-05, "loss": 1.0296, "num_tokens": 18576504483.0, "step": 3647 }, { "epoch": 0.6502673796791444, "grad_norm": 0.236328125, "learning_rate": 1.6194828271332598e-05, "loss": 1.0205, "num_tokens": 18582756759.0, "step": 3648 }, { "epoch": 0.6504456327985739, "grad_norm": 0.2236328125, "learning_rate": 1.6192706504068532e-05, "loss": 1.0193, "num_tokens": 18589041012.0, "step": 3649 }, { "epoch": 0.6506238859180036, "grad_norm": 0.2353515625, "learning_rate": 1.6190584304095138e-05, "loss": 1.0248, "num_tokens": 18595323147.0, "step": 3650 }, { "epoch": 0.6508021390374331, "grad_norm": 0.2314453125, "learning_rate": 1.6188461671589255e-05, "loss": 1.0084, "num_tokens": 18601589945.0, "step": 3651 }, { "epoch": 0.6509803921568628, "grad_norm": 0.244140625, "learning_rate": 1.618633860672776e-05, "loss": 1.0415, "num_tokens": 18607874787.0, "step": 3652 }, { "epoch": 0.6511586452762923, "grad_norm": 0.2294921875, "learning_rate": 1.6184215109687576e-05, "loss": 1.0459, "num_tokens": 18614131079.0, "step": 3653 }, { "epoch": 0.651336898395722, "grad_norm": 0.234375, "learning_rate": 1.618209118064565e-05, "loss": 1.0216, "num_tokens": 18620411535.0, "step": 3654 }, { "epoch": 0.6515151515151515, "grad_norm": 0.251953125, "learning_rate": 1.617996681977897e-05, "loss": 1.0368, "num_tokens": 18626694718.0, "step": 3655 }, { "epoch": 0.6516934046345811, "grad_norm": 0.216796875, "learning_rate": 1.617784202726456e-05, "loss": 0.9947, "num_tokens": 18632963538.0, "step": 3656 }, { "epoch": 0.6518716577540107, "grad_norm": 0.2314453125, "learning_rate": 1.6175716803279476e-05, "loss": 1.0307, "num_tokens": 18639241034.0, "step": 3657 }, { "epoch": 0.6520499108734403, "grad_norm": 0.2451171875, "learning_rate": 1.617359114800082e-05, "loss": 1.0193, "num_tokens": 18645526068.0, "step": 3658 }, { "epoch": 0.6522281639928699, "grad_norm": 0.234375, "learning_rate": 1.6171465061605713e-05, "loss": 1.0517, "num_tokens": 18651809876.0, "step": 3659 }, { "epoch": 0.6524064171122995, "grad_norm": 0.23046875, "learning_rate": 1.6169338544271336e-05, "loss": 1.0337, "num_tokens": 18658081989.0, "step": 3660 }, { "epoch": 0.6525846702317291, "grad_norm": 0.2431640625, "learning_rate": 1.616721159617488e-05, "loss": 1.0127, "num_tokens": 18664367231.0, "step": 3661 }, { "epoch": 0.6527629233511586, "grad_norm": 0.2265625, "learning_rate": 1.6165084217493593e-05, "loss": 1.0275, "num_tokens": 18670605013.0, "step": 3662 }, { "epoch": 0.6529411764705882, "grad_norm": 0.2265625, "learning_rate": 1.6162956408404742e-05, "loss": 1.0087, "num_tokens": 18676885246.0, "step": 3663 }, { "epoch": 0.6531194295900178, "grad_norm": 0.2451171875, "learning_rate": 1.6160828169085645e-05, "loss": 1.0218, "num_tokens": 18683169034.0, "step": 3664 }, { "epoch": 0.6532976827094474, "grad_norm": 0.21484375, "learning_rate": 1.6158699499713642e-05, "loss": 1.0198, "num_tokens": 18689422428.0, "step": 3665 }, { "epoch": 0.653475935828877, "grad_norm": 0.25390625, "learning_rate": 1.6156570400466124e-05, "loss": 1.0017, "num_tokens": 18695682022.0, "step": 3666 }, { "epoch": 0.6536541889483066, "grad_norm": 0.2392578125, "learning_rate": 1.61544408715205e-05, "loss": 1.0264, "num_tokens": 18701965240.0, "step": 3667 }, { "epoch": 0.6538324420677362, "grad_norm": 0.2265625, "learning_rate": 1.6152310913054224e-05, "loss": 1.047, "num_tokens": 18708243926.0, "step": 3668 }, { "epoch": 0.6540106951871658, "grad_norm": 0.234375, "learning_rate": 1.6150180525244797e-05, "loss": 1.0457, "num_tokens": 18714522749.0, "step": 3669 }, { "epoch": 0.6541889483065954, "grad_norm": 0.2392578125, "learning_rate": 1.6148049708269736e-05, "loss": 1.0227, "num_tokens": 18720775416.0, "step": 3670 }, { "epoch": 0.654367201426025, "grad_norm": 0.2431640625, "learning_rate": 1.61459184623066e-05, "loss": 1.045, "num_tokens": 18727016445.0, "step": 3671 }, { "epoch": 0.6545454545454545, "grad_norm": 0.267578125, "learning_rate": 1.6143786787532997e-05, "loss": 1.025, "num_tokens": 18733283956.0, "step": 3672 }, { "epoch": 0.6547237076648841, "grad_norm": 0.2275390625, "learning_rate": 1.614165468412655e-05, "loss": 1.0207, "num_tokens": 18739569472.0, "step": 3673 }, { "epoch": 0.6549019607843137, "grad_norm": 0.251953125, "learning_rate": 1.6139522152264927e-05, "loss": 1.012, "num_tokens": 18745853140.0, "step": 3674 }, { "epoch": 0.6550802139037433, "grad_norm": 0.2158203125, "learning_rate": 1.613738919212584e-05, "loss": 1.0205, "num_tokens": 18752137542.0, "step": 3675 }, { "epoch": 0.6552584670231729, "grad_norm": 0.2236328125, "learning_rate": 1.6135255803887025e-05, "loss": 1.0251, "num_tokens": 18758421526.0, "step": 3676 }, { "epoch": 0.6554367201426025, "grad_norm": 0.2080078125, "learning_rate": 1.6133121987726255e-05, "loss": 1.0564, "num_tokens": 18764704081.0, "step": 3677 }, { "epoch": 0.6556149732620321, "grad_norm": 0.2373046875, "learning_rate": 1.6130987743821352e-05, "loss": 1.0287, "num_tokens": 18770985881.0, "step": 3678 }, { "epoch": 0.6557932263814616, "grad_norm": 0.2080078125, "learning_rate": 1.6128853072350145e-05, "loss": 1.0218, "num_tokens": 18777268967.0, "step": 3679 }, { "epoch": 0.6559714795008913, "grad_norm": 0.2236328125, "learning_rate": 1.6126717973490534e-05, "loss": 1.0277, "num_tokens": 18783554248.0, "step": 3680 }, { "epoch": 0.6561497326203208, "grad_norm": 0.2275390625, "learning_rate": 1.6124582447420427e-05, "loss": 1.0552, "num_tokens": 18789783844.0, "step": 3681 }, { "epoch": 0.6563279857397505, "grad_norm": 0.244140625, "learning_rate": 1.612244649431778e-05, "loss": 0.9906, "num_tokens": 18796067822.0, "step": 3682 }, { "epoch": 0.65650623885918, "grad_norm": 0.23046875, "learning_rate": 1.6120310114360584e-05, "loss": 1.0619, "num_tokens": 18802319601.0, "step": 3683 }, { "epoch": 0.6566844919786097, "grad_norm": 0.216796875, "learning_rate": 1.6118173307726867e-05, "loss": 1.0241, "num_tokens": 18808601655.0, "step": 3684 }, { "epoch": 0.6568627450980392, "grad_norm": 0.234375, "learning_rate": 1.6116036074594683e-05, "loss": 1.0285, "num_tokens": 18814855748.0, "step": 3685 }, { "epoch": 0.6570409982174688, "grad_norm": 0.2001953125, "learning_rate": 1.611389841514213e-05, "loss": 1.0139, "num_tokens": 18821138244.0, "step": 3686 }, { "epoch": 0.6572192513368984, "grad_norm": 0.2216796875, "learning_rate": 1.611176032954734e-05, "loss": 1.0366, "num_tokens": 18827387277.0, "step": 3687 }, { "epoch": 0.6573975044563279, "grad_norm": 0.2158203125, "learning_rate": 1.6109621817988485e-05, "loss": 1.0272, "num_tokens": 18833671114.0, "step": 3688 }, { "epoch": 0.6575757575757576, "grad_norm": 0.2177734375, "learning_rate": 1.610748288064376e-05, "loss": 1.0173, "num_tokens": 18839930703.0, "step": 3689 }, { "epoch": 0.6577540106951871, "grad_norm": 0.224609375, "learning_rate": 1.6105343517691407e-05, "loss": 1.0302, "num_tokens": 18846215118.0, "step": 3690 }, { "epoch": 0.6579322638146168, "grad_norm": 0.20703125, "learning_rate": 1.61032037293097e-05, "loss": 1.0242, "num_tokens": 18852499783.0, "step": 3691 }, { "epoch": 0.6581105169340463, "grad_norm": 0.234375, "learning_rate": 1.6101063515676944e-05, "loss": 1.0131, "num_tokens": 18858766490.0, "step": 3692 }, { "epoch": 0.658288770053476, "grad_norm": 0.234375, "learning_rate": 1.609892287697149e-05, "loss": 1.0127, "num_tokens": 18865050474.0, "step": 3693 }, { "epoch": 0.6584670231729055, "grad_norm": 0.2294921875, "learning_rate": 1.6096781813371712e-05, "loss": 1.0153, "num_tokens": 18871335741.0, "step": 3694 }, { "epoch": 0.6586452762923352, "grad_norm": 0.2314453125, "learning_rate": 1.609464032505603e-05, "loss": 1.0265, "num_tokens": 18877580522.0, "step": 3695 }, { "epoch": 0.6588235294117647, "grad_norm": 0.2138671875, "learning_rate": 1.609249841220289e-05, "loss": 1.0091, "num_tokens": 18883864161.0, "step": 3696 }, { "epoch": 0.6590017825311943, "grad_norm": 0.2392578125, "learning_rate": 1.609035607499078e-05, "loss": 0.9997, "num_tokens": 18890147762.0, "step": 3697 }, { "epoch": 0.6591800356506239, "grad_norm": 0.224609375, "learning_rate": 1.6088213313598223e-05, "loss": 1.0527, "num_tokens": 18896415597.0, "step": 3698 }, { "epoch": 0.6593582887700534, "grad_norm": 0.232421875, "learning_rate": 1.6086070128203772e-05, "loss": 1.0439, "num_tokens": 18902699523.0, "step": 3699 }, { "epoch": 0.6595365418894831, "grad_norm": 0.2109375, "learning_rate": 1.6083926518986023e-05, "loss": 1.0199, "num_tokens": 18908932059.0, "step": 3700 }, { "epoch": 0.6597147950089126, "grad_norm": 0.234375, "learning_rate": 1.6081782486123603e-05, "loss": 1.0445, "num_tokens": 18915215013.0, "step": 3701 }, { "epoch": 0.6598930481283423, "grad_norm": 0.24609375, "learning_rate": 1.6079638029795175e-05, "loss": 1.0166, "num_tokens": 18921483935.0, "step": 3702 }, { "epoch": 0.6600713012477718, "grad_norm": 0.2177734375, "learning_rate": 1.6077493150179433e-05, "loss": 1.0287, "num_tokens": 18927767960.0, "step": 3703 }, { "epoch": 0.6602495543672015, "grad_norm": 0.2333984375, "learning_rate": 1.6075347847455116e-05, "loss": 1.0305, "num_tokens": 18934002783.0, "step": 3704 }, { "epoch": 0.660427807486631, "grad_norm": 0.2275390625, "learning_rate": 1.6073202121800986e-05, "loss": 1.042, "num_tokens": 18940283498.0, "step": 3705 }, { "epoch": 0.6606060606060606, "grad_norm": 0.2353515625, "learning_rate": 1.607105597339585e-05, "loss": 1.0288, "num_tokens": 18946561562.0, "step": 3706 }, { "epoch": 0.6607843137254902, "grad_norm": 0.2578125, "learning_rate": 1.6068909402418552e-05, "loss": 1.0211, "num_tokens": 18952844155.0, "step": 3707 }, { "epoch": 0.6609625668449198, "grad_norm": 0.220703125, "learning_rate": 1.6066762409047954e-05, "loss": 1.0055, "num_tokens": 18959129009.0, "step": 3708 }, { "epoch": 0.6611408199643494, "grad_norm": 0.2353515625, "learning_rate": 1.606461499346298e-05, "loss": 1.0366, "num_tokens": 18965412784.0, "step": 3709 }, { "epoch": 0.661319073083779, "grad_norm": 0.2451171875, "learning_rate": 1.6062467155842563e-05, "loss": 1.0332, "num_tokens": 18971662822.0, "step": 3710 }, { "epoch": 0.6614973262032086, "grad_norm": 0.2255859375, "learning_rate": 1.6060318896365693e-05, "loss": 1.0121, "num_tokens": 18977946095.0, "step": 3711 }, { "epoch": 0.6616755793226381, "grad_norm": 0.2470703125, "learning_rate": 1.6058170215211373e-05, "loss": 1.0377, "num_tokens": 18984138807.0, "step": 3712 }, { "epoch": 0.6618538324420677, "grad_norm": 0.208984375, "learning_rate": 1.6056021112558665e-05, "loss": 1.0014, "num_tokens": 18990400444.0, "step": 3713 }, { "epoch": 0.6620320855614973, "grad_norm": 0.25390625, "learning_rate": 1.6053871588586644e-05, "loss": 1.0177, "num_tokens": 18996685363.0, "step": 3714 }, { "epoch": 0.6622103386809269, "grad_norm": 0.2119140625, "learning_rate": 1.6051721643474435e-05, "loss": 0.9984, "num_tokens": 19002968783.0, "step": 3715 }, { "epoch": 0.6623885918003565, "grad_norm": 0.2197265625, "learning_rate": 1.6049571277401193e-05, "loss": 1.0187, "num_tokens": 19009205705.0, "step": 3716 }, { "epoch": 0.6625668449197861, "grad_norm": 0.2197265625, "learning_rate": 1.604742049054611e-05, "loss": 1.0415, "num_tokens": 19015489382.0, "step": 3717 }, { "epoch": 0.6627450980392157, "grad_norm": 0.216796875, "learning_rate": 1.6045269283088408e-05, "loss": 1.0337, "num_tokens": 19021727241.0, "step": 3718 }, { "epoch": 0.6629233511586453, "grad_norm": 0.236328125, "learning_rate": 1.6043117655207353e-05, "loss": 0.9889, "num_tokens": 19027998441.0, "step": 3719 }, { "epoch": 0.6631016042780749, "grad_norm": 0.2080078125, "learning_rate": 1.6040965607082234e-05, "loss": 1.0185, "num_tokens": 19034224654.0, "step": 3720 }, { "epoch": 0.6632798573975045, "grad_norm": 0.2412109375, "learning_rate": 1.603881313889239e-05, "loss": 1.026, "num_tokens": 19040509697.0, "step": 3721 }, { "epoch": 0.663458110516934, "grad_norm": 0.2294921875, "learning_rate": 1.6036660250817175e-05, "loss": 1.0328, "num_tokens": 19046795442.0, "step": 3722 }, { "epoch": 0.6636363636363637, "grad_norm": 0.232421875, "learning_rate": 1.6034506943036e-05, "loss": 1.0082, "num_tokens": 19053064424.0, "step": 3723 }, { "epoch": 0.6638146167557932, "grad_norm": 0.21875, "learning_rate": 1.6032353215728294e-05, "loss": 1.0151, "num_tokens": 19059334538.0, "step": 3724 }, { "epoch": 0.6639928698752228, "grad_norm": 0.2041015625, "learning_rate": 1.603019906907353e-05, "loss": 0.9733, "num_tokens": 19065619545.0, "step": 3725 }, { "epoch": 0.6641711229946524, "grad_norm": 0.265625, "learning_rate": 1.602804450325122e-05, "loss": 1.0335, "num_tokens": 19071899876.0, "step": 3726 }, { "epoch": 0.664349376114082, "grad_norm": 0.220703125, "learning_rate": 1.6025889518440893e-05, "loss": 1.0385, "num_tokens": 19078182784.0, "step": 3727 }, { "epoch": 0.6645276292335116, "grad_norm": 0.2275390625, "learning_rate": 1.602373411482213e-05, "loss": 0.9966, "num_tokens": 19084441377.0, "step": 3728 }, { "epoch": 0.6647058823529411, "grad_norm": 0.212890625, "learning_rate": 1.602157829257454e-05, "loss": 1.0242, "num_tokens": 19090696142.0, "step": 3729 }, { "epoch": 0.6648841354723708, "grad_norm": 0.2021484375, "learning_rate": 1.601942205187777e-05, "loss": 1.032, "num_tokens": 19096954817.0, "step": 3730 }, { "epoch": 0.6650623885918003, "grad_norm": 0.2236328125, "learning_rate": 1.60172653929115e-05, "loss": 1.0173, "num_tokens": 19103232956.0, "step": 3731 }, { "epoch": 0.66524064171123, "grad_norm": 0.212890625, "learning_rate": 1.6015108315855442e-05, "loss": 0.9957, "num_tokens": 19109517670.0, "step": 3732 }, { "epoch": 0.6654188948306595, "grad_norm": 0.224609375, "learning_rate": 1.6012950820889347e-05, "loss": 1.0213, "num_tokens": 19115766911.0, "step": 3733 }, { "epoch": 0.6655971479500892, "grad_norm": 0.2275390625, "learning_rate": 1.6010792908193006e-05, "loss": 1.0368, "num_tokens": 19122050223.0, "step": 3734 }, { "epoch": 0.6657754010695187, "grad_norm": 0.2314453125, "learning_rate": 1.6008634577946224e-05, "loss": 1.0475, "num_tokens": 19128332022.0, "step": 3735 }, { "epoch": 0.6659536541889483, "grad_norm": 0.2119140625, "learning_rate": 1.6006475830328866e-05, "loss": 1.0096, "num_tokens": 19134616783.0, "step": 3736 }, { "epoch": 0.6661319073083779, "grad_norm": 0.201171875, "learning_rate": 1.600431666552082e-05, "loss": 1.0104, "num_tokens": 19140899976.0, "step": 3737 }, { "epoch": 0.6663101604278074, "grad_norm": 0.23046875, "learning_rate": 1.6002157083702007e-05, "loss": 1.025, "num_tokens": 19147184548.0, "step": 3738 }, { "epoch": 0.6664884135472371, "grad_norm": 0.2255859375, "learning_rate": 1.5999997085052382e-05, "loss": 1.0238, "num_tokens": 19153448913.0, "step": 3739 }, { "epoch": 0.6666666666666666, "grad_norm": 0.2197265625, "learning_rate": 1.599783666975195e-05, "loss": 1.023, "num_tokens": 19159733989.0, "step": 3740 }, { "epoch": 0.6668449197860963, "grad_norm": 0.234375, "learning_rate": 1.599567583798073e-05, "loss": 1.0187, "num_tokens": 19166015391.0, "step": 3741 }, { "epoch": 0.6670231729055258, "grad_norm": 0.2265625, "learning_rate": 1.5993514589918783e-05, "loss": 1.0328, "num_tokens": 19172275304.0, "step": 3742 }, { "epoch": 0.6672014260249555, "grad_norm": 0.2421875, "learning_rate": 1.5991352925746207e-05, "loss": 1.0006, "num_tokens": 19178559502.0, "step": 3743 }, { "epoch": 0.667379679144385, "grad_norm": 0.234375, "learning_rate": 1.598919084564314e-05, "loss": 1.0588, "num_tokens": 19184818986.0, "step": 3744 }, { "epoch": 0.6675579322638147, "grad_norm": 0.212890625, "learning_rate": 1.5987028349789745e-05, "loss": 1.0004, "num_tokens": 19191102881.0, "step": 3745 }, { "epoch": 0.6677361853832442, "grad_norm": 0.2236328125, "learning_rate": 1.5984865438366223e-05, "loss": 1.0382, "num_tokens": 19197385954.0, "step": 3746 }, { "epoch": 0.6679144385026738, "grad_norm": 0.2412109375, "learning_rate": 1.5982702111552807e-05, "loss": 1.0406, "num_tokens": 19203667790.0, "step": 3747 }, { "epoch": 0.6680926916221034, "grad_norm": 0.21875, "learning_rate": 1.5980538369529773e-05, "loss": 0.9912, "num_tokens": 19209942315.0, "step": 3748 }, { "epoch": 0.6682709447415329, "grad_norm": 0.2294921875, "learning_rate": 1.5978374212477424e-05, "loss": 1.0081, "num_tokens": 19216201170.0, "step": 3749 }, { "epoch": 0.6684491978609626, "grad_norm": 0.236328125, "learning_rate": 1.5976209640576096e-05, "loss": 0.9892, "num_tokens": 19222475323.0, "step": 3750 }, { "epoch": 0.6686274509803921, "grad_norm": 0.2236328125, "learning_rate": 1.5974044654006167e-05, "loss": 0.9961, "num_tokens": 19228758615.0, "step": 3751 }, { "epoch": 0.6688057040998218, "grad_norm": 0.232421875, "learning_rate": 1.5971879252948044e-05, "loss": 1.0287, "num_tokens": 19235020347.0, "step": 3752 }, { "epoch": 0.6689839572192513, "grad_norm": 0.20703125, "learning_rate": 1.5969713437582173e-05, "loss": 1.002, "num_tokens": 19241290172.0, "step": 3753 }, { "epoch": 0.669162210338681, "grad_norm": 0.2265625, "learning_rate": 1.596754720808903e-05, "loss": 1.0408, "num_tokens": 19247572436.0, "step": 3754 }, { "epoch": 0.6693404634581105, "grad_norm": 0.228515625, "learning_rate": 1.596538056464913e-05, "loss": 1.0274, "num_tokens": 19253828188.0, "step": 3755 }, { "epoch": 0.6695187165775401, "grad_norm": 0.2138671875, "learning_rate": 1.5963213507443014e-05, "loss": 1.001, "num_tokens": 19260081563.0, "step": 3756 }, { "epoch": 0.6696969696969697, "grad_norm": 0.2353515625, "learning_rate": 1.596104603665127e-05, "loss": 1.0249, "num_tokens": 19266363341.0, "step": 3757 }, { "epoch": 0.6698752228163993, "grad_norm": 0.2109375, "learning_rate": 1.5958878152454506e-05, "loss": 1.0121, "num_tokens": 19272624317.0, "step": 3758 }, { "epoch": 0.6700534759358289, "grad_norm": 0.2080078125, "learning_rate": 1.595670985503338e-05, "loss": 1.0281, "num_tokens": 19278908060.0, "step": 3759 }, { "epoch": 0.6702317290552585, "grad_norm": 0.228515625, "learning_rate": 1.5954541144568575e-05, "loss": 1.021, "num_tokens": 19285173247.0, "step": 3760 }, { "epoch": 0.6704099821746881, "grad_norm": 0.2236328125, "learning_rate": 1.5952372021240805e-05, "loss": 1.0112, "num_tokens": 19291456500.0, "step": 3761 }, { "epoch": 0.6705882352941176, "grad_norm": 0.2236328125, "learning_rate": 1.595020248523083e-05, "loss": 1.0371, "num_tokens": 19297712922.0, "step": 3762 }, { "epoch": 0.6707664884135472, "grad_norm": 0.2119140625, "learning_rate": 1.5948032536719433e-05, "loss": 1.0473, "num_tokens": 19303995625.0, "step": 3763 }, { "epoch": 0.6709447415329768, "grad_norm": 0.23828125, "learning_rate": 1.594586217588744e-05, "loss": 1.0336, "num_tokens": 19310279247.0, "step": 3764 }, { "epoch": 0.6711229946524064, "grad_norm": 0.2470703125, "learning_rate": 1.5943691402915704e-05, "loss": 1.0247, "num_tokens": 19316535850.0, "step": 3765 }, { "epoch": 0.671301247771836, "grad_norm": 0.22265625, "learning_rate": 1.594152021798512e-05, "loss": 1.0865, "num_tokens": 19322786629.0, "step": 3766 }, { "epoch": 0.6714795008912656, "grad_norm": 0.2099609375, "learning_rate": 1.5939348621276606e-05, "loss": 1.0142, "num_tokens": 19329030846.0, "step": 3767 }, { "epoch": 0.6716577540106952, "grad_norm": 0.2216796875, "learning_rate": 1.5937176612971135e-05, "loss": 1.01, "num_tokens": 19335314197.0, "step": 3768 }, { "epoch": 0.6718360071301248, "grad_norm": 0.22265625, "learning_rate": 1.5935004193249686e-05, "loss": 1.0241, "num_tokens": 19341597554.0, "step": 3769 }, { "epoch": 0.6720142602495544, "grad_norm": 0.22265625, "learning_rate": 1.5932831362293297e-05, "loss": 0.9978, "num_tokens": 19347850190.0, "step": 3770 }, { "epoch": 0.672192513368984, "grad_norm": 0.21484375, "learning_rate": 1.5930658120283027e-05, "loss": 1.0168, "num_tokens": 19354134034.0, "step": 3771 }, { "epoch": 0.6723707664884135, "grad_norm": 0.2109375, "learning_rate": 1.592848446739997e-05, "loss": 1.0411, "num_tokens": 19360397988.0, "step": 3772 }, { "epoch": 0.6725490196078432, "grad_norm": 0.240234375, "learning_rate": 1.5926310403825266e-05, "loss": 1.004, "num_tokens": 19366680221.0, "step": 3773 }, { "epoch": 0.6727272727272727, "grad_norm": 0.2255859375, "learning_rate": 1.592413592974007e-05, "loss": 1.0061, "num_tokens": 19372953420.0, "step": 3774 }, { "epoch": 0.6729055258467023, "grad_norm": 0.232421875, "learning_rate": 1.5921961045325588e-05, "loss": 1.0281, "num_tokens": 19379231603.0, "step": 3775 }, { "epoch": 0.6730837789661319, "grad_norm": 0.2216796875, "learning_rate": 1.5919785750763047e-05, "loss": 1.0457, "num_tokens": 19385514858.0, "step": 3776 }, { "epoch": 0.6732620320855615, "grad_norm": 0.205078125, "learning_rate": 1.5917610046233724e-05, "loss": 1.0149, "num_tokens": 19391797850.0, "step": 3777 }, { "epoch": 0.6734402852049911, "grad_norm": 0.2421875, "learning_rate": 1.591543393191891e-05, "loss": 1.0088, "num_tokens": 19398053158.0, "step": 3778 }, { "epoch": 0.6736185383244206, "grad_norm": 0.20703125, "learning_rate": 1.5913257407999953e-05, "loss": 1.029, "num_tokens": 19404307870.0, "step": 3779 }, { "epoch": 0.6737967914438503, "grad_norm": 0.2275390625, "learning_rate": 1.5911080474658215e-05, "loss": 1.0018, "num_tokens": 19410561788.0, "step": 3780 }, { "epoch": 0.6739750445632798, "grad_norm": 0.22265625, "learning_rate": 1.59089031320751e-05, "loss": 1.039, "num_tokens": 19416843801.0, "step": 3781 }, { "epoch": 0.6741532976827095, "grad_norm": 0.2138671875, "learning_rate": 1.590672538043205e-05, "loss": 1.0176, "num_tokens": 19423128698.0, "step": 3782 }, { "epoch": 0.674331550802139, "grad_norm": 0.228515625, "learning_rate": 1.590454721991054e-05, "loss": 1.065, "num_tokens": 19429412506.0, "step": 3783 }, { "epoch": 0.6745098039215687, "grad_norm": 0.216796875, "learning_rate": 1.590236865069207e-05, "loss": 1.0339, "num_tokens": 19435678841.0, "step": 3784 }, { "epoch": 0.6746880570409982, "grad_norm": 0.2265625, "learning_rate": 1.5900189672958184e-05, "loss": 1.0478, "num_tokens": 19441926267.0, "step": 3785 }, { "epoch": 0.6748663101604279, "grad_norm": 0.220703125, "learning_rate": 1.5898010286890456e-05, "loss": 1.0219, "num_tokens": 19448196766.0, "step": 3786 }, { "epoch": 0.6750445632798574, "grad_norm": 0.2431640625, "learning_rate": 1.5895830492670493e-05, "loss": 0.999, "num_tokens": 19454481911.0, "step": 3787 }, { "epoch": 0.6752228163992869, "grad_norm": 0.2099609375, "learning_rate": 1.5893650290479944e-05, "loss": 1.0255, "num_tokens": 19460757442.0, "step": 3788 }, { "epoch": 0.6754010695187166, "grad_norm": 0.228515625, "learning_rate": 1.5891469680500477e-05, "loss": 1.0145, "num_tokens": 19467008675.0, "step": 3789 }, { "epoch": 0.6755793226381461, "grad_norm": 0.2353515625, "learning_rate": 1.588928866291381e-05, "loss": 1.0118, "num_tokens": 19473292125.0, "step": 3790 }, { "epoch": 0.6757575757575758, "grad_norm": 0.21484375, "learning_rate": 1.5887107237901683e-05, "loss": 1.0405, "num_tokens": 19479573355.0, "step": 3791 }, { "epoch": 0.6759358288770053, "grad_norm": 0.220703125, "learning_rate": 1.5884925405645877e-05, "loss": 1.021, "num_tokens": 19485859344.0, "step": 3792 }, { "epoch": 0.676114081996435, "grad_norm": 0.2236328125, "learning_rate": 1.58827431663282e-05, "loss": 1.0411, "num_tokens": 19492098710.0, "step": 3793 }, { "epoch": 0.6762923351158645, "grad_norm": 0.22265625, "learning_rate": 1.5880560520130507e-05, "loss": 0.9716, "num_tokens": 19498376507.0, "step": 3794 }, { "epoch": 0.6764705882352942, "grad_norm": 0.240234375, "learning_rate": 1.5878377467234677e-05, "loss": 1.0133, "num_tokens": 19504661250.0, "step": 3795 }, { "epoch": 0.6766488413547237, "grad_norm": 0.2177734375, "learning_rate": 1.5876194007822616e-05, "loss": 1.0283, "num_tokens": 19510908711.0, "step": 3796 }, { "epoch": 0.6768270944741533, "grad_norm": 0.2373046875, "learning_rate": 1.5874010142076277e-05, "loss": 1.0091, "num_tokens": 19517168397.0, "step": 3797 }, { "epoch": 0.6770053475935829, "grad_norm": 0.2333984375, "learning_rate": 1.5871825870177643e-05, "loss": 1.0099, "num_tokens": 19523430573.0, "step": 3798 }, { "epoch": 0.6771836007130124, "grad_norm": 0.2138671875, "learning_rate": 1.5869641192308727e-05, "loss": 1.0135, "num_tokens": 19529698735.0, "step": 3799 }, { "epoch": 0.6773618538324421, "grad_norm": 0.2470703125, "learning_rate": 1.5867456108651586e-05, "loss": 1.0237, "num_tokens": 19535953442.0, "step": 3800 }, { "epoch": 0.6775401069518716, "grad_norm": 0.248046875, "learning_rate": 1.5865270619388292e-05, "loss": 1.0279, "num_tokens": 19542237947.0, "step": 3801 }, { "epoch": 0.6777183600713013, "grad_norm": 0.2373046875, "learning_rate": 1.5863084724700973e-05, "loss": 1.0415, "num_tokens": 19548514109.0, "step": 3802 }, { "epoch": 0.6778966131907308, "grad_norm": 0.2421875, "learning_rate": 1.5860898424771773e-05, "loss": 0.9894, "num_tokens": 19554784937.0, "step": 3803 }, { "epoch": 0.6780748663101605, "grad_norm": 0.2373046875, "learning_rate": 1.585871171978288e-05, "loss": 1.0476, "num_tokens": 19561069625.0, "step": 3804 }, { "epoch": 0.67825311942959, "grad_norm": 0.2265625, "learning_rate": 1.585652460991651e-05, "loss": 1.0365, "num_tokens": 19567354001.0, "step": 3805 }, { "epoch": 0.6784313725490196, "grad_norm": 0.2177734375, "learning_rate": 1.5854337095354917e-05, "loss": 1.0381, "num_tokens": 19573607088.0, "step": 3806 }, { "epoch": 0.6786096256684492, "grad_norm": 0.2197265625, "learning_rate": 1.5852149176280388e-05, "loss": 1.0194, "num_tokens": 19579830493.0, "step": 3807 }, { "epoch": 0.6787878787878788, "grad_norm": 0.2138671875, "learning_rate": 1.5849960852875245e-05, "loss": 1.0116, "num_tokens": 19586113151.0, "step": 3808 }, { "epoch": 0.6789661319073084, "grad_norm": 0.2197265625, "learning_rate": 1.5847772125321836e-05, "loss": 1.0146, "num_tokens": 19592394200.0, "step": 3809 }, { "epoch": 0.679144385026738, "grad_norm": 0.2080078125, "learning_rate": 1.584558299380255e-05, "loss": 1.0598, "num_tokens": 19598669495.0, "step": 3810 }, { "epoch": 0.6793226381461676, "grad_norm": 0.2275390625, "learning_rate": 1.584339345849981e-05, "loss": 1.0432, "num_tokens": 19604934444.0, "step": 3811 }, { "epoch": 0.6795008912655971, "grad_norm": 0.220703125, "learning_rate": 1.584120351959607e-05, "loss": 0.9964, "num_tokens": 19611199435.0, "step": 3812 }, { "epoch": 0.6796791443850267, "grad_norm": 0.208984375, "learning_rate": 1.5839013177273812e-05, "loss": 0.977, "num_tokens": 19617451936.0, "step": 3813 }, { "epoch": 0.6798573975044563, "grad_norm": 0.2177734375, "learning_rate": 1.5836822431715567e-05, "loss": 1.0354, "num_tokens": 19623724837.0, "step": 3814 }, { "epoch": 0.6800356506238859, "grad_norm": 0.2216796875, "learning_rate": 1.5834631283103884e-05, "loss": 1.0123, "num_tokens": 19629981550.0, "step": 3815 }, { "epoch": 0.6802139037433155, "grad_norm": 0.2109375, "learning_rate": 1.5832439731621355e-05, "loss": 1.0397, "num_tokens": 19636237267.0, "step": 3816 }, { "epoch": 0.6803921568627451, "grad_norm": 0.2236328125, "learning_rate": 1.5830247777450598e-05, "loss": 1.0533, "num_tokens": 19642505254.0, "step": 3817 }, { "epoch": 0.6805704099821747, "grad_norm": 0.2275390625, "learning_rate": 1.5828055420774278e-05, "loss": 1.0631, "num_tokens": 19648760074.0, "step": 3818 }, { "epoch": 0.6807486631016043, "grad_norm": 0.21484375, "learning_rate": 1.5825862661775074e-05, "loss": 1.0371, "num_tokens": 19655041435.0, "step": 3819 }, { "epoch": 0.6809269162210339, "grad_norm": 0.21484375, "learning_rate": 1.5823669500635714e-05, "loss": 1.0315, "num_tokens": 19661244729.0, "step": 3820 }, { "epoch": 0.6811051693404635, "grad_norm": 0.224609375, "learning_rate": 1.5821475937538956e-05, "loss": 1.0249, "num_tokens": 19667498127.0, "step": 3821 }, { "epoch": 0.681283422459893, "grad_norm": 0.216796875, "learning_rate": 1.581928197266759e-05, "loss": 1.0105, "num_tokens": 19673747362.0, "step": 3822 }, { "epoch": 0.6814616755793227, "grad_norm": 0.21875, "learning_rate": 1.581708760620443e-05, "loss": 1.0411, "num_tokens": 19680001053.0, "step": 3823 }, { "epoch": 0.6816399286987522, "grad_norm": 0.2119140625, "learning_rate": 1.5814892838332348e-05, "loss": 1.0097, "num_tokens": 19686257673.0, "step": 3824 }, { "epoch": 0.6818181818181818, "grad_norm": 0.220703125, "learning_rate": 1.5812697669234227e-05, "loss": 1.0582, "num_tokens": 19692541811.0, "step": 3825 }, { "epoch": 0.6819964349376114, "grad_norm": 0.2080078125, "learning_rate": 1.581050209909299e-05, "loss": 0.9953, "num_tokens": 19698827462.0, "step": 3826 }, { "epoch": 0.682174688057041, "grad_norm": 0.2197265625, "learning_rate": 1.5808306128091597e-05, "loss": 1.0254, "num_tokens": 19705083173.0, "step": 3827 }, { "epoch": 0.6823529411764706, "grad_norm": 0.21484375, "learning_rate": 1.5806109756413038e-05, "loss": 1.0423, "num_tokens": 19711334002.0, "step": 3828 }, { "epoch": 0.6825311942959001, "grad_norm": 0.220703125, "learning_rate": 1.5803912984240334e-05, "loss": 1.0249, "num_tokens": 19717594754.0, "step": 3829 }, { "epoch": 0.6827094474153298, "grad_norm": 0.2236328125, "learning_rate": 1.580171581175655e-05, "loss": 0.9986, "num_tokens": 19723854877.0, "step": 3830 }, { "epoch": 0.6828877005347593, "grad_norm": 0.22265625, "learning_rate": 1.5799518239144766e-05, "loss": 1.042, "num_tokens": 19730136733.0, "step": 3831 }, { "epoch": 0.683065953654189, "grad_norm": 0.2412109375, "learning_rate": 1.5797320266588113e-05, "loss": 1.0257, "num_tokens": 19736422034.0, "step": 3832 }, { "epoch": 0.6832442067736185, "grad_norm": 0.22265625, "learning_rate": 1.579512189426975e-05, "loss": 1.0336, "num_tokens": 19742691121.0, "step": 3833 }, { "epoch": 0.6834224598930482, "grad_norm": 0.21875, "learning_rate": 1.5792923122372865e-05, "loss": 1.0314, "num_tokens": 19748975061.0, "step": 3834 }, { "epoch": 0.6836007130124777, "grad_norm": 0.2294921875, "learning_rate": 1.5790723951080683e-05, "loss": 0.9996, "num_tokens": 19755257432.0, "step": 3835 }, { "epoch": 0.6837789661319074, "grad_norm": 0.2021484375, "learning_rate": 1.578852438057646e-05, "loss": 1.0023, "num_tokens": 19761541211.0, "step": 3836 }, { "epoch": 0.6839572192513369, "grad_norm": 0.22265625, "learning_rate": 1.578632441104349e-05, "loss": 1.0053, "num_tokens": 19767826587.0, "step": 3837 }, { "epoch": 0.6841354723707664, "grad_norm": 0.2314453125, "learning_rate": 1.578412404266509e-05, "loss": 1.0152, "num_tokens": 19774109266.0, "step": 3838 }, { "epoch": 0.6843137254901961, "grad_norm": 0.216796875, "learning_rate": 1.5781923275624624e-05, "loss": 1.0146, "num_tokens": 19780368749.0, "step": 3839 }, { "epoch": 0.6844919786096256, "grad_norm": 0.2216796875, "learning_rate": 1.577972211010548e-05, "loss": 0.9925, "num_tokens": 19786640589.0, "step": 3840 }, { "epoch": 0.6846702317290553, "grad_norm": 0.2333984375, "learning_rate": 1.5777520546291084e-05, "loss": 1.0193, "num_tokens": 19792901621.0, "step": 3841 }, { "epoch": 0.6848484848484848, "grad_norm": 0.22265625, "learning_rate": 1.577531858436489e-05, "loss": 1.0431, "num_tokens": 19799180204.0, "step": 3842 }, { "epoch": 0.6850267379679145, "grad_norm": 0.2158203125, "learning_rate": 1.5773116224510382e-05, "loss": 1.0219, "num_tokens": 19805453570.0, "step": 3843 }, { "epoch": 0.685204991087344, "grad_norm": 0.2197265625, "learning_rate": 1.5770913466911094e-05, "loss": 0.9957, "num_tokens": 19811730654.0, "step": 3844 }, { "epoch": 0.6853832442067737, "grad_norm": 0.2109375, "learning_rate": 1.5768710311750582e-05, "loss": 1.0297, "num_tokens": 19817975146.0, "step": 3845 }, { "epoch": 0.6855614973262032, "grad_norm": 0.2197265625, "learning_rate": 1.5766506759212428e-05, "loss": 1.0231, "num_tokens": 19824259111.0, "step": 3846 }, { "epoch": 0.6857397504456328, "grad_norm": 0.220703125, "learning_rate": 1.5764302809480256e-05, "loss": 1.009, "num_tokens": 19830514351.0, "step": 3847 }, { "epoch": 0.6859180035650624, "grad_norm": 0.2255859375, "learning_rate": 1.576209846273773e-05, "loss": 1.0548, "num_tokens": 19836743542.0, "step": 3848 }, { "epoch": 0.686096256684492, "grad_norm": 0.208984375, "learning_rate": 1.5759893719168522e-05, "loss": 0.9891, "num_tokens": 19843028404.0, "step": 3849 }, { "epoch": 0.6862745098039216, "grad_norm": 0.2001953125, "learning_rate": 1.5757688578956368e-05, "loss": 1.0153, "num_tokens": 19849292386.0, "step": 3850 }, { "epoch": 0.6864527629233511, "grad_norm": 0.2265625, "learning_rate": 1.5755483042285022e-05, "loss": 1.0018, "num_tokens": 19855562850.0, "step": 3851 }, { "epoch": 0.6866310160427808, "grad_norm": 0.2138671875, "learning_rate": 1.575327710933826e-05, "loss": 1.0425, "num_tokens": 19861822509.0, "step": 3852 }, { "epoch": 0.6868092691622103, "grad_norm": 0.2099609375, "learning_rate": 1.575107078029992e-05, "loss": 1.0407, "num_tokens": 19868090779.0, "step": 3853 }, { "epoch": 0.68698752228164, "grad_norm": 0.2236328125, "learning_rate": 1.574886405535384e-05, "loss": 1.0262, "num_tokens": 19874343089.0, "step": 3854 }, { "epoch": 0.6871657754010695, "grad_norm": 0.201171875, "learning_rate": 1.5746656934683923e-05, "loss": 1.0272, "num_tokens": 19880627990.0, "step": 3855 }, { "epoch": 0.6873440285204991, "grad_norm": 0.2314453125, "learning_rate": 1.574444941847407e-05, "loss": 1.0239, "num_tokens": 19886869860.0, "step": 3856 }, { "epoch": 0.6875222816399287, "grad_norm": 0.2216796875, "learning_rate": 1.5742241506908248e-05, "loss": 1.0301, "num_tokens": 19893154958.0, "step": 3857 }, { "epoch": 0.6877005347593583, "grad_norm": 0.2216796875, "learning_rate": 1.574003320017044e-05, "loss": 0.997, "num_tokens": 19899440499.0, "step": 3858 }, { "epoch": 0.6878787878787879, "grad_norm": 0.21484375, "learning_rate": 1.5737824498444662e-05, "loss": 1.0044, "num_tokens": 19905698100.0, "step": 3859 }, { "epoch": 0.6880570409982175, "grad_norm": 0.232421875, "learning_rate": 1.5735615401914965e-05, "loss": 1.0264, "num_tokens": 19911951707.0, "step": 3860 }, { "epoch": 0.6882352941176471, "grad_norm": 0.2197265625, "learning_rate": 1.5733405910765435e-05, "loss": 1.0092, "num_tokens": 19918234943.0, "step": 3861 }, { "epoch": 0.6884135472370766, "grad_norm": 0.2177734375, "learning_rate": 1.573119602518019e-05, "loss": 1.0265, "num_tokens": 19924485303.0, "step": 3862 }, { "epoch": 0.6885918003565062, "grad_norm": 0.2197265625, "learning_rate": 1.572898574534338e-05, "loss": 1.0132, "num_tokens": 19930742864.0, "step": 3863 }, { "epoch": 0.6887700534759358, "grad_norm": 0.22265625, "learning_rate": 1.5726775071439187e-05, "loss": 1.0216, "num_tokens": 19937004018.0, "step": 3864 }, { "epoch": 0.6889483065953654, "grad_norm": 0.2236328125, "learning_rate": 1.5724564003651834e-05, "loss": 1.0142, "num_tokens": 19943289886.0, "step": 3865 }, { "epoch": 0.689126559714795, "grad_norm": 0.2177734375, "learning_rate": 1.5722352542165555e-05, "loss": 1.0319, "num_tokens": 19949542285.0, "step": 3866 }, { "epoch": 0.6893048128342246, "grad_norm": 0.21484375, "learning_rate": 1.572014068716464e-05, "loss": 1.0085, "num_tokens": 19955825253.0, "step": 3867 }, { "epoch": 0.6894830659536542, "grad_norm": 0.216796875, "learning_rate": 1.571792843883341e-05, "loss": 0.9971, "num_tokens": 19962108616.0, "step": 3868 }, { "epoch": 0.6896613190730838, "grad_norm": 0.2275390625, "learning_rate": 1.57157157973562e-05, "loss": 1.0414, "num_tokens": 19968364307.0, "step": 3869 }, { "epoch": 0.6898395721925134, "grad_norm": 0.2421875, "learning_rate": 1.5713502762917395e-05, "loss": 1.0384, "num_tokens": 19974627540.0, "step": 3870 }, { "epoch": 0.690017825311943, "grad_norm": 0.2177734375, "learning_rate": 1.571128933570141e-05, "loss": 1.0016, "num_tokens": 19980905318.0, "step": 3871 }, { "epoch": 0.6901960784313725, "grad_norm": 0.22265625, "learning_rate": 1.5709075515892687e-05, "loss": 1.0547, "num_tokens": 19987188247.0, "step": 3872 }, { "epoch": 0.6903743315508022, "grad_norm": 0.2197265625, "learning_rate": 1.5706861303675708e-05, "loss": 1.0216, "num_tokens": 19993467828.0, "step": 3873 }, { "epoch": 0.6905525846702317, "grad_norm": 0.2080078125, "learning_rate": 1.570464669923498e-05, "loss": 1.0242, "num_tokens": 19999750870.0, "step": 3874 }, { "epoch": 0.6907308377896613, "grad_norm": 0.2158203125, "learning_rate": 1.5702431702755046e-05, "loss": 1.0096, "num_tokens": 20006034838.0, "step": 3875 }, { "epoch": 0.6909090909090909, "grad_norm": 0.216796875, "learning_rate": 1.5700216314420483e-05, "loss": 1.0319, "num_tokens": 20012317674.0, "step": 3876 }, { "epoch": 0.6910873440285205, "grad_norm": 0.236328125, "learning_rate": 1.5698000534415903e-05, "loss": 1.0145, "num_tokens": 20018601952.0, "step": 3877 }, { "epoch": 0.6912655971479501, "grad_norm": 0.2099609375, "learning_rate": 1.5695784362925946e-05, "loss": 1.0076, "num_tokens": 20024887560.0, "step": 3878 }, { "epoch": 0.6914438502673796, "grad_norm": 0.2001953125, "learning_rate": 1.5693567800135282e-05, "loss": 1.0361, "num_tokens": 20031172078.0, "step": 3879 }, { "epoch": 0.6916221033868093, "grad_norm": 0.2119140625, "learning_rate": 1.5691350846228622e-05, "loss": 1.0194, "num_tokens": 20037441828.0, "step": 3880 }, { "epoch": 0.6918003565062388, "grad_norm": 0.2138671875, "learning_rate": 1.5689133501390703e-05, "loss": 1.0222, "num_tokens": 20043706046.0, "step": 3881 }, { "epoch": 0.6919786096256685, "grad_norm": 0.205078125, "learning_rate": 1.5686915765806297e-05, "loss": 1.0415, "num_tokens": 20049986510.0, "step": 3882 }, { "epoch": 0.692156862745098, "grad_norm": 0.224609375, "learning_rate": 1.5684697639660214e-05, "loss": 1.0085, "num_tokens": 20056246866.0, "step": 3883 }, { "epoch": 0.6923351158645277, "grad_norm": 0.212890625, "learning_rate": 1.568247912313728e-05, "loss": 1.0156, "num_tokens": 20062473088.0, "step": 3884 }, { "epoch": 0.6925133689839572, "grad_norm": 0.2080078125, "learning_rate": 1.5680260216422373e-05, "loss": 1.0156, "num_tokens": 20068757931.0, "step": 3885 }, { "epoch": 0.6926916221033869, "grad_norm": 0.23046875, "learning_rate": 1.5678040919700392e-05, "loss": 1.0475, "num_tokens": 20075016885.0, "step": 3886 }, { "epoch": 0.6928698752228164, "grad_norm": 0.2216796875, "learning_rate": 1.5675821233156272e-05, "loss": 1.021, "num_tokens": 20081303047.0, "step": 3887 }, { "epoch": 0.6930481283422459, "grad_norm": 0.22265625, "learning_rate": 1.567360115697498e-05, "loss": 0.9992, "num_tokens": 20087581344.0, "step": 3888 }, { "epoch": 0.6932263814616756, "grad_norm": 0.2119140625, "learning_rate": 1.5671380691341514e-05, "loss": 1.0425, "num_tokens": 20093866196.0, "step": 3889 }, { "epoch": 0.6934046345811051, "grad_norm": 0.22265625, "learning_rate": 1.566915983644091e-05, "loss": 0.9942, "num_tokens": 20100121092.0, "step": 3890 }, { "epoch": 0.6935828877005348, "grad_norm": 0.22265625, "learning_rate": 1.5666938592458226e-05, "loss": 1.0247, "num_tokens": 20106388585.0, "step": 3891 }, { "epoch": 0.6937611408199643, "grad_norm": 0.21484375, "learning_rate": 1.5664716959578564e-05, "loss": 1.0576, "num_tokens": 20112641063.0, "step": 3892 }, { "epoch": 0.693939393939394, "grad_norm": 0.2294921875, "learning_rate": 1.566249493798705e-05, "loss": 1.0256, "num_tokens": 20118919523.0, "step": 3893 }, { "epoch": 0.6941176470588235, "grad_norm": 0.2138671875, "learning_rate": 1.566027252786885e-05, "loss": 1.0424, "num_tokens": 20125203150.0, "step": 3894 }, { "epoch": 0.6942959001782532, "grad_norm": 0.2138671875, "learning_rate": 1.5658049729409147e-05, "loss": 1.0393, "num_tokens": 20131485914.0, "step": 3895 }, { "epoch": 0.6944741532976827, "grad_norm": 0.2177734375, "learning_rate": 1.565582654279318e-05, "loss": 1.0326, "num_tokens": 20137734312.0, "step": 3896 }, { "epoch": 0.6946524064171123, "grad_norm": 0.2138671875, "learning_rate": 1.5653602968206202e-05, "loss": 1.0433, "num_tokens": 20144019094.0, "step": 3897 }, { "epoch": 0.6948306595365419, "grad_norm": 0.1982421875, "learning_rate": 1.5651379005833506e-05, "loss": 1.0322, "num_tokens": 20150279096.0, "step": 3898 }, { "epoch": 0.6950089126559715, "grad_norm": 0.2099609375, "learning_rate": 1.5649154655860413e-05, "loss": 1.0331, "num_tokens": 20156549159.0, "step": 3899 }, { "epoch": 0.6951871657754011, "grad_norm": 0.216796875, "learning_rate": 1.564692991847228e-05, "loss": 1.0535, "num_tokens": 20162773784.0, "step": 3900 }, { "epoch": 0.6953654188948306, "grad_norm": 0.21484375, "learning_rate": 1.564470479385449e-05, "loss": 1.0729, "num_tokens": 20169058484.0, "step": 3901 }, { "epoch": 0.6955436720142603, "grad_norm": 0.201171875, "learning_rate": 1.564247928219247e-05, "loss": 1.0347, "num_tokens": 20175287717.0, "step": 3902 }, { "epoch": 0.6957219251336898, "grad_norm": 0.2158203125, "learning_rate": 1.564025338367167e-05, "loss": 1.0334, "num_tokens": 20181567257.0, "step": 3903 }, { "epoch": 0.6959001782531195, "grad_norm": 0.2265625, "learning_rate": 1.5638027098477574e-05, "loss": 1.0539, "num_tokens": 20187833065.0, "step": 3904 }, { "epoch": 0.696078431372549, "grad_norm": 0.2099609375, "learning_rate": 1.56358004267957e-05, "loss": 1.019, "num_tokens": 20194114311.0, "step": 3905 }, { "epoch": 0.6962566844919786, "grad_norm": 0.21484375, "learning_rate": 1.5633573368811594e-05, "loss": 1.0117, "num_tokens": 20200398025.0, "step": 3906 }, { "epoch": 0.6964349376114082, "grad_norm": 0.236328125, "learning_rate": 1.5631345924710843e-05, "loss": 1.0316, "num_tokens": 20206661962.0, "step": 3907 }, { "epoch": 0.6966131907308378, "grad_norm": 0.21484375, "learning_rate": 1.5629118094679054e-05, "loss": 1.0284, "num_tokens": 20212943606.0, "step": 3908 }, { "epoch": 0.6967914438502674, "grad_norm": 0.2255859375, "learning_rate": 1.5626889878901875e-05, "loss": 1.0196, "num_tokens": 20219207964.0, "step": 3909 }, { "epoch": 0.696969696969697, "grad_norm": 0.203125, "learning_rate": 1.5624661277564985e-05, "loss": 1.0301, "num_tokens": 20225465177.0, "step": 3910 }, { "epoch": 0.6971479500891266, "grad_norm": 0.21875, "learning_rate": 1.562243229085409e-05, "loss": 0.9851, "num_tokens": 20231749611.0, "step": 3911 }, { "epoch": 0.6973262032085561, "grad_norm": 0.234375, "learning_rate": 1.5620202918954937e-05, "loss": 1.0375, "num_tokens": 20238025749.0, "step": 3912 }, { "epoch": 0.6975044563279857, "grad_norm": 0.2314453125, "learning_rate": 1.5617973162053298e-05, "loss": 1.0535, "num_tokens": 20244279371.0, "step": 3913 }, { "epoch": 0.6976827094474153, "grad_norm": 0.2451171875, "learning_rate": 1.561574302033498e-05, "loss": 1.0178, "num_tokens": 20250546198.0, "step": 3914 }, { "epoch": 0.6978609625668449, "grad_norm": 0.2373046875, "learning_rate": 1.5613512493985815e-05, "loss": 1.0209, "num_tokens": 20256812007.0, "step": 3915 }, { "epoch": 0.6980392156862745, "grad_norm": 0.2314453125, "learning_rate": 1.561128158319168e-05, "loss": 1.0194, "num_tokens": 20263097389.0, "step": 3916 }, { "epoch": 0.6982174688057041, "grad_norm": 0.2255859375, "learning_rate": 1.5609050288138477e-05, "loss": 1.0036, "num_tokens": 20269380743.0, "step": 3917 }, { "epoch": 0.6983957219251337, "grad_norm": 0.2275390625, "learning_rate": 1.560681860901214e-05, "loss": 1.0249, "num_tokens": 20275664648.0, "step": 3918 }, { "epoch": 0.6985739750445633, "grad_norm": 0.220703125, "learning_rate": 1.5604586545998635e-05, "loss": 1.0082, "num_tokens": 20281948544.0, "step": 3919 }, { "epoch": 0.6987522281639929, "grad_norm": 0.2216796875, "learning_rate": 1.5602354099283953e-05, "loss": 1.0364, "num_tokens": 20288223665.0, "step": 3920 }, { "epoch": 0.6989304812834225, "grad_norm": 0.2158203125, "learning_rate": 1.5600121269054135e-05, "loss": 1.0145, "num_tokens": 20294507116.0, "step": 3921 }, { "epoch": 0.699108734402852, "grad_norm": 0.2353515625, "learning_rate": 1.559788805549524e-05, "loss": 1.059, "num_tokens": 20300789203.0, "step": 3922 }, { "epoch": 0.6992869875222817, "grad_norm": 0.2080078125, "learning_rate": 1.5595654458793354e-05, "loss": 1.0152, "num_tokens": 20307074375.0, "step": 3923 }, { "epoch": 0.6994652406417112, "grad_norm": 0.2451171875, "learning_rate": 1.559342047913462e-05, "loss": 1.0595, "num_tokens": 20313357998.0, "step": 3924 }, { "epoch": 0.6996434937611408, "grad_norm": 0.212890625, "learning_rate": 1.5591186116705178e-05, "loss": 1.0513, "num_tokens": 20319596227.0, "step": 3925 }, { "epoch": 0.6998217468805704, "grad_norm": 0.2138671875, "learning_rate": 1.558895137169123e-05, "loss": 1.0202, "num_tokens": 20325862118.0, "step": 3926 }, { "epoch": 0.7, "grad_norm": 0.2255859375, "learning_rate": 1.558671624427899e-05, "loss": 0.9973, "num_tokens": 20332132149.0, "step": 3927 }, { "epoch": 0.7001782531194296, "grad_norm": 0.203125, "learning_rate": 1.558448073465472e-05, "loss": 1.0207, "num_tokens": 20338400236.0, "step": 3928 }, { "epoch": 0.7003565062388591, "grad_norm": 0.212890625, "learning_rate": 1.5582244843004697e-05, "loss": 1.0268, "num_tokens": 20344646324.0, "step": 3929 }, { "epoch": 0.7005347593582888, "grad_norm": 0.2353515625, "learning_rate": 1.558000856951525e-05, "loss": 1.0511, "num_tokens": 20350903971.0, "step": 3930 }, { "epoch": 0.7007130124777183, "grad_norm": 0.2119140625, "learning_rate": 1.5577771914372713e-05, "loss": 1.014, "num_tokens": 20357157167.0, "step": 3931 }, { "epoch": 0.700891265597148, "grad_norm": 0.2216796875, "learning_rate": 1.5575534877763475e-05, "loss": 1.0225, "num_tokens": 20363440752.0, "step": 3932 }, { "epoch": 0.7010695187165775, "grad_norm": 0.212890625, "learning_rate": 1.5573297459873947e-05, "loss": 1.027, "num_tokens": 20369725514.0, "step": 3933 }, { "epoch": 0.7012477718360072, "grad_norm": 0.2373046875, "learning_rate": 1.5571059660890574e-05, "loss": 1.0051, "num_tokens": 20375982245.0, "step": 3934 }, { "epoch": 0.7014260249554367, "grad_norm": 0.2216796875, "learning_rate": 1.5568821480999836e-05, "loss": 1.0353, "num_tokens": 20382265039.0, "step": 3935 }, { "epoch": 0.7016042780748664, "grad_norm": 0.244140625, "learning_rate": 1.556658292038824e-05, "loss": 1.0009, "num_tokens": 20388526206.0, "step": 3936 }, { "epoch": 0.7017825311942959, "grad_norm": 0.2236328125, "learning_rate": 1.5564343979242316e-05, "loss": 0.9986, "num_tokens": 20394810159.0, "step": 3937 }, { "epoch": 0.7019607843137254, "grad_norm": 0.2333984375, "learning_rate": 1.5562104657748648e-05, "loss": 1.0463, "num_tokens": 20401093202.0, "step": 3938 }, { "epoch": 0.7021390374331551, "grad_norm": 0.26953125, "learning_rate": 1.5559864956093835e-05, "loss": 1.0068, "num_tokens": 20407377562.0, "step": 3939 }, { "epoch": 0.7023172905525846, "grad_norm": 0.1904296875, "learning_rate": 1.5557624874464506e-05, "loss": 0.994, "num_tokens": 20413659196.0, "step": 3940 }, { "epoch": 0.7024955436720143, "grad_norm": 0.2265625, "learning_rate": 1.5555384413047336e-05, "loss": 1.0395, "num_tokens": 20419941232.0, "step": 3941 }, { "epoch": 0.7026737967914438, "grad_norm": 0.2255859375, "learning_rate": 1.555314357202902e-05, "loss": 0.9936, "num_tokens": 20426211734.0, "step": 3942 }, { "epoch": 0.7028520499108735, "grad_norm": 0.21875, "learning_rate": 1.5550902351596284e-05, "loss": 1.0002, "num_tokens": 20432469653.0, "step": 3943 }, { "epoch": 0.703030303030303, "grad_norm": 0.2138671875, "learning_rate": 1.5548660751935894e-05, "loss": 1.0012, "num_tokens": 20438718004.0, "step": 3944 }, { "epoch": 0.7032085561497327, "grad_norm": 0.208984375, "learning_rate": 1.5546418773234643e-05, "loss": 1.0111, "num_tokens": 20444993493.0, "step": 3945 }, { "epoch": 0.7033868092691622, "grad_norm": 0.20703125, "learning_rate": 1.5544176415679352e-05, "loss": 1.0449, "num_tokens": 20451277509.0, "step": 3946 }, { "epoch": 0.7035650623885918, "grad_norm": 0.205078125, "learning_rate": 1.5541933679456882e-05, "loss": 0.9895, "num_tokens": 20457562063.0, "step": 3947 }, { "epoch": 0.7037433155080214, "grad_norm": 0.20703125, "learning_rate": 1.5539690564754114e-05, "loss": 1.0218, "num_tokens": 20463847949.0, "step": 3948 }, { "epoch": 0.703921568627451, "grad_norm": 0.2236328125, "learning_rate": 1.5537447071757975e-05, "loss": 1.0075, "num_tokens": 20470130526.0, "step": 3949 }, { "epoch": 0.7040998217468806, "grad_norm": 0.21484375, "learning_rate": 1.5535203200655412e-05, "loss": 1.0069, "num_tokens": 20476411222.0, "step": 3950 }, { "epoch": 0.7042780748663101, "grad_norm": 0.220703125, "learning_rate": 1.5532958951633402e-05, "loss": 0.993, "num_tokens": 20482674044.0, "step": 3951 }, { "epoch": 0.7044563279857398, "grad_norm": 0.2109375, "learning_rate": 1.553071432487897e-05, "loss": 1.0158, "num_tokens": 20488944100.0, "step": 3952 }, { "epoch": 0.7046345811051693, "grad_norm": 0.21484375, "learning_rate": 1.5528469320579152e-05, "loss": 0.9937, "num_tokens": 20495199984.0, "step": 3953 }, { "epoch": 0.704812834224599, "grad_norm": 0.21875, "learning_rate": 1.5526223938921026e-05, "loss": 0.9964, "num_tokens": 20501454605.0, "step": 3954 }, { "epoch": 0.7049910873440285, "grad_norm": 0.2080078125, "learning_rate": 1.5523978180091706e-05, "loss": 1.0221, "num_tokens": 20507717942.0, "step": 3955 }, { "epoch": 0.7051693404634581, "grad_norm": 0.2392578125, "learning_rate": 1.5521732044278327e-05, "loss": 1.0023, "num_tokens": 20514002242.0, "step": 3956 }, { "epoch": 0.7053475935828877, "grad_norm": 0.234375, "learning_rate": 1.551948553166806e-05, "loss": 1.0585, "num_tokens": 20520284344.0, "step": 3957 }, { "epoch": 0.7055258467023173, "grad_norm": 0.21875, "learning_rate": 1.5517238642448105e-05, "loss": 1.0068, "num_tokens": 20526555154.0, "step": 3958 }, { "epoch": 0.7057040998217469, "grad_norm": 0.2255859375, "learning_rate": 1.5514991376805702e-05, "loss": 1.0017, "num_tokens": 20532822511.0, "step": 3959 }, { "epoch": 0.7058823529411765, "grad_norm": 0.2099609375, "learning_rate": 1.5512743734928112e-05, "loss": 1.0192, "num_tokens": 20539106055.0, "step": 3960 }, { "epoch": 0.706060606060606, "grad_norm": 0.2255859375, "learning_rate": 1.5510495717002632e-05, "loss": 1.024, "num_tokens": 20545365728.0, "step": 3961 }, { "epoch": 0.7062388591800357, "grad_norm": 0.22265625, "learning_rate": 1.550824732321659e-05, "loss": 1.0333, "num_tokens": 20551639481.0, "step": 3962 }, { "epoch": 0.7064171122994652, "grad_norm": 0.2314453125, "learning_rate": 1.5505998553757342e-05, "loss": 1.0028, "num_tokens": 20557896010.0, "step": 3963 }, { "epoch": 0.7065953654188948, "grad_norm": 0.2255859375, "learning_rate": 1.5503749408812286e-05, "loss": 1.0276, "num_tokens": 20564179944.0, "step": 3964 }, { "epoch": 0.7067736185383244, "grad_norm": 0.22265625, "learning_rate": 1.550149988856884e-05, "loss": 1.0467, "num_tokens": 20570462064.0, "step": 3965 }, { "epoch": 0.706951871657754, "grad_norm": 0.2333984375, "learning_rate": 1.5499249993214452e-05, "loss": 1.0554, "num_tokens": 20576745328.0, "step": 3966 }, { "epoch": 0.7071301247771836, "grad_norm": 0.212890625, "learning_rate": 1.5496999722936615e-05, "loss": 1.0128, "num_tokens": 20583017206.0, "step": 3967 }, { "epoch": 0.7073083778966132, "grad_norm": 0.2109375, "learning_rate": 1.5494749077922837e-05, "loss": 1.0486, "num_tokens": 20589301980.0, "step": 3968 }, { "epoch": 0.7074866310160428, "grad_norm": 0.2119140625, "learning_rate": 1.5492498058360667e-05, "loss": 1.0526, "num_tokens": 20595573200.0, "step": 3969 }, { "epoch": 0.7076648841354723, "grad_norm": 0.2177734375, "learning_rate": 1.5490246664437684e-05, "loss": 1.0118, "num_tokens": 20601846137.0, "step": 3970 }, { "epoch": 0.707843137254902, "grad_norm": 0.205078125, "learning_rate": 1.5487994896341495e-05, "loss": 1.047, "num_tokens": 20608128214.0, "step": 3971 }, { "epoch": 0.7080213903743315, "grad_norm": 0.2060546875, "learning_rate": 1.548574275425975e-05, "loss": 1.027, "num_tokens": 20614412383.0, "step": 3972 }, { "epoch": 0.7081996434937612, "grad_norm": 0.2138671875, "learning_rate": 1.5483490238380104e-05, "loss": 1.0059, "num_tokens": 20620673704.0, "step": 3973 }, { "epoch": 0.7083778966131907, "grad_norm": 0.224609375, "learning_rate": 1.5481237348890274e-05, "loss": 1.008, "num_tokens": 20626953603.0, "step": 3974 }, { "epoch": 0.7085561497326203, "grad_norm": 0.2392578125, "learning_rate": 1.5478984085977986e-05, "loss": 1.0174, "num_tokens": 20633238653.0, "step": 3975 }, { "epoch": 0.7087344028520499, "grad_norm": 0.2265625, "learning_rate": 1.5476730449831008e-05, "loss": 1.0199, "num_tokens": 20639522795.0, "step": 3976 }, { "epoch": 0.7089126559714795, "grad_norm": 0.2041015625, "learning_rate": 1.5474476440637134e-05, "loss": 1.0376, "num_tokens": 20645808332.0, "step": 3977 }, { "epoch": 0.7090909090909091, "grad_norm": 0.2109375, "learning_rate": 1.547222205858419e-05, "loss": 1.0161, "num_tokens": 20652062832.0, "step": 3978 }, { "epoch": 0.7092691622103386, "grad_norm": 0.208984375, "learning_rate": 1.5469967303860035e-05, "loss": 1.0062, "num_tokens": 20658347199.0, "step": 3979 }, { "epoch": 0.7094474153297683, "grad_norm": 0.2021484375, "learning_rate": 1.5467712176652564e-05, "loss": 1.0092, "num_tokens": 20664631431.0, "step": 3980 }, { "epoch": 0.7096256684491978, "grad_norm": 0.2216796875, "learning_rate": 1.5465456677149694e-05, "loss": 1.0379, "num_tokens": 20670903431.0, "step": 3981 }, { "epoch": 0.7098039215686275, "grad_norm": 0.21484375, "learning_rate": 1.5463200805539366e-05, "loss": 1.0484, "num_tokens": 20677161800.0, "step": 3982 }, { "epoch": 0.709982174688057, "grad_norm": 0.23046875, "learning_rate": 1.5460944562009576e-05, "loss": 1.0174, "num_tokens": 20683397000.0, "step": 3983 }, { "epoch": 0.7101604278074867, "grad_norm": 0.2294921875, "learning_rate": 1.5458687946748335e-05, "loss": 1.0235, "num_tokens": 20689675186.0, "step": 3984 }, { "epoch": 0.7103386809269162, "grad_norm": 0.2158203125, "learning_rate": 1.545643095994368e-05, "loss": 1.0238, "num_tokens": 20695958920.0, "step": 3985 }, { "epoch": 0.7105169340463459, "grad_norm": 0.240234375, "learning_rate": 1.5454173601783693e-05, "loss": 1.0272, "num_tokens": 20702204733.0, "step": 3986 }, { "epoch": 0.7106951871657754, "grad_norm": 0.220703125, "learning_rate": 1.5451915872456476e-05, "loss": 1.0435, "num_tokens": 20708480538.0, "step": 3987 }, { "epoch": 0.7108734402852049, "grad_norm": 0.2236328125, "learning_rate": 1.544965777215017e-05, "loss": 1.0439, "num_tokens": 20714743093.0, "step": 3988 }, { "epoch": 0.7110516934046346, "grad_norm": 0.21875, "learning_rate": 1.5447399301052943e-05, "loss": 1.0563, "num_tokens": 20721023703.0, "step": 3989 }, { "epoch": 0.7112299465240641, "grad_norm": 0.220703125, "learning_rate": 1.5445140459352986e-05, "loss": 1.0125, "num_tokens": 20727276988.0, "step": 3990 }, { "epoch": 0.7114081996434938, "grad_norm": 0.2109375, "learning_rate": 1.5442881247238538e-05, "loss": 1.0131, "num_tokens": 20733549318.0, "step": 3991 }, { "epoch": 0.7115864527629233, "grad_norm": 0.2236328125, "learning_rate": 1.544062166489785e-05, "loss": 0.993, "num_tokens": 20739807649.0, "step": 3992 }, { "epoch": 0.711764705882353, "grad_norm": 0.2451171875, "learning_rate": 1.5438361712519227e-05, "loss": 1.0596, "num_tokens": 20746066057.0, "step": 3993 }, { "epoch": 0.7119429590017825, "grad_norm": 0.25390625, "learning_rate": 1.5436101390290982e-05, "loss": 1.0437, "num_tokens": 20752349063.0, "step": 3994 }, { "epoch": 0.7121212121212122, "grad_norm": 0.2265625, "learning_rate": 1.543384069840147e-05, "loss": 1.0024, "num_tokens": 20758633459.0, "step": 3995 }, { "epoch": 0.7122994652406417, "grad_norm": 0.267578125, "learning_rate": 1.5431579637039072e-05, "loss": 1.0173, "num_tokens": 20764916318.0, "step": 3996 }, { "epoch": 0.7124777183600713, "grad_norm": 0.2294921875, "learning_rate": 1.542931820639221e-05, "loss": 0.9818, "num_tokens": 20771156605.0, "step": 3997 }, { "epoch": 0.7126559714795009, "grad_norm": 0.25, "learning_rate": 1.542705640664932e-05, "loss": 1.0064, "num_tokens": 20777431065.0, "step": 3998 }, { "epoch": 0.7128342245989305, "grad_norm": 0.220703125, "learning_rate": 1.542479423799889e-05, "loss": 1.0194, "num_tokens": 20783715348.0, "step": 3999 }, { "epoch": 0.7130124777183601, "grad_norm": 0.224609375, "learning_rate": 1.5422531700629416e-05, "loss": 1.0535, "num_tokens": 20789982801.0, "step": 4000 }, { "epoch": 0.7131907308377896, "grad_norm": 0.259765625, "learning_rate": 1.542026879472944e-05, "loss": 1.0145, "num_tokens": 20796266902.0, "step": 4001 }, { "epoch": 0.7133689839572193, "grad_norm": 0.208984375, "learning_rate": 1.541800552048753e-05, "loss": 1.0264, "num_tokens": 20802543872.0, "step": 4002 }, { "epoch": 0.7135472370766488, "grad_norm": 0.2294921875, "learning_rate": 1.5415741878092288e-05, "loss": 1.0039, "num_tokens": 20808803767.0, "step": 4003 }, { "epoch": 0.7137254901960784, "grad_norm": 0.2412109375, "learning_rate": 1.541347786773234e-05, "loss": 1.0336, "num_tokens": 20815075972.0, "step": 4004 }, { "epoch": 0.713903743315508, "grad_norm": 0.2177734375, "learning_rate": 1.5411213489596348e-05, "loss": 1.0289, "num_tokens": 20821346604.0, "step": 4005 }, { "epoch": 0.7140819964349376, "grad_norm": 0.2158203125, "learning_rate": 1.5408948743873003e-05, "loss": 1.0392, "num_tokens": 20827561348.0, "step": 4006 }, { "epoch": 0.7142602495543672, "grad_norm": 0.220703125, "learning_rate": 1.540668363075103e-05, "loss": 1.0632, "num_tokens": 20833825212.0, "step": 4007 }, { "epoch": 0.7144385026737968, "grad_norm": 0.2216796875, "learning_rate": 1.5404418150419174e-05, "loss": 1.0335, "num_tokens": 20840109020.0, "step": 4008 }, { "epoch": 0.7146167557932264, "grad_norm": 0.2353515625, "learning_rate": 1.5402152303066222e-05, "loss": 1.0368, "num_tokens": 20846391025.0, "step": 4009 }, { "epoch": 0.714795008912656, "grad_norm": 0.2265625, "learning_rate": 1.539988608888099e-05, "loss": 1.0302, "num_tokens": 20852650493.0, "step": 4010 }, { "epoch": 0.7149732620320856, "grad_norm": 0.2392578125, "learning_rate": 1.539761950805232e-05, "loss": 1.0405, "num_tokens": 20858874704.0, "step": 4011 }, { "epoch": 0.7151515151515152, "grad_norm": 0.228515625, "learning_rate": 1.5395352560769083e-05, "loss": 1.0122, "num_tokens": 20865137414.0, "step": 4012 }, { "epoch": 0.7153297682709447, "grad_norm": 0.21875, "learning_rate": 1.539308524722019e-05, "loss": 1.029, "num_tokens": 20871422212.0, "step": 4013 }, { "epoch": 0.7155080213903743, "grad_norm": 0.2197265625, "learning_rate": 1.5390817567594574e-05, "loss": 1.0341, "num_tokens": 20877707043.0, "step": 4014 }, { "epoch": 0.7156862745098039, "grad_norm": 0.2197265625, "learning_rate": 1.5388549522081202e-05, "loss": 1.0504, "num_tokens": 20883990823.0, "step": 4015 }, { "epoch": 0.7158645276292335, "grad_norm": 0.234375, "learning_rate": 1.5386281110869068e-05, "loss": 1.0444, "num_tokens": 20890212107.0, "step": 4016 }, { "epoch": 0.7160427807486631, "grad_norm": 0.2255859375, "learning_rate": 1.5384012334147206e-05, "loss": 1.036, "num_tokens": 20896494742.0, "step": 4017 }, { "epoch": 0.7162210338680927, "grad_norm": 0.224609375, "learning_rate": 1.5381743192104663e-05, "loss": 1.0265, "num_tokens": 20902778342.0, "step": 4018 }, { "epoch": 0.7163992869875223, "grad_norm": 0.236328125, "learning_rate": 1.5379473684930533e-05, "loss": 1.033, "num_tokens": 20909064381.0, "step": 4019 }, { "epoch": 0.7165775401069518, "grad_norm": 0.2431640625, "learning_rate": 1.537720381281394e-05, "loss": 1.0315, "num_tokens": 20915344934.0, "step": 4020 }, { "epoch": 0.7167557932263815, "grad_norm": 0.2138671875, "learning_rate": 1.5374933575944024e-05, "loss": 1.0043, "num_tokens": 20921582896.0, "step": 4021 }, { "epoch": 0.716934046345811, "grad_norm": 0.263671875, "learning_rate": 1.5372662974509975e-05, "loss": 1.0281, "num_tokens": 20927845356.0, "step": 4022 }, { "epoch": 0.7171122994652407, "grad_norm": 0.255859375, "learning_rate": 1.5370392008700986e-05, "loss": 1.0576, "num_tokens": 20934109133.0, "step": 4023 }, { "epoch": 0.7172905525846702, "grad_norm": 0.2109375, "learning_rate": 1.536812067870631e-05, "loss": 1.0299, "num_tokens": 20940367091.0, "step": 4024 }, { "epoch": 0.7174688057040999, "grad_norm": 0.2470703125, "learning_rate": 1.536584898471521e-05, "loss": 1.0313, "num_tokens": 20946617212.0, "step": 4025 }, { "epoch": 0.7176470588235294, "grad_norm": 0.224609375, "learning_rate": 1.5363576926916998e-05, "loss": 1.0217, "num_tokens": 20952884487.0, "step": 4026 }, { "epoch": 0.717825311942959, "grad_norm": 0.228515625, "learning_rate": 1.5361304505500995e-05, "loss": 1.0435, "num_tokens": 20959168873.0, "step": 4027 }, { "epoch": 0.7180035650623886, "grad_norm": 0.2451171875, "learning_rate": 1.5359031720656565e-05, "loss": 1.0232, "num_tokens": 20965442796.0, "step": 4028 }, { "epoch": 0.7181818181818181, "grad_norm": 0.2021484375, "learning_rate": 1.53567585725731e-05, "loss": 1.0526, "num_tokens": 20971725722.0, "step": 4029 }, { "epoch": 0.7183600713012478, "grad_norm": 0.244140625, "learning_rate": 1.5354485061440022e-05, "loss": 1.0413, "num_tokens": 20978010225.0, "step": 4030 }, { "epoch": 0.7185383244206773, "grad_norm": 0.2421875, "learning_rate": 1.5352211187446787e-05, "loss": 1.0293, "num_tokens": 20984293514.0, "step": 4031 }, { "epoch": 0.718716577540107, "grad_norm": 0.2109375, "learning_rate": 1.5349936950782867e-05, "loss": 1.0327, "num_tokens": 20990559510.0, "step": 4032 }, { "epoch": 0.7188948306595365, "grad_norm": 0.2275390625, "learning_rate": 1.534766235163778e-05, "loss": 1.0555, "num_tokens": 20996841725.0, "step": 4033 }, { "epoch": 0.7190730837789662, "grad_norm": 0.224609375, "learning_rate": 1.5345387390201077e-05, "loss": 1.0162, "num_tokens": 21003126050.0, "step": 4034 }, { "epoch": 0.7192513368983957, "grad_norm": 0.251953125, "learning_rate": 1.5343112066662318e-05, "loss": 1.0265, "num_tokens": 21009410266.0, "step": 4035 }, { "epoch": 0.7194295900178254, "grad_norm": 0.271484375, "learning_rate": 1.5340836381211114e-05, "loss": 1.0365, "num_tokens": 21015693800.0, "step": 4036 }, { "epoch": 0.7196078431372549, "grad_norm": 0.2138671875, "learning_rate": 1.5338560334037095e-05, "loss": 0.9923, "num_tokens": 21021972814.0, "step": 4037 }, { "epoch": 0.7197860962566844, "grad_norm": 0.26171875, "learning_rate": 1.533628392532993e-05, "loss": 1.0542, "num_tokens": 21028235052.0, "step": 4038 }, { "epoch": 0.7199643493761141, "grad_norm": 0.232421875, "learning_rate": 1.53340071552793e-05, "loss": 1.032, "num_tokens": 21034518843.0, "step": 4039 }, { "epoch": 0.7201426024955436, "grad_norm": 0.212890625, "learning_rate": 1.5331730024074946e-05, "loss": 1.0302, "num_tokens": 21040803969.0, "step": 4040 }, { "epoch": 0.7203208556149733, "grad_norm": 0.26953125, "learning_rate": 1.532945253190661e-05, "loss": 1.0201, "num_tokens": 21047079420.0, "step": 4041 }, { "epoch": 0.7204991087344028, "grad_norm": 0.2138671875, "learning_rate": 1.5327174678964077e-05, "loss": 1.0425, "num_tokens": 21053345387.0, "step": 4042 }, { "epoch": 0.7206773618538325, "grad_norm": 0.25, "learning_rate": 1.5324896465437163e-05, "loss": 1.0734, "num_tokens": 21059601514.0, "step": 4043 }, { "epoch": 0.720855614973262, "grad_norm": 0.26953125, "learning_rate": 1.532261789151571e-05, "loss": 1.0173, "num_tokens": 21065859458.0, "step": 4044 }, { "epoch": 0.7210338680926917, "grad_norm": 0.2109375, "learning_rate": 1.53203389573896e-05, "loss": 1.0108, "num_tokens": 21072144210.0, "step": 4045 }, { "epoch": 0.7212121212121212, "grad_norm": 0.23046875, "learning_rate": 1.531805966324873e-05, "loss": 1.02, "num_tokens": 21078402337.0, "step": 4046 }, { "epoch": 0.7213903743315508, "grad_norm": 0.2353515625, "learning_rate": 1.5315780009283032e-05, "loss": 1.0399, "num_tokens": 21084684342.0, "step": 4047 }, { "epoch": 0.7215686274509804, "grad_norm": 0.2080078125, "learning_rate": 1.531349999568247e-05, "loss": 1.0226, "num_tokens": 21090953734.0, "step": 4048 }, { "epoch": 0.72174688057041, "grad_norm": 0.24609375, "learning_rate": 1.531121962263705e-05, "loss": 1.005, "num_tokens": 21097208650.0, "step": 4049 }, { "epoch": 0.7219251336898396, "grad_norm": 0.2158203125, "learning_rate": 1.530893889033678e-05, "loss": 1.0302, "num_tokens": 21103488144.0, "step": 4050 }, { "epoch": 0.7221033868092691, "grad_norm": 0.22265625, "learning_rate": 1.5306657798971725e-05, "loss": 1.0169, "num_tokens": 21109738101.0, "step": 4051 }, { "epoch": 0.7222816399286988, "grad_norm": 0.20703125, "learning_rate": 1.530437634873196e-05, "loss": 1.0135, "num_tokens": 21115975215.0, "step": 4052 }, { "epoch": 0.7224598930481283, "grad_norm": 0.2275390625, "learning_rate": 1.530209453980761e-05, "loss": 1.0172, "num_tokens": 21122259992.0, "step": 4053 }, { "epoch": 0.722638146167558, "grad_norm": 0.2099609375, "learning_rate": 1.5299812372388808e-05, "loss": 0.9843, "num_tokens": 21128543530.0, "step": 4054 }, { "epoch": 0.7228163992869875, "grad_norm": 0.19140625, "learning_rate": 1.5297529846665735e-05, "loss": 1.0294, "num_tokens": 21134828491.0, "step": 4055 }, { "epoch": 0.7229946524064171, "grad_norm": 0.22265625, "learning_rate": 1.5295246962828587e-05, "loss": 1.0609, "num_tokens": 21141087578.0, "step": 4056 }, { "epoch": 0.7231729055258467, "grad_norm": 0.2216796875, "learning_rate": 1.52929637210676e-05, "loss": 1.0301, "num_tokens": 21147372938.0, "step": 4057 }, { "epoch": 0.7233511586452763, "grad_norm": 0.2060546875, "learning_rate": 1.5290680121573044e-05, "loss": 1.0058, "num_tokens": 21153616225.0, "step": 4058 }, { "epoch": 0.7235294117647059, "grad_norm": 0.2373046875, "learning_rate": 1.52883961645352e-05, "loss": 1.0113, "num_tokens": 21159894321.0, "step": 4059 }, { "epoch": 0.7237076648841355, "grad_norm": 0.20703125, "learning_rate": 1.5286111850144404e-05, "loss": 1.0303, "num_tokens": 21166142788.0, "step": 4060 }, { "epoch": 0.723885918003565, "grad_norm": 0.2392578125, "learning_rate": 1.5283827178590996e-05, "loss": 1.0228, "num_tokens": 21172428711.0, "step": 4061 }, { "epoch": 0.7240641711229947, "grad_norm": 0.21875, "learning_rate": 1.5281542150065362e-05, "loss": 1.0617, "num_tokens": 21178712202.0, "step": 4062 }, { "epoch": 0.7242424242424242, "grad_norm": 0.2109375, "learning_rate": 1.5279256764757917e-05, "loss": 1.0198, "num_tokens": 21184925814.0, "step": 4063 }, { "epoch": 0.7244206773618538, "grad_norm": 0.2119140625, "learning_rate": 1.5276971022859102e-05, "loss": 1.0197, "num_tokens": 21191208834.0, "step": 4064 }, { "epoch": 0.7245989304812834, "grad_norm": 0.2080078125, "learning_rate": 1.5274684924559387e-05, "loss": 1.0397, "num_tokens": 21197493562.0, "step": 4065 }, { "epoch": 0.724777183600713, "grad_norm": 0.228515625, "learning_rate": 1.527239847004927e-05, "loss": 1.0129, "num_tokens": 21203775551.0, "step": 4066 }, { "epoch": 0.7249554367201426, "grad_norm": 0.197265625, "learning_rate": 1.527011165951929e-05, "loss": 1.0028, "num_tokens": 21210048351.0, "step": 4067 }, { "epoch": 0.7251336898395722, "grad_norm": 0.205078125, "learning_rate": 1.526782449316e-05, "loss": 1.0428, "num_tokens": 21216292853.0, "step": 4068 }, { "epoch": 0.7253119429590018, "grad_norm": 0.212890625, "learning_rate": 1.526553697116199e-05, "loss": 1.039, "num_tokens": 21222574469.0, "step": 4069 }, { "epoch": 0.7254901960784313, "grad_norm": 0.1953125, "learning_rate": 1.5263249093715885e-05, "loss": 1.0038, "num_tokens": 21228769106.0, "step": 4070 }, { "epoch": 0.725668449197861, "grad_norm": 0.212890625, "learning_rate": 1.5260960861012327e-05, "loss": 1.0188, "num_tokens": 21235024765.0, "step": 4071 }, { "epoch": 0.7258467023172905, "grad_norm": 0.224609375, "learning_rate": 1.5258672273242007e-05, "loss": 1.0224, "num_tokens": 21241281729.0, "step": 4072 }, { "epoch": 0.7260249554367202, "grad_norm": 0.2138671875, "learning_rate": 1.5256383330595621e-05, "loss": 1.0176, "num_tokens": 21247522656.0, "step": 4073 }, { "epoch": 0.7262032085561497, "grad_norm": 0.2216796875, "learning_rate": 1.5254094033263911e-05, "loss": 1.0349, "num_tokens": 21253806755.0, "step": 4074 }, { "epoch": 0.7263814616755794, "grad_norm": 0.2109375, "learning_rate": 1.525180438143765e-05, "loss": 1.0328, "num_tokens": 21260061008.0, "step": 4075 }, { "epoch": 0.7265597147950089, "grad_norm": 0.205078125, "learning_rate": 1.5249514375307628e-05, "loss": 0.9753, "num_tokens": 21266332072.0, "step": 4076 }, { "epoch": 0.7267379679144385, "grad_norm": 0.2138671875, "learning_rate": 1.5247224015064672e-05, "loss": 1.0122, "num_tokens": 21272616093.0, "step": 4077 }, { "epoch": 0.7269162210338681, "grad_norm": 0.2099609375, "learning_rate": 1.5244933300899643e-05, "loss": 1.015, "num_tokens": 21278900251.0, "step": 4078 }, { "epoch": 0.7270944741532976, "grad_norm": 0.2041015625, "learning_rate": 1.5242642233003424e-05, "loss": 1.0367, "num_tokens": 21285183849.0, "step": 4079 }, { "epoch": 0.7272727272727273, "grad_norm": 0.21484375, "learning_rate": 1.5240350811566933e-05, "loss": 1.0459, "num_tokens": 21291466465.0, "step": 4080 }, { "epoch": 0.7274509803921568, "grad_norm": 0.2255859375, "learning_rate": 1.523805903678111e-05, "loss": 1.0296, "num_tokens": 21297731835.0, "step": 4081 }, { "epoch": 0.7276292335115865, "grad_norm": 0.228515625, "learning_rate": 1.5235766908836926e-05, "loss": 1.0804, "num_tokens": 21303994243.0, "step": 4082 }, { "epoch": 0.727807486631016, "grad_norm": 0.2158203125, "learning_rate": 1.5233474427925398e-05, "loss": 1.0274, "num_tokens": 21310278090.0, "step": 4083 }, { "epoch": 0.7279857397504457, "grad_norm": 0.1982421875, "learning_rate": 1.5231181594237547e-05, "loss": 0.9936, "num_tokens": 21316542417.0, "step": 4084 }, { "epoch": 0.7281639928698752, "grad_norm": 0.228515625, "learning_rate": 1.5228888407964437e-05, "loss": 1.0315, "num_tokens": 21322801685.0, "step": 4085 }, { "epoch": 0.7283422459893049, "grad_norm": 0.21875, "learning_rate": 1.5226594869297167e-05, "loss": 1.0286, "num_tokens": 21329086206.0, "step": 4086 }, { "epoch": 0.7285204991087344, "grad_norm": 0.2275390625, "learning_rate": 1.5224300978426849e-05, "loss": 1.0375, "num_tokens": 21335367417.0, "step": 4087 }, { "epoch": 0.728698752228164, "grad_norm": 0.21875, "learning_rate": 1.5222006735544637e-05, "loss": 1.015, "num_tokens": 21341652053.0, "step": 4088 }, { "epoch": 0.7288770053475936, "grad_norm": 0.2099609375, "learning_rate": 1.5219712140841712e-05, "loss": 1.0259, "num_tokens": 21347937118.0, "step": 4089 }, { "epoch": 0.7290552584670231, "grad_norm": 0.2109375, "learning_rate": 1.521741719450928e-05, "loss": 1.0071, "num_tokens": 21354204665.0, "step": 4090 }, { "epoch": 0.7292335115864528, "grad_norm": 0.216796875, "learning_rate": 1.5215121896738582e-05, "loss": 0.9987, "num_tokens": 21360481274.0, "step": 4091 }, { "epoch": 0.7294117647058823, "grad_norm": 0.20703125, "learning_rate": 1.5212826247720885e-05, "loss": 1.0187, "num_tokens": 21366764046.0, "step": 4092 }, { "epoch": 0.729590017825312, "grad_norm": 0.2158203125, "learning_rate": 1.5210530247647489e-05, "loss": 1.0088, "num_tokens": 21373046918.0, "step": 4093 }, { "epoch": 0.7297682709447415, "grad_norm": 0.208984375, "learning_rate": 1.5208233896709712e-05, "loss": 1.0088, "num_tokens": 21379302182.0, "step": 4094 }, { "epoch": 0.7299465240641712, "grad_norm": 0.212890625, "learning_rate": 1.5205937195098919e-05, "loss": 1.0152, "num_tokens": 21385587336.0, "step": 4095 }, { "epoch": 0.7301247771836007, "grad_norm": 0.234375, "learning_rate": 1.5203640143006484e-05, "loss": 0.9844, "num_tokens": 21391861301.0, "step": 4096 }, { "epoch": 0.7303030303030303, "grad_norm": 0.203125, "learning_rate": 1.5201342740623833e-05, "loss": 1.0166, "num_tokens": 21398145131.0, "step": 4097 }, { "epoch": 0.7304812834224599, "grad_norm": 0.232421875, "learning_rate": 1.51990449881424e-05, "loss": 0.9859, "num_tokens": 21404423388.0, "step": 4098 }, { "epoch": 0.7306595365418895, "grad_norm": 0.2080078125, "learning_rate": 1.519674688575366e-05, "loss": 1.0262, "num_tokens": 21410707120.0, "step": 4099 }, { "epoch": 0.7308377896613191, "grad_norm": 0.205078125, "learning_rate": 1.5194448433649113e-05, "loss": 1.0294, "num_tokens": 21416993029.0, "step": 4100 }, { "epoch": 0.7310160427807486, "grad_norm": 0.2265625, "learning_rate": 1.5192149632020295e-05, "loss": 1.0145, "num_tokens": 21423233059.0, "step": 4101 }, { "epoch": 0.7311942959001783, "grad_norm": 0.1953125, "learning_rate": 1.518985048105876e-05, "loss": 1.0422, "num_tokens": 21429494395.0, "step": 4102 }, { "epoch": 0.7313725490196078, "grad_norm": 0.208984375, "learning_rate": 1.5187550980956097e-05, "loss": 1.0498, "num_tokens": 21435749701.0, "step": 4103 }, { "epoch": 0.7315508021390374, "grad_norm": 0.2265625, "learning_rate": 1.5185251131903929e-05, "loss": 1.0173, "num_tokens": 21442034361.0, "step": 4104 }, { "epoch": 0.731729055258467, "grad_norm": 0.2080078125, "learning_rate": 1.5182950934093892e-05, "loss": 1.0213, "num_tokens": 21448317405.0, "step": 4105 }, { "epoch": 0.7319073083778966, "grad_norm": 0.251953125, "learning_rate": 1.5180650387717675e-05, "loss": 1.052, "num_tokens": 21454586503.0, "step": 4106 }, { "epoch": 0.7320855614973262, "grad_norm": 0.216796875, "learning_rate": 1.5178349492966977e-05, "loss": 1.0267, "num_tokens": 21460850350.0, "step": 4107 }, { "epoch": 0.7322638146167558, "grad_norm": 0.2138671875, "learning_rate": 1.5176048250033528e-05, "loss": 1.0269, "num_tokens": 21467134911.0, "step": 4108 }, { "epoch": 0.7324420677361854, "grad_norm": 0.220703125, "learning_rate": 1.5173746659109099e-05, "loss": 1.0189, "num_tokens": 21473386185.0, "step": 4109 }, { "epoch": 0.732620320855615, "grad_norm": 0.212890625, "learning_rate": 1.5171444720385476e-05, "loss": 1.0005, "num_tokens": 21479641055.0, "step": 4110 }, { "epoch": 0.7327985739750446, "grad_norm": 0.2412109375, "learning_rate": 1.5169142434054485e-05, "loss": 1.0047, "num_tokens": 21485895106.0, "step": 4111 }, { "epoch": 0.7329768270944742, "grad_norm": 0.208984375, "learning_rate": 1.5166839800307969e-05, "loss": 1.0599, "num_tokens": 21492161933.0, "step": 4112 }, { "epoch": 0.7331550802139037, "grad_norm": 0.2138671875, "learning_rate": 1.5164536819337815e-05, "loss": 1.0028, "num_tokens": 21498426309.0, "step": 4113 }, { "epoch": 0.7333333333333333, "grad_norm": 0.201171875, "learning_rate": 1.5162233491335926e-05, "loss": 1.0114, "num_tokens": 21504696435.0, "step": 4114 }, { "epoch": 0.7335115864527629, "grad_norm": 0.208984375, "learning_rate": 1.5159929816494236e-05, "loss": 1.0482, "num_tokens": 21510948453.0, "step": 4115 }, { "epoch": 0.7336898395721925, "grad_norm": 0.2119140625, "learning_rate": 1.515762579500472e-05, "loss": 1.0114, "num_tokens": 21517181857.0, "step": 4116 }, { "epoch": 0.7338680926916221, "grad_norm": 0.2060546875, "learning_rate": 1.5155321427059365e-05, "loss": 1.0485, "num_tokens": 21523464331.0, "step": 4117 }, { "epoch": 0.7340463458110517, "grad_norm": 0.2158203125, "learning_rate": 1.5153016712850197e-05, "loss": 1.0321, "num_tokens": 21529746766.0, "step": 4118 }, { "epoch": 0.7342245989304813, "grad_norm": 0.2265625, "learning_rate": 1.5150711652569267e-05, "loss": 1.0031, "num_tokens": 21536030083.0, "step": 4119 }, { "epoch": 0.7344028520499108, "grad_norm": 0.216796875, "learning_rate": 1.5148406246408657e-05, "loss": 1.0245, "num_tokens": 21542289394.0, "step": 4120 }, { "epoch": 0.7345811051693405, "grad_norm": 0.224609375, "learning_rate": 1.5146100494560478e-05, "loss": 1.0178, "num_tokens": 21548573832.0, "step": 4121 }, { "epoch": 0.73475935828877, "grad_norm": 0.2255859375, "learning_rate": 1.514379439721687e-05, "loss": 0.9916, "num_tokens": 21554831656.0, "step": 4122 }, { "epoch": 0.7349376114081997, "grad_norm": 0.208984375, "learning_rate": 1.5141487954569997e-05, "loss": 1.0416, "num_tokens": 21561102525.0, "step": 4123 }, { "epoch": 0.7351158645276292, "grad_norm": 0.220703125, "learning_rate": 1.5139181166812055e-05, "loss": 1.0249, "num_tokens": 21567364935.0, "step": 4124 }, { "epoch": 0.7352941176470589, "grad_norm": 0.2294921875, "learning_rate": 1.5136874034135273e-05, "loss": 0.9857, "num_tokens": 21573647802.0, "step": 4125 }, { "epoch": 0.7354723707664884, "grad_norm": 0.228515625, "learning_rate": 1.5134566556731904e-05, "loss": 1.0518, "num_tokens": 21579931609.0, "step": 4126 }, { "epoch": 0.735650623885918, "grad_norm": 0.2080078125, "learning_rate": 1.5132258734794227e-05, "loss": 1.0127, "num_tokens": 21586197635.0, "step": 4127 }, { "epoch": 0.7358288770053476, "grad_norm": 0.240234375, "learning_rate": 1.5129950568514556e-05, "loss": 0.9982, "num_tokens": 21592410704.0, "step": 4128 }, { "epoch": 0.7360071301247771, "grad_norm": 0.2373046875, "learning_rate": 1.5127642058085234e-05, "loss": 1.0272, "num_tokens": 21598683017.0, "step": 4129 }, { "epoch": 0.7361853832442068, "grad_norm": 0.2109375, "learning_rate": 1.5125333203698623e-05, "loss": 1.0221, "num_tokens": 21604965719.0, "step": 4130 }, { "epoch": 0.7363636363636363, "grad_norm": 0.2216796875, "learning_rate": 1.5123024005547125e-05, "loss": 1.0299, "num_tokens": 21611250776.0, "step": 4131 }, { "epoch": 0.736541889483066, "grad_norm": 0.224609375, "learning_rate": 1.5120714463823162e-05, "loss": 1.0065, "num_tokens": 21617532971.0, "step": 4132 }, { "epoch": 0.7367201426024955, "grad_norm": 0.205078125, "learning_rate": 1.5118404578719195e-05, "loss": 1.0253, "num_tokens": 21623794337.0, "step": 4133 }, { "epoch": 0.7368983957219252, "grad_norm": 0.212890625, "learning_rate": 1.5116094350427701e-05, "loss": 1.0103, "num_tokens": 21630075888.0, "step": 4134 }, { "epoch": 0.7370766488413547, "grad_norm": 0.216796875, "learning_rate": 1.5113783779141196e-05, "loss": 0.9977, "num_tokens": 21636360794.0, "step": 4135 }, { "epoch": 0.7372549019607844, "grad_norm": 0.2236328125, "learning_rate": 1.5111472865052217e-05, "loss": 1.0101, "num_tokens": 21642644048.0, "step": 4136 }, { "epoch": 0.7374331550802139, "grad_norm": 0.220703125, "learning_rate": 1.5109161608353338e-05, "loss": 1.0317, "num_tokens": 21648922755.0, "step": 4137 }, { "epoch": 0.7376114081996435, "grad_norm": 0.224609375, "learning_rate": 1.5106850009237148e-05, "loss": 1.0077, "num_tokens": 21655150351.0, "step": 4138 }, { "epoch": 0.7377896613190731, "grad_norm": 0.22265625, "learning_rate": 1.5104538067896282e-05, "loss": 1.024, "num_tokens": 21661433959.0, "step": 4139 }, { "epoch": 0.7379679144385026, "grad_norm": 0.2197265625, "learning_rate": 1.5102225784523387e-05, "loss": 1.0331, "num_tokens": 21667716397.0, "step": 4140 }, { "epoch": 0.7381461675579323, "grad_norm": 0.2392578125, "learning_rate": 1.5099913159311154e-05, "loss": 1.02, "num_tokens": 21673970969.0, "step": 4141 }, { "epoch": 0.7383244206773618, "grad_norm": 0.2177734375, "learning_rate": 1.5097600192452288e-05, "loss": 1.0165, "num_tokens": 21680254326.0, "step": 4142 }, { "epoch": 0.7385026737967915, "grad_norm": 0.2001953125, "learning_rate": 1.5095286884139536e-05, "loss": 1.0658, "num_tokens": 21686536703.0, "step": 4143 }, { "epoch": 0.738680926916221, "grad_norm": 0.2138671875, "learning_rate": 1.5092973234565655e-05, "loss": 1.0484, "num_tokens": 21692819893.0, "step": 4144 }, { "epoch": 0.7388591800356507, "grad_norm": 0.2080078125, "learning_rate": 1.5090659243923451e-05, "loss": 1.0267, "num_tokens": 21699080015.0, "step": 4145 }, { "epoch": 0.7390374331550802, "grad_norm": 0.216796875, "learning_rate": 1.5088344912405749e-05, "loss": 1.0309, "num_tokens": 21705344511.0, "step": 4146 }, { "epoch": 0.7392156862745098, "grad_norm": 0.22265625, "learning_rate": 1.5086030240205398e-05, "loss": 1.0082, "num_tokens": 21711629082.0, "step": 4147 }, { "epoch": 0.7393939393939394, "grad_norm": 0.2333984375, "learning_rate": 1.5083715227515286e-05, "loss": 1.0216, "num_tokens": 21717911813.0, "step": 4148 }, { "epoch": 0.739572192513369, "grad_norm": 0.2216796875, "learning_rate": 1.508139987452832e-05, "loss": 1.0216, "num_tokens": 21724165881.0, "step": 4149 }, { "epoch": 0.7397504456327986, "grad_norm": 0.205078125, "learning_rate": 1.5079084181437438e-05, "loss": 1.0007, "num_tokens": 21730411207.0, "step": 4150 }, { "epoch": 0.7399286987522281, "grad_norm": 0.23828125, "learning_rate": 1.5076768148435607e-05, "loss": 1.0304, "num_tokens": 21736667269.0, "step": 4151 }, { "epoch": 0.7401069518716578, "grad_norm": 0.22265625, "learning_rate": 1.5074451775715828e-05, "loss": 1.0092, "num_tokens": 21742952575.0, "step": 4152 }, { "epoch": 0.7402852049910873, "grad_norm": 0.2177734375, "learning_rate": 1.5072135063471118e-05, "loss": 1.0175, "num_tokens": 21749237699.0, "step": 4153 }, { "epoch": 0.740463458110517, "grad_norm": 0.2109375, "learning_rate": 1.5069818011894533e-05, "loss": 1.0035, "num_tokens": 21755452135.0, "step": 4154 }, { "epoch": 0.7406417112299465, "grad_norm": 0.2060546875, "learning_rate": 1.5067500621179151e-05, "loss": 1.015, "num_tokens": 21761722010.0, "step": 4155 }, { "epoch": 0.7408199643493761, "grad_norm": 0.22265625, "learning_rate": 1.5065182891518084e-05, "loss": 1.036, "num_tokens": 21767985579.0, "step": 4156 }, { "epoch": 0.7409982174688057, "grad_norm": 0.21875, "learning_rate": 1.5062864823104466e-05, "loss": 1.0256, "num_tokens": 21774268304.0, "step": 4157 }, { "epoch": 0.7411764705882353, "grad_norm": 0.2119140625, "learning_rate": 1.5060546416131463e-05, "loss": 1.018, "num_tokens": 21780527275.0, "step": 4158 }, { "epoch": 0.7413547237076649, "grad_norm": 0.220703125, "learning_rate": 1.5058227670792269e-05, "loss": 0.9996, "num_tokens": 21786811111.0, "step": 4159 }, { "epoch": 0.7415329768270945, "grad_norm": 0.2119140625, "learning_rate": 1.5055908587280104e-05, "loss": 0.9986, "num_tokens": 21793073008.0, "step": 4160 }, { "epoch": 0.741711229946524, "grad_norm": 0.228515625, "learning_rate": 1.505358916578822e-05, "loss": 1.0366, "num_tokens": 21799348169.0, "step": 4161 }, { "epoch": 0.7418894830659537, "grad_norm": 0.2333984375, "learning_rate": 1.5051269406509891e-05, "loss": 1.0232, "num_tokens": 21805606137.0, "step": 4162 }, { "epoch": 0.7420677361853832, "grad_norm": 0.240234375, "learning_rate": 1.5048949309638428e-05, "loss": 1.0238, "num_tokens": 21811865550.0, "step": 4163 }, { "epoch": 0.7422459893048128, "grad_norm": 0.2431640625, "learning_rate": 1.5046628875367162e-05, "loss": 1.0401, "num_tokens": 21818149602.0, "step": 4164 }, { "epoch": 0.7424242424242424, "grad_norm": 0.2080078125, "learning_rate": 1.5044308103889458e-05, "loss": 1.0219, "num_tokens": 21824419292.0, "step": 4165 }, { "epoch": 0.742602495543672, "grad_norm": 0.255859375, "learning_rate": 1.50419869953987e-05, "loss": 1.0285, "num_tokens": 21830661903.0, "step": 4166 }, { "epoch": 0.7427807486631016, "grad_norm": 0.2578125, "learning_rate": 1.503966555008831e-05, "loss": 1.0504, "num_tokens": 21836924943.0, "step": 4167 }, { "epoch": 0.7429590017825312, "grad_norm": 0.20703125, "learning_rate": 1.5037343768151742e-05, "loss": 1.0411, "num_tokens": 21843204234.0, "step": 4168 }, { "epoch": 0.7431372549019608, "grad_norm": 0.2373046875, "learning_rate": 1.5035021649782462e-05, "loss": 1.0399, "num_tokens": 21849489614.0, "step": 4169 }, { "epoch": 0.7433155080213903, "grad_norm": 0.2421875, "learning_rate": 1.5032699195173972e-05, "loss": 1.034, "num_tokens": 21855774184.0, "step": 4170 }, { "epoch": 0.74349376114082, "grad_norm": 0.21484375, "learning_rate": 1.5030376404519803e-05, "loss": 0.9925, "num_tokens": 21862023407.0, "step": 4171 }, { "epoch": 0.7436720142602495, "grad_norm": 0.263671875, "learning_rate": 1.502805327801352e-05, "loss": 1.0218, "num_tokens": 21868291029.0, "step": 4172 }, { "epoch": 0.7438502673796792, "grad_norm": 0.2255859375, "learning_rate": 1.5025729815848706e-05, "loss": 1.0365, "num_tokens": 21874496938.0, "step": 4173 }, { "epoch": 0.7440285204991087, "grad_norm": 0.21484375, "learning_rate": 1.5023406018218973e-05, "loss": 1.0211, "num_tokens": 21880782187.0, "step": 4174 }, { "epoch": 0.7442067736185384, "grad_norm": 0.2314453125, "learning_rate": 1.5021081885317968e-05, "loss": 1.0387, "num_tokens": 21887066788.0, "step": 4175 }, { "epoch": 0.7443850267379679, "grad_norm": 0.2236328125, "learning_rate": 1.5018757417339355e-05, "loss": 1.0148, "num_tokens": 21893352378.0, "step": 4176 }, { "epoch": 0.7445632798573975, "grad_norm": 0.2236328125, "learning_rate": 1.5016432614476841e-05, "loss": 1.0347, "num_tokens": 21899630120.0, "step": 4177 }, { "epoch": 0.7447415329768271, "grad_norm": 0.2060546875, "learning_rate": 1.5014107476924144e-05, "loss": 1.0529, "num_tokens": 21905915172.0, "step": 4178 }, { "epoch": 0.7449197860962566, "grad_norm": 0.2197265625, "learning_rate": 1.5011782004875027e-05, "loss": 1.0319, "num_tokens": 21912199895.0, "step": 4179 }, { "epoch": 0.7450980392156863, "grad_norm": 0.22265625, "learning_rate": 1.5009456198523265e-05, "loss": 1.0173, "num_tokens": 21918462686.0, "step": 4180 }, { "epoch": 0.7452762923351158, "grad_norm": 0.2080078125, "learning_rate": 1.5007130058062666e-05, "loss": 0.9691, "num_tokens": 21924717469.0, "step": 4181 }, { "epoch": 0.7454545454545455, "grad_norm": 0.2216796875, "learning_rate": 1.5004803583687076e-05, "loss": 0.9847, "num_tokens": 21930943772.0, "step": 4182 }, { "epoch": 0.745632798573975, "grad_norm": 0.2158203125, "learning_rate": 1.5002476775590355e-05, "loss": 1.0397, "num_tokens": 21937227900.0, "step": 4183 }, { "epoch": 0.7458110516934047, "grad_norm": 0.23046875, "learning_rate": 1.5000149633966398e-05, "loss": 1.0215, "num_tokens": 21943456008.0, "step": 4184 }, { "epoch": 0.7459893048128342, "grad_norm": 0.2041015625, "learning_rate": 1.4997822159009124e-05, "loss": 1.0296, "num_tokens": 21949740212.0, "step": 4185 }, { "epoch": 0.7461675579322639, "grad_norm": 0.21875, "learning_rate": 1.4995494350912486e-05, "loss": 1.0403, "num_tokens": 21956003313.0, "step": 4186 }, { "epoch": 0.7463458110516934, "grad_norm": 0.205078125, "learning_rate": 1.4993166209870462e-05, "loss": 1.0016, "num_tokens": 21962285650.0, "step": 4187 }, { "epoch": 0.746524064171123, "grad_norm": 0.20703125, "learning_rate": 1.4990837736077049e-05, "loss": 0.9962, "num_tokens": 21968545758.0, "step": 4188 }, { "epoch": 0.7467023172905526, "grad_norm": 0.2177734375, "learning_rate": 1.4988508929726284e-05, "loss": 1.0238, "num_tokens": 21974828758.0, "step": 4189 }, { "epoch": 0.7468805704099821, "grad_norm": 0.21875, "learning_rate": 1.4986179791012226e-05, "loss": 0.9872, "num_tokens": 21981090243.0, "step": 4190 }, { "epoch": 0.7470588235294118, "grad_norm": 0.203125, "learning_rate": 1.4983850320128966e-05, "loss": 1.0416, "num_tokens": 21987373803.0, "step": 4191 }, { "epoch": 0.7472370766488413, "grad_norm": 0.2314453125, "learning_rate": 1.4981520517270612e-05, "loss": 1.0181, "num_tokens": 21993654753.0, "step": 4192 }, { "epoch": 0.747415329768271, "grad_norm": 0.19921875, "learning_rate": 1.4979190382631315e-05, "loss": 1.0096, "num_tokens": 21999932311.0, "step": 4193 }, { "epoch": 0.7475935828877005, "grad_norm": 0.2099609375, "learning_rate": 1.4976859916405237e-05, "loss": 1.0223, "num_tokens": 22006168639.0, "step": 4194 }, { "epoch": 0.7477718360071302, "grad_norm": 0.2080078125, "learning_rate": 1.4974529118786586e-05, "loss": 1.036, "num_tokens": 22012421907.0, "step": 4195 }, { "epoch": 0.7479500891265597, "grad_norm": 0.193359375, "learning_rate": 1.4972197989969583e-05, "loss": 1.0276, "num_tokens": 22018702085.0, "step": 4196 }, { "epoch": 0.7481283422459893, "grad_norm": 0.2236328125, "learning_rate": 1.496986653014848e-05, "loss": 1.0385, "num_tokens": 22024984637.0, "step": 4197 }, { "epoch": 0.7483065953654189, "grad_norm": 0.2099609375, "learning_rate": 1.4967534739517558e-05, "loss": 1.0412, "num_tokens": 22031265730.0, "step": 4198 }, { "epoch": 0.7484848484848485, "grad_norm": 0.21484375, "learning_rate": 1.4965202618271128e-05, "loss": 1.0102, "num_tokens": 22037523851.0, "step": 4199 }, { "epoch": 0.7486631016042781, "grad_norm": 0.2109375, "learning_rate": 1.4962870166603527e-05, "loss": 1.0315, "num_tokens": 22043808325.0, "step": 4200 }, { "epoch": 0.7488413547237077, "grad_norm": 0.201171875, "learning_rate": 1.4960537384709115e-05, "loss": 1.0196, "num_tokens": 22050090184.0, "step": 4201 }, { "epoch": 0.7490196078431373, "grad_norm": 0.220703125, "learning_rate": 1.4958204272782285e-05, "loss": 1.0325, "num_tokens": 22056341989.0, "step": 4202 }, { "epoch": 0.7491978609625668, "grad_norm": 0.2099609375, "learning_rate": 1.4955870831017456e-05, "loss": 1.0073, "num_tokens": 22062613021.0, "step": 4203 }, { "epoch": 0.7493761140819964, "grad_norm": 0.2099609375, "learning_rate": 1.4953537059609076e-05, "loss": 1.0097, "num_tokens": 22068894463.0, "step": 4204 }, { "epoch": 0.749554367201426, "grad_norm": 0.208984375, "learning_rate": 1.4951202958751615e-05, "loss": 1.0339, "num_tokens": 22075141488.0, "step": 4205 }, { "epoch": 0.7497326203208556, "grad_norm": 0.208984375, "learning_rate": 1.4948868528639575e-05, "loss": 0.9988, "num_tokens": 22081382251.0, "step": 4206 }, { "epoch": 0.7499108734402852, "grad_norm": 0.2119140625, "learning_rate": 1.4946533769467488e-05, "loss": 1.0379, "num_tokens": 22087634636.0, "step": 4207 }, { "epoch": 0.7500891265597148, "grad_norm": 0.208984375, "learning_rate": 1.4944198681429905e-05, "loss": 1.0119, "num_tokens": 22093919216.0, "step": 4208 }, { "epoch": 0.7502673796791444, "grad_norm": 0.234375, "learning_rate": 1.4941863264721412e-05, "loss": 1.0224, "num_tokens": 22100202432.0, "step": 4209 }, { "epoch": 0.750445632798574, "grad_norm": 0.2197265625, "learning_rate": 1.493952751953662e-05, "loss": 1.0179, "num_tokens": 22106486661.0, "step": 4210 }, { "epoch": 0.7506238859180036, "grad_norm": 0.22265625, "learning_rate": 1.4937191446070168e-05, "loss": 1.0009, "num_tokens": 22112772092.0, "step": 4211 }, { "epoch": 0.7508021390374332, "grad_norm": 0.236328125, "learning_rate": 1.4934855044516718e-05, "loss": 1.0145, "num_tokens": 22119046930.0, "step": 4212 }, { "epoch": 0.7509803921568627, "grad_norm": 0.212890625, "learning_rate": 1.4932518315070965e-05, "loss": 1.0251, "num_tokens": 22125329373.0, "step": 4213 }, { "epoch": 0.7511586452762923, "grad_norm": 0.2109375, "learning_rate": 1.4930181257927634e-05, "loss": 1.0255, "num_tokens": 22131588639.0, "step": 4214 }, { "epoch": 0.7513368983957219, "grad_norm": 0.2294921875, "learning_rate": 1.4927843873281462e-05, "loss": 1.0515, "num_tokens": 22137860400.0, "step": 4215 }, { "epoch": 0.7515151515151515, "grad_norm": 0.212890625, "learning_rate": 1.492550616132723e-05, "loss": 1.021, "num_tokens": 22144125180.0, "step": 4216 }, { "epoch": 0.7516934046345811, "grad_norm": 0.2294921875, "learning_rate": 1.4923168122259738e-05, "loss": 1.0234, "num_tokens": 22150408027.0, "step": 4217 }, { "epoch": 0.7518716577540107, "grad_norm": 0.2314453125, "learning_rate": 1.492082975627382e-05, "loss": 1.0078, "num_tokens": 22156692482.0, "step": 4218 }, { "epoch": 0.7520499108734403, "grad_norm": 0.216796875, "learning_rate": 1.4918491063564329e-05, "loss": 1.0229, "num_tokens": 22162927804.0, "step": 4219 }, { "epoch": 0.7522281639928698, "grad_norm": 0.212890625, "learning_rate": 1.4916152044326147e-05, "loss": 1.0038, "num_tokens": 22169210936.0, "step": 4220 }, { "epoch": 0.7524064171122995, "grad_norm": 0.2177734375, "learning_rate": 1.4913812698754191e-05, "loss": 0.9958, "num_tokens": 22175496395.0, "step": 4221 }, { "epoch": 0.752584670231729, "grad_norm": 0.21875, "learning_rate": 1.4911473027043393e-05, "loss": 1.0021, "num_tokens": 22181765482.0, "step": 4222 }, { "epoch": 0.7527629233511587, "grad_norm": 0.2099609375, "learning_rate": 1.4909133029388717e-05, "loss": 0.9972, "num_tokens": 22188013564.0, "step": 4223 }, { "epoch": 0.7529411764705882, "grad_norm": 0.228515625, "learning_rate": 1.4906792705985162e-05, "loss": 1.0333, "num_tokens": 22194266936.0, "step": 4224 }, { "epoch": 0.7531194295900179, "grad_norm": 0.228515625, "learning_rate": 1.4904452057027745e-05, "loss": 1.0029, "num_tokens": 22200517935.0, "step": 4225 }, { "epoch": 0.7532976827094474, "grad_norm": 0.2060546875, "learning_rate": 1.4902111082711515e-05, "loss": 1.0462, "num_tokens": 22206784815.0, "step": 4226 }, { "epoch": 0.753475935828877, "grad_norm": 0.201171875, "learning_rate": 1.4899769783231537e-05, "loss": 1.0118, "num_tokens": 22213031385.0, "step": 4227 }, { "epoch": 0.7536541889483066, "grad_norm": 0.2099609375, "learning_rate": 1.489742815878292e-05, "loss": 1.0343, "num_tokens": 22219315592.0, "step": 4228 }, { "epoch": 0.7538324420677361, "grad_norm": 0.21484375, "learning_rate": 1.4895086209560792e-05, "loss": 1.0362, "num_tokens": 22225596850.0, "step": 4229 }, { "epoch": 0.7540106951871658, "grad_norm": 0.2109375, "learning_rate": 1.4892743935760307e-05, "loss": 1.0543, "num_tokens": 22231879607.0, "step": 4230 }, { "epoch": 0.7541889483065953, "grad_norm": 0.208984375, "learning_rate": 1.4890401337576643e-05, "loss": 1.0094, "num_tokens": 22238165332.0, "step": 4231 }, { "epoch": 0.754367201426025, "grad_norm": 0.2275390625, "learning_rate": 1.4888058415205018e-05, "loss": 1.047, "num_tokens": 22244435938.0, "step": 4232 }, { "epoch": 0.7545454545454545, "grad_norm": 0.2119140625, "learning_rate": 1.488571516884066e-05, "loss": 1.0217, "num_tokens": 22250698655.0, "step": 4233 }, { "epoch": 0.7547237076648842, "grad_norm": 0.2138671875, "learning_rate": 1.4883371598678833e-05, "loss": 1.0364, "num_tokens": 22256968674.0, "step": 4234 }, { "epoch": 0.7549019607843137, "grad_norm": 0.228515625, "learning_rate": 1.4881027704914833e-05, "loss": 0.9981, "num_tokens": 22263251269.0, "step": 4235 }, { "epoch": 0.7550802139037434, "grad_norm": 0.21484375, "learning_rate": 1.487868348774397e-05, "loss": 1.0247, "num_tokens": 22269535500.0, "step": 4236 }, { "epoch": 0.7552584670231729, "grad_norm": 0.23046875, "learning_rate": 1.4876338947361595e-05, "loss": 1.0149, "num_tokens": 22275818922.0, "step": 4237 }, { "epoch": 0.7554367201426025, "grad_norm": 0.234375, "learning_rate": 1.4873994083963077e-05, "loss": 1.0328, "num_tokens": 22282102322.0, "step": 4238 }, { "epoch": 0.7556149732620321, "grad_norm": 0.19921875, "learning_rate": 1.4871648897743808e-05, "loss": 1.0216, "num_tokens": 22288387738.0, "step": 4239 }, { "epoch": 0.7557932263814616, "grad_norm": 0.228515625, "learning_rate": 1.486930338889922e-05, "loss": 1.0345, "num_tokens": 22294671022.0, "step": 4240 }, { "epoch": 0.7559714795008913, "grad_norm": 0.21875, "learning_rate": 1.4866957557624763e-05, "loss": 1.0341, "num_tokens": 22300940560.0, "step": 4241 }, { "epoch": 0.7561497326203208, "grad_norm": 0.224609375, "learning_rate": 1.4864611404115914e-05, "loss": 0.9873, "num_tokens": 22307195379.0, "step": 4242 }, { "epoch": 0.7563279857397505, "grad_norm": 0.20703125, "learning_rate": 1.4862264928568183e-05, "loss": 1.0098, "num_tokens": 22313478781.0, "step": 4243 }, { "epoch": 0.75650623885918, "grad_norm": 0.2177734375, "learning_rate": 1.4859918131177091e-05, "loss": 1.0203, "num_tokens": 22319762219.0, "step": 4244 }, { "epoch": 0.7566844919786097, "grad_norm": 0.23046875, "learning_rate": 1.485757101213821e-05, "loss": 1.038, "num_tokens": 22326041299.0, "step": 4245 }, { "epoch": 0.7568627450980392, "grad_norm": 0.216796875, "learning_rate": 1.485522357164712e-05, "loss": 1.0535, "num_tokens": 22332292968.0, "step": 4246 }, { "epoch": 0.7570409982174688, "grad_norm": 0.216796875, "learning_rate": 1.4852875809899434e-05, "loss": 0.9906, "num_tokens": 22338579114.0, "step": 4247 }, { "epoch": 0.7572192513368984, "grad_norm": 0.2421875, "learning_rate": 1.4850527727090792e-05, "loss": 1.0079, "num_tokens": 22344836801.0, "step": 4248 }, { "epoch": 0.757397504456328, "grad_norm": 0.220703125, "learning_rate": 1.4848179323416863e-05, "loss": 1.0465, "num_tokens": 22351110255.0, "step": 4249 }, { "epoch": 0.7575757575757576, "grad_norm": 0.2080078125, "learning_rate": 1.4845830599073332e-05, "loss": 0.9948, "num_tokens": 22357393926.0, "step": 4250 }, { "epoch": 0.7577540106951872, "grad_norm": 0.2294921875, "learning_rate": 1.4843481554255927e-05, "loss": 1.0649, "num_tokens": 22363611055.0, "step": 4251 }, { "epoch": 0.7579322638146168, "grad_norm": 0.2216796875, "learning_rate": 1.484113218916039e-05, "loss": 1.0479, "num_tokens": 22369877902.0, "step": 4252 }, { "epoch": 0.7581105169340463, "grad_norm": 0.208984375, "learning_rate": 1.4838782503982496e-05, "loss": 1.0195, "num_tokens": 22376125204.0, "step": 4253 }, { "epoch": 0.758288770053476, "grad_norm": 0.21875, "learning_rate": 1.4836432498918044e-05, "loss": 1.0054, "num_tokens": 22382409808.0, "step": 4254 }, { "epoch": 0.7584670231729055, "grad_norm": 0.20703125, "learning_rate": 1.483408217416286e-05, "loss": 0.9868, "num_tokens": 22388692770.0, "step": 4255 }, { "epoch": 0.7586452762923351, "grad_norm": 0.2060546875, "learning_rate": 1.4831731529912801e-05, "loss": 0.9956, "num_tokens": 22394977810.0, "step": 4256 }, { "epoch": 0.7588235294117647, "grad_norm": 0.22265625, "learning_rate": 1.482938056636374e-05, "loss": 1.0085, "num_tokens": 22401261699.0, "step": 4257 }, { "epoch": 0.7590017825311943, "grad_norm": 0.197265625, "learning_rate": 1.4827029283711587e-05, "loss": 1.0383, "num_tokens": 22407533668.0, "step": 4258 }, { "epoch": 0.7591800356506239, "grad_norm": 0.2265625, "learning_rate": 1.4824677682152273e-05, "loss": 0.9886, "num_tokens": 22413818068.0, "step": 4259 }, { "epoch": 0.7593582887700535, "grad_norm": 0.2109375, "learning_rate": 1.4822325761881762e-05, "loss": 1.0259, "num_tokens": 22420103500.0, "step": 4260 }, { "epoch": 0.759536541889483, "grad_norm": 0.220703125, "learning_rate": 1.4819973523096033e-05, "loss": 1.032, "num_tokens": 22426358428.0, "step": 4261 }, { "epoch": 0.7597147950089127, "grad_norm": 0.208984375, "learning_rate": 1.4817620965991106e-05, "loss": 1.0306, "num_tokens": 22432617830.0, "step": 4262 }, { "epoch": 0.7598930481283422, "grad_norm": 0.212890625, "learning_rate": 1.4815268090763014e-05, "loss": 1.0315, "num_tokens": 22438875995.0, "step": 4263 }, { "epoch": 0.7600713012477719, "grad_norm": 0.21484375, "learning_rate": 1.4812914897607828e-05, "loss": 1.0434, "num_tokens": 22445156655.0, "step": 4264 }, { "epoch": 0.7602495543672014, "grad_norm": 0.2109375, "learning_rate": 1.4810561386721634e-05, "loss": 1.0308, "num_tokens": 22451439287.0, "step": 4265 }, { "epoch": 0.760427807486631, "grad_norm": 0.2197265625, "learning_rate": 1.4808207558300557e-05, "loss": 1.0293, "num_tokens": 22457722685.0, "step": 4266 }, { "epoch": 0.7606060606060606, "grad_norm": 0.2109375, "learning_rate": 1.4805853412540736e-05, "loss": 1.0395, "num_tokens": 22463975761.0, "step": 4267 }, { "epoch": 0.7607843137254902, "grad_norm": 0.205078125, "learning_rate": 1.4803498949638344e-05, "loss": 1.012, "num_tokens": 22470258638.0, "step": 4268 }, { "epoch": 0.7609625668449198, "grad_norm": 0.2197265625, "learning_rate": 1.4801144169789586e-05, "loss": 1.0122, "num_tokens": 22476528719.0, "step": 4269 }, { "epoch": 0.7611408199643493, "grad_norm": 0.20703125, "learning_rate": 1.4798789073190673e-05, "loss": 1.0169, "num_tokens": 22482805409.0, "step": 4270 }, { "epoch": 0.761319073083779, "grad_norm": 0.208984375, "learning_rate": 1.479643366003787e-05, "loss": 1.0034, "num_tokens": 22489061463.0, "step": 4271 }, { "epoch": 0.7614973262032085, "grad_norm": 0.2294921875, "learning_rate": 1.4794077930527445e-05, "loss": 0.996, "num_tokens": 22495316106.0, "step": 4272 }, { "epoch": 0.7616755793226382, "grad_norm": 0.208984375, "learning_rate": 1.4791721884855703e-05, "loss": 1.0363, "num_tokens": 22501557052.0, "step": 4273 }, { "epoch": 0.7618538324420677, "grad_norm": 0.224609375, "learning_rate": 1.4789365523218972e-05, "loss": 0.9835, "num_tokens": 22507840766.0, "step": 4274 }, { "epoch": 0.7620320855614974, "grad_norm": 0.2158203125, "learning_rate": 1.4787008845813612e-05, "loss": 0.9916, "num_tokens": 22514124728.0, "step": 4275 }, { "epoch": 0.7622103386809269, "grad_norm": 0.216796875, "learning_rate": 1.4784651852836007e-05, "loss": 1.0304, "num_tokens": 22520352283.0, "step": 4276 }, { "epoch": 0.7623885918003565, "grad_norm": 0.2392578125, "learning_rate": 1.4782294544482561e-05, "loss": 1.0257, "num_tokens": 22526572646.0, "step": 4277 }, { "epoch": 0.7625668449197861, "grad_norm": 0.220703125, "learning_rate": 1.4779936920949708e-05, "loss": 1.0375, "num_tokens": 22532852043.0, "step": 4278 }, { "epoch": 0.7627450980392156, "grad_norm": 0.234375, "learning_rate": 1.4777578982433917e-05, "loss": 1.0181, "num_tokens": 22539136651.0, "step": 4279 }, { "epoch": 0.7629233511586453, "grad_norm": 0.25, "learning_rate": 1.4775220729131667e-05, "loss": 1.0425, "num_tokens": 22545388910.0, "step": 4280 }, { "epoch": 0.7631016042780748, "grad_norm": 0.212890625, "learning_rate": 1.4772862161239474e-05, "loss": 1.0325, "num_tokens": 22551673386.0, "step": 4281 }, { "epoch": 0.7632798573975045, "grad_norm": 0.23046875, "learning_rate": 1.4770503278953882e-05, "loss": 0.9983, "num_tokens": 22557932023.0, "step": 4282 }, { "epoch": 0.763458110516934, "grad_norm": 0.216796875, "learning_rate": 1.4768144082471457e-05, "loss": 1.0478, "num_tokens": 22564197996.0, "step": 4283 }, { "epoch": 0.7636363636363637, "grad_norm": 0.2060546875, "learning_rate": 1.4765784571988784e-05, "loss": 0.9963, "num_tokens": 22570461219.0, "step": 4284 }, { "epoch": 0.7638146167557932, "grad_norm": 0.224609375, "learning_rate": 1.4763424747702487e-05, "loss": 1.0482, "num_tokens": 22576743288.0, "step": 4285 }, { "epoch": 0.7639928698752229, "grad_norm": 0.2060546875, "learning_rate": 1.4761064609809209e-05, "loss": 1.0086, "num_tokens": 22583026261.0, "step": 4286 }, { "epoch": 0.7641711229946524, "grad_norm": 0.2109375, "learning_rate": 1.4758704158505625e-05, "loss": 1.0142, "num_tokens": 22589299779.0, "step": 4287 }, { "epoch": 0.764349376114082, "grad_norm": 0.216796875, "learning_rate": 1.4756343393988426e-05, "loss": 1.025, "num_tokens": 22595551258.0, "step": 4288 }, { "epoch": 0.7645276292335116, "grad_norm": 0.2080078125, "learning_rate": 1.4753982316454339e-05, "loss": 1.0127, "num_tokens": 22601828968.0, "step": 4289 }, { "epoch": 0.7647058823529411, "grad_norm": 0.2109375, "learning_rate": 1.475162092610011e-05, "loss": 1.0389, "num_tokens": 22608087516.0, "step": 4290 }, { "epoch": 0.7648841354723708, "grad_norm": 0.203125, "learning_rate": 1.4749259223122516e-05, "loss": 1.0071, "num_tokens": 22614370616.0, "step": 4291 }, { "epoch": 0.7650623885918003, "grad_norm": 0.2255859375, "learning_rate": 1.474689720771836e-05, "loss": 1.0222, "num_tokens": 22620629457.0, "step": 4292 }, { "epoch": 0.76524064171123, "grad_norm": 0.2080078125, "learning_rate": 1.4744534880084465e-05, "loss": 1.029, "num_tokens": 22626912955.0, "step": 4293 }, { "epoch": 0.7654188948306595, "grad_norm": 0.2158203125, "learning_rate": 1.4742172240417687e-05, "loss": 1.0187, "num_tokens": 22633174452.0, "step": 4294 }, { "epoch": 0.7655971479500892, "grad_norm": 0.2197265625, "learning_rate": 1.4739809288914905e-05, "loss": 1.0217, "num_tokens": 22639430104.0, "step": 4295 }, { "epoch": 0.7657754010695187, "grad_norm": 0.2021484375, "learning_rate": 1.4737446025773025e-05, "loss": 1.0122, "num_tokens": 22645715118.0, "step": 4296 }, { "epoch": 0.7659536541889483, "grad_norm": 0.193359375, "learning_rate": 1.4735082451188975e-05, "loss": 1.0206, "num_tokens": 22651988909.0, "step": 4297 }, { "epoch": 0.7661319073083779, "grad_norm": 0.2236328125, "learning_rate": 1.4732718565359718e-05, "loss": 0.9989, "num_tokens": 22658272983.0, "step": 4298 }, { "epoch": 0.7663101604278075, "grad_norm": 0.2158203125, "learning_rate": 1.4730354368482232e-05, "loss": 1.0049, "num_tokens": 22664516480.0, "step": 4299 }, { "epoch": 0.7664884135472371, "grad_norm": 0.1884765625, "learning_rate": 1.4727989860753524e-05, "loss": 1.0368, "num_tokens": 22670787839.0, "step": 4300 }, { "epoch": 0.7666666666666667, "grad_norm": 0.23828125, "learning_rate": 1.4725625042370636e-05, "loss": 1.0239, "num_tokens": 22677070368.0, "step": 4301 }, { "epoch": 0.7668449197860963, "grad_norm": 0.2255859375, "learning_rate": 1.4723259913530627e-05, "loss": 1.0165, "num_tokens": 22683354576.0, "step": 4302 }, { "epoch": 0.7670231729055258, "grad_norm": 0.2109375, "learning_rate": 1.472089447443058e-05, "loss": 1.0418, "num_tokens": 22689614830.0, "step": 4303 }, { "epoch": 0.7672014260249554, "grad_norm": 0.236328125, "learning_rate": 1.4718528725267609e-05, "loss": 1.051, "num_tokens": 22695895017.0, "step": 4304 }, { "epoch": 0.767379679144385, "grad_norm": 0.2060546875, "learning_rate": 1.4716162666238853e-05, "loss": 1.0007, "num_tokens": 22702154898.0, "step": 4305 }, { "epoch": 0.7675579322638146, "grad_norm": 0.2275390625, "learning_rate": 1.4713796297541479e-05, "loss": 1.0244, "num_tokens": 22708436980.0, "step": 4306 }, { "epoch": 0.7677361853832442, "grad_norm": 0.228515625, "learning_rate": 1.471142961937267e-05, "loss": 1.0196, "num_tokens": 22714719470.0, "step": 4307 }, { "epoch": 0.7679144385026738, "grad_norm": 0.2265625, "learning_rate": 1.4709062631929647e-05, "loss": 1.0479, "num_tokens": 22721000162.0, "step": 4308 }, { "epoch": 0.7680926916221034, "grad_norm": 0.22265625, "learning_rate": 1.470669533540965e-05, "loss": 1.035, "num_tokens": 22727283125.0, "step": 4309 }, { "epoch": 0.768270944741533, "grad_norm": 0.2021484375, "learning_rate": 1.4704327730009951e-05, "loss": 1.0219, "num_tokens": 22733566097.0, "step": 4310 }, { "epoch": 0.7684491978609626, "grad_norm": 0.224609375, "learning_rate": 1.4701959815927833e-05, "loss": 1.0413, "num_tokens": 22739849828.0, "step": 4311 }, { "epoch": 0.7686274509803922, "grad_norm": 0.2265625, "learning_rate": 1.4699591593360624e-05, "loss": 1.0497, "num_tokens": 22746105569.0, "step": 4312 }, { "epoch": 0.7688057040998217, "grad_norm": 0.216796875, "learning_rate": 1.4697223062505663e-05, "loss": 0.9996, "num_tokens": 22752387054.0, "step": 4313 }, { "epoch": 0.7689839572192514, "grad_norm": 0.2314453125, "learning_rate": 1.4694854223560326e-05, "loss": 1.0126, "num_tokens": 22758653398.0, "step": 4314 }, { "epoch": 0.7691622103386809, "grad_norm": 0.2236328125, "learning_rate": 1.4692485076722e-05, "loss": 1.0343, "num_tokens": 22764936097.0, "step": 4315 }, { "epoch": 0.7693404634581105, "grad_norm": 0.2490234375, "learning_rate": 1.4690115622188115e-05, "loss": 1.0273, "num_tokens": 22771216195.0, "step": 4316 }, { "epoch": 0.7695187165775401, "grad_norm": 0.2353515625, "learning_rate": 1.4687745860156113e-05, "loss": 1.0053, "num_tokens": 22777461651.0, "step": 4317 }, { "epoch": 0.7696969696969697, "grad_norm": 0.19921875, "learning_rate": 1.4685375790823473e-05, "loss": 0.9953, "num_tokens": 22783731069.0, "step": 4318 }, { "epoch": 0.7698752228163993, "grad_norm": 0.2373046875, "learning_rate": 1.4683005414387685e-05, "loss": 1.0292, "num_tokens": 22789991873.0, "step": 4319 }, { "epoch": 0.7700534759358288, "grad_norm": 0.240234375, "learning_rate": 1.4680634731046276e-05, "loss": 1.0149, "num_tokens": 22796274528.0, "step": 4320 }, { "epoch": 0.7702317290552585, "grad_norm": 0.2080078125, "learning_rate": 1.46782637409968e-05, "loss": 1.0326, "num_tokens": 22802496354.0, "step": 4321 }, { "epoch": 0.770409982174688, "grad_norm": 0.236328125, "learning_rate": 1.4675892444436828e-05, "loss": 1.0385, "num_tokens": 22808778857.0, "step": 4322 }, { "epoch": 0.7705882352941177, "grad_norm": 0.2236328125, "learning_rate": 1.467352084156396e-05, "loss": 0.9765, "num_tokens": 22815036758.0, "step": 4323 }, { "epoch": 0.7707664884135472, "grad_norm": 0.2177734375, "learning_rate": 1.4671148932575823e-05, "loss": 0.9918, "num_tokens": 22821267526.0, "step": 4324 }, { "epoch": 0.7709447415329769, "grad_norm": 0.2294921875, "learning_rate": 1.4668776717670068e-05, "loss": 1.0048, "num_tokens": 22827549430.0, "step": 4325 }, { "epoch": 0.7711229946524064, "grad_norm": 0.212890625, "learning_rate": 1.4666404197044375e-05, "loss": 1.0258, "num_tokens": 22833833150.0, "step": 4326 }, { "epoch": 0.771301247771836, "grad_norm": 0.22265625, "learning_rate": 1.4664031370896443e-05, "loss": 1.0425, "num_tokens": 22840116605.0, "step": 4327 }, { "epoch": 0.7714795008912656, "grad_norm": 0.216796875, "learning_rate": 1.4661658239424004e-05, "loss": 1.0178, "num_tokens": 22846401015.0, "step": 4328 }, { "epoch": 0.7716577540106951, "grad_norm": 0.2177734375, "learning_rate": 1.4659284802824812e-05, "loss": 1.0102, "num_tokens": 22852684375.0, "step": 4329 }, { "epoch": 0.7718360071301248, "grad_norm": 0.2470703125, "learning_rate": 1.4656911061296637e-05, "loss": 1.0242, "num_tokens": 22858958403.0, "step": 4330 }, { "epoch": 0.7720142602495543, "grad_norm": 0.2080078125, "learning_rate": 1.4654537015037294e-05, "loss": 1.0082, "num_tokens": 22865220763.0, "step": 4331 }, { "epoch": 0.772192513368984, "grad_norm": 0.2109375, "learning_rate": 1.4652162664244607e-05, "loss": 0.9833, "num_tokens": 22871478328.0, "step": 4332 }, { "epoch": 0.7723707664884135, "grad_norm": 0.2236328125, "learning_rate": 1.4649788009116436e-05, "loss": 1.0424, "num_tokens": 22877762244.0, "step": 4333 }, { "epoch": 0.7725490196078432, "grad_norm": 0.2001953125, "learning_rate": 1.4647413049850656e-05, "loss": 1.0308, "num_tokens": 22884019184.0, "step": 4334 }, { "epoch": 0.7727272727272727, "grad_norm": 0.2158203125, "learning_rate": 1.4645037786645174e-05, "loss": 1.0149, "num_tokens": 22890273762.0, "step": 4335 }, { "epoch": 0.7729055258467024, "grad_norm": 0.2109375, "learning_rate": 1.4642662219697921e-05, "loss": 1.0194, "num_tokens": 22896556025.0, "step": 4336 }, { "epoch": 0.7730837789661319, "grad_norm": 0.216796875, "learning_rate": 1.464028634920686e-05, "loss": 1.0107, "num_tokens": 22902799882.0, "step": 4337 }, { "epoch": 0.7732620320855615, "grad_norm": 0.2080078125, "learning_rate": 1.4637910175369966e-05, "loss": 1.0091, "num_tokens": 22909085451.0, "step": 4338 }, { "epoch": 0.7734402852049911, "grad_norm": 0.1953125, "learning_rate": 1.4635533698385247e-05, "loss": 1.0161, "num_tokens": 22915350138.0, "step": 4339 }, { "epoch": 0.7736185383244206, "grad_norm": 0.1953125, "learning_rate": 1.4633156918450736e-05, "loss": 1.0161, "num_tokens": 22921611708.0, "step": 4340 }, { "epoch": 0.7737967914438503, "grad_norm": 0.2080078125, "learning_rate": 1.4630779835764488e-05, "loss": 0.9867, "num_tokens": 22927832064.0, "step": 4341 }, { "epoch": 0.7739750445632798, "grad_norm": 0.2109375, "learning_rate": 1.4628402450524594e-05, "loss": 1.033, "num_tokens": 22934114909.0, "step": 4342 }, { "epoch": 0.7741532976827095, "grad_norm": 0.2021484375, "learning_rate": 1.4626024762929151e-05, "loss": 1.0225, "num_tokens": 22940399053.0, "step": 4343 }, { "epoch": 0.774331550802139, "grad_norm": 0.2119140625, "learning_rate": 1.4623646773176302e-05, "loss": 1.0068, "num_tokens": 22946641681.0, "step": 4344 }, { "epoch": 0.7745098039215687, "grad_norm": 0.2109375, "learning_rate": 1.4621268481464199e-05, "loss": 1.014, "num_tokens": 22952883556.0, "step": 4345 }, { "epoch": 0.7746880570409982, "grad_norm": 0.19921875, "learning_rate": 1.4618889887991029e-05, "loss": 1.0302, "num_tokens": 22959152706.0, "step": 4346 }, { "epoch": 0.7748663101604278, "grad_norm": 0.2060546875, "learning_rate": 1.4616510992954998e-05, "loss": 1.0397, "num_tokens": 22965435668.0, "step": 4347 }, { "epoch": 0.7750445632798574, "grad_norm": 0.2138671875, "learning_rate": 1.461413179655434e-05, "loss": 1.0131, "num_tokens": 22971718930.0, "step": 4348 }, { "epoch": 0.775222816399287, "grad_norm": 0.2197265625, "learning_rate": 1.4611752298987318e-05, "loss": 1.0309, "num_tokens": 22977995951.0, "step": 4349 }, { "epoch": 0.7754010695187166, "grad_norm": 0.2275390625, "learning_rate": 1.4609372500452208e-05, "loss": 1.0022, "num_tokens": 22984229185.0, "step": 4350 }, { "epoch": 0.7755793226381462, "grad_norm": 0.20703125, "learning_rate": 1.4606992401147329e-05, "loss": 1.0242, "num_tokens": 22990514446.0, "step": 4351 }, { "epoch": 0.7757575757575758, "grad_norm": 0.2275390625, "learning_rate": 1.460461200127101e-05, "loss": 1.0006, "num_tokens": 22996798393.0, "step": 4352 }, { "epoch": 0.7759358288770053, "grad_norm": 0.2109375, "learning_rate": 1.460223130102161e-05, "loss": 1.0317, "num_tokens": 23003083536.0, "step": 4353 }, { "epoch": 0.776114081996435, "grad_norm": 0.2109375, "learning_rate": 1.4599850300597511e-05, "loss": 0.994, "num_tokens": 23009338328.0, "step": 4354 }, { "epoch": 0.7762923351158645, "grad_norm": 0.2353515625, "learning_rate": 1.4597469000197129e-05, "loss": 1.0146, "num_tokens": 23015620081.0, "step": 4355 }, { "epoch": 0.7764705882352941, "grad_norm": 0.2099609375, "learning_rate": 1.4595087400018895e-05, "loss": 1.0276, "num_tokens": 23021905212.0, "step": 4356 }, { "epoch": 0.7766488413547237, "grad_norm": 0.22265625, "learning_rate": 1.4592705500261264e-05, "loss": 1.0041, "num_tokens": 23028176593.0, "step": 4357 }, { "epoch": 0.7768270944741533, "grad_norm": 0.21875, "learning_rate": 1.4590323301122725e-05, "loss": 1.0154, "num_tokens": 23034459270.0, "step": 4358 }, { "epoch": 0.7770053475935829, "grad_norm": 0.2041015625, "learning_rate": 1.4587940802801784e-05, "loss": 1.0014, "num_tokens": 23040743193.0, "step": 4359 }, { "epoch": 0.7771836007130125, "grad_norm": 0.240234375, "learning_rate": 1.4585558005496983e-05, "loss": 1.0768, "num_tokens": 23047014536.0, "step": 4360 }, { "epoch": 0.777361853832442, "grad_norm": 0.2353515625, "learning_rate": 1.4583174909406871e-05, "loss": 1.0121, "num_tokens": 23053297448.0, "step": 4361 }, { "epoch": 0.7775401069518717, "grad_norm": 0.22265625, "learning_rate": 1.4580791514730035e-05, "loss": 0.9846, "num_tokens": 23059583626.0, "step": 4362 }, { "epoch": 0.7777183600713012, "grad_norm": 0.2294921875, "learning_rate": 1.4578407821665086e-05, "loss": 1.0432, "num_tokens": 23065867926.0, "step": 4363 }, { "epoch": 0.7778966131907309, "grad_norm": 0.224609375, "learning_rate": 1.4576023830410658e-05, "loss": 1.0362, "num_tokens": 23072135499.0, "step": 4364 }, { "epoch": 0.7780748663101604, "grad_norm": 0.2275390625, "learning_rate": 1.457363954116541e-05, "loss": 1.0197, "num_tokens": 23078395397.0, "step": 4365 }, { "epoch": 0.77825311942959, "grad_norm": 0.21875, "learning_rate": 1.4571254954128015e-05, "loss": 0.9665, "num_tokens": 23084680390.0, "step": 4366 }, { "epoch": 0.7784313725490196, "grad_norm": 0.2109375, "learning_rate": 1.4568870069497197e-05, "loss": 1.0212, "num_tokens": 23090947952.0, "step": 4367 }, { "epoch": 0.7786096256684492, "grad_norm": 0.21875, "learning_rate": 1.4566484887471679e-05, "loss": 1.0141, "num_tokens": 23097231752.0, "step": 4368 }, { "epoch": 0.7787878787878788, "grad_norm": 0.21484375, "learning_rate": 1.456409940825022e-05, "loss": 1.0128, "num_tokens": 23103512025.0, "step": 4369 }, { "epoch": 0.7789661319073083, "grad_norm": 0.2041015625, "learning_rate": 1.4561713632031605e-05, "loss": 1.0149, "num_tokens": 23109796101.0, "step": 4370 }, { "epoch": 0.779144385026738, "grad_norm": 0.216796875, "learning_rate": 1.455932755901464e-05, "loss": 1.0403, "num_tokens": 23116003141.0, "step": 4371 }, { "epoch": 0.7793226381461675, "grad_norm": 0.22265625, "learning_rate": 1.4556941189398158e-05, "loss": 1.0139, "num_tokens": 23122277370.0, "step": 4372 }, { "epoch": 0.7795008912655972, "grad_norm": 0.2060546875, "learning_rate": 1.455455452338101e-05, "loss": 0.9848, "num_tokens": 23128557125.0, "step": 4373 }, { "epoch": 0.7796791443850267, "grad_norm": 0.2158203125, "learning_rate": 1.4552167561162085e-05, "loss": 1.0212, "num_tokens": 23134834994.0, "step": 4374 }, { "epoch": 0.7798573975044564, "grad_norm": 0.197265625, "learning_rate": 1.4549780302940286e-05, "loss": 1.0334, "num_tokens": 23141113092.0, "step": 4375 }, { "epoch": 0.7800356506238859, "grad_norm": 0.20703125, "learning_rate": 1.4547392748914543e-05, "loss": 1.0372, "num_tokens": 23147374826.0, "step": 4376 }, { "epoch": 0.7802139037433156, "grad_norm": 0.224609375, "learning_rate": 1.4545004899283809e-05, "loss": 1.0221, "num_tokens": 23153577908.0, "step": 4377 }, { "epoch": 0.7803921568627451, "grad_norm": 0.2080078125, "learning_rate": 1.4542616754247073e-05, "loss": 0.9913, "num_tokens": 23159862572.0, "step": 4378 }, { "epoch": 0.7805704099821746, "grad_norm": 0.220703125, "learning_rate": 1.454022831400333e-05, "loss": 1.0222, "num_tokens": 23166146338.0, "step": 4379 }, { "epoch": 0.7807486631016043, "grad_norm": 0.2236328125, "learning_rate": 1.4537839578751614e-05, "loss": 1.0233, "num_tokens": 23172401659.0, "step": 4380 }, { "epoch": 0.7809269162210338, "grad_norm": 0.21484375, "learning_rate": 1.4535450548690977e-05, "loss": 1.0341, "num_tokens": 23178667057.0, "step": 4381 }, { "epoch": 0.7811051693404635, "grad_norm": 0.19921875, "learning_rate": 1.4533061224020497e-05, "loss": 1.0362, "num_tokens": 23184944950.0, "step": 4382 }, { "epoch": 0.781283422459893, "grad_norm": 0.232421875, "learning_rate": 1.4530671604939281e-05, "loss": 1.0123, "num_tokens": 23191229279.0, "step": 4383 }, { "epoch": 0.7814616755793227, "grad_norm": 0.2333984375, "learning_rate": 1.4528281691646451e-05, "loss": 1.0166, "num_tokens": 23197510430.0, "step": 4384 }, { "epoch": 0.7816399286987522, "grad_norm": 0.2275390625, "learning_rate": 1.4525891484341162e-05, "loss": 1.0131, "num_tokens": 23203752739.0, "step": 4385 }, { "epoch": 0.7818181818181819, "grad_norm": 0.2236328125, "learning_rate": 1.452350098322259e-05, "loss": 1.0435, "num_tokens": 23210015071.0, "step": 4386 }, { "epoch": 0.7819964349376114, "grad_norm": 0.2158203125, "learning_rate": 1.4521110188489938e-05, "loss": 1.0273, "num_tokens": 23216283292.0, "step": 4387 }, { "epoch": 0.782174688057041, "grad_norm": 0.2392578125, "learning_rate": 1.4518719100342427e-05, "loss": 1.0263, "num_tokens": 23222537329.0, "step": 4388 }, { "epoch": 0.7823529411764706, "grad_norm": 0.205078125, "learning_rate": 1.4516327718979308e-05, "loss": 1.0167, "num_tokens": 23228821526.0, "step": 4389 }, { "epoch": 0.7825311942959001, "grad_norm": 0.205078125, "learning_rate": 1.451393604459986e-05, "loss": 1.0067, "num_tokens": 23235054457.0, "step": 4390 }, { "epoch": 0.7827094474153298, "grad_norm": 0.22265625, "learning_rate": 1.4511544077403374e-05, "loss": 1.0339, "num_tokens": 23241336877.0, "step": 4391 }, { "epoch": 0.7828877005347593, "grad_norm": 0.2216796875, "learning_rate": 1.4509151817589182e-05, "loss": 1.0253, "num_tokens": 23247620026.0, "step": 4392 }, { "epoch": 0.783065953654189, "grad_norm": 0.19921875, "learning_rate": 1.4506759265356623e-05, "loss": 1.0074, "num_tokens": 23253895693.0, "step": 4393 }, { "epoch": 0.7832442067736185, "grad_norm": 0.232421875, "learning_rate": 1.4504366420905074e-05, "loss": 1.0108, "num_tokens": 23260150666.0, "step": 4394 }, { "epoch": 0.7834224598930482, "grad_norm": 0.2158203125, "learning_rate": 1.4501973284433932e-05, "loss": 1.0041, "num_tokens": 23266433077.0, "step": 4395 }, { "epoch": 0.7836007130124777, "grad_norm": 0.2041015625, "learning_rate": 1.4499579856142614e-05, "loss": 1.0224, "num_tokens": 23272706459.0, "step": 4396 }, { "epoch": 0.7837789661319073, "grad_norm": 0.2431640625, "learning_rate": 1.4497186136230566e-05, "loss": 1.0467, "num_tokens": 23278956221.0, "step": 4397 }, { "epoch": 0.7839572192513369, "grad_norm": 0.2314453125, "learning_rate": 1.4494792124897258e-05, "loss": 1.0136, "num_tokens": 23285239403.0, "step": 4398 }, { "epoch": 0.7841354723707665, "grad_norm": 0.2216796875, "learning_rate": 1.4492397822342186e-05, "loss": 1.0324, "num_tokens": 23291522370.0, "step": 4399 }, { "epoch": 0.7843137254901961, "grad_norm": 0.259765625, "learning_rate": 1.449000322876486e-05, "loss": 1.007, "num_tokens": 23297781240.0, "step": 4400 }, { "epoch": 0.7844919786096257, "grad_norm": 0.2451171875, "learning_rate": 1.448760834436483e-05, "loss": 1.0075, "num_tokens": 23304047980.0, "step": 4401 }, { "epoch": 0.7846702317290553, "grad_norm": 0.2421875, "learning_rate": 1.4485213169341658e-05, "loss": 1.0344, "num_tokens": 23310331971.0, "step": 4402 }, { "epoch": 0.7848484848484848, "grad_norm": 0.2373046875, "learning_rate": 1.4482817703894938e-05, "loss": 1.0397, "num_tokens": 23316599507.0, "step": 4403 }, { "epoch": 0.7850267379679144, "grad_norm": 0.22265625, "learning_rate": 1.4480421948224279e-05, "loss": 1.0541, "num_tokens": 23322858148.0, "step": 4404 }, { "epoch": 0.785204991087344, "grad_norm": 0.2275390625, "learning_rate": 1.4478025902529324e-05, "loss": 1.0387, "num_tokens": 23329092975.0, "step": 4405 }, { "epoch": 0.7853832442067736, "grad_norm": 0.2314453125, "learning_rate": 1.4475629567009735e-05, "loss": 1.0684, "num_tokens": 23335376138.0, "step": 4406 }, { "epoch": 0.7855614973262032, "grad_norm": 0.2236328125, "learning_rate": 1.4473232941865196e-05, "loss": 1.0379, "num_tokens": 23341659366.0, "step": 4407 }, { "epoch": 0.7857397504456328, "grad_norm": 0.19921875, "learning_rate": 1.4470836027295426e-05, "loss": 1.0679, "num_tokens": 23347933145.0, "step": 4408 }, { "epoch": 0.7859180035650624, "grad_norm": 0.2255859375, "learning_rate": 1.4468438823500155e-05, "loss": 1.0413, "num_tokens": 23354169504.0, "step": 4409 }, { "epoch": 0.786096256684492, "grad_norm": 0.21484375, "learning_rate": 1.4466041330679146e-05, "loss": 1.0437, "num_tokens": 23360451184.0, "step": 4410 }, { "epoch": 0.7862745098039216, "grad_norm": 0.212890625, "learning_rate": 1.4463643549032171e-05, "loss": 1.0304, "num_tokens": 23366731275.0, "step": 4411 }, { "epoch": 0.7864527629233512, "grad_norm": 0.1982421875, "learning_rate": 1.4461245478759054e-05, "loss": 0.9975, "num_tokens": 23373015224.0, "step": 4412 }, { "epoch": 0.7866310160427807, "grad_norm": 0.220703125, "learning_rate": 1.4458847120059615e-05, "loss": 1.0002, "num_tokens": 23379298773.0, "step": 4413 }, { "epoch": 0.7868092691622104, "grad_norm": 0.203125, "learning_rate": 1.4456448473133718e-05, "loss": 1.0276, "num_tokens": 23385583962.0, "step": 4414 }, { "epoch": 0.7869875222816399, "grad_norm": 0.2099609375, "learning_rate": 1.4454049538181238e-05, "loss": 1.0425, "num_tokens": 23391868222.0, "step": 4415 }, { "epoch": 0.7871657754010695, "grad_norm": 0.224609375, "learning_rate": 1.4451650315402077e-05, "loss": 1.0282, "num_tokens": 23398138658.0, "step": 4416 }, { "epoch": 0.7873440285204991, "grad_norm": 0.212890625, "learning_rate": 1.4449250804996166e-05, "loss": 0.9893, "num_tokens": 23404422907.0, "step": 4417 }, { "epoch": 0.7875222816399287, "grad_norm": 0.2197265625, "learning_rate": 1.4446851007163457e-05, "loss": 1.0013, "num_tokens": 23410707304.0, "step": 4418 }, { "epoch": 0.7877005347593583, "grad_norm": 0.228515625, "learning_rate": 1.4444450922103928e-05, "loss": 1.0014, "num_tokens": 23416992083.0, "step": 4419 }, { "epoch": 0.7878787878787878, "grad_norm": 0.2392578125, "learning_rate": 1.444205055001757e-05, "loss": 0.9919, "num_tokens": 23423257717.0, "step": 4420 }, { "epoch": 0.7880570409982175, "grad_norm": 0.23046875, "learning_rate": 1.4439649891104413e-05, "loss": 1.0014, "num_tokens": 23429540829.0, "step": 4421 }, { "epoch": 0.788235294117647, "grad_norm": 0.2158203125, "learning_rate": 1.4437248945564503e-05, "loss": 1.0337, "num_tokens": 23435825667.0, "step": 4422 }, { "epoch": 0.7884135472370767, "grad_norm": 0.20703125, "learning_rate": 1.4434847713597912e-05, "loss": 1.036, "num_tokens": 23442108672.0, "step": 4423 }, { "epoch": 0.7885918003565062, "grad_norm": 0.2236328125, "learning_rate": 1.4432446195404734e-05, "loss": 1.0583, "num_tokens": 23448389855.0, "step": 4424 }, { "epoch": 0.7887700534759359, "grad_norm": 0.2158203125, "learning_rate": 1.4430044391185092e-05, "loss": 1.0272, "num_tokens": 23454664847.0, "step": 4425 }, { "epoch": 0.7889483065953654, "grad_norm": 0.2158203125, "learning_rate": 1.4427642301139122e-05, "loss": 1.0128, "num_tokens": 23460948545.0, "step": 4426 }, { "epoch": 0.7891265597147951, "grad_norm": 0.212890625, "learning_rate": 1.4425239925466994e-05, "loss": 1.0149, "num_tokens": 23467231939.0, "step": 4427 }, { "epoch": 0.7893048128342246, "grad_norm": 0.2197265625, "learning_rate": 1.4422837264368898e-05, "loss": 0.9923, "num_tokens": 23473493326.0, "step": 4428 }, { "epoch": 0.7894830659536541, "grad_norm": 0.208984375, "learning_rate": 1.4420434318045054e-05, "loss": 1.0361, "num_tokens": 23479776088.0, "step": 4429 }, { "epoch": 0.7896613190730838, "grad_norm": 0.212890625, "learning_rate": 1.441803108669569e-05, "loss": 0.9972, "num_tokens": 23486056778.0, "step": 4430 }, { "epoch": 0.7898395721925133, "grad_norm": 0.21484375, "learning_rate": 1.4415627570521073e-05, "loss": 1.0487, "num_tokens": 23492342192.0, "step": 4431 }, { "epoch": 0.790017825311943, "grad_norm": 0.2021484375, "learning_rate": 1.4413223769721488e-05, "loss": 1.0305, "num_tokens": 23498625300.0, "step": 4432 }, { "epoch": 0.7901960784313725, "grad_norm": 0.2158203125, "learning_rate": 1.4410819684497248e-05, "loss": 1.005, "num_tokens": 23504899101.0, "step": 4433 }, { "epoch": 0.7903743315508022, "grad_norm": 0.23046875, "learning_rate": 1.4408415315048679e-05, "loss": 1.0035, "num_tokens": 23511185779.0, "step": 4434 }, { "epoch": 0.7905525846702317, "grad_norm": 0.2109375, "learning_rate": 1.4406010661576142e-05, "loss": 1.004, "num_tokens": 23517457414.0, "step": 4435 }, { "epoch": 0.7907308377896614, "grad_norm": 0.212890625, "learning_rate": 1.4403605724280013e-05, "loss": 1.0344, "num_tokens": 23523739867.0, "step": 4436 }, { "epoch": 0.7909090909090909, "grad_norm": 0.224609375, "learning_rate": 1.4401200503360703e-05, "loss": 1.016, "num_tokens": 23530021456.0, "step": 4437 }, { "epoch": 0.7910873440285205, "grad_norm": 0.21484375, "learning_rate": 1.4398794999018638e-05, "loss": 0.9994, "num_tokens": 23536267571.0, "step": 4438 }, { "epoch": 0.7912655971479501, "grad_norm": 0.2216796875, "learning_rate": 1.439638921145426e-05, "loss": 0.9863, "num_tokens": 23542546910.0, "step": 4439 }, { "epoch": 0.7914438502673797, "grad_norm": 0.228515625, "learning_rate": 1.4393983140868057e-05, "loss": 0.9872, "num_tokens": 23548819910.0, "step": 4440 }, { "epoch": 0.7916221033868093, "grad_norm": 0.212890625, "learning_rate": 1.439157678746052e-05, "loss": 1.0529, "num_tokens": 23555102577.0, "step": 4441 }, { "epoch": 0.7918003565062388, "grad_norm": 0.2333984375, "learning_rate": 1.438917015143217e-05, "loss": 0.997, "num_tokens": 23561374885.0, "step": 4442 }, { "epoch": 0.7919786096256685, "grad_norm": 0.2109375, "learning_rate": 1.4386763232983556e-05, "loss": 1.0075, "num_tokens": 23567628763.0, "step": 4443 }, { "epoch": 0.792156862745098, "grad_norm": 0.203125, "learning_rate": 1.4384356032315242e-05, "loss": 1.011, "num_tokens": 23573882709.0, "step": 4444 }, { "epoch": 0.7923351158645277, "grad_norm": 0.2197265625, "learning_rate": 1.438194854962783e-05, "loss": 1.0494, "num_tokens": 23580151479.0, "step": 4445 }, { "epoch": 0.7925133689839572, "grad_norm": 0.21484375, "learning_rate": 1.4379540785121926e-05, "loss": 1.0013, "num_tokens": 23586414290.0, "step": 4446 }, { "epoch": 0.7926916221033868, "grad_norm": 0.2177734375, "learning_rate": 1.4377132738998177e-05, "loss": 1.0303, "num_tokens": 23592698491.0, "step": 4447 }, { "epoch": 0.7928698752228164, "grad_norm": 0.1962890625, "learning_rate": 1.4374724411457239e-05, "loss": 1.0061, "num_tokens": 23598984199.0, "step": 4448 }, { "epoch": 0.793048128342246, "grad_norm": 0.21875, "learning_rate": 1.4372315802699802e-05, "loss": 0.982, "num_tokens": 23605239934.0, "step": 4449 }, { "epoch": 0.7932263814616756, "grad_norm": 0.23046875, "learning_rate": 1.4369906912926579e-05, "loss": 1.0498, "num_tokens": 23611508550.0, "step": 4450 }, { "epoch": 0.7934046345811052, "grad_norm": 0.2197265625, "learning_rate": 1.4367497742338299e-05, "loss": 1.0405, "num_tokens": 23617774762.0, "step": 4451 }, { "epoch": 0.7935828877005348, "grad_norm": 0.216796875, "learning_rate": 1.4365088291135718e-05, "loss": 1.0536, "num_tokens": 23624058197.0, "step": 4452 }, { "epoch": 0.7937611408199643, "grad_norm": 0.228515625, "learning_rate": 1.4362678559519618e-05, "loss": 1.0087, "num_tokens": 23630342976.0, "step": 4453 }, { "epoch": 0.793939393939394, "grad_norm": 0.2060546875, "learning_rate": 1.4360268547690807e-05, "loss": 1.0132, "num_tokens": 23636608168.0, "step": 4454 }, { "epoch": 0.7941176470588235, "grad_norm": 0.24609375, "learning_rate": 1.4357858255850102e-05, "loss": 1.0563, "num_tokens": 23642878147.0, "step": 4455 }, { "epoch": 0.7942959001782531, "grad_norm": 0.216796875, "learning_rate": 1.4355447684198365e-05, "loss": 1.0498, "num_tokens": 23649162871.0, "step": 4456 }, { "epoch": 0.7944741532976827, "grad_norm": 0.21484375, "learning_rate": 1.4353036832936455e-05, "loss": 1.0328, "num_tokens": 23655401511.0, "step": 4457 }, { "epoch": 0.7946524064171123, "grad_norm": 0.22265625, "learning_rate": 1.4350625702265282e-05, "loss": 1.0187, "num_tokens": 23661685022.0, "step": 4458 }, { "epoch": 0.7948306595365419, "grad_norm": 0.1953125, "learning_rate": 1.4348214292385757e-05, "loss": 1.0141, "num_tokens": 23667968320.0, "step": 4459 }, { "epoch": 0.7950089126559715, "grad_norm": 0.2265625, "learning_rate": 1.4345802603498831e-05, "loss": 1.0077, "num_tokens": 23674199414.0, "step": 4460 }, { "epoch": 0.795187165775401, "grad_norm": 0.2216796875, "learning_rate": 1.4343390635805463e-05, "loss": 1.044, "num_tokens": 23680452074.0, "step": 4461 }, { "epoch": 0.7953654188948307, "grad_norm": 0.1982421875, "learning_rate": 1.4340978389506648e-05, "loss": 1.0257, "num_tokens": 23686733526.0, "step": 4462 }, { "epoch": 0.7955436720142602, "grad_norm": 0.224609375, "learning_rate": 1.4338565864803398e-05, "loss": 1.027, "num_tokens": 23692984469.0, "step": 4463 }, { "epoch": 0.7957219251336899, "grad_norm": 0.2138671875, "learning_rate": 1.4336153061896748e-05, "loss": 1.0336, "num_tokens": 23699268184.0, "step": 4464 }, { "epoch": 0.7959001782531194, "grad_norm": 0.189453125, "learning_rate": 1.4333739980987757e-05, "loss": 1.0142, "num_tokens": 23705491982.0, "step": 4465 }, { "epoch": 0.796078431372549, "grad_norm": 0.2080078125, "learning_rate": 1.4331326622277505e-05, "loss": 1.0023, "num_tokens": 23711774297.0, "step": 4466 }, { "epoch": 0.7962566844919786, "grad_norm": 0.2138671875, "learning_rate": 1.4328912985967107e-05, "loss": 1.0122, "num_tokens": 23718030422.0, "step": 4467 }, { "epoch": 0.7964349376114082, "grad_norm": 0.2021484375, "learning_rate": 1.4326499072257685e-05, "loss": 0.9964, "num_tokens": 23724284724.0, "step": 4468 }, { "epoch": 0.7966131907308378, "grad_norm": 0.1962890625, "learning_rate": 1.4324084881350386e-05, "loss": 1.0159, "num_tokens": 23730569973.0, "step": 4469 }, { "epoch": 0.7967914438502673, "grad_norm": 0.2021484375, "learning_rate": 1.4321670413446395e-05, "loss": 1.0078, "num_tokens": 23736853503.0, "step": 4470 }, { "epoch": 0.796969696969697, "grad_norm": 0.2080078125, "learning_rate": 1.4319255668746905e-05, "loss": 1.0405, "num_tokens": 23743126169.0, "step": 4471 }, { "epoch": 0.7971479500891265, "grad_norm": 0.1884765625, "learning_rate": 1.4316840647453143e-05, "loss": 1.0283, "num_tokens": 23749404486.0, "step": 4472 }, { "epoch": 0.7973262032085562, "grad_norm": 0.205078125, "learning_rate": 1.4314425349766341e-05, "loss": 1.0009, "num_tokens": 23755682509.0, "step": 4473 }, { "epoch": 0.7975044563279857, "grad_norm": 0.2109375, "learning_rate": 1.4312009775887778e-05, "loss": 1.0241, "num_tokens": 23761912912.0, "step": 4474 }, { "epoch": 0.7976827094474154, "grad_norm": 0.208984375, "learning_rate": 1.4309593926018743e-05, "loss": 1.0335, "num_tokens": 23768159605.0, "step": 4475 }, { "epoch": 0.7978609625668449, "grad_norm": 0.2197265625, "learning_rate": 1.4307177800360539e-05, "loss": 1.0216, "num_tokens": 23774404961.0, "step": 4476 }, { "epoch": 0.7980392156862746, "grad_norm": 0.201171875, "learning_rate": 1.4304761399114512e-05, "loss": 1.0266, "num_tokens": 23780689636.0, "step": 4477 }, { "epoch": 0.7982174688057041, "grad_norm": 0.1962890625, "learning_rate": 1.430234472248202e-05, "loss": 1.0026, "num_tokens": 23786961835.0, "step": 4478 }, { "epoch": 0.7983957219251336, "grad_norm": 0.2099609375, "learning_rate": 1.4299927770664445e-05, "loss": 1.015, "num_tokens": 23793227858.0, "step": 4479 }, { "epoch": 0.7985739750445633, "grad_norm": 0.2099609375, "learning_rate": 1.4297510543863186e-05, "loss": 1.0587, "num_tokens": 23799508459.0, "step": 4480 }, { "epoch": 0.7987522281639928, "grad_norm": 0.2001953125, "learning_rate": 1.4295093042279682e-05, "loss": 1.0522, "num_tokens": 23805787089.0, "step": 4481 }, { "epoch": 0.7989304812834225, "grad_norm": 0.19921875, "learning_rate": 1.4292675266115373e-05, "loss": 1.0163, "num_tokens": 23812070374.0, "step": 4482 }, { "epoch": 0.799108734402852, "grad_norm": 0.19921875, "learning_rate": 1.4290257215571744e-05, "loss": 1.0378, "num_tokens": 23818321009.0, "step": 4483 }, { "epoch": 0.7992869875222817, "grad_norm": 0.2080078125, "learning_rate": 1.4287838890850278e-05, "loss": 1.004, "num_tokens": 23824580164.0, "step": 4484 }, { "epoch": 0.7994652406417112, "grad_norm": 0.22265625, "learning_rate": 1.4285420292152507e-05, "loss": 1.0051, "num_tokens": 23830862823.0, "step": 4485 }, { "epoch": 0.7996434937611409, "grad_norm": 0.201171875, "learning_rate": 1.4283001419679965e-05, "loss": 1.0113, "num_tokens": 23837130220.0, "step": 4486 }, { "epoch": 0.7998217468805704, "grad_norm": 0.205078125, "learning_rate": 1.4280582273634223e-05, "loss": 1.0291, "num_tokens": 23843407044.0, "step": 4487 }, { "epoch": 0.8, "grad_norm": 0.1953125, "learning_rate": 1.4278162854216867e-05, "loss": 1.0128, "num_tokens": 23849691816.0, "step": 4488 }, { "epoch": 0.8001782531194296, "grad_norm": 0.203125, "learning_rate": 1.4275743161629503e-05, "loss": 1.0268, "num_tokens": 23855976458.0, "step": 4489 }, { "epoch": 0.8003565062388592, "grad_norm": 0.2119140625, "learning_rate": 1.4273323196073772e-05, "loss": 1.0446, "num_tokens": 23862261596.0, "step": 4490 }, { "epoch": 0.8005347593582888, "grad_norm": 0.2060546875, "learning_rate": 1.4270902957751327e-05, "loss": 1.0304, "num_tokens": 23868545616.0, "step": 4491 }, { "epoch": 0.8007130124777183, "grad_norm": 0.2021484375, "learning_rate": 1.4268482446863846e-05, "loss": 1.0473, "num_tokens": 23874806953.0, "step": 4492 }, { "epoch": 0.800891265597148, "grad_norm": 0.2138671875, "learning_rate": 1.4266061663613028e-05, "loss": 1.0158, "num_tokens": 23881082358.0, "step": 4493 }, { "epoch": 0.8010695187165775, "grad_norm": 0.2041015625, "learning_rate": 1.4263640608200605e-05, "loss": 1.0033, "num_tokens": 23887349914.0, "step": 4494 }, { "epoch": 0.8012477718360071, "grad_norm": 0.2138671875, "learning_rate": 1.4261219280828317e-05, "loss": 1.045, "num_tokens": 23893633719.0, "step": 4495 }, { "epoch": 0.8014260249554367, "grad_norm": 0.201171875, "learning_rate": 1.4258797681697938e-05, "loss": 1.0326, "num_tokens": 23899915994.0, "step": 4496 }, { "epoch": 0.8016042780748663, "grad_norm": 0.1982421875, "learning_rate": 1.425637581101126e-05, "loss": 1.0369, "num_tokens": 23906199181.0, "step": 4497 }, { "epoch": 0.8017825311942959, "grad_norm": 0.2041015625, "learning_rate": 1.4253953668970097e-05, "loss": 0.9926, "num_tokens": 23912483383.0, "step": 4498 }, { "epoch": 0.8019607843137255, "grad_norm": 0.2001953125, "learning_rate": 1.4251531255776288e-05, "loss": 1.0245, "num_tokens": 23918740649.0, "step": 4499 }, { "epoch": 0.8021390374331551, "grad_norm": 0.208984375, "learning_rate": 1.424910857163169e-05, "loss": 1.0183, "num_tokens": 23924999295.0, "step": 4500 }, { "epoch": 0.8023172905525847, "grad_norm": 0.2177734375, "learning_rate": 1.4246685616738186e-05, "loss": 1.0654, "num_tokens": 23931281624.0, "step": 4501 }, { "epoch": 0.8024955436720143, "grad_norm": 0.2158203125, "learning_rate": 1.4244262391297683e-05, "loss": 1.0236, "num_tokens": 23937552288.0, "step": 4502 }, { "epoch": 0.8026737967914439, "grad_norm": 0.2080078125, "learning_rate": 1.424183889551211e-05, "loss": 1.0416, "num_tokens": 23943822978.0, "step": 4503 }, { "epoch": 0.8028520499108734, "grad_norm": 0.2421875, "learning_rate": 1.4239415129583417e-05, "loss": 1.0662, "num_tokens": 23950105266.0, "step": 4504 }, { "epoch": 0.803030303030303, "grad_norm": 0.1962890625, "learning_rate": 1.4236991093713575e-05, "loss": 1.0687, "num_tokens": 23956390714.0, "step": 4505 }, { "epoch": 0.8032085561497326, "grad_norm": 0.2177734375, "learning_rate": 1.4234566788104582e-05, "loss": 0.999, "num_tokens": 23962649603.0, "step": 4506 }, { "epoch": 0.8033868092691622, "grad_norm": 0.2265625, "learning_rate": 1.4232142212958452e-05, "loss": 1.0121, "num_tokens": 23968931725.0, "step": 4507 }, { "epoch": 0.8035650623885918, "grad_norm": 0.208984375, "learning_rate": 1.4229717368477232e-05, "loss": 1.0373, "num_tokens": 23975178589.0, "step": 4508 }, { "epoch": 0.8037433155080214, "grad_norm": 0.2158203125, "learning_rate": 1.4227292254862974e-05, "loss": 1.0061, "num_tokens": 23981454155.0, "step": 4509 }, { "epoch": 0.803921568627451, "grad_norm": 0.2255859375, "learning_rate": 1.4224866872317775e-05, "loss": 1.0175, "num_tokens": 23987738558.0, "step": 4510 }, { "epoch": 0.8040998217468805, "grad_norm": 0.1943359375, "learning_rate": 1.4222441221043739e-05, "loss": 1.0172, "num_tokens": 23994007295.0, "step": 4511 }, { "epoch": 0.8042780748663102, "grad_norm": 0.2255859375, "learning_rate": 1.4220015301242989e-05, "loss": 1.0453, "num_tokens": 24000267654.0, "step": 4512 }, { "epoch": 0.8044563279857397, "grad_norm": 0.208984375, "learning_rate": 1.4217589113117687e-05, "loss": 1.0256, "num_tokens": 24006550428.0, "step": 4513 }, { "epoch": 0.8046345811051694, "grad_norm": 0.2001953125, "learning_rate": 1.4215162656870004e-05, "loss": 1.0288, "num_tokens": 24012803116.0, "step": 4514 }, { "epoch": 0.8048128342245989, "grad_norm": 0.2177734375, "learning_rate": 1.4212735932702135e-05, "loss": 0.9993, "num_tokens": 24019047120.0, "step": 4515 }, { "epoch": 0.8049910873440285, "grad_norm": 0.2041015625, "learning_rate": 1.42103089408163e-05, "loss": 1.0256, "num_tokens": 24025329407.0, "step": 4516 }, { "epoch": 0.8051693404634581, "grad_norm": 0.197265625, "learning_rate": 1.4207881681414746e-05, "loss": 1.0345, "num_tokens": 24031608456.0, "step": 4517 }, { "epoch": 0.8053475935828877, "grad_norm": 0.22265625, "learning_rate": 1.4205454154699731e-05, "loss": 1.0123, "num_tokens": 24037868593.0, "step": 4518 }, { "epoch": 0.8055258467023173, "grad_norm": 0.193359375, "learning_rate": 1.4203026360873543e-05, "loss": 1.01, "num_tokens": 24044124919.0, "step": 4519 }, { "epoch": 0.8057040998217468, "grad_norm": 0.205078125, "learning_rate": 1.420059830013849e-05, "loss": 1.0248, "num_tokens": 24050397223.0, "step": 4520 }, { "epoch": 0.8058823529411765, "grad_norm": 0.1953125, "learning_rate": 1.4198169972696906e-05, "loss": 1.0346, "num_tokens": 24056667680.0, "step": 4521 }, { "epoch": 0.806060606060606, "grad_norm": 0.208984375, "learning_rate": 1.4195741378751142e-05, "loss": 1.0104, "num_tokens": 24062936621.0, "step": 4522 }, { "epoch": 0.8062388591800357, "grad_norm": 0.2060546875, "learning_rate": 1.4193312518503573e-05, "loss": 1.03, "num_tokens": 24069203833.0, "step": 4523 }, { "epoch": 0.8064171122994652, "grad_norm": 0.1962890625, "learning_rate": 1.4190883392156593e-05, "loss": 1.0159, "num_tokens": 24075474011.0, "step": 4524 }, { "epoch": 0.8065953654188949, "grad_norm": 0.2060546875, "learning_rate": 1.4188453999912627e-05, "loss": 0.9638, "num_tokens": 24081737341.0, "step": 4525 }, { "epoch": 0.8067736185383244, "grad_norm": 0.2197265625, "learning_rate": 1.4186024341974116e-05, "loss": 1.0256, "num_tokens": 24088021165.0, "step": 4526 }, { "epoch": 0.8069518716577541, "grad_norm": 0.2001953125, "learning_rate": 1.4183594418543522e-05, "loss": 1.0565, "num_tokens": 24094305089.0, "step": 4527 }, { "epoch": 0.8071301247771836, "grad_norm": 0.208984375, "learning_rate": 1.4181164229823333e-05, "loss": 1.0415, "num_tokens": 24100581866.0, "step": 4528 }, { "epoch": 0.8073083778966131, "grad_norm": 0.208984375, "learning_rate": 1.4178733776016056e-05, "loss": 1.0216, "num_tokens": 24106833869.0, "step": 4529 }, { "epoch": 0.8074866310160428, "grad_norm": 0.2109375, "learning_rate": 1.4176303057324218e-05, "loss": 0.9952, "num_tokens": 24113119710.0, "step": 4530 }, { "epoch": 0.8076648841354723, "grad_norm": 0.201171875, "learning_rate": 1.417387207395038e-05, "loss": 1.021, "num_tokens": 24119401948.0, "step": 4531 }, { "epoch": 0.807843137254902, "grad_norm": 0.2109375, "learning_rate": 1.4171440826097102e-05, "loss": 1.0209, "num_tokens": 24125685595.0, "step": 4532 }, { "epoch": 0.8080213903743315, "grad_norm": 0.201171875, "learning_rate": 1.4169009313966998e-05, "loss": 1.0175, "num_tokens": 24131945992.0, "step": 4533 }, { "epoch": 0.8081996434937612, "grad_norm": 0.212890625, "learning_rate": 1.4166577537762673e-05, "loss": 1.0058, "num_tokens": 24138230653.0, "step": 4534 }, { "epoch": 0.8083778966131907, "grad_norm": 0.1982421875, "learning_rate": 1.4164145497686772e-05, "loss": 1.0077, "num_tokens": 24144488895.0, "step": 4535 }, { "epoch": 0.8085561497326204, "grad_norm": 0.216796875, "learning_rate": 1.4161713193941962e-05, "loss": 1.0419, "num_tokens": 24150771821.0, "step": 4536 }, { "epoch": 0.8087344028520499, "grad_norm": 0.212890625, "learning_rate": 1.415928062673092e-05, "loss": 1.0298, "num_tokens": 24157054102.0, "step": 4537 }, { "epoch": 0.8089126559714795, "grad_norm": 0.2236328125, "learning_rate": 1.4156847796256354e-05, "loss": 1.0049, "num_tokens": 24163336374.0, "step": 4538 }, { "epoch": 0.8090909090909091, "grad_norm": 0.23046875, "learning_rate": 1.4154414702720996e-05, "loss": 1.0189, "num_tokens": 24169589255.0, "step": 4539 }, { "epoch": 0.8092691622103387, "grad_norm": 0.2119140625, "learning_rate": 1.4151981346327592e-05, "loss": 1.0152, "num_tokens": 24175873978.0, "step": 4540 }, { "epoch": 0.8094474153297683, "grad_norm": 0.2021484375, "learning_rate": 1.4149547727278917e-05, "loss": 1.0495, "num_tokens": 24182125911.0, "step": 4541 }, { "epoch": 0.8096256684491978, "grad_norm": 0.2216796875, "learning_rate": 1.414711384577776e-05, "loss": 1.0283, "num_tokens": 24188407904.0, "step": 4542 }, { "epoch": 0.8098039215686275, "grad_norm": 0.208984375, "learning_rate": 1.4144679702026944e-05, "loss": 1.0274, "num_tokens": 24194691976.0, "step": 4543 }, { "epoch": 0.809982174688057, "grad_norm": 0.2138671875, "learning_rate": 1.4142245296229302e-05, "loss": 1.0228, "num_tokens": 24200965144.0, "step": 4544 }, { "epoch": 0.8101604278074866, "grad_norm": 0.23046875, "learning_rate": 1.4139810628587697e-05, "loss": 1.0239, "num_tokens": 24207219652.0, "step": 4545 }, { "epoch": 0.8103386809269162, "grad_norm": 0.2041015625, "learning_rate": 1.4137375699305009e-05, "loss": 1.0141, "num_tokens": 24213502875.0, "step": 4546 }, { "epoch": 0.8105169340463458, "grad_norm": 0.2197265625, "learning_rate": 1.4134940508584138e-05, "loss": 1.0351, "num_tokens": 24219765043.0, "step": 4547 }, { "epoch": 0.8106951871657754, "grad_norm": 0.2275390625, "learning_rate": 1.4132505056628014e-05, "loss": 1.0045, "num_tokens": 24226034321.0, "step": 4548 }, { "epoch": 0.810873440285205, "grad_norm": 0.2216796875, "learning_rate": 1.4130069343639577e-05, "loss": 1.0325, "num_tokens": 24232288229.0, "step": 4549 }, { "epoch": 0.8110516934046346, "grad_norm": 0.2294921875, "learning_rate": 1.4127633369821802e-05, "loss": 1.0192, "num_tokens": 24238571019.0, "step": 4550 }, { "epoch": 0.8112299465240642, "grad_norm": 0.2265625, "learning_rate": 1.4125197135377674e-05, "loss": 1.0246, "num_tokens": 24244852283.0, "step": 4551 }, { "epoch": 0.8114081996434938, "grad_norm": 0.201171875, "learning_rate": 1.4122760640510216e-05, "loss": 1.0125, "num_tokens": 24251137836.0, "step": 4552 }, { "epoch": 0.8115864527629234, "grad_norm": 0.2265625, "learning_rate": 1.4120323885422446e-05, "loss": 1.0225, "num_tokens": 24257422155.0, "step": 4553 }, { "epoch": 0.8117647058823529, "grad_norm": 0.2490234375, "learning_rate": 1.411788687031743e-05, "loss": 1.0055, "num_tokens": 24263677727.0, "step": 4554 }, { "epoch": 0.8119429590017825, "grad_norm": 0.220703125, "learning_rate": 1.4115449595398238e-05, "loss": 1.0337, "num_tokens": 24269961182.0, "step": 4555 }, { "epoch": 0.8121212121212121, "grad_norm": 0.2158203125, "learning_rate": 1.4113012060867977e-05, "loss": 1.046, "num_tokens": 24276233115.0, "step": 4556 }, { "epoch": 0.8122994652406417, "grad_norm": 0.2109375, "learning_rate": 1.4110574266929762e-05, "loss": 1.0291, "num_tokens": 24282511345.0, "step": 4557 }, { "epoch": 0.8124777183600713, "grad_norm": 0.2080078125, "learning_rate": 1.410813621378673e-05, "loss": 1.0127, "num_tokens": 24288786302.0, "step": 4558 }, { "epoch": 0.8126559714795009, "grad_norm": 0.2216796875, "learning_rate": 1.4105697901642055e-05, "loss": 0.9845, "num_tokens": 24295061750.0, "step": 4559 }, { "epoch": 0.8128342245989305, "grad_norm": 0.2138671875, "learning_rate": 1.4103259330698917e-05, "loss": 1.0359, "num_tokens": 24301339058.0, "step": 4560 }, { "epoch": 0.81301247771836, "grad_norm": 0.2109375, "learning_rate": 1.4100820501160522e-05, "loss": 1.0143, "num_tokens": 24307574350.0, "step": 4561 }, { "epoch": 0.8131907308377897, "grad_norm": 0.2314453125, "learning_rate": 1.4098381413230095e-05, "loss": 1.0082, "num_tokens": 24313821301.0, "step": 4562 }, { "epoch": 0.8133689839572192, "grad_norm": 0.2197265625, "learning_rate": 1.4095942067110895e-05, "loss": 1.0337, "num_tokens": 24320071425.0, "step": 4563 }, { "epoch": 0.8135472370766489, "grad_norm": 0.22265625, "learning_rate": 1.4093502463006185e-05, "loss": 1.0226, "num_tokens": 24326322648.0, "step": 4564 }, { "epoch": 0.8137254901960784, "grad_norm": 0.2451171875, "learning_rate": 1.4091062601119263e-05, "loss": 1.0003, "num_tokens": 24332585832.0, "step": 4565 }, { "epoch": 0.813903743315508, "grad_norm": 0.2158203125, "learning_rate": 1.4088622481653438e-05, "loss": 1.0133, "num_tokens": 24338844262.0, "step": 4566 }, { "epoch": 0.8140819964349376, "grad_norm": 0.2119140625, "learning_rate": 1.408618210481205e-05, "loss": 1.0122, "num_tokens": 24345100532.0, "step": 4567 }, { "epoch": 0.8142602495543672, "grad_norm": 0.2109375, "learning_rate": 1.4083741470798455e-05, "loss": 1.0407, "num_tokens": 24351361858.0, "step": 4568 }, { "epoch": 0.8144385026737968, "grad_norm": 0.1904296875, "learning_rate": 1.4081300579816028e-05, "loss": 1.0002, "num_tokens": 24357646933.0, "step": 4569 }, { "epoch": 0.8146167557932263, "grad_norm": 0.2314453125, "learning_rate": 1.4078859432068175e-05, "loss": 1.0114, "num_tokens": 24363928966.0, "step": 4570 }, { "epoch": 0.814795008912656, "grad_norm": 0.2197265625, "learning_rate": 1.4076418027758314e-05, "loss": 1.008, "num_tokens": 24370211982.0, "step": 4571 }, { "epoch": 0.8149732620320855, "grad_norm": 0.2080078125, "learning_rate": 1.4073976367089887e-05, "loss": 1.0248, "num_tokens": 24376473436.0, "step": 4572 }, { "epoch": 0.8151515151515152, "grad_norm": 0.212890625, "learning_rate": 1.4071534450266359e-05, "loss": 1.0011, "num_tokens": 24382719506.0, "step": 4573 }, { "epoch": 0.8153297682709447, "grad_norm": 0.21484375, "learning_rate": 1.406909227749122e-05, "loss": 1.0217, "num_tokens": 24388992315.0, "step": 4574 }, { "epoch": 0.8155080213903744, "grad_norm": 0.22265625, "learning_rate": 1.4066649848967968e-05, "loss": 0.9893, "num_tokens": 24395267578.0, "step": 4575 }, { "epoch": 0.8156862745098039, "grad_norm": 0.2001953125, "learning_rate": 1.4064207164900136e-05, "loss": 0.9939, "num_tokens": 24401550332.0, "step": 4576 }, { "epoch": 0.8158645276292336, "grad_norm": 0.21484375, "learning_rate": 1.4061764225491273e-05, "loss": 1.0201, "num_tokens": 24407834347.0, "step": 4577 }, { "epoch": 0.8160427807486631, "grad_norm": 0.205078125, "learning_rate": 1.4059321030944948e-05, "loss": 1.0183, "num_tokens": 24414118561.0, "step": 4578 }, { "epoch": 0.8162210338680926, "grad_norm": 0.2314453125, "learning_rate": 1.4056877581464758e-05, "loss": 0.99, "num_tokens": 24420379553.0, "step": 4579 }, { "epoch": 0.8163992869875223, "grad_norm": 0.216796875, "learning_rate": 1.4054433877254313e-05, "loss": 0.9902, "num_tokens": 24426663305.0, "step": 4580 }, { "epoch": 0.8165775401069518, "grad_norm": 0.2099609375, "learning_rate": 1.4051989918517244e-05, "loss": 1.0208, "num_tokens": 24432899660.0, "step": 4581 }, { "epoch": 0.8167557932263815, "grad_norm": 0.20703125, "learning_rate": 1.404954570545721e-05, "loss": 0.9932, "num_tokens": 24439165457.0, "step": 4582 }, { "epoch": 0.816934046345811, "grad_norm": 0.2041015625, "learning_rate": 1.4047101238277889e-05, "loss": 1.0241, "num_tokens": 24445424715.0, "step": 4583 }, { "epoch": 0.8171122994652407, "grad_norm": 0.1943359375, "learning_rate": 1.4044656517182977e-05, "loss": 1.0342, "num_tokens": 24451707940.0, "step": 4584 }, { "epoch": 0.8172905525846702, "grad_norm": 0.2294921875, "learning_rate": 1.4042211542376192e-05, "loss": 1.0004, "num_tokens": 24457962682.0, "step": 4585 }, { "epoch": 0.8174688057040999, "grad_norm": 0.1982421875, "learning_rate": 1.4039766314061276e-05, "loss": 1.0195, "num_tokens": 24464245273.0, "step": 4586 }, { "epoch": 0.8176470588235294, "grad_norm": 0.193359375, "learning_rate": 1.4037320832441993e-05, "loss": 1.0218, "num_tokens": 24470522903.0, "step": 4587 }, { "epoch": 0.817825311942959, "grad_norm": 0.25, "learning_rate": 1.4034875097722123e-05, "loss": 1.0116, "num_tokens": 24476798131.0, "step": 4588 }, { "epoch": 0.8180035650623886, "grad_norm": 0.2236328125, "learning_rate": 1.4032429110105466e-05, "loss": 1.0083, "num_tokens": 24483081025.0, "step": 4589 }, { "epoch": 0.8181818181818182, "grad_norm": 0.20703125, "learning_rate": 1.4029982869795853e-05, "loss": 0.9933, "num_tokens": 24489363876.0, "step": 4590 }, { "epoch": 0.8183600713012478, "grad_norm": 0.2255859375, "learning_rate": 1.4027536376997124e-05, "loss": 1.0074, "num_tokens": 24495603562.0, "step": 4591 }, { "epoch": 0.8185383244206773, "grad_norm": 0.2158203125, "learning_rate": 1.4025089631913147e-05, "loss": 1.0058, "num_tokens": 24501823654.0, "step": 4592 }, { "epoch": 0.818716577540107, "grad_norm": 0.201171875, "learning_rate": 1.4022642634747814e-05, "loss": 1.0366, "num_tokens": 24508106965.0, "step": 4593 }, { "epoch": 0.8188948306595365, "grad_norm": 0.240234375, "learning_rate": 1.4020195385705031e-05, "loss": 1.0413, "num_tokens": 24514390612.0, "step": 4594 }, { "epoch": 0.8190730837789661, "grad_norm": 0.2265625, "learning_rate": 1.4017747884988729e-05, "loss": 1.0103, "num_tokens": 24520675280.0, "step": 4595 }, { "epoch": 0.8192513368983957, "grad_norm": 0.2265625, "learning_rate": 1.4015300132802853e-05, "loss": 1.0349, "num_tokens": 24526939348.0, "step": 4596 }, { "epoch": 0.8194295900178253, "grad_norm": 0.216796875, "learning_rate": 1.4012852129351382e-05, "loss": 1.0257, "num_tokens": 24533176184.0, "step": 4597 }, { "epoch": 0.8196078431372549, "grad_norm": 0.2119140625, "learning_rate": 1.4010403874838307e-05, "loss": 1.0373, "num_tokens": 24539459769.0, "step": 4598 }, { "epoch": 0.8197860962566845, "grad_norm": 0.2177734375, "learning_rate": 1.4007955369467639e-05, "loss": 1.0041, "num_tokens": 24545635681.0, "step": 4599 }, { "epoch": 0.8199643493761141, "grad_norm": 0.2177734375, "learning_rate": 1.4005506613443415e-05, "loss": 1.0381, "num_tokens": 24551918030.0, "step": 4600 }, { "epoch": 0.8201426024955437, "grad_norm": 0.2216796875, "learning_rate": 1.4003057606969685e-05, "loss": 1.0302, "num_tokens": 24558202477.0, "step": 4601 }, { "epoch": 0.8203208556149733, "grad_norm": 0.21875, "learning_rate": 1.4000608350250537e-05, "loss": 1.017, "num_tokens": 24564472641.0, "step": 4602 }, { "epoch": 0.8204991087344029, "grad_norm": 0.23046875, "learning_rate": 1.3998158843490055e-05, "loss": 1.0344, "num_tokens": 24570756991.0, "step": 4603 }, { "epoch": 0.8206773618538324, "grad_norm": 0.1943359375, "learning_rate": 1.3995709086892363e-05, "loss": 1.01, "num_tokens": 24577039258.0, "step": 4604 }, { "epoch": 0.820855614973262, "grad_norm": 0.2119140625, "learning_rate": 1.39932590806616e-05, "loss": 1.0203, "num_tokens": 24583283372.0, "step": 4605 }, { "epoch": 0.8210338680926916, "grad_norm": 0.2255859375, "learning_rate": 1.3990808825001927e-05, "loss": 0.9971, "num_tokens": 24589553047.0, "step": 4606 }, { "epoch": 0.8212121212121212, "grad_norm": 0.2177734375, "learning_rate": 1.3988358320117522e-05, "loss": 1.009, "num_tokens": 24595832378.0, "step": 4607 }, { "epoch": 0.8213903743315508, "grad_norm": 0.224609375, "learning_rate": 1.3985907566212582e-05, "loss": 0.9711, "num_tokens": 24602077008.0, "step": 4608 }, { "epoch": 0.8215686274509804, "grad_norm": 0.2451171875, "learning_rate": 1.3983456563491335e-05, "loss": 1.0267, "num_tokens": 24608360350.0, "step": 4609 }, { "epoch": 0.82174688057041, "grad_norm": 0.203125, "learning_rate": 1.3981005312158022e-05, "loss": 1.0371, "num_tokens": 24614644353.0, "step": 4610 }, { "epoch": 0.8219251336898395, "grad_norm": 0.21484375, "learning_rate": 1.3978553812416908e-05, "loss": 1.0024, "num_tokens": 24620928379.0, "step": 4611 }, { "epoch": 0.8221033868092692, "grad_norm": 0.212890625, "learning_rate": 1.397610206447227e-05, "loss": 1.0488, "num_tokens": 24627190461.0, "step": 4612 }, { "epoch": 0.8222816399286987, "grad_norm": 0.2138671875, "learning_rate": 1.3973650068528421e-05, "loss": 1.0127, "num_tokens": 24633415295.0, "step": 4613 }, { "epoch": 0.8224598930481284, "grad_norm": 0.23046875, "learning_rate": 1.3971197824789687e-05, "loss": 1.0275, "num_tokens": 24639689138.0, "step": 4614 }, { "epoch": 0.8226381461675579, "grad_norm": 0.2138671875, "learning_rate": 1.3968745333460402e-05, "loss": 1.0427, "num_tokens": 24645961252.0, "step": 4615 }, { "epoch": 0.8228163992869876, "grad_norm": 0.2109375, "learning_rate": 1.3966292594744943e-05, "loss": 0.9893, "num_tokens": 24652244526.0, "step": 4616 }, { "epoch": 0.8229946524064171, "grad_norm": 0.2041015625, "learning_rate": 1.3963839608847696e-05, "loss": 1.0446, "num_tokens": 24658483287.0, "step": 4617 }, { "epoch": 0.8231729055258467, "grad_norm": 0.208984375, "learning_rate": 1.396138637597307e-05, "loss": 1.0316, "num_tokens": 24664759823.0, "step": 4618 }, { "epoch": 0.8233511586452763, "grad_norm": 0.2080078125, "learning_rate": 1.3958932896325486e-05, "loss": 1.005, "num_tokens": 24671019166.0, "step": 4619 }, { "epoch": 0.8235294117647058, "grad_norm": 0.201171875, "learning_rate": 1.39564791701094e-05, "loss": 1.0347, "num_tokens": 24677269967.0, "step": 4620 }, { "epoch": 0.8237076648841355, "grad_norm": 0.1943359375, "learning_rate": 1.395402519752928e-05, "loss": 1.046, "num_tokens": 24683547393.0, "step": 4621 }, { "epoch": 0.823885918003565, "grad_norm": 0.2216796875, "learning_rate": 1.3951570978789615e-05, "loss": 1.0292, "num_tokens": 24689830441.0, "step": 4622 }, { "epoch": 0.8240641711229947, "grad_norm": 0.1982421875, "learning_rate": 1.3949116514094916e-05, "loss": 0.9859, "num_tokens": 24696112892.0, "step": 4623 }, { "epoch": 0.8242424242424242, "grad_norm": 0.228515625, "learning_rate": 1.3946661803649715e-05, "loss": 1.0141, "num_tokens": 24702390416.0, "step": 4624 }, { "epoch": 0.8244206773618539, "grad_norm": 0.2109375, "learning_rate": 1.3944206847658563e-05, "loss": 1.0361, "num_tokens": 24708667614.0, "step": 4625 }, { "epoch": 0.8245989304812834, "grad_norm": 0.1953125, "learning_rate": 1.3941751646326029e-05, "loss": 1.0203, "num_tokens": 24714949822.0, "step": 4626 }, { "epoch": 0.8247771836007131, "grad_norm": 0.2021484375, "learning_rate": 1.3939296199856714e-05, "loss": 1.0197, "num_tokens": 24721232158.0, "step": 4627 }, { "epoch": 0.8249554367201426, "grad_norm": 0.2001953125, "learning_rate": 1.3936840508455218e-05, "loss": 1.0131, "num_tokens": 24727484842.0, "step": 4628 }, { "epoch": 0.8251336898395721, "grad_norm": 0.2109375, "learning_rate": 1.3934384572326184e-05, "loss": 1.0192, "num_tokens": 24733746389.0, "step": 4629 }, { "epoch": 0.8253119429590018, "grad_norm": 0.203125, "learning_rate": 1.3931928391674265e-05, "loss": 1.0441, "num_tokens": 24740027292.0, "step": 4630 }, { "epoch": 0.8254901960784313, "grad_norm": 0.2060546875, "learning_rate": 1.3929471966704129e-05, "loss": 1.0238, "num_tokens": 24746260615.0, "step": 4631 }, { "epoch": 0.825668449197861, "grad_norm": 0.216796875, "learning_rate": 1.392701529762048e-05, "loss": 1.0183, "num_tokens": 24752487883.0, "step": 4632 }, { "epoch": 0.8258467023172905, "grad_norm": 0.2021484375, "learning_rate": 1.3924558384628024e-05, "loss": 1.0096, "num_tokens": 24758770752.0, "step": 4633 }, { "epoch": 0.8260249554367202, "grad_norm": 0.20703125, "learning_rate": 1.3922101227931499e-05, "loss": 1.0158, "num_tokens": 24765015545.0, "step": 4634 }, { "epoch": 0.8262032085561497, "grad_norm": 0.2138671875, "learning_rate": 1.391964382773566e-05, "loss": 1.0204, "num_tokens": 24771299581.0, "step": 4635 }, { "epoch": 0.8263814616755794, "grad_norm": 0.2119140625, "learning_rate": 1.3917186184245287e-05, "loss": 1.0137, "num_tokens": 24777581401.0, "step": 4636 }, { "epoch": 0.8265597147950089, "grad_norm": 0.2109375, "learning_rate": 1.391472829766517e-05, "loss": 0.9998, "num_tokens": 24783865350.0, "step": 4637 }, { "epoch": 0.8267379679144385, "grad_norm": 0.1943359375, "learning_rate": 1.3912270168200127e-05, "loss": 0.9981, "num_tokens": 24790134827.0, "step": 4638 }, { "epoch": 0.8269162210338681, "grad_norm": 0.2109375, "learning_rate": 1.3909811796054995e-05, "loss": 1.012, "num_tokens": 24796419096.0, "step": 4639 }, { "epoch": 0.8270944741532977, "grad_norm": 0.2060546875, "learning_rate": 1.3907353181434632e-05, "loss": 1.0072, "num_tokens": 24802703522.0, "step": 4640 }, { "epoch": 0.8272727272727273, "grad_norm": 0.2001953125, "learning_rate": 1.3904894324543914e-05, "loss": 1.0093, "num_tokens": 24808988126.0, "step": 4641 }, { "epoch": 0.8274509803921568, "grad_norm": 0.2119140625, "learning_rate": 1.3902435225587734e-05, "loss": 1.0448, "num_tokens": 24815273009.0, "step": 4642 }, { "epoch": 0.8276292335115865, "grad_norm": 0.21484375, "learning_rate": 1.3899975884771015e-05, "loss": 1.0108, "num_tokens": 24821553209.0, "step": 4643 }, { "epoch": 0.827807486631016, "grad_norm": 0.21484375, "learning_rate": 1.3897516302298692e-05, "loss": 0.9938, "num_tokens": 24827838174.0, "step": 4644 }, { "epoch": 0.8279857397504456, "grad_norm": 0.2158203125, "learning_rate": 1.3895056478375726e-05, "loss": 1.0164, "num_tokens": 24834110526.0, "step": 4645 }, { "epoch": 0.8281639928698752, "grad_norm": 0.208984375, "learning_rate": 1.3892596413207087e-05, "loss": 1.0115, "num_tokens": 24840394383.0, "step": 4646 }, { "epoch": 0.8283422459893048, "grad_norm": 0.2177734375, "learning_rate": 1.3890136106997777e-05, "loss": 0.9953, "num_tokens": 24846649940.0, "step": 4647 }, { "epoch": 0.8285204991087344, "grad_norm": 0.21875, "learning_rate": 1.3887675559952818e-05, "loss": 1.0189, "num_tokens": 24852904358.0, "step": 4648 }, { "epoch": 0.828698752228164, "grad_norm": 0.21875, "learning_rate": 1.388521477227724e-05, "loss": 1.0196, "num_tokens": 24859168115.0, "step": 4649 }, { "epoch": 0.8288770053475936, "grad_norm": 0.2314453125, "learning_rate": 1.3882753744176107e-05, "loss": 1.0159, "num_tokens": 24865451293.0, "step": 4650 }, { "epoch": 0.8290552584670232, "grad_norm": 0.208984375, "learning_rate": 1.3880292475854493e-05, "loss": 1.0182, "num_tokens": 24871708176.0, "step": 4651 }, { "epoch": 0.8292335115864528, "grad_norm": 0.1962890625, "learning_rate": 1.38778309675175e-05, "loss": 1.0217, "num_tokens": 24877991900.0, "step": 4652 }, { "epoch": 0.8294117647058824, "grad_norm": 0.216796875, "learning_rate": 1.3875369219370242e-05, "loss": 1.0299, "num_tokens": 24884267401.0, "step": 4653 }, { "epoch": 0.8295900178253119, "grad_norm": 0.220703125, "learning_rate": 1.3872907231617863e-05, "loss": 0.9949, "num_tokens": 24890551137.0, "step": 4654 }, { "epoch": 0.8297682709447415, "grad_norm": 0.205078125, "learning_rate": 1.3870445004465512e-05, "loss": 1.0064, "num_tokens": 24896780528.0, "step": 4655 }, { "epoch": 0.8299465240641711, "grad_norm": 0.193359375, "learning_rate": 1.3867982538118372e-05, "loss": 1.0092, "num_tokens": 24903053806.0, "step": 4656 }, { "epoch": 0.8301247771836007, "grad_norm": 0.212890625, "learning_rate": 1.3865519832781645e-05, "loss": 1.0499, "num_tokens": 24909308750.0, "step": 4657 }, { "epoch": 0.8303030303030303, "grad_norm": 0.203125, "learning_rate": 1.3863056888660538e-05, "loss": 0.9983, "num_tokens": 24915567095.0, "step": 4658 }, { "epoch": 0.8304812834224599, "grad_norm": 0.208984375, "learning_rate": 1.38605937059603e-05, "loss": 0.9794, "num_tokens": 24921770874.0, "step": 4659 }, { "epoch": 0.8306595365418895, "grad_norm": 0.2021484375, "learning_rate": 1.385813028488618e-05, "loss": 1.026, "num_tokens": 24928053070.0, "step": 4660 }, { "epoch": 0.830837789661319, "grad_norm": 0.2021484375, "learning_rate": 1.3855666625643462e-05, "loss": 0.9947, "num_tokens": 24934285602.0, "step": 4661 }, { "epoch": 0.8310160427807487, "grad_norm": 0.20703125, "learning_rate": 1.3853202728437439e-05, "loss": 0.975, "num_tokens": 24940568644.0, "step": 4662 }, { "epoch": 0.8311942959001782, "grad_norm": 0.212890625, "learning_rate": 1.385073859347343e-05, "loss": 0.9933, "num_tokens": 24946841125.0, "step": 4663 }, { "epoch": 0.8313725490196079, "grad_norm": 0.2099609375, "learning_rate": 1.3848274220956767e-05, "loss": 1.0179, "num_tokens": 24953122772.0, "step": 4664 }, { "epoch": 0.8315508021390374, "grad_norm": 0.2041015625, "learning_rate": 1.3845809611092813e-05, "loss": 1.0107, "num_tokens": 24959404782.0, "step": 4665 }, { "epoch": 0.8317290552584671, "grad_norm": 0.20703125, "learning_rate": 1.3843344764086943e-05, "loss": 1.0319, "num_tokens": 24965689163.0, "step": 4666 }, { "epoch": 0.8319073083778966, "grad_norm": 0.2109375, "learning_rate": 1.3840879680144554e-05, "loss": 1.0264, "num_tokens": 24971968720.0, "step": 4667 }, { "epoch": 0.8320855614973262, "grad_norm": 0.203125, "learning_rate": 1.3838414359471062e-05, "loss": 0.99, "num_tokens": 24978253151.0, "step": 4668 }, { "epoch": 0.8322638146167558, "grad_norm": 0.2021484375, "learning_rate": 1.3835948802271896e-05, "loss": 1.0242, "num_tokens": 24984524644.0, "step": 4669 }, { "epoch": 0.8324420677361853, "grad_norm": 0.193359375, "learning_rate": 1.383348300875252e-05, "loss": 1.0238, "num_tokens": 24990808854.0, "step": 4670 }, { "epoch": 0.832620320855615, "grad_norm": 0.21484375, "learning_rate": 1.3831016979118403e-05, "loss": 1.0648, "num_tokens": 24997093664.0, "step": 4671 }, { "epoch": 0.8327985739750445, "grad_norm": 0.20703125, "learning_rate": 1.3828550713575044e-05, "loss": 1.0157, "num_tokens": 25003374373.0, "step": 4672 }, { "epoch": 0.8329768270944742, "grad_norm": 0.19921875, "learning_rate": 1.3826084212327957e-05, "loss": 1.0153, "num_tokens": 25009635543.0, "step": 4673 }, { "epoch": 0.8331550802139037, "grad_norm": 0.2021484375, "learning_rate": 1.3823617475582671e-05, "loss": 1.0371, "num_tokens": 25015913423.0, "step": 4674 }, { "epoch": 0.8333333333333334, "grad_norm": 0.2177734375, "learning_rate": 1.3821150503544747e-05, "loss": 1.0085, "num_tokens": 25022197699.0, "step": 4675 }, { "epoch": 0.8335115864527629, "grad_norm": 0.1923828125, "learning_rate": 1.3818683296419755e-05, "loss": 1.0329, "num_tokens": 25028464069.0, "step": 4676 }, { "epoch": 0.8336898395721926, "grad_norm": 0.2314453125, "learning_rate": 1.3816215854413289e-05, "loss": 0.9904, "num_tokens": 25034748723.0, "step": 4677 }, { "epoch": 0.8338680926916221, "grad_norm": 0.2216796875, "learning_rate": 1.3813748177730954e-05, "loss": 1.0176, "num_tokens": 25041031905.0, "step": 4678 }, { "epoch": 0.8340463458110517, "grad_norm": 0.203125, "learning_rate": 1.3811280266578395e-05, "loss": 1.0231, "num_tokens": 25047307170.0, "step": 4679 }, { "epoch": 0.8342245989304813, "grad_norm": 0.2109375, "learning_rate": 1.3808812121161253e-05, "loss": 1.0121, "num_tokens": 25053567386.0, "step": 4680 }, { "epoch": 0.8344028520499108, "grad_norm": 0.2216796875, "learning_rate": 1.3806343741685204e-05, "loss": 0.9988, "num_tokens": 25059814653.0, "step": 4681 }, { "epoch": 0.8345811051693405, "grad_norm": 0.212890625, "learning_rate": 1.3803875128355939e-05, "loss": 1.0361, "num_tokens": 25066089570.0, "step": 4682 }, { "epoch": 0.83475935828877, "grad_norm": 0.22265625, "learning_rate": 1.3801406281379166e-05, "loss": 1.0359, "num_tokens": 25072372475.0, "step": 4683 }, { "epoch": 0.8349376114081997, "grad_norm": 0.2080078125, "learning_rate": 1.3798937200960614e-05, "loss": 0.984, "num_tokens": 25078654800.0, "step": 4684 }, { "epoch": 0.8351158645276292, "grad_norm": 0.228515625, "learning_rate": 1.3796467887306034e-05, "loss": 1.0221, "num_tokens": 25084940822.0, "step": 4685 }, { "epoch": 0.8352941176470589, "grad_norm": 0.22265625, "learning_rate": 1.3793998340621191e-05, "loss": 1.0267, "num_tokens": 25091201444.0, "step": 4686 }, { "epoch": 0.8354723707664884, "grad_norm": 0.20703125, "learning_rate": 1.379152856111188e-05, "loss": 1.0124, "num_tokens": 25097486500.0, "step": 4687 }, { "epoch": 0.835650623885918, "grad_norm": 0.2041015625, "learning_rate": 1.3789058548983898e-05, "loss": 1.0297, "num_tokens": 25103753692.0, "step": 4688 }, { "epoch": 0.8358288770053476, "grad_norm": 0.232421875, "learning_rate": 1.3786588304443082e-05, "loss": 1.027, "num_tokens": 25110034450.0, "step": 4689 }, { "epoch": 0.8360071301247772, "grad_norm": 0.2197265625, "learning_rate": 1.3784117827695272e-05, "loss": 1.0089, "num_tokens": 25116318757.0, "step": 4690 }, { "epoch": 0.8361853832442068, "grad_norm": 0.2138671875, "learning_rate": 1.3781647118946335e-05, "loss": 1.0135, "num_tokens": 25122569312.0, "step": 4691 }, { "epoch": 0.8363636363636363, "grad_norm": 0.2080078125, "learning_rate": 1.3779176178402152e-05, "loss": 1.0214, "num_tokens": 25128818993.0, "step": 4692 }, { "epoch": 0.836541889483066, "grad_norm": 0.203125, "learning_rate": 1.3776705006268633e-05, "loss": 1.0056, "num_tokens": 25135084736.0, "step": 4693 }, { "epoch": 0.8367201426024955, "grad_norm": 0.2119140625, "learning_rate": 1.3774233602751701e-05, "loss": 1.0274, "num_tokens": 25141351209.0, "step": 4694 }, { "epoch": 0.8368983957219251, "grad_norm": 0.2001953125, "learning_rate": 1.3771761968057292e-05, "loss": 1.0056, "num_tokens": 25147620100.0, "step": 4695 }, { "epoch": 0.8370766488413547, "grad_norm": 0.201171875, "learning_rate": 1.3769290102391372e-05, "loss": 1.0275, "num_tokens": 25153905652.0, "step": 4696 }, { "epoch": 0.8372549019607843, "grad_norm": 0.21484375, "learning_rate": 1.3766818005959925e-05, "loss": 1.006, "num_tokens": 25160171263.0, "step": 4697 }, { "epoch": 0.8374331550802139, "grad_norm": 0.197265625, "learning_rate": 1.3764345678968948e-05, "loss": 1.0143, "num_tokens": 25166454603.0, "step": 4698 }, { "epoch": 0.8376114081996435, "grad_norm": 0.212890625, "learning_rate": 1.376187312162446e-05, "loss": 1.0214, "num_tokens": 25172738421.0, "step": 4699 }, { "epoch": 0.8377896613190731, "grad_norm": 0.2275390625, "learning_rate": 1.37594003341325e-05, "loss": 1.0162, "num_tokens": 25179003994.0, "step": 4700 }, { "epoch": 0.8379679144385027, "grad_norm": 0.197265625, "learning_rate": 1.3756927316699127e-05, "loss": 1.0192, "num_tokens": 25185286625.0, "step": 4701 }, { "epoch": 0.8381461675579323, "grad_norm": 0.2041015625, "learning_rate": 1.3754454069530418e-05, "loss": 1.0138, "num_tokens": 25191569974.0, "step": 4702 }, { "epoch": 0.8383244206773619, "grad_norm": 0.2177734375, "learning_rate": 1.3751980592832473e-05, "loss": 1.024, "num_tokens": 25197852498.0, "step": 4703 }, { "epoch": 0.8385026737967914, "grad_norm": 0.23046875, "learning_rate": 1.3749506886811394e-05, "loss": 1.0482, "num_tokens": 25204108998.0, "step": 4704 }, { "epoch": 0.838680926916221, "grad_norm": 0.2080078125, "learning_rate": 1.374703295167333e-05, "loss": 1.0064, "num_tokens": 25210393198.0, "step": 4705 }, { "epoch": 0.8388591800356506, "grad_norm": 0.2275390625, "learning_rate": 1.3744558787624432e-05, "loss": 1.0495, "num_tokens": 25216676716.0, "step": 4706 }, { "epoch": 0.8390374331550802, "grad_norm": 0.2158203125, "learning_rate": 1.3742084394870868e-05, "loss": 1.0204, "num_tokens": 25222938242.0, "step": 4707 }, { "epoch": 0.8392156862745098, "grad_norm": 0.203125, "learning_rate": 1.3739609773618827e-05, "loss": 1.0266, "num_tokens": 25229168158.0, "step": 4708 }, { "epoch": 0.8393939393939394, "grad_norm": 0.2080078125, "learning_rate": 1.3737134924074526e-05, "loss": 0.9945, "num_tokens": 25235436079.0, "step": 4709 }, { "epoch": 0.839572192513369, "grad_norm": 0.2001953125, "learning_rate": 1.3734659846444195e-05, "loss": 1.019, "num_tokens": 25241719632.0, "step": 4710 }, { "epoch": 0.8397504456327985, "grad_norm": 0.1982421875, "learning_rate": 1.3732184540934081e-05, "loss": 0.9825, "num_tokens": 25248002478.0, "step": 4711 }, { "epoch": 0.8399286987522282, "grad_norm": 0.2158203125, "learning_rate": 1.3729709007750451e-05, "loss": 1.0349, "num_tokens": 25254286514.0, "step": 4712 }, { "epoch": 0.8401069518716577, "grad_norm": 0.205078125, "learning_rate": 1.3727233247099589e-05, "loss": 1.0188, "num_tokens": 25260493144.0, "step": 4713 }, { "epoch": 0.8402852049910874, "grad_norm": 0.2314453125, "learning_rate": 1.3724757259187808e-05, "loss": 1.0221, "num_tokens": 25266704743.0, "step": 4714 }, { "epoch": 0.8404634581105169, "grad_norm": 0.2197265625, "learning_rate": 1.3722281044221426e-05, "loss": 1.0208, "num_tokens": 25272932396.0, "step": 4715 }, { "epoch": 0.8406417112299466, "grad_norm": 0.21875, "learning_rate": 1.3719804602406789e-05, "loss": 0.9629, "num_tokens": 25279215666.0, "step": 4716 }, { "epoch": 0.8408199643493761, "grad_norm": 0.201171875, "learning_rate": 1.3717327933950261e-05, "loss": 1.0334, "num_tokens": 25285469872.0, "step": 4717 }, { "epoch": 0.8409982174688057, "grad_norm": 0.2138671875, "learning_rate": 1.3714851039058221e-05, "loss": 1.0352, "num_tokens": 25291753766.0, "step": 4718 }, { "epoch": 0.8411764705882353, "grad_norm": 0.2197265625, "learning_rate": 1.3712373917937068e-05, "loss": 0.9901, "num_tokens": 25298038661.0, "step": 4719 }, { "epoch": 0.8413547237076648, "grad_norm": 0.193359375, "learning_rate": 1.3709896570793222e-05, "loss": 1.0354, "num_tokens": 25304317194.0, "step": 4720 }, { "epoch": 0.8415329768270945, "grad_norm": 0.203125, "learning_rate": 1.3707418997833126e-05, "loss": 1.0256, "num_tokens": 25310602634.0, "step": 4721 }, { "epoch": 0.841711229946524, "grad_norm": 0.212890625, "learning_rate": 1.3704941199263229e-05, "loss": 1.042, "num_tokens": 25316881510.0, "step": 4722 }, { "epoch": 0.8418894830659537, "grad_norm": 0.203125, "learning_rate": 1.3702463175290011e-05, "loss": 1.0374, "num_tokens": 25323144619.0, "step": 4723 }, { "epoch": 0.8420677361853832, "grad_norm": 0.2109375, "learning_rate": 1.3699984926119962e-05, "loss": 1.0123, "num_tokens": 25329392535.0, "step": 4724 }, { "epoch": 0.8422459893048129, "grad_norm": 0.21484375, "learning_rate": 1.3697506451959607e-05, "loss": 0.9976, "num_tokens": 25335625025.0, "step": 4725 }, { "epoch": 0.8424242424242424, "grad_norm": 0.2001953125, "learning_rate": 1.369502775301546e-05, "loss": 1.0342, "num_tokens": 25341908376.0, "step": 4726 }, { "epoch": 0.8426024955436721, "grad_norm": 0.2041015625, "learning_rate": 1.3692548829494083e-05, "loss": 1.0261, "num_tokens": 25348191659.0, "step": 4727 }, { "epoch": 0.8427807486631016, "grad_norm": 0.2236328125, "learning_rate": 1.3690069681602042e-05, "loss": 1.0404, "num_tokens": 25354443762.0, "step": 4728 }, { "epoch": 0.8429590017825312, "grad_norm": 0.2080078125, "learning_rate": 1.3687590309545926e-05, "loss": 1.0162, "num_tokens": 25360695024.0, "step": 4729 }, { "epoch": 0.8431372549019608, "grad_norm": 0.208984375, "learning_rate": 1.3685110713532343e-05, "loss": 1.0501, "num_tokens": 25366956094.0, "step": 4730 }, { "epoch": 0.8433155080213903, "grad_norm": 0.2216796875, "learning_rate": 1.3682630893767914e-05, "loss": 1.059, "num_tokens": 25373240222.0, "step": 4731 }, { "epoch": 0.84349376114082, "grad_norm": 0.201171875, "learning_rate": 1.3680150850459283e-05, "loss": 1.0051, "num_tokens": 25379516025.0, "step": 4732 }, { "epoch": 0.8436720142602495, "grad_norm": 0.2041015625, "learning_rate": 1.3677670583813121e-05, "loss": 1.0106, "num_tokens": 25385784540.0, "step": 4733 }, { "epoch": 0.8438502673796792, "grad_norm": 0.2177734375, "learning_rate": 1.3675190094036099e-05, "loss": 1.0133, "num_tokens": 25392023982.0, "step": 4734 }, { "epoch": 0.8440285204991087, "grad_norm": 0.2001953125, "learning_rate": 1.367270938133492e-05, "loss": 0.9787, "num_tokens": 25398306743.0, "step": 4735 }, { "epoch": 0.8442067736185384, "grad_norm": 0.2197265625, "learning_rate": 1.3670228445916307e-05, "loss": 1.0321, "num_tokens": 25404577593.0, "step": 4736 }, { "epoch": 0.8443850267379679, "grad_norm": 0.203125, "learning_rate": 1.366774728798699e-05, "loss": 1.0264, "num_tokens": 25410862060.0, "step": 4737 }, { "epoch": 0.8445632798573975, "grad_norm": 0.1953125, "learning_rate": 1.3665265907753726e-05, "loss": 0.9965, "num_tokens": 25417096003.0, "step": 4738 }, { "epoch": 0.8447415329768271, "grad_norm": 0.208984375, "learning_rate": 1.3662784305423293e-05, "loss": 1.0069, "num_tokens": 25423377828.0, "step": 4739 }, { "epoch": 0.8449197860962567, "grad_norm": 0.22265625, "learning_rate": 1.366030248120248e-05, "loss": 1.0072, "num_tokens": 25429660895.0, "step": 4740 }, { "epoch": 0.8450980392156863, "grad_norm": 0.1904296875, "learning_rate": 1.3657820435298098e-05, "loss": 1.0326, "num_tokens": 25435945558.0, "step": 4741 }, { "epoch": 0.8452762923351159, "grad_norm": 0.203125, "learning_rate": 1.3655338167916979e-05, "loss": 1.0237, "num_tokens": 25442230444.0, "step": 4742 }, { "epoch": 0.8454545454545455, "grad_norm": 0.208984375, "learning_rate": 1.3652855679265966e-05, "loss": 0.9822, "num_tokens": 25448502467.0, "step": 4743 }, { "epoch": 0.845632798573975, "grad_norm": 0.2060546875, "learning_rate": 1.365037296955193e-05, "loss": 1.0088, "num_tokens": 25454749961.0, "step": 4744 }, { "epoch": 0.8458110516934046, "grad_norm": 0.220703125, "learning_rate": 1.3647890038981753e-05, "loss": 1.0549, "num_tokens": 25461019519.0, "step": 4745 }, { "epoch": 0.8459893048128342, "grad_norm": 0.2041015625, "learning_rate": 1.3645406887762338e-05, "loss": 0.9912, "num_tokens": 25467293105.0, "step": 4746 }, { "epoch": 0.8461675579322638, "grad_norm": 0.2080078125, "learning_rate": 1.3642923516100608e-05, "loss": 1.0004, "num_tokens": 25473562670.0, "step": 4747 }, { "epoch": 0.8463458110516934, "grad_norm": 0.2060546875, "learning_rate": 1.3640439924203506e-05, "loss": 0.9819, "num_tokens": 25479802003.0, "step": 4748 }, { "epoch": 0.846524064171123, "grad_norm": 0.201171875, "learning_rate": 1.3637956112277982e-05, "loss": 1.0517, "num_tokens": 25486055092.0, "step": 4749 }, { "epoch": 0.8467023172905526, "grad_norm": 0.19921875, "learning_rate": 1.363547208053102e-05, "loss": 1.0213, "num_tokens": 25492321866.0, "step": 4750 }, { "epoch": 0.8468805704099822, "grad_norm": 0.201171875, "learning_rate": 1.3632987829169611e-05, "loss": 1.0332, "num_tokens": 25498576008.0, "step": 4751 }, { "epoch": 0.8470588235294118, "grad_norm": 0.2001953125, "learning_rate": 1.3630503358400771e-05, "loss": 1.025, "num_tokens": 25504841689.0, "step": 4752 }, { "epoch": 0.8472370766488414, "grad_norm": 0.19921875, "learning_rate": 1.3628018668431529e-05, "loss": 1.0363, "num_tokens": 25511125702.0, "step": 4753 }, { "epoch": 0.8474153297682709, "grad_norm": 0.2041015625, "learning_rate": 1.3625533759468936e-05, "loss": 1.0069, "num_tokens": 25517400876.0, "step": 4754 }, { "epoch": 0.8475935828877005, "grad_norm": 0.1923828125, "learning_rate": 1.3623048631720057e-05, "loss": 0.9804, "num_tokens": 25523677087.0, "step": 4755 }, { "epoch": 0.8477718360071301, "grad_norm": 0.2177734375, "learning_rate": 1.3620563285391985e-05, "loss": 1.0046, "num_tokens": 25529961138.0, "step": 4756 }, { "epoch": 0.8479500891265597, "grad_norm": 0.1943359375, "learning_rate": 1.3618077720691818e-05, "loss": 1.0221, "num_tokens": 25536240021.0, "step": 4757 }, { "epoch": 0.8481283422459893, "grad_norm": 0.2119140625, "learning_rate": 1.361559193782668e-05, "loss": 0.9954, "num_tokens": 25542496784.0, "step": 4758 }, { "epoch": 0.8483065953654189, "grad_norm": 0.2109375, "learning_rate": 1.3613105937003712e-05, "loss": 1.0297, "num_tokens": 25548756843.0, "step": 4759 }, { "epoch": 0.8484848484848485, "grad_norm": 0.2060546875, "learning_rate": 1.3610619718430076e-05, "loss": 1.0289, "num_tokens": 25555015318.0, "step": 4760 }, { "epoch": 0.848663101604278, "grad_norm": 0.2060546875, "learning_rate": 1.3608133282312945e-05, "loss": 1.0094, "num_tokens": 25561299267.0, "step": 4761 }, { "epoch": 0.8488413547237077, "grad_norm": 0.20703125, "learning_rate": 1.360564662885952e-05, "loss": 1.014, "num_tokens": 25567563855.0, "step": 4762 }, { "epoch": 0.8490196078431372, "grad_norm": 0.220703125, "learning_rate": 1.3603159758277003e-05, "loss": 1.0194, "num_tokens": 25573848040.0, "step": 4763 }, { "epoch": 0.8491978609625669, "grad_norm": 0.2099609375, "learning_rate": 1.3600672670772638e-05, "loss": 1.0207, "num_tokens": 25580105892.0, "step": 4764 }, { "epoch": 0.8493761140819964, "grad_norm": 0.197265625, "learning_rate": 1.359818536655367e-05, "loss": 1.0051, "num_tokens": 25586390093.0, "step": 4765 }, { "epoch": 0.8495543672014261, "grad_norm": 0.2060546875, "learning_rate": 1.3595697845827363e-05, "loss": 1.044, "num_tokens": 25592658377.0, "step": 4766 }, { "epoch": 0.8497326203208556, "grad_norm": 0.205078125, "learning_rate": 1.3593210108801007e-05, "loss": 0.9867, "num_tokens": 25598884935.0, "step": 4767 }, { "epoch": 0.8499108734402852, "grad_norm": 0.20703125, "learning_rate": 1.3590722155681903e-05, "loss": 1.039, "num_tokens": 25605149424.0, "step": 4768 }, { "epoch": 0.8500891265597148, "grad_norm": 0.2080078125, "learning_rate": 1.3588233986677377e-05, "loss": 0.9999, "num_tokens": 25611434233.0, "step": 4769 }, { "epoch": 0.8502673796791443, "grad_norm": 0.205078125, "learning_rate": 1.3585745601994761e-05, "loss": 0.9838, "num_tokens": 25617718251.0, "step": 4770 }, { "epoch": 0.850445632798574, "grad_norm": 0.205078125, "learning_rate": 1.3583257001841424e-05, "loss": 1.0196, "num_tokens": 25624002515.0, "step": 4771 }, { "epoch": 0.8506238859180035, "grad_norm": 0.2197265625, "learning_rate": 1.358076818642473e-05, "loss": 1.0416, "num_tokens": 25630233802.0, "step": 4772 }, { "epoch": 0.8508021390374332, "grad_norm": 0.2119140625, "learning_rate": 1.3578279155952077e-05, "loss": 1.0628, "num_tokens": 25636470289.0, "step": 4773 }, { "epoch": 0.8509803921568627, "grad_norm": 0.2158203125, "learning_rate": 1.3575789910630874e-05, "loss": 0.99, "num_tokens": 25642753956.0, "step": 4774 }, { "epoch": 0.8511586452762924, "grad_norm": 0.2373046875, "learning_rate": 1.3573300450668556e-05, "loss": 1.0105, "num_tokens": 25649037923.0, "step": 4775 }, { "epoch": 0.8513368983957219, "grad_norm": 0.197265625, "learning_rate": 1.357081077627257e-05, "loss": 1.0289, "num_tokens": 25655297583.0, "step": 4776 }, { "epoch": 0.8515151515151516, "grad_norm": 0.2080078125, "learning_rate": 1.3568320887650372e-05, "loss": 1.0404, "num_tokens": 25661581481.0, "step": 4777 }, { "epoch": 0.8516934046345811, "grad_norm": 0.234375, "learning_rate": 1.3565830785009458e-05, "loss": 0.9955, "num_tokens": 25667837162.0, "step": 4778 }, { "epoch": 0.8518716577540107, "grad_norm": 0.208984375, "learning_rate": 1.3563340468557317e-05, "loss": 1.0254, "num_tokens": 25674122124.0, "step": 4779 }, { "epoch": 0.8520499108734403, "grad_norm": 0.216796875, "learning_rate": 1.3560849938501477e-05, "loss": 1.0162, "num_tokens": 25680405739.0, "step": 4780 }, { "epoch": 0.8522281639928698, "grad_norm": 0.2275390625, "learning_rate": 1.3558359195049467e-05, "loss": 1.0118, "num_tokens": 25686690498.0, "step": 4781 }, { "epoch": 0.8524064171122995, "grad_norm": 0.212890625, "learning_rate": 1.3555868238408845e-05, "loss": 1.0305, "num_tokens": 25692975310.0, "step": 4782 }, { "epoch": 0.852584670231729, "grad_norm": 0.212890625, "learning_rate": 1.3553377068787183e-05, "loss": 1.0496, "num_tokens": 25699250247.0, "step": 4783 }, { "epoch": 0.8527629233511587, "grad_norm": 0.22265625, "learning_rate": 1.3550885686392068e-05, "loss": 1.0043, "num_tokens": 25705535302.0, "step": 4784 }, { "epoch": 0.8529411764705882, "grad_norm": 0.203125, "learning_rate": 1.3548394091431112e-05, "loss": 1.0271, "num_tokens": 25711790099.0, "step": 4785 }, { "epoch": 0.8531194295900179, "grad_norm": 0.1962890625, "learning_rate": 1.3545902284111934e-05, "loss": 1.0269, "num_tokens": 25718074692.0, "step": 4786 }, { "epoch": 0.8532976827094474, "grad_norm": 0.2060546875, "learning_rate": 1.3543410264642182e-05, "loss": 0.9952, "num_tokens": 25724357477.0, "step": 4787 }, { "epoch": 0.853475935828877, "grad_norm": 0.2041015625, "learning_rate": 1.3540918033229513e-05, "loss": 1.024, "num_tokens": 25730641562.0, "step": 4788 }, { "epoch": 0.8536541889483066, "grad_norm": 0.20703125, "learning_rate": 1.353842559008161e-05, "loss": 1.007, "num_tokens": 25736924756.0, "step": 4789 }, { "epoch": 0.8538324420677362, "grad_norm": 0.2138671875, "learning_rate": 1.3535932935406166e-05, "loss": 0.9932, "num_tokens": 25743175471.0, "step": 4790 }, { "epoch": 0.8540106951871658, "grad_norm": 0.2041015625, "learning_rate": 1.353344006941089e-05, "loss": 0.9981, "num_tokens": 25749459746.0, "step": 4791 }, { "epoch": 0.8541889483065954, "grad_norm": 0.203125, "learning_rate": 1.3530946992303521e-05, "loss": 1.0431, "num_tokens": 25755740993.0, "step": 4792 }, { "epoch": 0.854367201426025, "grad_norm": 0.1982421875, "learning_rate": 1.3528453704291801e-05, "loss": 1.0093, "num_tokens": 25762023962.0, "step": 4793 }, { "epoch": 0.8545454545454545, "grad_norm": 0.205078125, "learning_rate": 1.3525960205583503e-05, "loss": 0.9812, "num_tokens": 25768281225.0, "step": 4794 }, { "epoch": 0.8547237076648841, "grad_norm": 0.2138671875, "learning_rate": 1.3523466496386404e-05, "loss": 1.0057, "num_tokens": 25774564173.0, "step": 4795 }, { "epoch": 0.8549019607843137, "grad_norm": 0.2119140625, "learning_rate": 1.3520972576908311e-05, "loss": 1.0268, "num_tokens": 25780837100.0, "step": 4796 }, { "epoch": 0.8550802139037433, "grad_norm": 0.2001953125, "learning_rate": 1.3518478447357035e-05, "loss": 1.0377, "num_tokens": 25787094687.0, "step": 4797 }, { "epoch": 0.8552584670231729, "grad_norm": 0.19921875, "learning_rate": 1.351598410794042e-05, "loss": 1.0551, "num_tokens": 25793356055.0, "step": 4798 }, { "epoch": 0.8554367201426025, "grad_norm": 0.2041015625, "learning_rate": 1.351348955886632e-05, "loss": 1.0258, "num_tokens": 25799621861.0, "step": 4799 }, { "epoch": 0.8556149732620321, "grad_norm": 0.203125, "learning_rate": 1.35109948003426e-05, "loss": 1.0557, "num_tokens": 25805903118.0, "step": 4800 }, { "epoch": 0.8557932263814617, "grad_norm": 0.203125, "learning_rate": 1.3508499832577153e-05, "loss": 0.9938, "num_tokens": 25812158870.0, "step": 4801 }, { "epoch": 0.8559714795008913, "grad_norm": 0.19921875, "learning_rate": 1.3506004655777885e-05, "loss": 1.0357, "num_tokens": 25818413439.0, "step": 4802 }, { "epoch": 0.8561497326203209, "grad_norm": 0.197265625, "learning_rate": 1.3503509270152719e-05, "loss": 1.0101, "num_tokens": 25824698615.0, "step": 4803 }, { "epoch": 0.8563279857397504, "grad_norm": 0.2001953125, "learning_rate": 1.3501013675909594e-05, "loss": 1.0139, "num_tokens": 25830920186.0, "step": 4804 }, { "epoch": 0.85650623885918, "grad_norm": 0.2041015625, "learning_rate": 1.3498517873256473e-05, "loss": 1.0046, "num_tokens": 25837202380.0, "step": 4805 }, { "epoch": 0.8566844919786096, "grad_norm": 0.1982421875, "learning_rate": 1.349602186240133e-05, "loss": 0.9887, "num_tokens": 25843475113.0, "step": 4806 }, { "epoch": 0.8568627450980392, "grad_norm": 0.1982421875, "learning_rate": 1.3493525643552154e-05, "loss": 1.0303, "num_tokens": 25849758449.0, "step": 4807 }, { "epoch": 0.8570409982174688, "grad_norm": 0.201171875, "learning_rate": 1.349102921691696e-05, "loss": 1.0233, "num_tokens": 25856040397.0, "step": 4808 }, { "epoch": 0.8572192513368984, "grad_norm": 0.2255859375, "learning_rate": 1.3488532582703772e-05, "loss": 1.0358, "num_tokens": 25862323257.0, "step": 4809 }, { "epoch": 0.857397504456328, "grad_norm": 0.197265625, "learning_rate": 1.3486035741120644e-05, "loss": 1.0104, "num_tokens": 25868608515.0, "step": 4810 }, { "epoch": 0.8575757575757575, "grad_norm": 0.19921875, "learning_rate": 1.3483538692375629e-05, "loss": 1.0376, "num_tokens": 25874890107.0, "step": 4811 }, { "epoch": 0.8577540106951872, "grad_norm": 0.20703125, "learning_rate": 1.348104143667681e-05, "loss": 1.0441, "num_tokens": 25881170353.0, "step": 4812 }, { "epoch": 0.8579322638146167, "grad_norm": 0.21484375, "learning_rate": 1.3478543974232279e-05, "loss": 1.0014, "num_tokens": 25887443919.0, "step": 4813 }, { "epoch": 0.8581105169340464, "grad_norm": 0.205078125, "learning_rate": 1.3476046305250161e-05, "loss": 1.0195, "num_tokens": 25893697954.0, "step": 4814 }, { "epoch": 0.8582887700534759, "grad_norm": 0.2119140625, "learning_rate": 1.3473548429938576e-05, "loss": 1.0189, "num_tokens": 25899981300.0, "step": 4815 }, { "epoch": 0.8584670231729056, "grad_norm": 0.22265625, "learning_rate": 1.347105034850568e-05, "loss": 1.0293, "num_tokens": 25906265640.0, "step": 4816 }, { "epoch": 0.8586452762923351, "grad_norm": 0.2041015625, "learning_rate": 1.3468552061159635e-05, "loss": 1.024, "num_tokens": 25912501697.0, "step": 4817 }, { "epoch": 0.8588235294117647, "grad_norm": 0.203125, "learning_rate": 1.3466053568108624e-05, "loss": 1.0167, "num_tokens": 25918762083.0, "step": 4818 }, { "epoch": 0.8590017825311943, "grad_norm": 0.2109375, "learning_rate": 1.3463554869560849e-05, "loss": 1.016, "num_tokens": 25925043114.0, "step": 4819 }, { "epoch": 0.8591800356506238, "grad_norm": 0.22265625, "learning_rate": 1.3461055965724524e-05, "loss": 1.0223, "num_tokens": 25931327315.0, "step": 4820 }, { "epoch": 0.8593582887700535, "grad_norm": 0.2099609375, "learning_rate": 1.3458556856807885e-05, "loss": 1.0438, "num_tokens": 25937559112.0, "step": 4821 }, { "epoch": 0.859536541889483, "grad_norm": 0.1943359375, "learning_rate": 1.3456057543019186e-05, "loss": 1.0218, "num_tokens": 25943840965.0, "step": 4822 }, { "epoch": 0.8597147950089127, "grad_norm": 0.208984375, "learning_rate": 1.345355802456669e-05, "loss": 1.0201, "num_tokens": 25950124449.0, "step": 4823 }, { "epoch": 0.8598930481283422, "grad_norm": 0.2158203125, "learning_rate": 1.3451058301658684e-05, "loss": 1.0114, "num_tokens": 25956407860.0, "step": 4824 }, { "epoch": 0.8600713012477719, "grad_norm": 0.203125, "learning_rate": 1.3448558374503473e-05, "loss": 1.0035, "num_tokens": 25962692816.0, "step": 4825 }, { "epoch": 0.8602495543672014, "grad_norm": 0.2158203125, "learning_rate": 1.3446058243309374e-05, "loss": 1.0452, "num_tokens": 25968950307.0, "step": 4826 }, { "epoch": 0.8604278074866311, "grad_norm": 0.212890625, "learning_rate": 1.3443557908284724e-05, "loss": 1.0217, "num_tokens": 25975232429.0, "step": 4827 }, { "epoch": 0.8606060606060606, "grad_norm": 0.2119140625, "learning_rate": 1.3441057369637878e-05, "loss": 1.0116, "num_tokens": 25981515613.0, "step": 4828 }, { "epoch": 0.8607843137254902, "grad_norm": 0.1953125, "learning_rate": 1.3438556627577204e-05, "loss": 1.0241, "num_tokens": 25987799166.0, "step": 4829 }, { "epoch": 0.8609625668449198, "grad_norm": 0.212890625, "learning_rate": 1.3436055682311094e-05, "loss": 0.9985, "num_tokens": 25994039121.0, "step": 4830 }, { "epoch": 0.8611408199643493, "grad_norm": 0.2109375, "learning_rate": 1.3433554534047943e-05, "loss": 0.9947, "num_tokens": 26000271668.0, "step": 4831 }, { "epoch": 0.861319073083779, "grad_norm": 0.203125, "learning_rate": 1.3431053182996183e-05, "loss": 1.0168, "num_tokens": 26006552903.0, "step": 4832 }, { "epoch": 0.8614973262032085, "grad_norm": 0.2001953125, "learning_rate": 1.3428551629364246e-05, "loss": 0.9927, "num_tokens": 26012819053.0, "step": 4833 }, { "epoch": 0.8616755793226382, "grad_norm": 0.1953125, "learning_rate": 1.3426049873360585e-05, "loss": 1.0021, "num_tokens": 26019060698.0, "step": 4834 }, { "epoch": 0.8618538324420677, "grad_norm": 0.19921875, "learning_rate": 1.3423547915193678e-05, "loss": 1.0377, "num_tokens": 26025314453.0, "step": 4835 }, { "epoch": 0.8620320855614974, "grad_norm": 0.203125, "learning_rate": 1.3421045755072007e-05, "loss": 1.0059, "num_tokens": 26031554408.0, "step": 4836 }, { "epoch": 0.8622103386809269, "grad_norm": 0.2197265625, "learning_rate": 1.3418543393204088e-05, "loss": 1.0009, "num_tokens": 26037812845.0, "step": 4837 }, { "epoch": 0.8623885918003565, "grad_norm": 0.212890625, "learning_rate": 1.3416040829798432e-05, "loss": 1.0032, "num_tokens": 26044037042.0, "step": 4838 }, { "epoch": 0.8625668449197861, "grad_norm": 0.212890625, "learning_rate": 1.3413538065063582e-05, "loss": 1.0545, "num_tokens": 26050288138.0, "step": 4839 }, { "epoch": 0.8627450980392157, "grad_norm": 0.1962890625, "learning_rate": 1.3411035099208097e-05, "loss": 1.0261, "num_tokens": 26056566939.0, "step": 4840 }, { "epoch": 0.8629233511586453, "grad_norm": 0.2197265625, "learning_rate": 1.3408531932440545e-05, "loss": 1.0223, "num_tokens": 26062849059.0, "step": 4841 }, { "epoch": 0.8631016042780749, "grad_norm": 0.2021484375, "learning_rate": 1.340602856496952e-05, "loss": 1.0249, "num_tokens": 26069076768.0, "step": 4842 }, { "epoch": 0.8632798573975045, "grad_norm": 0.205078125, "learning_rate": 1.3403524997003622e-05, "loss": 1.0058, "num_tokens": 26075346805.0, "step": 4843 }, { "epoch": 0.863458110516934, "grad_norm": 0.1982421875, "learning_rate": 1.3401021228751483e-05, "loss": 1.0105, "num_tokens": 26081631306.0, "step": 4844 }, { "epoch": 0.8636363636363636, "grad_norm": 0.2021484375, "learning_rate": 1.3398517260421735e-05, "loss": 1.0422, "num_tokens": 26087866746.0, "step": 4845 }, { "epoch": 0.8638146167557932, "grad_norm": 0.2041015625, "learning_rate": 1.3396013092223037e-05, "loss": 1.0097, "num_tokens": 26094116917.0, "step": 4846 }, { "epoch": 0.8639928698752228, "grad_norm": 0.208984375, "learning_rate": 1.3393508724364057e-05, "loss": 1.0258, "num_tokens": 26100399660.0, "step": 4847 }, { "epoch": 0.8641711229946524, "grad_norm": 0.2021484375, "learning_rate": 1.3391004157053495e-05, "loss": 1.0238, "num_tokens": 26106659165.0, "step": 4848 }, { "epoch": 0.864349376114082, "grad_norm": 0.2099609375, "learning_rate": 1.3388499390500049e-05, "loss": 0.9838, "num_tokens": 26112879523.0, "step": 4849 }, { "epoch": 0.8645276292335116, "grad_norm": 0.2197265625, "learning_rate": 1.338599442491244e-05, "loss": 1.0231, "num_tokens": 26119164014.0, "step": 4850 }, { "epoch": 0.8647058823529412, "grad_norm": 0.19921875, "learning_rate": 1.3383489260499416e-05, "loss": 1.0327, "num_tokens": 26125445335.0, "step": 4851 }, { "epoch": 0.8648841354723708, "grad_norm": 0.2041015625, "learning_rate": 1.3380983897469724e-05, "loss": 1.0326, "num_tokens": 26131727010.0, "step": 4852 }, { "epoch": 0.8650623885918004, "grad_norm": 0.216796875, "learning_rate": 1.3378478336032143e-05, "loss": 1.0432, "num_tokens": 26138009270.0, "step": 4853 }, { "epoch": 0.8652406417112299, "grad_norm": 0.2021484375, "learning_rate": 1.3375972576395457e-05, "loss": 1.0446, "num_tokens": 26144255271.0, "step": 4854 }, { "epoch": 0.8654188948306596, "grad_norm": 0.1953125, "learning_rate": 1.3373466618768476e-05, "loss": 1.0177, "num_tokens": 26150511612.0, "step": 4855 }, { "epoch": 0.8655971479500891, "grad_norm": 0.205078125, "learning_rate": 1.3370960463360021e-05, "loss": 0.9872, "num_tokens": 26156795827.0, "step": 4856 }, { "epoch": 0.8657754010695187, "grad_norm": 0.2099609375, "learning_rate": 1.3368454110378923e-05, "loss": 1.0323, "num_tokens": 26163078933.0, "step": 4857 }, { "epoch": 0.8659536541889483, "grad_norm": 0.2001953125, "learning_rate": 1.3365947560034046e-05, "loss": 1.006, "num_tokens": 26169345490.0, "step": 4858 }, { "epoch": 0.8661319073083779, "grad_norm": 0.2119140625, "learning_rate": 1.336344081253426e-05, "loss": 1.0176, "num_tokens": 26175630941.0, "step": 4859 }, { "epoch": 0.8663101604278075, "grad_norm": 0.2138671875, "learning_rate": 1.3360933868088455e-05, "loss": 1.0203, "num_tokens": 26181888012.0, "step": 4860 }, { "epoch": 0.866488413547237, "grad_norm": 0.1982421875, "learning_rate": 1.3358426726905525e-05, "loss": 1.0275, "num_tokens": 26188145910.0, "step": 4861 }, { "epoch": 0.8666666666666667, "grad_norm": 0.2255859375, "learning_rate": 1.3355919389194398e-05, "loss": 1.0342, "num_tokens": 26194428464.0, "step": 4862 }, { "epoch": 0.8668449197860962, "grad_norm": 0.21484375, "learning_rate": 1.3353411855164013e-05, "loss": 1.0298, "num_tokens": 26200708687.0, "step": 4863 }, { "epoch": 0.8670231729055259, "grad_norm": 0.1982421875, "learning_rate": 1.3350904125023319e-05, "loss": 1.0131, "num_tokens": 26206973910.0, "step": 4864 }, { "epoch": 0.8672014260249554, "grad_norm": 0.2353515625, "learning_rate": 1.3348396198981287e-05, "loss": 1.029, "num_tokens": 26213234352.0, "step": 4865 }, { "epoch": 0.8673796791443851, "grad_norm": 0.2119140625, "learning_rate": 1.3345888077246898e-05, "loss": 0.9884, "num_tokens": 26219517479.0, "step": 4866 }, { "epoch": 0.8675579322638146, "grad_norm": 0.189453125, "learning_rate": 1.3343379760029168e-05, "loss": 1.0203, "num_tokens": 26225799806.0, "step": 4867 }, { "epoch": 0.8677361853832442, "grad_norm": 0.2041015625, "learning_rate": 1.3340871247537102e-05, "loss": 1.0012, "num_tokens": 26232085062.0, "step": 4868 }, { "epoch": 0.8679144385026738, "grad_norm": 0.203125, "learning_rate": 1.3338362539979743e-05, "loss": 1.0409, "num_tokens": 26238358943.0, "step": 4869 }, { "epoch": 0.8680926916221033, "grad_norm": 0.203125, "learning_rate": 1.3335853637566135e-05, "loss": 1.0288, "num_tokens": 26244627581.0, "step": 4870 }, { "epoch": 0.868270944741533, "grad_norm": 0.1982421875, "learning_rate": 1.3333344540505351e-05, "loss": 1.0124, "num_tokens": 26250868261.0, "step": 4871 }, { "epoch": 0.8684491978609625, "grad_norm": 0.2099609375, "learning_rate": 1.3330835249006475e-05, "loss": 1.018, "num_tokens": 26257149873.0, "step": 4872 }, { "epoch": 0.8686274509803922, "grad_norm": 0.212890625, "learning_rate": 1.33283257632786e-05, "loss": 1.0102, "num_tokens": 26263433777.0, "step": 4873 }, { "epoch": 0.8688057040998217, "grad_norm": 0.21484375, "learning_rate": 1.3325816083530853e-05, "loss": 0.9948, "num_tokens": 26269696313.0, "step": 4874 }, { "epoch": 0.8689839572192514, "grad_norm": 0.203125, "learning_rate": 1.3323306209972356e-05, "loss": 1.0159, "num_tokens": 26275979458.0, "step": 4875 }, { "epoch": 0.8691622103386809, "grad_norm": 0.205078125, "learning_rate": 1.3320796142812264e-05, "loss": 1.048, "num_tokens": 26282262840.0, "step": 4876 }, { "epoch": 0.8693404634581106, "grad_norm": 0.212890625, "learning_rate": 1.3318285882259734e-05, "loss": 1.0368, "num_tokens": 26288547519.0, "step": 4877 }, { "epoch": 0.8695187165775401, "grad_norm": 0.2060546875, "learning_rate": 1.3315775428523953e-05, "loss": 1.025, "num_tokens": 26294833046.0, "step": 4878 }, { "epoch": 0.8696969696969697, "grad_norm": 0.220703125, "learning_rate": 1.3313264781814119e-05, "loss": 1.0179, "num_tokens": 26301114380.0, "step": 4879 }, { "epoch": 0.8698752228163993, "grad_norm": 0.212890625, "learning_rate": 1.3310753942339439e-05, "loss": 1.0136, "num_tokens": 26307344363.0, "step": 4880 }, { "epoch": 0.8700534759358288, "grad_norm": 0.2158203125, "learning_rate": 1.3308242910309143e-05, "loss": 0.9906, "num_tokens": 26313625670.0, "step": 4881 }, { "epoch": 0.8702317290552585, "grad_norm": 0.2021484375, "learning_rate": 1.3305731685932477e-05, "loss": 1.0243, "num_tokens": 26319908741.0, "step": 4882 }, { "epoch": 0.870409982174688, "grad_norm": 0.205078125, "learning_rate": 1.3303220269418707e-05, "loss": 1.0183, "num_tokens": 26326192620.0, "step": 4883 }, { "epoch": 0.8705882352941177, "grad_norm": 0.21484375, "learning_rate": 1.3300708660977102e-05, "loss": 1.0143, "num_tokens": 26332477048.0, "step": 4884 }, { "epoch": 0.8707664884135472, "grad_norm": 0.208984375, "learning_rate": 1.3298196860816957e-05, "loss": 1.0183, "num_tokens": 26338759626.0, "step": 4885 }, { "epoch": 0.8709447415329769, "grad_norm": 0.2109375, "learning_rate": 1.3295684869147578e-05, "loss": 1.0159, "num_tokens": 26345035066.0, "step": 4886 }, { "epoch": 0.8711229946524064, "grad_norm": 0.2197265625, "learning_rate": 1.3293172686178299e-05, "loss": 1.0423, "num_tokens": 26351290268.0, "step": 4887 }, { "epoch": 0.871301247771836, "grad_norm": 0.2265625, "learning_rate": 1.3290660312118453e-05, "loss": 1.0031, "num_tokens": 26357575868.0, "step": 4888 }, { "epoch": 0.8714795008912656, "grad_norm": 0.212890625, "learning_rate": 1.3288147747177396e-05, "loss": 1.0288, "num_tokens": 26363850912.0, "step": 4889 }, { "epoch": 0.8716577540106952, "grad_norm": 0.228515625, "learning_rate": 1.3285634991564512e-05, "loss": 1.0074, "num_tokens": 26370136419.0, "step": 4890 }, { "epoch": 0.8718360071301248, "grad_norm": 0.2138671875, "learning_rate": 1.3283122045489171e-05, "loss": 1.018, "num_tokens": 26376410900.0, "step": 4891 }, { "epoch": 0.8720142602495544, "grad_norm": 0.216796875, "learning_rate": 1.3280608909160793e-05, "loss": 0.9951, "num_tokens": 26382679630.0, "step": 4892 }, { "epoch": 0.872192513368984, "grad_norm": 0.20703125, "learning_rate": 1.3278095582788789e-05, "loss": 1.0267, "num_tokens": 26388929738.0, "step": 4893 }, { "epoch": 0.8723707664884135, "grad_norm": 0.201171875, "learning_rate": 1.3275582066582601e-05, "loss": 0.9932, "num_tokens": 26395195181.0, "step": 4894 }, { "epoch": 0.8725490196078431, "grad_norm": 0.212890625, "learning_rate": 1.3273068360751675e-05, "loss": 1.0401, "num_tokens": 26401474559.0, "step": 4895 }, { "epoch": 0.8727272727272727, "grad_norm": 0.2041015625, "learning_rate": 1.3270554465505485e-05, "loss": 1.0132, "num_tokens": 26407730849.0, "step": 4896 }, { "epoch": 0.8729055258467023, "grad_norm": 0.2041015625, "learning_rate": 1.326804038105351e-05, "loss": 1.017, "num_tokens": 26413961914.0, "step": 4897 }, { "epoch": 0.8730837789661319, "grad_norm": 0.201171875, "learning_rate": 1.326552610760525e-05, "loss": 0.9934, "num_tokens": 26420244097.0, "step": 4898 }, { "epoch": 0.8732620320855615, "grad_norm": 0.1943359375, "learning_rate": 1.326301164537022e-05, "loss": 1.0335, "num_tokens": 26426524183.0, "step": 4899 }, { "epoch": 0.8734402852049911, "grad_norm": 0.205078125, "learning_rate": 1.326049699455795e-05, "loss": 1.0169, "num_tokens": 26432777081.0, "step": 4900 }, { "epoch": 0.8736185383244207, "grad_norm": 0.1923828125, "learning_rate": 1.3257982155377989e-05, "loss": 1.025, "num_tokens": 26439041902.0, "step": 4901 }, { "epoch": 0.8737967914438503, "grad_norm": 0.197265625, "learning_rate": 1.3255467128039898e-05, "loss": 1.0466, "num_tokens": 26445322027.0, "step": 4902 }, { "epoch": 0.8739750445632799, "grad_norm": 0.2001953125, "learning_rate": 1.3252951912753254e-05, "loss": 1.0206, "num_tokens": 26451583480.0, "step": 4903 }, { "epoch": 0.8741532976827094, "grad_norm": 0.1875, "learning_rate": 1.325043650972765e-05, "loss": 1.0262, "num_tokens": 26457853718.0, "step": 4904 }, { "epoch": 0.8743315508021391, "grad_norm": 0.203125, "learning_rate": 1.3247920919172694e-05, "loss": 1.0197, "num_tokens": 26464079943.0, "step": 4905 }, { "epoch": 0.8745098039215686, "grad_norm": 0.1962890625, "learning_rate": 1.3245405141298015e-05, "loss": 1.0409, "num_tokens": 26470340883.0, "step": 4906 }, { "epoch": 0.8746880570409982, "grad_norm": 0.201171875, "learning_rate": 1.324288917631325e-05, "loss": 1.0123, "num_tokens": 26476625791.0, "step": 4907 }, { "epoch": 0.8748663101604278, "grad_norm": 0.197265625, "learning_rate": 1.3240373024428055e-05, "loss": 1.0125, "num_tokens": 26482858530.0, "step": 4908 }, { "epoch": 0.8750445632798574, "grad_norm": 0.189453125, "learning_rate": 1.3237856685852104e-05, "loss": 1.0303, "num_tokens": 26489141725.0, "step": 4909 }, { "epoch": 0.875222816399287, "grad_norm": 0.1943359375, "learning_rate": 1.3235340160795083e-05, "loss": 1.0341, "num_tokens": 26495421567.0, "step": 4910 }, { "epoch": 0.8754010695187165, "grad_norm": 0.2060546875, "learning_rate": 1.323282344946669e-05, "loss": 1.0081, "num_tokens": 26501687305.0, "step": 4911 }, { "epoch": 0.8755793226381462, "grad_norm": 0.203125, "learning_rate": 1.3230306552076647e-05, "loss": 1.0243, "num_tokens": 26507948639.0, "step": 4912 }, { "epoch": 0.8757575757575757, "grad_norm": 0.2001953125, "learning_rate": 1.3227789468834688e-05, "loss": 1.0368, "num_tokens": 26514197552.0, "step": 4913 }, { "epoch": 0.8759358288770054, "grad_norm": 0.1982421875, "learning_rate": 1.3225272199950562e-05, "loss": 1.0475, "num_tokens": 26520446255.0, "step": 4914 }, { "epoch": 0.8761140819964349, "grad_norm": 0.1953125, "learning_rate": 1.3222754745634034e-05, "loss": 1.0106, "num_tokens": 26526729235.0, "step": 4915 }, { "epoch": 0.8762923351158646, "grad_norm": 0.2001953125, "learning_rate": 1.322023710609488e-05, "loss": 1.0268, "num_tokens": 26533012999.0, "step": 4916 }, { "epoch": 0.8764705882352941, "grad_norm": 0.205078125, "learning_rate": 1.3217719281542904e-05, "loss": 1.0241, "num_tokens": 26539267588.0, "step": 4917 }, { "epoch": 0.8766488413547238, "grad_norm": 0.2119140625, "learning_rate": 1.3215201272187905e-05, "loss": 1.0028, "num_tokens": 26545543710.0, "step": 4918 }, { "epoch": 0.8768270944741533, "grad_norm": 0.2109375, "learning_rate": 1.3212683078239718e-05, "loss": 0.9953, "num_tokens": 26551812524.0, "step": 4919 }, { "epoch": 0.8770053475935828, "grad_norm": 0.2060546875, "learning_rate": 1.3210164699908184e-05, "loss": 0.9839, "num_tokens": 26558097244.0, "step": 4920 }, { "epoch": 0.8771836007130125, "grad_norm": 0.2138671875, "learning_rate": 1.3207646137403153e-05, "loss": 1.0002, "num_tokens": 26564353785.0, "step": 4921 }, { "epoch": 0.877361853832442, "grad_norm": 0.1884765625, "learning_rate": 1.3205127390934507e-05, "loss": 1.0023, "num_tokens": 26570638717.0, "step": 4922 }, { "epoch": 0.8775401069518717, "grad_norm": 0.212890625, "learning_rate": 1.3202608460712128e-05, "loss": 0.9992, "num_tokens": 26576913127.0, "step": 4923 }, { "epoch": 0.8777183600713012, "grad_norm": 0.2060546875, "learning_rate": 1.3200089346945919e-05, "loss": 1.0157, "num_tokens": 26583169484.0, "step": 4924 }, { "epoch": 0.8778966131907309, "grad_norm": 0.20703125, "learning_rate": 1.3197570049845802e-05, "loss": 1.0086, "num_tokens": 26589454492.0, "step": 4925 }, { "epoch": 0.8780748663101604, "grad_norm": 0.19921875, "learning_rate": 1.3195050569621705e-05, "loss": 1.036, "num_tokens": 26595712414.0, "step": 4926 }, { "epoch": 0.8782531194295901, "grad_norm": 0.189453125, "learning_rate": 1.3192530906483583e-05, "loss": 1.0031, "num_tokens": 26601997817.0, "step": 4927 }, { "epoch": 0.8784313725490196, "grad_norm": 0.1982421875, "learning_rate": 1.3190011060641395e-05, "loss": 1.0314, "num_tokens": 26608282125.0, "step": 4928 }, { "epoch": 0.8786096256684492, "grad_norm": 0.2021484375, "learning_rate": 1.3187491032305122e-05, "loss": 1.0134, "num_tokens": 26614537236.0, "step": 4929 }, { "epoch": 0.8787878787878788, "grad_norm": 0.2021484375, "learning_rate": 1.3184970821684759e-05, "loss": 0.9986, "num_tokens": 26620816581.0, "step": 4930 }, { "epoch": 0.8789661319073083, "grad_norm": 0.1826171875, "learning_rate": 1.3182450428990315e-05, "loss": 1.0509, "num_tokens": 26627097873.0, "step": 4931 }, { "epoch": 0.879144385026738, "grad_norm": 0.216796875, "learning_rate": 1.3179929854431814e-05, "loss": 1.0549, "num_tokens": 26633350020.0, "step": 4932 }, { "epoch": 0.8793226381461675, "grad_norm": 0.1962890625, "learning_rate": 1.3177409098219303e-05, "loss": 1.0567, "num_tokens": 26639631446.0, "step": 4933 }, { "epoch": 0.8795008912655972, "grad_norm": 0.1884765625, "learning_rate": 1.3174888160562824e-05, "loss": 1.0425, "num_tokens": 26645890243.0, "step": 4934 }, { "epoch": 0.8796791443850267, "grad_norm": 0.21875, "learning_rate": 1.3172367041672458e-05, "loss": 1.0393, "num_tokens": 26652141522.0, "step": 4935 }, { "epoch": 0.8798573975044564, "grad_norm": 0.19921875, "learning_rate": 1.3169845741758289e-05, "loss": 1.0002, "num_tokens": 26658375111.0, "step": 4936 }, { "epoch": 0.8800356506238859, "grad_norm": 0.1875, "learning_rate": 1.3167324261030412e-05, "loss": 1.0356, "num_tokens": 26664632382.0, "step": 4937 }, { "epoch": 0.8802139037433155, "grad_norm": 0.201171875, "learning_rate": 1.3164802599698947e-05, "loss": 1.0024, "num_tokens": 26670916491.0, "step": 4938 }, { "epoch": 0.8803921568627451, "grad_norm": 0.1943359375, "learning_rate": 1.3162280757974022e-05, "loss": 1.0237, "num_tokens": 26677201782.0, "step": 4939 }, { "epoch": 0.8805704099821747, "grad_norm": 0.208984375, "learning_rate": 1.3159758736065789e-05, "loss": 0.9667, "num_tokens": 26683486692.0, "step": 4940 }, { "epoch": 0.8807486631016043, "grad_norm": 0.189453125, "learning_rate": 1.31572365341844e-05, "loss": 1.0306, "num_tokens": 26689745590.0, "step": 4941 }, { "epoch": 0.8809269162210339, "grad_norm": 0.1943359375, "learning_rate": 1.3154714152540034e-05, "loss": 1.0171, "num_tokens": 26696022720.0, "step": 4942 }, { "epoch": 0.8811051693404635, "grad_norm": 0.19921875, "learning_rate": 1.3152191591342882e-05, "loss": 0.987, "num_tokens": 26702279081.0, "step": 4943 }, { "epoch": 0.881283422459893, "grad_norm": 0.197265625, "learning_rate": 1.3149668850803151e-05, "loss": 1.0419, "num_tokens": 26708560701.0, "step": 4944 }, { "epoch": 0.8814616755793226, "grad_norm": 0.19140625, "learning_rate": 1.314714593113106e-05, "loss": 1.0312, "num_tokens": 26714799394.0, "step": 4945 }, { "epoch": 0.8816399286987522, "grad_norm": 0.2001953125, "learning_rate": 1.3144622832536844e-05, "loss": 1.0271, "num_tokens": 26721081685.0, "step": 4946 }, { "epoch": 0.8818181818181818, "grad_norm": 0.2021484375, "learning_rate": 1.3142099555230754e-05, "loss": 1.0199, "num_tokens": 26727355909.0, "step": 4947 }, { "epoch": 0.8819964349376114, "grad_norm": 0.1904296875, "learning_rate": 1.3139576099423055e-05, "loss": 0.9948, "num_tokens": 26733609705.0, "step": 4948 }, { "epoch": 0.882174688057041, "grad_norm": 0.2080078125, "learning_rate": 1.3137052465324028e-05, "loss": 1.0192, "num_tokens": 26739893070.0, "step": 4949 }, { "epoch": 0.8823529411764706, "grad_norm": 0.2138671875, "learning_rate": 1.3134528653143966e-05, "loss": 1.0207, "num_tokens": 26746177628.0, "step": 4950 }, { "epoch": 0.8825311942959002, "grad_norm": 0.1962890625, "learning_rate": 1.3132004663093178e-05, "loss": 1.0331, "num_tokens": 26752407315.0, "step": 4951 }, { "epoch": 0.8827094474153298, "grad_norm": 0.212890625, "learning_rate": 1.3129480495381994e-05, "loss": 1.0298, "num_tokens": 26758667921.0, "step": 4952 }, { "epoch": 0.8828877005347594, "grad_norm": 0.1982421875, "learning_rate": 1.3126956150220747e-05, "loss": 1.0198, "num_tokens": 26764929956.0, "step": 4953 }, { "epoch": 0.8830659536541889, "grad_norm": 0.193359375, "learning_rate": 1.3124431627819796e-05, "loss": 1.0577, "num_tokens": 26771205153.0, "step": 4954 }, { "epoch": 0.8832442067736186, "grad_norm": 0.201171875, "learning_rate": 1.3121906928389507e-05, "loss": 0.9908, "num_tokens": 26777480650.0, "step": 4955 }, { "epoch": 0.8834224598930481, "grad_norm": 0.2177734375, "learning_rate": 1.3119382052140268e-05, "loss": 1.0115, "num_tokens": 26783752065.0, "step": 4956 }, { "epoch": 0.8836007130124777, "grad_norm": 0.2060546875, "learning_rate": 1.3116856999282472e-05, "loss": 1.0578, "num_tokens": 26790031636.0, "step": 4957 }, { "epoch": 0.8837789661319073, "grad_norm": 0.212890625, "learning_rate": 1.3114331770026533e-05, "loss": 1.0359, "num_tokens": 26796264931.0, "step": 4958 }, { "epoch": 0.8839572192513369, "grad_norm": 0.208984375, "learning_rate": 1.3111806364582884e-05, "loss": 1.0076, "num_tokens": 26802549480.0, "step": 4959 }, { "epoch": 0.8841354723707665, "grad_norm": 0.197265625, "learning_rate": 1.3109280783161962e-05, "loss": 1.0115, "num_tokens": 26808820508.0, "step": 4960 }, { "epoch": 0.884313725490196, "grad_norm": 0.2021484375, "learning_rate": 1.3106755025974228e-05, "loss": 1.034, "num_tokens": 26815080253.0, "step": 4961 }, { "epoch": 0.8844919786096257, "grad_norm": 0.220703125, "learning_rate": 1.3104229093230147e-05, "loss": 1.0059, "num_tokens": 26821334752.0, "step": 4962 }, { "epoch": 0.8846702317290552, "grad_norm": 0.201171875, "learning_rate": 1.3101702985140217e-05, "loss": 1.0083, "num_tokens": 26827565926.0, "step": 4963 }, { "epoch": 0.8848484848484849, "grad_norm": 0.22265625, "learning_rate": 1.309917670191493e-05, "loss": 0.9914, "num_tokens": 26833826548.0, "step": 4964 }, { "epoch": 0.8850267379679144, "grad_norm": 0.2041015625, "learning_rate": 1.3096650243764806e-05, "loss": 1.0271, "num_tokens": 26840091898.0, "step": 4965 }, { "epoch": 0.8852049910873441, "grad_norm": 0.2177734375, "learning_rate": 1.3094123610900369e-05, "loss": 1.0254, "num_tokens": 26846371566.0, "step": 4966 }, { "epoch": 0.8853832442067736, "grad_norm": 0.2216796875, "learning_rate": 1.3091596803532177e-05, "loss": 1.0065, "num_tokens": 26852652872.0, "step": 4967 }, { "epoch": 0.8855614973262033, "grad_norm": 0.2197265625, "learning_rate": 1.3089069821870773e-05, "loss": 1.0093, "num_tokens": 26858880977.0, "step": 4968 }, { "epoch": 0.8857397504456328, "grad_norm": 0.1904296875, "learning_rate": 1.3086542666126743e-05, "loss": 1.0054, "num_tokens": 26865154606.0, "step": 4969 }, { "epoch": 0.8859180035650623, "grad_norm": 0.2255859375, "learning_rate": 1.3084015336510667e-05, "loss": 1.0223, "num_tokens": 26871397747.0, "step": 4970 }, { "epoch": 0.886096256684492, "grad_norm": 0.22265625, "learning_rate": 1.3081487833233157e-05, "loss": 1.0394, "num_tokens": 26877677557.0, "step": 4971 }, { "epoch": 0.8862745098039215, "grad_norm": 0.2060546875, "learning_rate": 1.3078960156504824e-05, "loss": 1.0317, "num_tokens": 26883936441.0, "step": 4972 }, { "epoch": 0.8864527629233512, "grad_norm": 0.2333984375, "learning_rate": 1.3076432306536302e-05, "loss": 0.9904, "num_tokens": 26890206766.0, "step": 4973 }, { "epoch": 0.8866310160427807, "grad_norm": 0.224609375, "learning_rate": 1.3073904283538238e-05, "loss": 1.0249, "num_tokens": 26896470697.0, "step": 4974 }, { "epoch": 0.8868092691622104, "grad_norm": 0.197265625, "learning_rate": 1.3071376087721288e-05, "loss": 1.0251, "num_tokens": 26902719850.0, "step": 4975 }, { "epoch": 0.8869875222816399, "grad_norm": 0.205078125, "learning_rate": 1.3068847719296136e-05, "loss": 1.032, "num_tokens": 26909004013.0, "step": 4976 }, { "epoch": 0.8871657754010696, "grad_norm": 0.232421875, "learning_rate": 1.306631917847346e-05, "loss": 1.0619, "num_tokens": 26915265444.0, "step": 4977 }, { "epoch": 0.8873440285204991, "grad_norm": 0.2041015625, "learning_rate": 1.3063790465463972e-05, "loss": 1.0109, "num_tokens": 26921544973.0, "step": 4978 }, { "epoch": 0.8875222816399287, "grad_norm": 0.2021484375, "learning_rate": 1.306126158047839e-05, "loss": 0.9975, "num_tokens": 26927809731.0, "step": 4979 }, { "epoch": 0.8877005347593583, "grad_norm": 0.216796875, "learning_rate": 1.3058732523727442e-05, "loss": 1.0158, "num_tokens": 26934084737.0, "step": 4980 }, { "epoch": 0.8878787878787879, "grad_norm": 0.2197265625, "learning_rate": 1.3056203295421879e-05, "loss": 1.0209, "num_tokens": 26940368967.0, "step": 4981 }, { "epoch": 0.8880570409982175, "grad_norm": 0.201171875, "learning_rate": 1.305367389577246e-05, "loss": 0.9798, "num_tokens": 26946652949.0, "step": 4982 }, { "epoch": 0.888235294117647, "grad_norm": 0.2333984375, "learning_rate": 1.3051144324989962e-05, "loss": 1.0076, "num_tokens": 26952922276.0, "step": 4983 }, { "epoch": 0.8884135472370767, "grad_norm": 0.2080078125, "learning_rate": 1.304861458328517e-05, "loss": 1.0383, "num_tokens": 26959205320.0, "step": 4984 }, { "epoch": 0.8885918003565062, "grad_norm": 0.203125, "learning_rate": 1.3046084670868895e-05, "loss": 1.0133, "num_tokens": 26965422054.0, "step": 4985 }, { "epoch": 0.8887700534759359, "grad_norm": 0.2060546875, "learning_rate": 1.3043554587951953e-05, "loss": 1.0538, "num_tokens": 26971699255.0, "step": 4986 }, { "epoch": 0.8889483065953654, "grad_norm": 0.21875, "learning_rate": 1.304102433474517e-05, "loss": 0.989, "num_tokens": 26977982144.0, "step": 4987 }, { "epoch": 0.889126559714795, "grad_norm": 0.1953125, "learning_rate": 1.30384939114594e-05, "loss": 1.0036, "num_tokens": 26984232457.0, "step": 4988 }, { "epoch": 0.8893048128342246, "grad_norm": 0.2021484375, "learning_rate": 1.30359633183055e-05, "loss": 0.9973, "num_tokens": 26990515561.0, "step": 4989 }, { "epoch": 0.8894830659536542, "grad_norm": 0.205078125, "learning_rate": 1.3033432555494351e-05, "loss": 1.0206, "num_tokens": 26996798225.0, "step": 4990 }, { "epoch": 0.8896613190730838, "grad_norm": 0.2021484375, "learning_rate": 1.3030901623236831e-05, "loss": 1.029, "num_tokens": 27003059793.0, "step": 4991 }, { "epoch": 0.8898395721925134, "grad_norm": 0.2119140625, "learning_rate": 1.3028370521743854e-05, "loss": 1.0259, "num_tokens": 27009342602.0, "step": 4992 }, { "epoch": 0.890017825311943, "grad_norm": 0.208984375, "learning_rate": 1.3025839251226328e-05, "loss": 1.0289, "num_tokens": 27015603799.0, "step": 4993 }, { "epoch": 0.8901960784313725, "grad_norm": 0.2041015625, "learning_rate": 1.3023307811895192e-05, "loss": 1.0026, "num_tokens": 27021887399.0, "step": 4994 }, { "epoch": 0.8903743315508021, "grad_norm": 0.2060546875, "learning_rate": 1.302077620396139e-05, "loss": 1.0384, "num_tokens": 27028149576.0, "step": 4995 }, { "epoch": 0.8905525846702317, "grad_norm": 0.2109375, "learning_rate": 1.3018244427635873e-05, "loss": 1.0503, "num_tokens": 27034406982.0, "step": 4996 }, { "epoch": 0.8907308377896613, "grad_norm": 0.1962890625, "learning_rate": 1.3015712483129628e-05, "loss": 1.0079, "num_tokens": 27040626645.0, "step": 4997 }, { "epoch": 0.8909090909090909, "grad_norm": 0.2099609375, "learning_rate": 1.3013180370653632e-05, "loss": 0.995, "num_tokens": 27046909955.0, "step": 4998 }, { "epoch": 0.8910873440285205, "grad_norm": 0.20703125, "learning_rate": 1.3010648090418892e-05, "loss": 1.032, "num_tokens": 27053193679.0, "step": 4999 }, { "epoch": 0.8912655971479501, "grad_norm": 0.2119140625, "learning_rate": 1.3008115642636414e-05, "loss": 1.0403, "num_tokens": 27059473331.0, "step": 5000 }, { "epoch": 0.8914438502673797, "grad_norm": 0.203125, "learning_rate": 1.3005583027517244e-05, "loss": 1.0252, "num_tokens": 27065756958.0, "step": 5001 }, { "epoch": 0.8916221033868093, "grad_norm": 0.208984375, "learning_rate": 1.3003050245272412e-05, "loss": 1.0081, "num_tokens": 27072019771.0, "step": 5002 }, { "epoch": 0.8918003565062389, "grad_norm": 0.1982421875, "learning_rate": 1.300051729611298e-05, "loss": 1.0333, "num_tokens": 27078271631.0, "step": 5003 }, { "epoch": 0.8919786096256684, "grad_norm": 0.19140625, "learning_rate": 1.2997984180250018e-05, "loss": 0.9984, "num_tokens": 27084555688.0, "step": 5004 }, { "epoch": 0.8921568627450981, "grad_norm": 0.216796875, "learning_rate": 1.299545089789461e-05, "loss": 1.0512, "num_tokens": 27090811393.0, "step": 5005 }, { "epoch": 0.8923351158645276, "grad_norm": 0.2080078125, "learning_rate": 1.2992917449257865e-05, "loss": 1.0255, "num_tokens": 27097094778.0, "step": 5006 }, { "epoch": 0.8925133689839572, "grad_norm": 0.2001953125, "learning_rate": 1.2990383834550877e-05, "loss": 1.0209, "num_tokens": 27103334007.0, "step": 5007 }, { "epoch": 0.8926916221033868, "grad_norm": 0.2021484375, "learning_rate": 1.2987850053984789e-05, "loss": 1.0333, "num_tokens": 27109590542.0, "step": 5008 }, { "epoch": 0.8928698752228164, "grad_norm": 0.2119140625, "learning_rate": 1.2985316107770736e-05, "loss": 1.0119, "num_tokens": 27115858487.0, "step": 5009 }, { "epoch": 0.893048128342246, "grad_norm": 0.201171875, "learning_rate": 1.2982781996119869e-05, "loss": 1.0275, "num_tokens": 27122141670.0, "step": 5010 }, { "epoch": 0.8932263814616755, "grad_norm": 0.1962890625, "learning_rate": 1.2980247719243362e-05, "loss": 1.0165, "num_tokens": 27128426251.0, "step": 5011 }, { "epoch": 0.8934046345811052, "grad_norm": 0.2001953125, "learning_rate": 1.2977713277352389e-05, "loss": 1.0418, "num_tokens": 27134710634.0, "step": 5012 }, { "epoch": 0.8935828877005347, "grad_norm": 0.2119140625, "learning_rate": 1.2975178670658155e-05, "loss": 1.0185, "num_tokens": 27140943552.0, "step": 5013 }, { "epoch": 0.8937611408199644, "grad_norm": 0.203125, "learning_rate": 1.2972643899371861e-05, "loss": 1.0339, "num_tokens": 27147228325.0, "step": 5014 }, { "epoch": 0.8939393939393939, "grad_norm": 0.19921875, "learning_rate": 1.2970108963704737e-05, "loss": 1.004, "num_tokens": 27153512013.0, "step": 5015 }, { "epoch": 0.8941176470588236, "grad_norm": 0.23828125, "learning_rate": 1.2967573863868016e-05, "loss": 1.0034, "num_tokens": 27159796675.0, "step": 5016 }, { "epoch": 0.8942959001782531, "grad_norm": 0.1962890625, "learning_rate": 1.2965038600072945e-05, "loss": 1.0345, "num_tokens": 27166079132.0, "step": 5017 }, { "epoch": 0.8944741532976828, "grad_norm": 0.2236328125, "learning_rate": 1.2962503172530796e-05, "loss": 1.0136, "num_tokens": 27172358294.0, "step": 5018 }, { "epoch": 0.8946524064171123, "grad_norm": 0.2177734375, "learning_rate": 1.2959967581452842e-05, "loss": 1.0011, "num_tokens": 27178635053.0, "step": 5019 }, { "epoch": 0.8948306595365418, "grad_norm": 0.1962890625, "learning_rate": 1.2957431827050373e-05, "loss": 1.0466, "num_tokens": 27184919049.0, "step": 5020 }, { "epoch": 0.8950089126559715, "grad_norm": 0.197265625, "learning_rate": 1.2954895909534697e-05, "loss": 1.0356, "num_tokens": 27191195545.0, "step": 5021 }, { "epoch": 0.895187165775401, "grad_norm": 0.2119140625, "learning_rate": 1.2952359829117134e-05, "loss": 1.0263, "num_tokens": 27197461980.0, "step": 5022 }, { "epoch": 0.8953654188948307, "grad_norm": 0.197265625, "learning_rate": 1.294982358600901e-05, "loss": 1.0348, "num_tokens": 27203744892.0, "step": 5023 }, { "epoch": 0.8955436720142602, "grad_norm": 0.2001953125, "learning_rate": 1.2947287180421675e-05, "loss": 1.0092, "num_tokens": 27209992239.0, "step": 5024 }, { "epoch": 0.8957219251336899, "grad_norm": 0.1923828125, "learning_rate": 1.294475061256649e-05, "loss": 1.0324, "num_tokens": 27216274237.0, "step": 5025 }, { "epoch": 0.8959001782531194, "grad_norm": 0.19921875, "learning_rate": 1.2942213882654821e-05, "loss": 1.0592, "num_tokens": 27222541007.0, "step": 5026 }, { "epoch": 0.8960784313725491, "grad_norm": 0.208984375, "learning_rate": 1.2939676990898058e-05, "loss": 1.0167, "num_tokens": 27228825274.0, "step": 5027 }, { "epoch": 0.8962566844919786, "grad_norm": 0.193359375, "learning_rate": 1.2937139937507603e-05, "loss": 1.0346, "num_tokens": 27235098089.0, "step": 5028 }, { "epoch": 0.8964349376114082, "grad_norm": 0.2041015625, "learning_rate": 1.293460272269487e-05, "loss": 1.0113, "num_tokens": 27241346294.0, "step": 5029 }, { "epoch": 0.8966131907308378, "grad_norm": 0.1923828125, "learning_rate": 1.293206534667128e-05, "loss": 1.0272, "num_tokens": 27247578709.0, "step": 5030 }, { "epoch": 0.8967914438502674, "grad_norm": 0.1982421875, "learning_rate": 1.2929527809648277e-05, "loss": 1.0458, "num_tokens": 27253809912.0, "step": 5031 }, { "epoch": 0.896969696969697, "grad_norm": 0.1953125, "learning_rate": 1.2926990111837312e-05, "loss": 1.0062, "num_tokens": 27260092414.0, "step": 5032 }, { "epoch": 0.8971479500891265, "grad_norm": 0.203125, "learning_rate": 1.2924452253449858e-05, "loss": 1.0089, "num_tokens": 27266342009.0, "step": 5033 }, { "epoch": 0.8973262032085562, "grad_norm": 0.2021484375, "learning_rate": 1.2921914234697386e-05, "loss": 1.0155, "num_tokens": 27272622570.0, "step": 5034 }, { "epoch": 0.8975044563279857, "grad_norm": 0.19140625, "learning_rate": 1.2919376055791395e-05, "loss": 0.9925, "num_tokens": 27278888291.0, "step": 5035 }, { "epoch": 0.8976827094474154, "grad_norm": 0.208984375, "learning_rate": 1.2916837716943395e-05, "loss": 1.0417, "num_tokens": 27285172359.0, "step": 5036 }, { "epoch": 0.8978609625668449, "grad_norm": 0.197265625, "learning_rate": 1.2914299218364899e-05, "loss": 1.0103, "num_tokens": 27291456010.0, "step": 5037 }, { "epoch": 0.8980392156862745, "grad_norm": 0.2001953125, "learning_rate": 1.2911760560267445e-05, "loss": 1.0177, "num_tokens": 27297737998.0, "step": 5038 }, { "epoch": 0.8982174688057041, "grad_norm": 0.205078125, "learning_rate": 1.2909221742862578e-05, "loss": 1.0147, "num_tokens": 27303999094.0, "step": 5039 }, { "epoch": 0.8983957219251337, "grad_norm": 0.205078125, "learning_rate": 1.2906682766361862e-05, "loss": 1.0714, "num_tokens": 27310283683.0, "step": 5040 }, { "epoch": 0.8985739750445633, "grad_norm": 0.2021484375, "learning_rate": 1.2904143630976867e-05, "loss": 1.0137, "num_tokens": 27316568243.0, "step": 5041 }, { "epoch": 0.8987522281639929, "grad_norm": 0.1962890625, "learning_rate": 1.2901604336919177e-05, "loss": 1.0016, "num_tokens": 27322850203.0, "step": 5042 }, { "epoch": 0.8989304812834225, "grad_norm": 0.1982421875, "learning_rate": 1.28990648844004e-05, "loss": 1.0498, "num_tokens": 27329134134.0, "step": 5043 }, { "epoch": 0.899108734402852, "grad_norm": 0.205078125, "learning_rate": 1.2896525273632143e-05, "loss": 1.0503, "num_tokens": 27335390129.0, "step": 5044 }, { "epoch": 0.8992869875222816, "grad_norm": 0.1982421875, "learning_rate": 1.2893985504826039e-05, "loss": 1.0092, "num_tokens": 27341647327.0, "step": 5045 }, { "epoch": 0.8994652406417112, "grad_norm": 0.2041015625, "learning_rate": 1.2891445578193714e-05, "loss": 0.9864, "num_tokens": 27347930291.0, "step": 5046 }, { "epoch": 0.8996434937611408, "grad_norm": 0.203125, "learning_rate": 1.2888905493946833e-05, "loss": 1.0052, "num_tokens": 27354204001.0, "step": 5047 }, { "epoch": 0.8998217468805704, "grad_norm": 0.21484375, "learning_rate": 1.2886365252297058e-05, "loss": 1.0282, "num_tokens": 27360488443.0, "step": 5048 }, { "epoch": 0.9, "grad_norm": 0.2021484375, "learning_rate": 1.2883824853456067e-05, "loss": 1.0159, "num_tokens": 27366749339.0, "step": 5049 }, { "epoch": 0.9001782531194296, "grad_norm": 0.2060546875, "learning_rate": 1.288128429763555e-05, "loss": 1.0253, "num_tokens": 27372992028.0, "step": 5050 }, { "epoch": 0.9003565062388592, "grad_norm": 0.208984375, "learning_rate": 1.2878743585047215e-05, "loss": 1.0208, "num_tokens": 27379264019.0, "step": 5051 }, { "epoch": 0.9005347593582887, "grad_norm": 0.2001953125, "learning_rate": 1.2876202715902787e-05, "loss": 1.0528, "num_tokens": 27385546998.0, "step": 5052 }, { "epoch": 0.9007130124777184, "grad_norm": 0.1943359375, "learning_rate": 1.2873661690413984e-05, "loss": 0.9989, "num_tokens": 27391818907.0, "step": 5053 }, { "epoch": 0.9008912655971479, "grad_norm": 0.2236328125, "learning_rate": 1.2871120508792553e-05, "loss": 1.0378, "num_tokens": 27398090996.0, "step": 5054 }, { "epoch": 0.9010695187165776, "grad_norm": 0.1982421875, "learning_rate": 1.2868579171250257e-05, "loss": 1.0331, "num_tokens": 27404372045.0, "step": 5055 }, { "epoch": 0.9012477718360071, "grad_norm": 0.22265625, "learning_rate": 1.2866037677998869e-05, "loss": 1.0349, "num_tokens": 27410635061.0, "step": 5056 }, { "epoch": 0.9014260249554367, "grad_norm": 0.2177734375, "learning_rate": 1.286349602925016e-05, "loss": 0.9999, "num_tokens": 27416918675.0, "step": 5057 }, { "epoch": 0.9016042780748663, "grad_norm": 0.19921875, "learning_rate": 1.2860954225215935e-05, "loss": 1.018, "num_tokens": 27423202445.0, "step": 5058 }, { "epoch": 0.9017825311942959, "grad_norm": 0.2177734375, "learning_rate": 1.2858412266108e-05, "loss": 1.0004, "num_tokens": 27429486363.0, "step": 5059 }, { "epoch": 0.9019607843137255, "grad_norm": 0.2255859375, "learning_rate": 1.285587015213818e-05, "loss": 1.0194, "num_tokens": 27435769429.0, "step": 5060 }, { "epoch": 0.902139037433155, "grad_norm": 0.193359375, "learning_rate": 1.2853327883518305e-05, "loss": 1.0341, "num_tokens": 27442052571.0, "step": 5061 }, { "epoch": 0.9023172905525847, "grad_norm": 0.228515625, "learning_rate": 1.2850785460460225e-05, "loss": 1.0621, "num_tokens": 27448332960.0, "step": 5062 }, { "epoch": 0.9024955436720142, "grad_norm": 0.2265625, "learning_rate": 1.2848242883175806e-05, "loss": 1.0565, "num_tokens": 27454609582.0, "step": 5063 }, { "epoch": 0.9026737967914439, "grad_norm": 0.20703125, "learning_rate": 1.2845700151876912e-05, "loss": 1.0168, "num_tokens": 27460820064.0, "step": 5064 }, { "epoch": 0.9028520499108734, "grad_norm": 0.2119140625, "learning_rate": 1.2843157266775437e-05, "loss": 1.0106, "num_tokens": 27467095330.0, "step": 5065 }, { "epoch": 0.9030303030303031, "grad_norm": 0.1962890625, "learning_rate": 1.2840614228083273e-05, "loss": 1.013, "num_tokens": 27473381073.0, "step": 5066 }, { "epoch": 0.9032085561497326, "grad_norm": 0.19921875, "learning_rate": 1.283807103601234e-05, "loss": 1.0029, "num_tokens": 27479665507.0, "step": 5067 }, { "epoch": 0.9033868092691623, "grad_norm": 0.1962890625, "learning_rate": 1.2835527690774559e-05, "loss": 1.0222, "num_tokens": 27485920758.0, "step": 5068 }, { "epoch": 0.9035650623885918, "grad_norm": 0.203125, "learning_rate": 1.2832984192581865e-05, "loss": 1.0048, "num_tokens": 27492150508.0, "step": 5069 }, { "epoch": 0.9037433155080213, "grad_norm": 0.1962890625, "learning_rate": 1.283044054164621e-05, "loss": 1.0114, "num_tokens": 27498410236.0, "step": 5070 }, { "epoch": 0.903921568627451, "grad_norm": 0.21484375, "learning_rate": 1.282789673817956e-05, "loss": 0.9984, "num_tokens": 27504693388.0, "step": 5071 }, { "epoch": 0.9040998217468805, "grad_norm": 0.193359375, "learning_rate": 1.2825352782393887e-05, "loss": 1.0102, "num_tokens": 27510944022.0, "step": 5072 }, { "epoch": 0.9042780748663102, "grad_norm": 0.201171875, "learning_rate": 1.2822808674501181e-05, "loss": 1.0051, "num_tokens": 27517218231.0, "step": 5073 }, { "epoch": 0.9044563279857397, "grad_norm": 0.2138671875, "learning_rate": 1.2820264414713443e-05, "loss": 1.0258, "num_tokens": 27523493169.0, "step": 5074 }, { "epoch": 0.9046345811051694, "grad_norm": 0.2021484375, "learning_rate": 1.2817720003242687e-05, "loss": 1.0177, "num_tokens": 27529751212.0, "step": 5075 }, { "epoch": 0.9048128342245989, "grad_norm": 0.19140625, "learning_rate": 1.2815175440300935e-05, "loss": 1.0068, "num_tokens": 27536034934.0, "step": 5076 }, { "epoch": 0.9049910873440286, "grad_norm": 0.2021484375, "learning_rate": 1.2812630726100234e-05, "loss": 1.0038, "num_tokens": 27542274758.0, "step": 5077 }, { "epoch": 0.9051693404634581, "grad_norm": 0.2099609375, "learning_rate": 1.2810085860852629e-05, "loss": 1.023, "num_tokens": 27548559344.0, "step": 5078 }, { "epoch": 0.9053475935828877, "grad_norm": 0.201171875, "learning_rate": 1.280754084477019e-05, "loss": 1.0185, "num_tokens": 27554843807.0, "step": 5079 }, { "epoch": 0.9055258467023173, "grad_norm": 0.1982421875, "learning_rate": 1.2804995678064987e-05, "loss": 1.0465, "num_tokens": 27561125071.0, "step": 5080 }, { "epoch": 0.9057040998217469, "grad_norm": 0.203125, "learning_rate": 1.280245036094911e-05, "loss": 1.0193, "num_tokens": 27567410182.0, "step": 5081 }, { "epoch": 0.9058823529411765, "grad_norm": 0.20703125, "learning_rate": 1.2799904893634666e-05, "loss": 1.0222, "num_tokens": 27573685413.0, "step": 5082 }, { "epoch": 0.906060606060606, "grad_norm": 0.2021484375, "learning_rate": 1.2797359276333767e-05, "loss": 0.9959, "num_tokens": 27579943318.0, "step": 5083 }, { "epoch": 0.9062388591800357, "grad_norm": 0.2080078125, "learning_rate": 1.2794813509258541e-05, "loss": 1.0416, "num_tokens": 27586200536.0, "step": 5084 }, { "epoch": 0.9064171122994652, "grad_norm": 0.193359375, "learning_rate": 1.2792267592621119e-05, "loss": 1.0095, "num_tokens": 27592435514.0, "step": 5085 }, { "epoch": 0.9065953654188948, "grad_norm": 0.2109375, "learning_rate": 1.278972152663367e-05, "loss": 1.0205, "num_tokens": 27598719266.0, "step": 5086 }, { "epoch": 0.9067736185383244, "grad_norm": 0.19921875, "learning_rate": 1.2787175311508337e-05, "loss": 1.0302, "num_tokens": 27604996900.0, "step": 5087 }, { "epoch": 0.906951871657754, "grad_norm": 0.212890625, "learning_rate": 1.2784628947457314e-05, "loss": 1.0059, "num_tokens": 27611280874.0, "step": 5088 }, { "epoch": 0.9071301247771836, "grad_norm": 0.201171875, "learning_rate": 1.278208243469278e-05, "loss": 1.0482, "num_tokens": 27617506274.0, "step": 5089 }, { "epoch": 0.9073083778966132, "grad_norm": 0.205078125, "learning_rate": 1.277953577342694e-05, "loss": 1.0026, "num_tokens": 27623751924.0, "step": 5090 }, { "epoch": 0.9074866310160428, "grad_norm": 0.216796875, "learning_rate": 1.2776988963872012e-05, "loss": 1.0571, "num_tokens": 27630034801.0, "step": 5091 }, { "epoch": 0.9076648841354724, "grad_norm": 0.2041015625, "learning_rate": 1.2774442006240213e-05, "loss": 1.0113, "num_tokens": 27636308512.0, "step": 5092 }, { "epoch": 0.907843137254902, "grad_norm": 0.1953125, "learning_rate": 1.2771894900743788e-05, "loss": 1.027, "num_tokens": 27642542582.0, "step": 5093 }, { "epoch": 0.9080213903743316, "grad_norm": 0.2119140625, "learning_rate": 1.2769347647594988e-05, "loss": 1.0333, "num_tokens": 27648821475.0, "step": 5094 }, { "epoch": 0.9081996434937611, "grad_norm": 0.2001953125, "learning_rate": 1.2766800247006072e-05, "loss": 1.028, "num_tokens": 27655089527.0, "step": 5095 }, { "epoch": 0.9083778966131907, "grad_norm": 0.216796875, "learning_rate": 1.2764252699189317e-05, "loss": 1.0218, "num_tokens": 27661375143.0, "step": 5096 }, { "epoch": 0.9085561497326203, "grad_norm": 0.1923828125, "learning_rate": 1.2761705004357014e-05, "loss": 1.0328, "num_tokens": 27667604346.0, "step": 5097 }, { "epoch": 0.9087344028520499, "grad_norm": 0.216796875, "learning_rate": 1.2759157162721459e-05, "loss": 1.0127, "num_tokens": 27673838444.0, "step": 5098 }, { "epoch": 0.9089126559714795, "grad_norm": 0.197265625, "learning_rate": 1.2756609174494964e-05, "loss": 1.0488, "num_tokens": 27680077889.0, "step": 5099 }, { "epoch": 0.9090909090909091, "grad_norm": 0.2158203125, "learning_rate": 1.2754061039889858e-05, "loss": 1.0013, "num_tokens": 27686329895.0, "step": 5100 }, { "epoch": 0.9092691622103387, "grad_norm": 0.2021484375, "learning_rate": 1.2751512759118472e-05, "loss": 1.0525, "num_tokens": 27692609042.0, "step": 5101 }, { "epoch": 0.9094474153297682, "grad_norm": 0.2001953125, "learning_rate": 1.2748964332393161e-05, "loss": 1.0181, "num_tokens": 27698860734.0, "step": 5102 }, { "epoch": 0.9096256684491979, "grad_norm": 0.2041015625, "learning_rate": 1.2746415759926278e-05, "loss": 0.9859, "num_tokens": 27705146454.0, "step": 5103 }, { "epoch": 0.9098039215686274, "grad_norm": 0.2041015625, "learning_rate": 1.2743867041930204e-05, "loss": 1.0363, "num_tokens": 27711415269.0, "step": 5104 }, { "epoch": 0.9099821746880571, "grad_norm": 0.1962890625, "learning_rate": 1.2741318178617318e-05, "loss": 1.0057, "num_tokens": 27717700322.0, "step": 5105 }, { "epoch": 0.9101604278074866, "grad_norm": 0.2041015625, "learning_rate": 1.2738769170200023e-05, "loss": 1.0321, "num_tokens": 27723983249.0, "step": 5106 }, { "epoch": 0.9103386809269162, "grad_norm": 0.205078125, "learning_rate": 1.2736220016890723e-05, "loss": 0.9824, "num_tokens": 27730245820.0, "step": 5107 }, { "epoch": 0.9105169340463458, "grad_norm": 0.1953125, "learning_rate": 1.273367071890184e-05, "loss": 1.0423, "num_tokens": 27736529917.0, "step": 5108 }, { "epoch": 0.9106951871657754, "grad_norm": 0.2021484375, "learning_rate": 1.2731121276445814e-05, "loss": 1.0331, "num_tokens": 27742815317.0, "step": 5109 }, { "epoch": 0.910873440285205, "grad_norm": 0.2060546875, "learning_rate": 1.2728571689735083e-05, "loss": 1.0311, "num_tokens": 27749097788.0, "step": 5110 }, { "epoch": 0.9110516934046345, "grad_norm": 0.212890625, "learning_rate": 1.272602195898211e-05, "loss": 1.0122, "num_tokens": 27755382458.0, "step": 5111 }, { "epoch": 0.9112299465240642, "grad_norm": 0.197265625, "learning_rate": 1.2723472084399355e-05, "loss": 1.0336, "num_tokens": 27761667219.0, "step": 5112 }, { "epoch": 0.9114081996434937, "grad_norm": 0.201171875, "learning_rate": 1.2720922066199313e-05, "loss": 1.0322, "num_tokens": 27767934461.0, "step": 5113 }, { "epoch": 0.9115864527629234, "grad_norm": 0.2001953125, "learning_rate": 1.2718371904594471e-05, "loss": 1.0276, "num_tokens": 27774189016.0, "step": 5114 }, { "epoch": 0.9117647058823529, "grad_norm": 0.2001953125, "learning_rate": 1.2715821599797334e-05, "loss": 1.0168, "num_tokens": 27780442280.0, "step": 5115 }, { "epoch": 0.9119429590017826, "grad_norm": 0.19921875, "learning_rate": 1.2713271152020418e-05, "loss": 1.0504, "num_tokens": 27786713167.0, "step": 5116 }, { "epoch": 0.9121212121212121, "grad_norm": 0.2021484375, "learning_rate": 1.271072056147626e-05, "loss": 1.0288, "num_tokens": 27792981401.0, "step": 5117 }, { "epoch": 0.9122994652406418, "grad_norm": 0.2021484375, "learning_rate": 1.2708169828377392e-05, "loss": 1.0255, "num_tokens": 27799248756.0, "step": 5118 }, { "epoch": 0.9124777183600713, "grad_norm": 0.2001953125, "learning_rate": 1.2705618952936369e-05, "loss": 1.0121, "num_tokens": 27805504884.0, "step": 5119 }, { "epoch": 0.9126559714795008, "grad_norm": 0.2138671875, "learning_rate": 1.270306793536576e-05, "loss": 0.9852, "num_tokens": 27811789071.0, "step": 5120 }, { "epoch": 0.9128342245989305, "grad_norm": 0.1953125, "learning_rate": 1.2700516775878143e-05, "loss": 1.0079, "num_tokens": 27818059733.0, "step": 5121 }, { "epoch": 0.91301247771836, "grad_norm": 0.212890625, "learning_rate": 1.2697965474686099e-05, "loss": 1.0012, "num_tokens": 27824342543.0, "step": 5122 }, { "epoch": 0.9131907308377897, "grad_norm": 0.201171875, "learning_rate": 1.2695414032002235e-05, "loss": 1.0241, "num_tokens": 27830606738.0, "step": 5123 }, { "epoch": 0.9133689839572192, "grad_norm": 0.2041015625, "learning_rate": 1.2692862448039161e-05, "loss": 1.0463, "num_tokens": 27836890399.0, "step": 5124 }, { "epoch": 0.9135472370766489, "grad_norm": 0.2001953125, "learning_rate": 1.2690310723009502e-05, "loss": 1.012, "num_tokens": 27843145627.0, "step": 5125 }, { "epoch": 0.9137254901960784, "grad_norm": 0.201171875, "learning_rate": 1.268775885712589e-05, "loss": 1.0383, "num_tokens": 27849428989.0, "step": 5126 }, { "epoch": 0.913903743315508, "grad_norm": 0.2080078125, "learning_rate": 1.2685206850600977e-05, "loss": 1.0476, "num_tokens": 27855713179.0, "step": 5127 }, { "epoch": 0.9140819964349376, "grad_norm": 0.21875, "learning_rate": 1.268265470364742e-05, "loss": 1.0556, "num_tokens": 27861985374.0, "step": 5128 }, { "epoch": 0.9142602495543672, "grad_norm": 0.19921875, "learning_rate": 1.2680102416477895e-05, "loss": 1.0458, "num_tokens": 27868269064.0, "step": 5129 }, { "epoch": 0.9144385026737968, "grad_norm": 0.1923828125, "learning_rate": 1.2677549989305076e-05, "loss": 1.0151, "num_tokens": 27874553425.0, "step": 5130 }, { "epoch": 0.9146167557932264, "grad_norm": 0.212890625, "learning_rate": 1.267499742234166e-05, "loss": 1.0388, "num_tokens": 27880837644.0, "step": 5131 }, { "epoch": 0.914795008912656, "grad_norm": 0.2041015625, "learning_rate": 1.267244471580036e-05, "loss": 1.0206, "num_tokens": 27887092974.0, "step": 5132 }, { "epoch": 0.9149732620320855, "grad_norm": 0.19921875, "learning_rate": 1.2669891869893883e-05, "loss": 1.0412, "num_tokens": 27893376458.0, "step": 5133 }, { "epoch": 0.9151515151515152, "grad_norm": 0.20703125, "learning_rate": 1.2667338884834966e-05, "loss": 1.0318, "num_tokens": 27899658955.0, "step": 5134 }, { "epoch": 0.9153297682709447, "grad_norm": 0.193359375, "learning_rate": 1.2664785760836347e-05, "loss": 1.0439, "num_tokens": 27905941785.0, "step": 5135 }, { "epoch": 0.9155080213903743, "grad_norm": 0.201171875, "learning_rate": 1.266223249811078e-05, "loss": 1.0406, "num_tokens": 27912226046.0, "step": 5136 }, { "epoch": 0.9156862745098039, "grad_norm": 0.2060546875, "learning_rate": 1.2659679096871024e-05, "loss": 1.0451, "num_tokens": 27918510801.0, "step": 5137 }, { "epoch": 0.9158645276292335, "grad_norm": 0.1962890625, "learning_rate": 1.2657125557329859e-05, "loss": 1.0095, "num_tokens": 27924700973.0, "step": 5138 }, { "epoch": 0.9160427807486631, "grad_norm": 0.1943359375, "learning_rate": 1.265457187970007e-05, "loss": 1.0214, "num_tokens": 27930982480.0, "step": 5139 }, { "epoch": 0.9162210338680927, "grad_norm": 0.203125, "learning_rate": 1.265201806419446e-05, "loss": 1.0234, "num_tokens": 27937260608.0, "step": 5140 }, { "epoch": 0.9163992869875223, "grad_norm": 0.2021484375, "learning_rate": 1.2649464111025833e-05, "loss": 1.0264, "num_tokens": 27943520671.0, "step": 5141 }, { "epoch": 0.9165775401069519, "grad_norm": 0.1943359375, "learning_rate": 1.264691002040701e-05, "loss": 1.0381, "num_tokens": 27949775804.0, "step": 5142 }, { "epoch": 0.9167557932263815, "grad_norm": 0.20703125, "learning_rate": 1.2644355792550832e-05, "loss": 1.0056, "num_tokens": 27956059979.0, "step": 5143 }, { "epoch": 0.9169340463458111, "grad_norm": 0.208984375, "learning_rate": 1.2641801427670134e-05, "loss": 1.0171, "num_tokens": 27962340479.0, "step": 5144 }, { "epoch": 0.9171122994652406, "grad_norm": 0.203125, "learning_rate": 1.263924692597778e-05, "loss": 1.0037, "num_tokens": 27968625874.0, "step": 5145 }, { "epoch": 0.9172905525846702, "grad_norm": 0.19921875, "learning_rate": 1.2636692287686628e-05, "loss": 1.0208, "num_tokens": 27974910647.0, "step": 5146 }, { "epoch": 0.9174688057040998, "grad_norm": 0.203125, "learning_rate": 1.2634137513009566e-05, "loss": 1.0284, "num_tokens": 27981146663.0, "step": 5147 }, { "epoch": 0.9176470588235294, "grad_norm": 0.2041015625, "learning_rate": 1.2631582602159478e-05, "loss": 1.0382, "num_tokens": 27987402195.0, "step": 5148 }, { "epoch": 0.917825311942959, "grad_norm": 0.19921875, "learning_rate": 1.2629027555349265e-05, "loss": 1.0227, "num_tokens": 27993684270.0, "step": 5149 }, { "epoch": 0.9180035650623886, "grad_norm": 0.201171875, "learning_rate": 1.2626472372791843e-05, "loss": 1.0, "num_tokens": 27999958517.0, "step": 5150 }, { "epoch": 0.9181818181818182, "grad_norm": 0.2177734375, "learning_rate": 1.262391705470013e-05, "loss": 1.0202, "num_tokens": 28006233934.0, "step": 5151 }, { "epoch": 0.9183600713012477, "grad_norm": 0.2177734375, "learning_rate": 1.2621361601287072e-05, "loss": 1.0249, "num_tokens": 28012494192.0, "step": 5152 }, { "epoch": 0.9185383244206774, "grad_norm": 0.19921875, "learning_rate": 1.2618806012765603e-05, "loss": 1.0293, "num_tokens": 28018776804.0, "step": 5153 }, { "epoch": 0.9187165775401069, "grad_norm": 0.2001953125, "learning_rate": 1.261625028934869e-05, "loss": 1.0038, "num_tokens": 28025060391.0, "step": 5154 }, { "epoch": 0.9188948306595366, "grad_norm": 0.1953125, "learning_rate": 1.26136944312493e-05, "loss": 1.0229, "num_tokens": 28031343569.0, "step": 5155 }, { "epoch": 0.9190730837789661, "grad_norm": 0.19921875, "learning_rate": 1.2611138438680407e-05, "loss": 0.9906, "num_tokens": 28037628135.0, "step": 5156 }, { "epoch": 0.9192513368983958, "grad_norm": 0.201171875, "learning_rate": 1.2608582311855013e-05, "loss": 1.0466, "num_tokens": 28043892233.0, "step": 5157 }, { "epoch": 0.9194295900178253, "grad_norm": 0.208984375, "learning_rate": 1.2606026050986108e-05, "loss": 1.0035, "num_tokens": 28050177194.0, "step": 5158 }, { "epoch": 0.9196078431372549, "grad_norm": 0.2177734375, "learning_rate": 1.260346965628672e-05, "loss": 1.0043, "num_tokens": 28056413011.0, "step": 5159 }, { "epoch": 0.9197860962566845, "grad_norm": 0.2099609375, "learning_rate": 1.2600913127969862e-05, "loss": 1.0053, "num_tokens": 28062696241.0, "step": 5160 }, { "epoch": 0.919964349376114, "grad_norm": 0.2060546875, "learning_rate": 1.2598356466248575e-05, "loss": 1.0379, "num_tokens": 28068934687.0, "step": 5161 }, { "epoch": 0.9201426024955437, "grad_norm": 0.203125, "learning_rate": 1.2595799671335904e-05, "loss": 1.0378, "num_tokens": 28075192894.0, "step": 5162 }, { "epoch": 0.9203208556149732, "grad_norm": 0.1982421875, "learning_rate": 1.259324274344491e-05, "loss": 1.0306, "num_tokens": 28081434554.0, "step": 5163 }, { "epoch": 0.9204991087344029, "grad_norm": 0.2060546875, "learning_rate": 1.2590685682788663e-05, "loss": 1.0318, "num_tokens": 28087691399.0, "step": 5164 }, { "epoch": 0.9206773618538324, "grad_norm": 0.203125, "learning_rate": 1.258812848958024e-05, "loss": 1.0363, "num_tokens": 28093952492.0, "step": 5165 }, { "epoch": 0.9208556149732621, "grad_norm": 0.203125, "learning_rate": 1.2585571164032736e-05, "loss": 1.0451, "num_tokens": 28100237833.0, "step": 5166 }, { "epoch": 0.9210338680926916, "grad_norm": 0.2021484375, "learning_rate": 1.258301370635925e-05, "loss": 1.0119, "num_tokens": 28106521098.0, "step": 5167 }, { "epoch": 0.9212121212121213, "grad_norm": 0.1982421875, "learning_rate": 1.25804561167729e-05, "loss": 1.0427, "num_tokens": 28112804424.0, "step": 5168 }, { "epoch": 0.9213903743315508, "grad_norm": 0.19921875, "learning_rate": 1.2577898395486803e-05, "loss": 1.0341, "num_tokens": 28119088848.0, "step": 5169 }, { "epoch": 0.9215686274509803, "grad_norm": 0.201171875, "learning_rate": 1.25753405427141e-05, "loss": 1.0326, "num_tokens": 28125356142.0, "step": 5170 }, { "epoch": 0.92174688057041, "grad_norm": 0.19140625, "learning_rate": 1.2572782558667938e-05, "loss": 1.0016, "num_tokens": 28131640217.0, "step": 5171 }, { "epoch": 0.9219251336898395, "grad_norm": 0.1943359375, "learning_rate": 1.2570224443561472e-05, "loss": 1.0307, "num_tokens": 28137893968.0, "step": 5172 }, { "epoch": 0.9221033868092692, "grad_norm": 0.2109375, "learning_rate": 1.256766619760787e-05, "loss": 1.0173, "num_tokens": 28144177149.0, "step": 5173 }, { "epoch": 0.9222816399286987, "grad_norm": 0.19921875, "learning_rate": 1.2565107821020313e-05, "loss": 1.0542, "num_tokens": 28150440049.0, "step": 5174 }, { "epoch": 0.9224598930481284, "grad_norm": 0.1953125, "learning_rate": 1.2562549314011994e-05, "loss": 1.0074, "num_tokens": 28156724964.0, "step": 5175 }, { "epoch": 0.9226381461675579, "grad_norm": 0.19140625, "learning_rate": 1.2559990676796104e-05, "loss": 1.0433, "num_tokens": 28162991758.0, "step": 5176 }, { "epoch": 0.9228163992869876, "grad_norm": 0.2119140625, "learning_rate": 1.2557431909585867e-05, "loss": 1.0099, "num_tokens": 28169269837.0, "step": 5177 }, { "epoch": 0.9229946524064171, "grad_norm": 0.201171875, "learning_rate": 1.2554873012594492e-05, "loss": 0.9972, "num_tokens": 28175538097.0, "step": 5178 }, { "epoch": 0.9231729055258467, "grad_norm": 0.203125, "learning_rate": 1.2552313986035225e-05, "loss": 1.0486, "num_tokens": 28181780650.0, "step": 5179 }, { "epoch": 0.9233511586452763, "grad_norm": 0.2099609375, "learning_rate": 1.2549754830121306e-05, "loss": 1.0275, "num_tokens": 28188043042.0, "step": 5180 }, { "epoch": 0.9235294117647059, "grad_norm": 0.220703125, "learning_rate": 1.2547195545065985e-05, "loss": 1.0249, "num_tokens": 28194326431.0, "step": 5181 }, { "epoch": 0.9237076648841355, "grad_norm": 0.2060546875, "learning_rate": 1.2544636131082537e-05, "loss": 1.0284, "num_tokens": 28200576588.0, "step": 5182 }, { "epoch": 0.923885918003565, "grad_norm": 0.21484375, "learning_rate": 1.2542076588384231e-05, "loss": 1.0273, "num_tokens": 28206843534.0, "step": 5183 }, { "epoch": 0.9240641711229947, "grad_norm": 0.201171875, "learning_rate": 1.2539516917184358e-05, "loss": 1.0189, "num_tokens": 28213065603.0, "step": 5184 }, { "epoch": 0.9242424242424242, "grad_norm": 0.1962890625, "learning_rate": 1.2536957117696212e-05, "loss": 1.0005, "num_tokens": 28219349014.0, "step": 5185 }, { "epoch": 0.9244206773618538, "grad_norm": 0.1953125, "learning_rate": 1.2534397190133108e-05, "loss": 1.0366, "num_tokens": 28225632669.0, "step": 5186 }, { "epoch": 0.9245989304812834, "grad_norm": 0.2001953125, "learning_rate": 1.253183713470836e-05, "loss": 1.0397, "num_tokens": 28231915120.0, "step": 5187 }, { "epoch": 0.924777183600713, "grad_norm": 0.20703125, "learning_rate": 1.2529276951635299e-05, "loss": 1.0394, "num_tokens": 28238132043.0, "step": 5188 }, { "epoch": 0.9249554367201426, "grad_norm": 0.203125, "learning_rate": 1.2526716641127268e-05, "loss": 1.0324, "num_tokens": 28244398118.0, "step": 5189 }, { "epoch": 0.9251336898395722, "grad_norm": 0.2021484375, "learning_rate": 1.2524156203397612e-05, "loss": 1.022, "num_tokens": 28250674244.0, "step": 5190 }, { "epoch": 0.9253119429590018, "grad_norm": 0.1962890625, "learning_rate": 1.2521595638659705e-05, "loss": 1.0197, "num_tokens": 28256926795.0, "step": 5191 }, { "epoch": 0.9254901960784314, "grad_norm": 0.197265625, "learning_rate": 1.2519034947126905e-05, "loss": 1.0333, "num_tokens": 28263171833.0, "step": 5192 }, { "epoch": 0.925668449197861, "grad_norm": 0.2001953125, "learning_rate": 1.2516474129012603e-05, "loss": 1.0432, "num_tokens": 28269454443.0, "step": 5193 }, { "epoch": 0.9258467023172906, "grad_norm": 0.212890625, "learning_rate": 1.2513913184530193e-05, "loss": 1.0634, "num_tokens": 28275739316.0, "step": 5194 }, { "epoch": 0.9260249554367201, "grad_norm": 0.193359375, "learning_rate": 1.2511352113893074e-05, "loss": 0.9866, "num_tokens": 28281998257.0, "step": 5195 }, { "epoch": 0.9262032085561497, "grad_norm": 0.1982421875, "learning_rate": 1.2508790917314666e-05, "loss": 1.0123, "num_tokens": 28288281437.0, "step": 5196 }, { "epoch": 0.9263814616755793, "grad_norm": 0.2119140625, "learning_rate": 1.250622959500839e-05, "loss": 1.0132, "num_tokens": 28294538242.0, "step": 5197 }, { "epoch": 0.9265597147950089, "grad_norm": 0.1923828125, "learning_rate": 1.2503668147187688e-05, "loss": 1.0073, "num_tokens": 28300808062.0, "step": 5198 }, { "epoch": 0.9267379679144385, "grad_norm": 0.1943359375, "learning_rate": 1.2501106574065995e-05, "loss": 1.0027, "num_tokens": 28307092699.0, "step": 5199 }, { "epoch": 0.9269162210338681, "grad_norm": 0.201171875, "learning_rate": 1.2498544875856778e-05, "loss": 1.0131, "num_tokens": 28313364531.0, "step": 5200 }, { "epoch": 0.9270944741532977, "grad_norm": 0.212890625, "learning_rate": 1.2495983052773497e-05, "loss": 1.0119, "num_tokens": 28319649304.0, "step": 5201 }, { "epoch": 0.9272727272727272, "grad_norm": 0.19921875, "learning_rate": 1.2493421105029635e-05, "loss": 1.0207, "num_tokens": 28325900571.0, "step": 5202 }, { "epoch": 0.9274509803921569, "grad_norm": 0.1943359375, "learning_rate": 1.2490859032838673e-05, "loss": 1.0093, "num_tokens": 28332184174.0, "step": 5203 }, { "epoch": 0.9276292335115864, "grad_norm": 0.2109375, "learning_rate": 1.2488296836414113e-05, "loss": 1.0228, "num_tokens": 28338465820.0, "step": 5204 }, { "epoch": 0.9278074866310161, "grad_norm": 0.1953125, "learning_rate": 1.2485734515969467e-05, "loss": 1.0289, "num_tokens": 28344728277.0, "step": 5205 }, { "epoch": 0.9279857397504456, "grad_norm": 0.189453125, "learning_rate": 1.2483172071718245e-05, "loss": 0.9933, "num_tokens": 28350968178.0, "step": 5206 }, { "epoch": 0.9281639928698753, "grad_norm": 0.19921875, "learning_rate": 1.2480609503873985e-05, "loss": 1.026, "num_tokens": 28357237190.0, "step": 5207 }, { "epoch": 0.9283422459893048, "grad_norm": 0.1884765625, "learning_rate": 1.2478046812650215e-05, "loss": 1.0273, "num_tokens": 28363519623.0, "step": 5208 }, { "epoch": 0.9285204991087344, "grad_norm": 0.19921875, "learning_rate": 1.24754839982605e-05, "loss": 1.0418, "num_tokens": 28369804076.0, "step": 5209 }, { "epoch": 0.928698752228164, "grad_norm": 0.1982421875, "learning_rate": 1.2472921060918386e-05, "loss": 1.0186, "num_tokens": 28376065377.0, "step": 5210 }, { "epoch": 0.9288770053475935, "grad_norm": 0.1923828125, "learning_rate": 1.2470358000837446e-05, "loss": 1.0412, "num_tokens": 28382346193.0, "step": 5211 }, { "epoch": 0.9290552584670232, "grad_norm": 0.1923828125, "learning_rate": 1.2467794818231269e-05, "loss": 1.0233, "num_tokens": 28388599413.0, "step": 5212 }, { "epoch": 0.9292335115864527, "grad_norm": 0.1982421875, "learning_rate": 1.2465231513313437e-05, "loss": 1.0282, "num_tokens": 28394883465.0, "step": 5213 }, { "epoch": 0.9294117647058824, "grad_norm": 0.201171875, "learning_rate": 1.246266808629755e-05, "loss": 1.0084, "num_tokens": 28401155158.0, "step": 5214 }, { "epoch": 0.9295900178253119, "grad_norm": 0.19140625, "learning_rate": 1.2460104537397226e-05, "loss": 1.0442, "num_tokens": 28407439660.0, "step": 5215 }, { "epoch": 0.9297682709447416, "grad_norm": 0.1943359375, "learning_rate": 1.2457540866826081e-05, "loss": 1.001, "num_tokens": 28413703355.0, "step": 5216 }, { "epoch": 0.9299465240641711, "grad_norm": 0.1923828125, "learning_rate": 1.2454977074797746e-05, "loss": 1.0485, "num_tokens": 28419986095.0, "step": 5217 }, { "epoch": 0.9301247771836008, "grad_norm": 0.2060546875, "learning_rate": 1.245241316152587e-05, "loss": 1.0548, "num_tokens": 28426270831.0, "step": 5218 }, { "epoch": 0.9303030303030303, "grad_norm": 0.2001953125, "learning_rate": 1.2449849127224089e-05, "loss": 1.0122, "num_tokens": 28432554194.0, "step": 5219 }, { "epoch": 0.93048128342246, "grad_norm": 0.19140625, "learning_rate": 1.244728497210608e-05, "loss": 1.0504, "num_tokens": 28438836797.0, "step": 5220 }, { "epoch": 0.9306595365418895, "grad_norm": 0.1943359375, "learning_rate": 1.2444720696385504e-05, "loss": 1.0262, "num_tokens": 28445119740.0, "step": 5221 }, { "epoch": 0.930837789661319, "grad_norm": 0.20703125, "learning_rate": 1.2442156300276047e-05, "loss": 1.0045, "num_tokens": 28451403118.0, "step": 5222 }, { "epoch": 0.9310160427807487, "grad_norm": 0.19921875, "learning_rate": 1.2439591783991404e-05, "loss": 1.0417, "num_tokens": 28457687061.0, "step": 5223 }, { "epoch": 0.9311942959001782, "grad_norm": 0.2099609375, "learning_rate": 1.243702714774527e-05, "loss": 1.0338, "num_tokens": 28463926921.0, "step": 5224 }, { "epoch": 0.9313725490196079, "grad_norm": 0.203125, "learning_rate": 1.2434462391751363e-05, "loss": 1.0586, "num_tokens": 28470209586.0, "step": 5225 }, { "epoch": 0.9315508021390374, "grad_norm": 0.1953125, "learning_rate": 1.2431897516223398e-05, "loss": 1.0072, "num_tokens": 28476453486.0, "step": 5226 }, { "epoch": 0.931729055258467, "grad_norm": 0.197265625, "learning_rate": 1.2429332521375112e-05, "loss": 1.0124, "num_tokens": 28482736164.0, "step": 5227 }, { "epoch": 0.9319073083778966, "grad_norm": 0.1943359375, "learning_rate": 1.2426767407420245e-05, "loss": 1.0146, "num_tokens": 28489003995.0, "step": 5228 }, { "epoch": 0.9320855614973262, "grad_norm": 0.19921875, "learning_rate": 1.2424202174572548e-05, "loss": 1.041, "num_tokens": 28495289662.0, "step": 5229 }, { "epoch": 0.9322638146167558, "grad_norm": 0.1953125, "learning_rate": 1.2421636823045782e-05, "loss": 1.0309, "num_tokens": 28501541129.0, "step": 5230 }, { "epoch": 0.9324420677361854, "grad_norm": 0.1923828125, "learning_rate": 1.2419071353053717e-05, "loss": 0.9872, "num_tokens": 28507807672.0, "step": 5231 }, { "epoch": 0.932620320855615, "grad_norm": 0.208984375, "learning_rate": 1.241650576481014e-05, "loss": 1.0366, "num_tokens": 28514090428.0, "step": 5232 }, { "epoch": 0.9327985739750445, "grad_norm": 0.220703125, "learning_rate": 1.2413940058528838e-05, "loss": 1.0303, "num_tokens": 28520374294.0, "step": 5233 }, { "epoch": 0.9329768270944742, "grad_norm": 0.21875, "learning_rate": 1.241137423442361e-05, "loss": 1.0012, "num_tokens": 28526655817.0, "step": 5234 }, { "epoch": 0.9331550802139037, "grad_norm": 0.19921875, "learning_rate": 1.2408808292708268e-05, "loss": 1.0349, "num_tokens": 28532937088.0, "step": 5235 }, { "epoch": 0.9333333333333333, "grad_norm": 0.2060546875, "learning_rate": 1.2406242233596635e-05, "loss": 1.0387, "num_tokens": 28539220667.0, "step": 5236 }, { "epoch": 0.9335115864527629, "grad_norm": 0.2236328125, "learning_rate": 1.2403676057302538e-05, "loss": 1.0041, "num_tokens": 28545487058.0, "step": 5237 }, { "epoch": 0.9336898395721925, "grad_norm": 0.205078125, "learning_rate": 1.2401109764039816e-05, "loss": 0.9907, "num_tokens": 28551772255.0, "step": 5238 }, { "epoch": 0.9338680926916221, "grad_norm": 0.203125, "learning_rate": 1.2398543354022324e-05, "loss": 1.0611, "num_tokens": 28558008910.0, "step": 5239 }, { "epoch": 0.9340463458110517, "grad_norm": 0.1953125, "learning_rate": 1.239597682746392e-05, "loss": 1.0185, "num_tokens": 28564275116.0, "step": 5240 }, { "epoch": 0.9342245989304813, "grad_norm": 0.19140625, "learning_rate": 1.2393410184578469e-05, "loss": 0.9995, "num_tokens": 28570558018.0, "step": 5241 }, { "epoch": 0.9344028520499109, "grad_norm": 0.2099609375, "learning_rate": 1.2390843425579852e-05, "loss": 1.0241, "num_tokens": 28576830809.0, "step": 5242 }, { "epoch": 0.9345811051693405, "grad_norm": 0.205078125, "learning_rate": 1.238827655068196e-05, "loss": 1.0279, "num_tokens": 28583096848.0, "step": 5243 }, { "epoch": 0.9347593582887701, "grad_norm": 0.2060546875, "learning_rate": 1.238570956009869e-05, "loss": 1.0009, "num_tokens": 28589362193.0, "step": 5244 }, { "epoch": 0.9349376114081996, "grad_norm": 0.1865234375, "learning_rate": 1.2383142454043948e-05, "loss": 1.0334, "num_tokens": 28595645794.0, "step": 5245 }, { "epoch": 0.9351158645276292, "grad_norm": 0.2177734375, "learning_rate": 1.2380575232731653e-05, "loss": 1.0129, "num_tokens": 28601908140.0, "step": 5246 }, { "epoch": 0.9352941176470588, "grad_norm": 0.232421875, "learning_rate": 1.237800789637573e-05, "loss": 1.0446, "num_tokens": 28608185740.0, "step": 5247 }, { "epoch": 0.9354723707664884, "grad_norm": 0.1943359375, "learning_rate": 1.2375440445190122e-05, "loss": 1.0087, "num_tokens": 28614450931.0, "step": 5248 }, { "epoch": 0.935650623885918, "grad_norm": 0.1962890625, "learning_rate": 1.2372872879388767e-05, "loss": 1.0029, "num_tokens": 28620715859.0, "step": 5249 }, { "epoch": 0.9358288770053476, "grad_norm": 0.228515625, "learning_rate": 1.2370305199185628e-05, "loss": 1.0309, "num_tokens": 28626958052.0, "step": 5250 }, { "epoch": 0.9360071301247772, "grad_norm": 0.2265625, "learning_rate": 1.2367737404794662e-05, "loss": 1.0153, "num_tokens": 28633240517.0, "step": 5251 }, { "epoch": 0.9361853832442067, "grad_norm": 0.1953125, "learning_rate": 1.2365169496429852e-05, "loss": 1.0078, "num_tokens": 28639525984.0, "step": 5252 }, { "epoch": 0.9363636363636364, "grad_norm": 0.2255859375, "learning_rate": 1.2362601474305181e-05, "loss": 1.0235, "num_tokens": 28645808831.0, "step": 5253 }, { "epoch": 0.9365418894830659, "grad_norm": 0.2119140625, "learning_rate": 1.2360033338634637e-05, "loss": 1.0356, "num_tokens": 28652092257.0, "step": 5254 }, { "epoch": 0.9367201426024956, "grad_norm": 0.1923828125, "learning_rate": 1.235746508963223e-05, "loss": 1.0274, "num_tokens": 28658378190.0, "step": 5255 }, { "epoch": 0.9368983957219251, "grad_norm": 0.197265625, "learning_rate": 1.2354896727511968e-05, "loss": 1.0336, "num_tokens": 28664637075.0, "step": 5256 }, { "epoch": 0.9370766488413548, "grad_norm": 0.21875, "learning_rate": 1.2352328252487877e-05, "loss": 1.0117, "num_tokens": 28670921163.0, "step": 5257 }, { "epoch": 0.9372549019607843, "grad_norm": 0.205078125, "learning_rate": 1.2349759664773986e-05, "loss": 1.0246, "num_tokens": 28677163107.0, "step": 5258 }, { "epoch": 0.9374331550802139, "grad_norm": 0.203125, "learning_rate": 1.2347190964584338e-05, "loss": 1.0484, "num_tokens": 28683395919.0, "step": 5259 }, { "epoch": 0.9376114081996435, "grad_norm": 0.2021484375, "learning_rate": 1.2344622152132981e-05, "loss": 1.0186, "num_tokens": 28689667223.0, "step": 5260 }, { "epoch": 0.937789661319073, "grad_norm": 0.2060546875, "learning_rate": 1.2342053227633972e-05, "loss": 1.0044, "num_tokens": 28695952209.0, "step": 5261 }, { "epoch": 0.9379679144385027, "grad_norm": 0.193359375, "learning_rate": 1.2339484191301389e-05, "loss": 1.0013, "num_tokens": 28702227335.0, "step": 5262 }, { "epoch": 0.9381461675579322, "grad_norm": 0.216796875, "learning_rate": 1.2336915043349305e-05, "loss": 1.0267, "num_tokens": 28708511491.0, "step": 5263 }, { "epoch": 0.9383244206773619, "grad_norm": 0.203125, "learning_rate": 1.2334345783991807e-05, "loss": 1.0304, "num_tokens": 28714780564.0, "step": 5264 }, { "epoch": 0.9385026737967914, "grad_norm": 0.197265625, "learning_rate": 1.2331776413442986e-05, "loss": 1.0062, "num_tokens": 28721064649.0, "step": 5265 }, { "epoch": 0.9386809269162211, "grad_norm": 0.2041015625, "learning_rate": 1.2329206931916962e-05, "loss": 1.0331, "num_tokens": 28727348652.0, "step": 5266 }, { "epoch": 0.9388591800356506, "grad_norm": 0.2080078125, "learning_rate": 1.2326637339627843e-05, "loss": 1.0057, "num_tokens": 28733631539.0, "step": 5267 }, { "epoch": 0.9390374331550803, "grad_norm": 0.1923828125, "learning_rate": 1.2324067636789752e-05, "loss": 1.0168, "num_tokens": 28739917401.0, "step": 5268 }, { "epoch": 0.9392156862745098, "grad_norm": 0.1962890625, "learning_rate": 1.2321497823616825e-05, "loss": 0.9952, "num_tokens": 28746164193.0, "step": 5269 }, { "epoch": 0.9393939393939394, "grad_norm": 0.2109375, "learning_rate": 1.2318927900323204e-05, "loss": 1.0281, "num_tokens": 28752438047.0, "step": 5270 }, { "epoch": 0.939572192513369, "grad_norm": 0.1865234375, "learning_rate": 1.2316357867123046e-05, "loss": 1.0024, "num_tokens": 28758707204.0, "step": 5271 }, { "epoch": 0.9397504456327985, "grad_norm": 0.21484375, "learning_rate": 1.2313787724230504e-05, "loss": 1.0039, "num_tokens": 28764990363.0, "step": 5272 }, { "epoch": 0.9399286987522282, "grad_norm": 0.1923828125, "learning_rate": 1.2311217471859757e-05, "loss": 1.0338, "num_tokens": 28771247803.0, "step": 5273 }, { "epoch": 0.9401069518716577, "grad_norm": 0.203125, "learning_rate": 1.2308647110224976e-05, "loss": 1.0558, "num_tokens": 28777530140.0, "step": 5274 }, { "epoch": 0.9402852049910874, "grad_norm": 0.1923828125, "learning_rate": 1.2306076639540358e-05, "loss": 1.0104, "num_tokens": 28783815534.0, "step": 5275 }, { "epoch": 0.9404634581105169, "grad_norm": 0.205078125, "learning_rate": 1.2303506060020095e-05, "loss": 0.9913, "num_tokens": 28790099604.0, "step": 5276 }, { "epoch": 0.9406417112299466, "grad_norm": 0.2021484375, "learning_rate": 1.2300935371878398e-05, "loss": 1.0409, "num_tokens": 28796380253.0, "step": 5277 }, { "epoch": 0.9408199643493761, "grad_norm": 0.1953125, "learning_rate": 1.2298364575329483e-05, "loss": 1.0012, "num_tokens": 28802656068.0, "step": 5278 }, { "epoch": 0.9409982174688057, "grad_norm": 0.1923828125, "learning_rate": 1.2295793670587573e-05, "loss": 1.0265, "num_tokens": 28808940859.0, "step": 5279 }, { "epoch": 0.9411764705882353, "grad_norm": 0.21484375, "learning_rate": 1.2293222657866902e-05, "loss": 1.016, "num_tokens": 28815207733.0, "step": 5280 }, { "epoch": 0.9413547237076649, "grad_norm": 0.2001953125, "learning_rate": 1.2290651537381708e-05, "loss": 1.0092, "num_tokens": 28821484720.0, "step": 5281 }, { "epoch": 0.9415329768270945, "grad_norm": 0.2021484375, "learning_rate": 1.2288080309346255e-05, "loss": 1.0262, "num_tokens": 28827748254.0, "step": 5282 }, { "epoch": 0.941711229946524, "grad_norm": 0.2001953125, "learning_rate": 1.2285508973974795e-05, "loss": 1.0109, "num_tokens": 28834033372.0, "step": 5283 }, { "epoch": 0.9418894830659537, "grad_norm": 0.2119140625, "learning_rate": 1.22829375314816e-05, "loss": 0.9911, "num_tokens": 28840300157.0, "step": 5284 }, { "epoch": 0.9420677361853832, "grad_norm": 0.1904296875, "learning_rate": 1.228036598208095e-05, "loss": 0.9665, "num_tokens": 28846566293.0, "step": 5285 }, { "epoch": 0.9422459893048128, "grad_norm": 0.189453125, "learning_rate": 1.2277794325987133e-05, "loss": 0.9822, "num_tokens": 28852850821.0, "step": 5286 }, { "epoch": 0.9424242424242424, "grad_norm": 0.1982421875, "learning_rate": 1.2275222563414443e-05, "loss": 1.0082, "num_tokens": 28859135079.0, "step": 5287 }, { "epoch": 0.942602495543672, "grad_norm": 0.197265625, "learning_rate": 1.2272650694577186e-05, "loss": 1.005, "num_tokens": 28865420491.0, "step": 5288 }, { "epoch": 0.9427807486631016, "grad_norm": 0.208984375, "learning_rate": 1.227007871968968e-05, "loss": 1.0382, "num_tokens": 28871664029.0, "step": 5289 }, { "epoch": 0.9429590017825312, "grad_norm": 0.2001953125, "learning_rate": 1.2267506638966246e-05, "loss": 1.0552, "num_tokens": 28877910443.0, "step": 5290 }, { "epoch": 0.9431372549019608, "grad_norm": 0.1953125, "learning_rate": 1.2264934452621213e-05, "loss": 0.9722, "num_tokens": 28884195073.0, "step": 5291 }, { "epoch": 0.9433155080213904, "grad_norm": 0.2060546875, "learning_rate": 1.2262362160868928e-05, "loss": 1.0269, "num_tokens": 28890479612.0, "step": 5292 }, { "epoch": 0.94349376114082, "grad_norm": 0.19140625, "learning_rate": 1.2259789763923734e-05, "loss": 1.0349, "num_tokens": 28896764167.0, "step": 5293 }, { "epoch": 0.9436720142602496, "grad_norm": 0.1923828125, "learning_rate": 1.2257217261999994e-05, "loss": 1.0147, "num_tokens": 28903050107.0, "step": 5294 }, { "epoch": 0.9438502673796791, "grad_norm": 0.1953125, "learning_rate": 1.2254644655312074e-05, "loss": 1.0473, "num_tokens": 28909334178.0, "step": 5295 }, { "epoch": 0.9440285204991087, "grad_norm": 0.203125, "learning_rate": 1.2252071944074349e-05, "loss": 1.0306, "num_tokens": 28915596298.0, "step": 5296 }, { "epoch": 0.9442067736185383, "grad_norm": 0.1943359375, "learning_rate": 1.22494991285012e-05, "loss": 1.0305, "num_tokens": 28921837535.0, "step": 5297 }, { "epoch": 0.9443850267379679, "grad_norm": 0.1943359375, "learning_rate": 1.2246926208807032e-05, "loss": 1.0177, "num_tokens": 28928123064.0, "step": 5298 }, { "epoch": 0.9445632798573975, "grad_norm": 0.193359375, "learning_rate": 1.2244353185206235e-05, "loss": 1.0358, "num_tokens": 28934405664.0, "step": 5299 }, { "epoch": 0.9447415329768271, "grad_norm": 0.1923828125, "learning_rate": 1.2241780057913224e-05, "loss": 1.0182, "num_tokens": 28940690692.0, "step": 5300 }, { "epoch": 0.9449197860962567, "grad_norm": 0.1943359375, "learning_rate": 1.2239206827142418e-05, "loss": 1.0298, "num_tokens": 28946976242.0, "step": 5301 }, { "epoch": 0.9450980392156862, "grad_norm": 0.205078125, "learning_rate": 1.2236633493108246e-05, "loss": 0.998, "num_tokens": 28953234263.0, "step": 5302 }, { "epoch": 0.9452762923351159, "grad_norm": 0.2021484375, "learning_rate": 1.2234060056025145e-05, "loss": 1.0371, "num_tokens": 28959504279.0, "step": 5303 }, { "epoch": 0.9454545454545454, "grad_norm": 0.1923828125, "learning_rate": 1.2231486516107554e-05, "loss": 1.037, "num_tokens": 28965774389.0, "step": 5304 }, { "epoch": 0.9456327985739751, "grad_norm": 0.1962890625, "learning_rate": 1.2228912873569936e-05, "loss": 1.0423, "num_tokens": 28972058065.0, "step": 5305 }, { "epoch": 0.9458110516934046, "grad_norm": 0.2001953125, "learning_rate": 1.2226339128626746e-05, "loss": 1.0087, "num_tokens": 28978321513.0, "step": 5306 }, { "epoch": 0.9459893048128343, "grad_norm": 0.1865234375, "learning_rate": 1.2223765281492456e-05, "loss": 0.9882, "num_tokens": 28984579820.0, "step": 5307 }, { "epoch": 0.9461675579322638, "grad_norm": 0.20703125, "learning_rate": 1.2221191332381547e-05, "loss": 1.0408, "num_tokens": 28990846349.0, "step": 5308 }, { "epoch": 0.9463458110516934, "grad_norm": 0.2001953125, "learning_rate": 1.2218617281508508e-05, "loss": 0.9882, "num_tokens": 28997122019.0, "step": 5309 }, { "epoch": 0.946524064171123, "grad_norm": 0.2099609375, "learning_rate": 1.2216043129087834e-05, "loss": 1.0257, "num_tokens": 29003404844.0, "step": 5310 }, { "epoch": 0.9467023172905525, "grad_norm": 0.1953125, "learning_rate": 1.2213468875334024e-05, "loss": 1.0191, "num_tokens": 29009634942.0, "step": 5311 }, { "epoch": 0.9468805704099822, "grad_norm": 0.1982421875, "learning_rate": 1.2210894520461597e-05, "loss": 1.0193, "num_tokens": 29015906796.0, "step": 5312 }, { "epoch": 0.9470588235294117, "grad_norm": 0.2001953125, "learning_rate": 1.2208320064685078e-05, "loss": 1.0026, "num_tokens": 29022191688.0, "step": 5313 }, { "epoch": 0.9472370766488414, "grad_norm": 0.19921875, "learning_rate": 1.2205745508218991e-05, "loss": 1.039, "num_tokens": 29028469421.0, "step": 5314 }, { "epoch": 0.9474153297682709, "grad_norm": 0.2060546875, "learning_rate": 1.2203170851277875e-05, "loss": 1.0038, "num_tokens": 29034728080.0, "step": 5315 }, { "epoch": 0.9475935828877006, "grad_norm": 0.193359375, "learning_rate": 1.2200596094076275e-05, "loss": 0.9995, "num_tokens": 29041011317.0, "step": 5316 }, { "epoch": 0.9477718360071301, "grad_norm": 0.1884765625, "learning_rate": 1.2198021236828752e-05, "loss": 1.0228, "num_tokens": 29047294736.0, "step": 5317 }, { "epoch": 0.9479500891265598, "grad_norm": 0.193359375, "learning_rate": 1.2195446279749862e-05, "loss": 1.0188, "num_tokens": 29053579296.0, "step": 5318 }, { "epoch": 0.9481283422459893, "grad_norm": 0.2021484375, "learning_rate": 1.2192871223054184e-05, "loss": 1.0087, "num_tokens": 29059864634.0, "step": 5319 }, { "epoch": 0.948306595365419, "grad_norm": 0.1953125, "learning_rate": 1.2190296066956292e-05, "loss": 0.9922, "num_tokens": 29066124218.0, "step": 5320 }, { "epoch": 0.9484848484848485, "grad_norm": 0.2119140625, "learning_rate": 1.2187720811670782e-05, "loss": 1.0027, "num_tokens": 29072382284.0, "step": 5321 }, { "epoch": 0.948663101604278, "grad_norm": 0.1923828125, "learning_rate": 1.2185145457412241e-05, "loss": 1.0276, "num_tokens": 29078665417.0, "step": 5322 }, { "epoch": 0.9488413547237077, "grad_norm": 0.20703125, "learning_rate": 1.218257000439528e-05, "loss": 0.9909, "num_tokens": 29084916353.0, "step": 5323 }, { "epoch": 0.9490196078431372, "grad_norm": 0.205078125, "learning_rate": 1.2179994452834512e-05, "loss": 0.9834, "num_tokens": 29091200096.0, "step": 5324 }, { "epoch": 0.9491978609625669, "grad_norm": 0.2197265625, "learning_rate": 1.2177418802944554e-05, "loss": 1.0631, "num_tokens": 29097484422.0, "step": 5325 }, { "epoch": 0.9493761140819964, "grad_norm": 0.1884765625, "learning_rate": 1.217484305494004e-05, "loss": 1.0123, "num_tokens": 29103770179.0, "step": 5326 }, { "epoch": 0.949554367201426, "grad_norm": 0.2001953125, "learning_rate": 1.2172267209035604e-05, "loss": 1.0131, "num_tokens": 29110054770.0, "step": 5327 }, { "epoch": 0.9497326203208556, "grad_norm": 0.2060546875, "learning_rate": 1.2169691265445896e-05, "loss": 1.0058, "num_tokens": 29116338067.0, "step": 5328 }, { "epoch": 0.9499108734402852, "grad_norm": 0.2099609375, "learning_rate": 1.2167115224385567e-05, "loss": 1.0197, "num_tokens": 29122574370.0, "step": 5329 }, { "epoch": 0.9500891265597148, "grad_norm": 0.203125, "learning_rate": 1.2164539086069278e-05, "loss": 1.0192, "num_tokens": 29128856175.0, "step": 5330 }, { "epoch": 0.9502673796791444, "grad_norm": 0.2138671875, "learning_rate": 1.2161962850711697e-05, "loss": 1.0354, "num_tokens": 29135141266.0, "step": 5331 }, { "epoch": 0.950445632798574, "grad_norm": 0.2060546875, "learning_rate": 1.2159386518527511e-05, "loss": 1.0432, "num_tokens": 29141423900.0, "step": 5332 }, { "epoch": 0.9506238859180036, "grad_norm": 0.1904296875, "learning_rate": 1.2156810089731404e-05, "loss": 1.018, "num_tokens": 29147683873.0, "step": 5333 }, { "epoch": 0.9508021390374332, "grad_norm": 0.2021484375, "learning_rate": 1.215423356453806e-05, "loss": 1.0322, "num_tokens": 29153963757.0, "step": 5334 }, { "epoch": 0.9509803921568627, "grad_norm": 0.20703125, "learning_rate": 1.2151656943162195e-05, "loss": 1.016, "num_tokens": 29160227176.0, "step": 5335 }, { "epoch": 0.9511586452762923, "grad_norm": 0.1953125, "learning_rate": 1.214908022581851e-05, "loss": 1.0075, "num_tokens": 29166505809.0, "step": 5336 }, { "epoch": 0.9513368983957219, "grad_norm": 0.203125, "learning_rate": 1.2146503412721731e-05, "loss": 1.0236, "num_tokens": 29172751907.0, "step": 5337 }, { "epoch": 0.9515151515151515, "grad_norm": 0.2021484375, "learning_rate": 1.2143926504086575e-05, "loss": 1.0104, "num_tokens": 29179033356.0, "step": 5338 }, { "epoch": 0.9516934046345811, "grad_norm": 0.22265625, "learning_rate": 1.2141349500127784e-05, "loss": 1.0212, "num_tokens": 29185318474.0, "step": 5339 }, { "epoch": 0.9518716577540107, "grad_norm": 0.197265625, "learning_rate": 1.2138772401060097e-05, "loss": 0.9856, "num_tokens": 29191603600.0, "step": 5340 }, { "epoch": 0.9520499108734403, "grad_norm": 0.2275390625, "learning_rate": 1.2136195207098265e-05, "loss": 1.0172, "num_tokens": 29197865524.0, "step": 5341 }, { "epoch": 0.9522281639928699, "grad_norm": 0.2080078125, "learning_rate": 1.2133617918457047e-05, "loss": 1.0143, "num_tokens": 29204143743.0, "step": 5342 }, { "epoch": 0.9524064171122995, "grad_norm": 0.197265625, "learning_rate": 1.2131040535351208e-05, "loss": 1.0587, "num_tokens": 29210410337.0, "step": 5343 }, { "epoch": 0.9525846702317291, "grad_norm": 0.2021484375, "learning_rate": 1.2128463057995524e-05, "loss": 1.0097, "num_tokens": 29216689721.0, "step": 5344 }, { "epoch": 0.9527629233511586, "grad_norm": 0.236328125, "learning_rate": 1.2125885486604769e-05, "loss": 1.0206, "num_tokens": 29222936576.0, "step": 5345 }, { "epoch": 0.9529411764705882, "grad_norm": 0.208984375, "learning_rate": 1.2123307821393741e-05, "loss": 1.0449, "num_tokens": 29229199086.0, "step": 5346 }, { "epoch": 0.9531194295900178, "grad_norm": 0.189453125, "learning_rate": 1.2120730062577237e-05, "loss": 1.018, "num_tokens": 29235483614.0, "step": 5347 }, { "epoch": 0.9532976827094474, "grad_norm": 0.2060546875, "learning_rate": 1.211815221037006e-05, "loss": 0.9803, "num_tokens": 29241744403.0, "step": 5348 }, { "epoch": 0.953475935828877, "grad_norm": 0.240234375, "learning_rate": 1.211557426498702e-05, "loss": 1.0254, "num_tokens": 29248012151.0, "step": 5349 }, { "epoch": 0.9536541889483066, "grad_norm": 0.205078125, "learning_rate": 1.2112996226642938e-05, "loss": 1.0022, "num_tokens": 29254296188.0, "step": 5350 }, { "epoch": 0.9538324420677362, "grad_norm": 0.193359375, "learning_rate": 1.2110418095552651e-05, "loss": 1.0217, "num_tokens": 29260574927.0, "step": 5351 }, { "epoch": 0.9540106951871657, "grad_norm": 0.1904296875, "learning_rate": 1.2107839871930986e-05, "loss": 1.0081, "num_tokens": 29266840141.0, "step": 5352 }, { "epoch": 0.9541889483065954, "grad_norm": 0.2041015625, "learning_rate": 1.2105261555992793e-05, "loss": 0.9795, "num_tokens": 29273066228.0, "step": 5353 }, { "epoch": 0.9543672014260249, "grad_norm": 0.1884765625, "learning_rate": 1.2102683147952919e-05, "loss": 1.0271, "num_tokens": 29279333478.0, "step": 5354 }, { "epoch": 0.9545454545454546, "grad_norm": 0.189453125, "learning_rate": 1.2100104648026224e-05, "loss": 0.9849, "num_tokens": 29285603321.0, "step": 5355 }, { "epoch": 0.9547237076648841, "grad_norm": 0.1953125, "learning_rate": 1.2097526056427578e-05, "loss": 1.0313, "num_tokens": 29291887035.0, "step": 5356 }, { "epoch": 0.9549019607843138, "grad_norm": 0.1923828125, "learning_rate": 1.209494737337185e-05, "loss": 1.0259, "num_tokens": 29298172677.0, "step": 5357 }, { "epoch": 0.9550802139037433, "grad_norm": 0.1904296875, "learning_rate": 1.2092368599073929e-05, "loss": 1.0239, "num_tokens": 29304413674.0, "step": 5358 }, { "epoch": 0.9552584670231729, "grad_norm": 0.2158203125, "learning_rate": 1.2089789733748703e-05, "loss": 1.013, "num_tokens": 29310650940.0, "step": 5359 }, { "epoch": 0.9554367201426025, "grad_norm": 0.2041015625, "learning_rate": 1.2087210777611066e-05, "loss": 1.0386, "num_tokens": 29316928390.0, "step": 5360 }, { "epoch": 0.955614973262032, "grad_norm": 0.2060546875, "learning_rate": 1.2084631730875924e-05, "loss": 1.0246, "num_tokens": 29323209572.0, "step": 5361 }, { "epoch": 0.9557932263814617, "grad_norm": 0.185546875, "learning_rate": 1.2082052593758191e-05, "loss": 1.0329, "num_tokens": 29329493701.0, "step": 5362 }, { "epoch": 0.9559714795008912, "grad_norm": 0.205078125, "learning_rate": 1.2079473366472789e-05, "loss": 1.0357, "num_tokens": 29335766070.0, "step": 5363 }, { "epoch": 0.9561497326203209, "grad_norm": 0.212890625, "learning_rate": 1.207689404923464e-05, "loss": 1.0267, "num_tokens": 29342041150.0, "step": 5364 }, { "epoch": 0.9563279857397504, "grad_norm": 0.1962890625, "learning_rate": 1.2074314642258684e-05, "loss": 0.9913, "num_tokens": 29348295681.0, "step": 5365 }, { "epoch": 0.9565062388591801, "grad_norm": 0.2080078125, "learning_rate": 1.207173514575986e-05, "loss": 0.9958, "num_tokens": 29354554508.0, "step": 5366 }, { "epoch": 0.9566844919786096, "grad_norm": 0.201171875, "learning_rate": 1.2069155559953124e-05, "loss": 1.0532, "num_tokens": 29360838621.0, "step": 5367 }, { "epoch": 0.9568627450980393, "grad_norm": 0.1875, "learning_rate": 1.2066575885053423e-05, "loss": 1.0373, "num_tokens": 29367123015.0, "step": 5368 }, { "epoch": 0.9570409982174688, "grad_norm": 0.1845703125, "learning_rate": 1.2063996121275732e-05, "loss": 1.0285, "num_tokens": 29373379674.0, "step": 5369 }, { "epoch": 0.9572192513368984, "grad_norm": 0.2197265625, "learning_rate": 1.2061416268835015e-05, "loss": 0.9804, "num_tokens": 29379662021.0, "step": 5370 }, { "epoch": 0.957397504456328, "grad_norm": 0.1953125, "learning_rate": 1.2058836327946262e-05, "loss": 1.0043, "num_tokens": 29385945979.0, "step": 5371 }, { "epoch": 0.9575757575757575, "grad_norm": 0.1865234375, "learning_rate": 1.205625629882445e-05, "loss": 1.0093, "num_tokens": 29392222246.0, "step": 5372 }, { "epoch": 0.9577540106951872, "grad_norm": 0.21875, "learning_rate": 1.2053676181684575e-05, "loss": 1.0467, "num_tokens": 29398507515.0, "step": 5373 }, { "epoch": 0.9579322638146167, "grad_norm": 0.1982421875, "learning_rate": 1.2051095976741645e-05, "loss": 1.0477, "num_tokens": 29404764721.0, "step": 5374 }, { "epoch": 0.9581105169340464, "grad_norm": 0.1953125, "learning_rate": 1.2048515684210664e-05, "loss": 1.054, "num_tokens": 29411028640.0, "step": 5375 }, { "epoch": 0.9582887700534759, "grad_norm": 0.1982421875, "learning_rate": 1.2045935304306649e-05, "loss": 1.0128, "num_tokens": 29417299127.0, "step": 5376 }, { "epoch": 0.9584670231729056, "grad_norm": 0.203125, "learning_rate": 1.2043354837244622e-05, "loss": 1.0478, "num_tokens": 29423581321.0, "step": 5377 }, { "epoch": 0.9586452762923351, "grad_norm": 0.2041015625, "learning_rate": 1.204077428323962e-05, "loss": 1.0047, "num_tokens": 29429860949.0, "step": 5378 }, { "epoch": 0.9588235294117647, "grad_norm": 0.1904296875, "learning_rate": 1.2038193642506675e-05, "loss": 1.0262, "num_tokens": 29436116469.0, "step": 5379 }, { "epoch": 0.9590017825311943, "grad_norm": 0.193359375, "learning_rate": 1.2035612915260833e-05, "loss": 1.0185, "num_tokens": 29442355001.0, "step": 5380 }, { "epoch": 0.9591800356506239, "grad_norm": 0.2099609375, "learning_rate": 1.2033032101717146e-05, "loss": 1.0327, "num_tokens": 29448633581.0, "step": 5381 }, { "epoch": 0.9593582887700535, "grad_norm": 0.19140625, "learning_rate": 1.2030451202090681e-05, "loss": 0.9951, "num_tokens": 29454914786.0, "step": 5382 }, { "epoch": 0.9595365418894831, "grad_norm": 0.1962890625, "learning_rate": 1.2027870216596496e-05, "loss": 1.0289, "num_tokens": 29461152519.0, "step": 5383 }, { "epoch": 0.9597147950089127, "grad_norm": 0.1943359375, "learning_rate": 1.2025289145449665e-05, "loss": 1.0452, "num_tokens": 29467419861.0, "step": 5384 }, { "epoch": 0.9598930481283422, "grad_norm": 0.2060546875, "learning_rate": 1.2022707988865279e-05, "loss": 1.0185, "num_tokens": 29473697668.0, "step": 5385 }, { "epoch": 0.9600713012477718, "grad_norm": 0.203125, "learning_rate": 1.2020126747058417e-05, "loss": 0.9844, "num_tokens": 29479979913.0, "step": 5386 }, { "epoch": 0.9602495543672014, "grad_norm": 0.2001953125, "learning_rate": 1.2017545420244178e-05, "loss": 1.0244, "num_tokens": 29486264583.0, "step": 5387 }, { "epoch": 0.960427807486631, "grad_norm": 0.1943359375, "learning_rate": 1.2014964008637663e-05, "loss": 1.0408, "num_tokens": 29492515677.0, "step": 5388 }, { "epoch": 0.9606060606060606, "grad_norm": 0.1943359375, "learning_rate": 1.2012382512453986e-05, "loss": 1.0156, "num_tokens": 29498800198.0, "step": 5389 }, { "epoch": 0.9607843137254902, "grad_norm": 0.205078125, "learning_rate": 1.2009800931908256e-05, "loss": 1.0291, "num_tokens": 29505060316.0, "step": 5390 }, { "epoch": 0.9609625668449198, "grad_norm": 0.197265625, "learning_rate": 1.2007219267215603e-05, "loss": 1.0114, "num_tokens": 29511342434.0, "step": 5391 }, { "epoch": 0.9611408199643494, "grad_norm": 0.2080078125, "learning_rate": 1.2004637518591153e-05, "loss": 1.0197, "num_tokens": 29517597923.0, "step": 5392 }, { "epoch": 0.961319073083779, "grad_norm": 0.203125, "learning_rate": 1.2002055686250048e-05, "loss": 0.9904, "num_tokens": 29523844118.0, "step": 5393 }, { "epoch": 0.9614973262032086, "grad_norm": 0.1982421875, "learning_rate": 1.1999473770407433e-05, "loss": 1.0174, "num_tokens": 29530128210.0, "step": 5394 }, { "epoch": 0.9616755793226381, "grad_norm": 0.212890625, "learning_rate": 1.1996891771278455e-05, "loss": 0.9861, "num_tokens": 29536383388.0, "step": 5395 }, { "epoch": 0.9618538324420678, "grad_norm": 0.201171875, "learning_rate": 1.1994309689078275e-05, "loss": 1.0148, "num_tokens": 29542640379.0, "step": 5396 }, { "epoch": 0.9620320855614973, "grad_norm": 0.1953125, "learning_rate": 1.1991727524022054e-05, "loss": 1.0272, "num_tokens": 29548916601.0, "step": 5397 }, { "epoch": 0.9622103386809269, "grad_norm": 0.2119140625, "learning_rate": 1.1989145276324977e-05, "loss": 1.0406, "num_tokens": 29555182703.0, "step": 5398 }, { "epoch": 0.9623885918003565, "grad_norm": 0.2001953125, "learning_rate": 1.1986562946202209e-05, "loss": 1.0285, "num_tokens": 29561465219.0, "step": 5399 }, { "epoch": 0.9625668449197861, "grad_norm": 0.19921875, "learning_rate": 1.1983980533868942e-05, "loss": 1.0188, "num_tokens": 29567714538.0, "step": 5400 }, { "epoch": 0.9627450980392157, "grad_norm": 0.19140625, "learning_rate": 1.1981398039540374e-05, "loss": 0.9938, "num_tokens": 29573992262.0, "step": 5401 }, { "epoch": 0.9629233511586452, "grad_norm": 0.193359375, "learning_rate": 1.1978815463431697e-05, "loss": 1.0214, "num_tokens": 29580247616.0, "step": 5402 }, { "epoch": 0.9631016042780749, "grad_norm": 0.1953125, "learning_rate": 1.1976232805758117e-05, "loss": 1.0122, "num_tokens": 29586515769.0, "step": 5403 }, { "epoch": 0.9632798573975044, "grad_norm": 0.205078125, "learning_rate": 1.1973650066734855e-05, "loss": 1.0328, "num_tokens": 29592740423.0, "step": 5404 }, { "epoch": 0.9634581105169341, "grad_norm": 0.2001953125, "learning_rate": 1.1971067246577126e-05, "loss": 1.0138, "num_tokens": 29598929537.0, "step": 5405 }, { "epoch": 0.9636363636363636, "grad_norm": 0.1953125, "learning_rate": 1.196848434550016e-05, "loss": 1.0328, "num_tokens": 29605212413.0, "step": 5406 }, { "epoch": 0.9638146167557933, "grad_norm": 0.201171875, "learning_rate": 1.1965901363719183e-05, "loss": 0.9935, "num_tokens": 29611486335.0, "step": 5407 }, { "epoch": 0.9639928698752228, "grad_norm": 0.2080078125, "learning_rate": 1.1963318301449445e-05, "loss": 1.016, "num_tokens": 29617772178.0, "step": 5408 }, { "epoch": 0.9641711229946524, "grad_norm": 0.185546875, "learning_rate": 1.1960735158906191e-05, "loss": 0.9963, "num_tokens": 29624034041.0, "step": 5409 }, { "epoch": 0.964349376114082, "grad_norm": 0.1943359375, "learning_rate": 1.195815193630467e-05, "loss": 1.0444, "num_tokens": 29630291773.0, "step": 5410 }, { "epoch": 0.9645276292335115, "grad_norm": 0.193359375, "learning_rate": 1.1955568633860145e-05, "loss": 1.0435, "num_tokens": 29636550078.0, "step": 5411 }, { "epoch": 0.9647058823529412, "grad_norm": 0.1875, "learning_rate": 1.1952985251787885e-05, "loss": 1.0143, "num_tokens": 29642823960.0, "step": 5412 }, { "epoch": 0.9648841354723707, "grad_norm": 0.19140625, "learning_rate": 1.1950401790303164e-05, "loss": 1.0007, "num_tokens": 29649106467.0, "step": 5413 }, { "epoch": 0.9650623885918004, "grad_norm": 0.1923828125, "learning_rate": 1.1947818249621257e-05, "loss": 0.989, "num_tokens": 29655390990.0, "step": 5414 }, { "epoch": 0.9652406417112299, "grad_norm": 0.2021484375, "learning_rate": 1.1945234629957458e-05, "loss": 1.0471, "num_tokens": 29661658475.0, "step": 5415 }, { "epoch": 0.9654188948306596, "grad_norm": 0.193359375, "learning_rate": 1.1942650931527055e-05, "loss": 1.0309, "num_tokens": 29667909100.0, "step": 5416 }, { "epoch": 0.9655971479500891, "grad_norm": 0.1962890625, "learning_rate": 1.1940067154545353e-05, "loss": 1.0763, "num_tokens": 29674192368.0, "step": 5417 }, { "epoch": 0.9657754010695188, "grad_norm": 0.2041015625, "learning_rate": 1.1937483299227654e-05, "loss": 1.0335, "num_tokens": 29680442575.0, "step": 5418 }, { "epoch": 0.9659536541889483, "grad_norm": 0.1982421875, "learning_rate": 1.1934899365789276e-05, "loss": 0.9799, "num_tokens": 29686726323.0, "step": 5419 }, { "epoch": 0.966131907308378, "grad_norm": 0.1943359375, "learning_rate": 1.1932315354445533e-05, "loss": 1.0047, "num_tokens": 29693010732.0, "step": 5420 }, { "epoch": 0.9663101604278075, "grad_norm": 0.205078125, "learning_rate": 1.1929731265411759e-05, "loss": 1.0179, "num_tokens": 29699242333.0, "step": 5421 }, { "epoch": 0.966488413547237, "grad_norm": 0.2021484375, "learning_rate": 1.192714709890328e-05, "loss": 1.0271, "num_tokens": 29705524468.0, "step": 5422 }, { "epoch": 0.9666666666666667, "grad_norm": 0.201171875, "learning_rate": 1.1924562855135433e-05, "loss": 0.9972, "num_tokens": 29711792711.0, "step": 5423 }, { "epoch": 0.9668449197860962, "grad_norm": 0.201171875, "learning_rate": 1.1921978534323572e-05, "loss": 1.0124, "num_tokens": 29718075668.0, "step": 5424 }, { "epoch": 0.9670231729055259, "grad_norm": 0.189453125, "learning_rate": 1.1919394136683045e-05, "loss": 0.9851, "num_tokens": 29724359163.0, "step": 5425 }, { "epoch": 0.9672014260249554, "grad_norm": 0.197265625, "learning_rate": 1.191680966242921e-05, "loss": 0.9982, "num_tokens": 29730643124.0, "step": 5426 }, { "epoch": 0.967379679144385, "grad_norm": 0.208984375, "learning_rate": 1.191422511177743e-05, "loss": 1.0279, "num_tokens": 29736888711.0, "step": 5427 }, { "epoch": 0.9675579322638146, "grad_norm": 0.2001953125, "learning_rate": 1.1911640484943078e-05, "loss": 1.0198, "num_tokens": 29743170833.0, "step": 5428 }, { "epoch": 0.9677361853832442, "grad_norm": 0.197265625, "learning_rate": 1.1909055782141532e-05, "loss": 1.0083, "num_tokens": 29749456025.0, "step": 5429 }, { "epoch": 0.9679144385026738, "grad_norm": 0.20703125, "learning_rate": 1.1906471003588178e-05, "loss": 1.0378, "num_tokens": 29755696040.0, "step": 5430 }, { "epoch": 0.9680926916221034, "grad_norm": 0.205078125, "learning_rate": 1.1903886149498401e-05, "loss": 1.0252, "num_tokens": 29761978659.0, "step": 5431 }, { "epoch": 0.968270944741533, "grad_norm": 0.19921875, "learning_rate": 1.19013012200876e-05, "loss": 1.028, "num_tokens": 29768255230.0, "step": 5432 }, { "epoch": 0.9684491978609626, "grad_norm": 0.2041015625, "learning_rate": 1.189871621557118e-05, "loss": 1.0053, "num_tokens": 29774513022.0, "step": 5433 }, { "epoch": 0.9686274509803922, "grad_norm": 0.1884765625, "learning_rate": 1.1896131136164544e-05, "loss": 1.0187, "num_tokens": 29780772657.0, "step": 5434 }, { "epoch": 0.9688057040998217, "grad_norm": 0.185546875, "learning_rate": 1.1893545982083112e-05, "loss": 1.0106, "num_tokens": 29787056746.0, "step": 5435 }, { "epoch": 0.9689839572192513, "grad_norm": 0.1943359375, "learning_rate": 1.1890960753542306e-05, "loss": 1.0282, "num_tokens": 29793329685.0, "step": 5436 }, { "epoch": 0.9691622103386809, "grad_norm": 0.1962890625, "learning_rate": 1.1888375450757552e-05, "loss": 1.0135, "num_tokens": 29799565313.0, "step": 5437 }, { "epoch": 0.9693404634581105, "grad_norm": 0.197265625, "learning_rate": 1.188579007394428e-05, "loss": 1.0182, "num_tokens": 29805850993.0, "step": 5438 }, { "epoch": 0.9695187165775401, "grad_norm": 0.185546875, "learning_rate": 1.1883204623317938e-05, "loss": 1.0223, "num_tokens": 29812134601.0, "step": 5439 }, { "epoch": 0.9696969696969697, "grad_norm": 0.2001953125, "learning_rate": 1.1880619099093969e-05, "loss": 1.0225, "num_tokens": 29818388975.0, "step": 5440 }, { "epoch": 0.9698752228163993, "grad_norm": 0.205078125, "learning_rate": 1.1878033501487822e-05, "loss": 1.0144, "num_tokens": 29824665802.0, "step": 5441 }, { "epoch": 0.9700534759358289, "grad_norm": 0.201171875, "learning_rate": 1.1875447830714958e-05, "loss": 1.0352, "num_tokens": 29830949157.0, "step": 5442 }, { "epoch": 0.9702317290552585, "grad_norm": 0.19921875, "learning_rate": 1.1872862086990838e-05, "loss": 1.0247, "num_tokens": 29837222369.0, "step": 5443 }, { "epoch": 0.9704099821746881, "grad_norm": 0.208984375, "learning_rate": 1.187027627053094e-05, "loss": 1.0334, "num_tokens": 29843506664.0, "step": 5444 }, { "epoch": 0.9705882352941176, "grad_norm": 0.1982421875, "learning_rate": 1.1867690381550735e-05, "loss": 1.0275, "num_tokens": 29849789156.0, "step": 5445 }, { "epoch": 0.9707664884135473, "grad_norm": 0.1875, "learning_rate": 1.1865104420265705e-05, "loss": 1.0169, "num_tokens": 29856071055.0, "step": 5446 }, { "epoch": 0.9709447415329768, "grad_norm": 0.2080078125, "learning_rate": 1.1862518386891345e-05, "loss": 1.0256, "num_tokens": 29862328841.0, "step": 5447 }, { "epoch": 0.9711229946524064, "grad_norm": 0.216796875, "learning_rate": 1.1859932281643143e-05, "loss": 1.0668, "num_tokens": 29868584819.0, "step": 5448 }, { "epoch": 0.971301247771836, "grad_norm": 0.193359375, "learning_rate": 1.18573461047366e-05, "loss": 1.019, "num_tokens": 29874868277.0, "step": 5449 }, { "epoch": 0.9714795008912656, "grad_norm": 0.1923828125, "learning_rate": 1.1854759856387225e-05, "loss": 1.048, "num_tokens": 29881146235.0, "step": 5450 }, { "epoch": 0.9716577540106952, "grad_norm": 0.2236328125, "learning_rate": 1.1852173536810536e-05, "loss": 1.0155, "num_tokens": 29887397751.0, "step": 5451 }, { "epoch": 0.9718360071301247, "grad_norm": 0.2197265625, "learning_rate": 1.1849587146222041e-05, "loss": 1.0063, "num_tokens": 29893664232.0, "step": 5452 }, { "epoch": 0.9720142602495544, "grad_norm": 0.201171875, "learning_rate": 1.1847000684837268e-05, "loss": 0.9856, "num_tokens": 29899930257.0, "step": 5453 }, { "epoch": 0.9721925133689839, "grad_norm": 0.2197265625, "learning_rate": 1.1844414152871755e-05, "loss": 1.0085, "num_tokens": 29906192554.0, "step": 5454 }, { "epoch": 0.9723707664884136, "grad_norm": 0.2099609375, "learning_rate": 1.1841827550541027e-05, "loss": 1.0311, "num_tokens": 29912449990.0, "step": 5455 }, { "epoch": 0.9725490196078431, "grad_norm": 0.1953125, "learning_rate": 1.1839240878060634e-05, "loss": 1.0354, "num_tokens": 29918726382.0, "step": 5456 }, { "epoch": 0.9727272727272728, "grad_norm": 0.2099609375, "learning_rate": 1.1836654135646117e-05, "loss": 1.0079, "num_tokens": 29925008980.0, "step": 5457 }, { "epoch": 0.9729055258467023, "grad_norm": 0.2177734375, "learning_rate": 1.1834067323513039e-05, "loss": 1.0347, "num_tokens": 29931255155.0, "step": 5458 }, { "epoch": 0.973083778966132, "grad_norm": 0.1943359375, "learning_rate": 1.1831480441876952e-05, "loss": 1.0133, "num_tokens": 29937524560.0, "step": 5459 }, { "epoch": 0.9732620320855615, "grad_norm": 0.2021484375, "learning_rate": 1.1828893490953424e-05, "loss": 1.0191, "num_tokens": 29943806214.0, "step": 5460 }, { "epoch": 0.973440285204991, "grad_norm": 0.2197265625, "learning_rate": 1.1826306470958027e-05, "loss": 1.0535, "num_tokens": 29950069840.0, "step": 5461 }, { "epoch": 0.9736185383244207, "grad_norm": 0.2080078125, "learning_rate": 1.1823719382106336e-05, "loss": 1.0164, "num_tokens": 29956354457.0, "step": 5462 }, { "epoch": 0.9737967914438502, "grad_norm": 0.193359375, "learning_rate": 1.1821132224613934e-05, "loss": 0.9837, "num_tokens": 29962630346.0, "step": 5463 }, { "epoch": 0.9739750445632799, "grad_norm": 0.22265625, "learning_rate": 1.1818544998696411e-05, "loss": 1.0479, "num_tokens": 29968883011.0, "step": 5464 }, { "epoch": 0.9741532976827094, "grad_norm": 0.2294921875, "learning_rate": 1.1815957704569359e-05, "loss": 0.9848, "num_tokens": 29975155112.0, "step": 5465 }, { "epoch": 0.9743315508021391, "grad_norm": 0.1865234375, "learning_rate": 1.181337034244838e-05, "loss": 1.0251, "num_tokens": 29981440490.0, "step": 5466 }, { "epoch": 0.9745098039215686, "grad_norm": 0.2080078125, "learning_rate": 1.1810782912549082e-05, "loss": 1.0149, "num_tokens": 29987700864.0, "step": 5467 }, { "epoch": 0.9746880570409983, "grad_norm": 0.2197265625, "learning_rate": 1.1808195415087067e-05, "loss": 1.0566, "num_tokens": 29993970233.0, "step": 5468 }, { "epoch": 0.9748663101604278, "grad_norm": 0.1923828125, "learning_rate": 1.1805607850277958e-05, "loss": 1.0316, "num_tokens": 30000254447.0, "step": 5469 }, { "epoch": 0.9750445632798574, "grad_norm": 0.193359375, "learning_rate": 1.1803020218337376e-05, "loss": 1.0258, "num_tokens": 30006536155.0, "step": 5470 }, { "epoch": 0.975222816399287, "grad_norm": 0.2001953125, "learning_rate": 1.1800432519480953e-05, "loss": 1.022, "num_tokens": 30012818613.0, "step": 5471 }, { "epoch": 0.9754010695187165, "grad_norm": 0.21484375, "learning_rate": 1.1797844753924317e-05, "loss": 1.0373, "num_tokens": 30019101142.0, "step": 5472 }, { "epoch": 0.9755793226381462, "grad_norm": 0.1904296875, "learning_rate": 1.1795256921883108e-05, "loss": 1.0053, "num_tokens": 30025356869.0, "step": 5473 }, { "epoch": 0.9757575757575757, "grad_norm": 0.2060546875, "learning_rate": 1.1792669023572974e-05, "loss": 1.0259, "num_tokens": 30031639877.0, "step": 5474 }, { "epoch": 0.9759358288770054, "grad_norm": 0.2177734375, "learning_rate": 1.179008105920956e-05, "loss": 1.0435, "num_tokens": 30037864764.0, "step": 5475 }, { "epoch": 0.9761140819964349, "grad_norm": 0.201171875, "learning_rate": 1.1787493029008525e-05, "loss": 1.0132, "num_tokens": 30044148477.0, "step": 5476 }, { "epoch": 0.9762923351158646, "grad_norm": 0.189453125, "learning_rate": 1.1784904933185528e-05, "loss": 1.0565, "num_tokens": 30050420270.0, "step": 5477 }, { "epoch": 0.9764705882352941, "grad_norm": 0.1962890625, "learning_rate": 1.1782316771956239e-05, "loss": 1.0203, "num_tokens": 30056703949.0, "step": 5478 }, { "epoch": 0.9766488413547237, "grad_norm": 0.197265625, "learning_rate": 1.1779728545536328e-05, "loss": 1.0029, "num_tokens": 30062946608.0, "step": 5479 }, { "epoch": 0.9768270944741533, "grad_norm": 0.1923828125, "learning_rate": 1.177714025414147e-05, "loss": 1.03, "num_tokens": 30069210071.0, "step": 5480 }, { "epoch": 0.9770053475935829, "grad_norm": 0.1923828125, "learning_rate": 1.1774551897987353e-05, "loss": 0.9987, "num_tokens": 30075482633.0, "step": 5481 }, { "epoch": 0.9771836007130125, "grad_norm": 0.205078125, "learning_rate": 1.1771963477289661e-05, "loss": 1.0196, "num_tokens": 30081765117.0, "step": 5482 }, { "epoch": 0.9773618538324421, "grad_norm": 0.1865234375, "learning_rate": 1.1769374992264092e-05, "loss": 1.0185, "num_tokens": 30088003696.0, "step": 5483 }, { "epoch": 0.9775401069518717, "grad_norm": 0.203125, "learning_rate": 1.1766786443126337e-05, "loss": 1.0099, "num_tokens": 30094288059.0, "step": 5484 }, { "epoch": 0.9777183600713012, "grad_norm": 0.201171875, "learning_rate": 1.1764197830092107e-05, "loss": 0.978, "num_tokens": 30100572043.0, "step": 5485 }, { "epoch": 0.9778966131907308, "grad_norm": 0.1962890625, "learning_rate": 1.1761609153377117e-05, "loss": 1.01, "num_tokens": 30106855170.0, "step": 5486 }, { "epoch": 0.9780748663101604, "grad_norm": 0.1962890625, "learning_rate": 1.1759020413197066e-05, "loss": 1.007, "num_tokens": 30113139399.0, "step": 5487 }, { "epoch": 0.97825311942959, "grad_norm": 0.197265625, "learning_rate": 1.1756431609767688e-05, "loss": 0.9833, "num_tokens": 30119423987.0, "step": 5488 }, { "epoch": 0.9784313725490196, "grad_norm": 0.2080078125, "learning_rate": 1.1753842743304702e-05, "loss": 1.0121, "num_tokens": 30125705757.0, "step": 5489 }, { "epoch": 0.9786096256684492, "grad_norm": 0.203125, "learning_rate": 1.1751253814023845e-05, "loss": 1.0327, "num_tokens": 30131970047.0, "step": 5490 }, { "epoch": 0.9787878787878788, "grad_norm": 0.1884765625, "learning_rate": 1.1748664822140842e-05, "loss": 1.0269, "num_tokens": 30138218852.0, "step": 5491 }, { "epoch": 0.9789661319073084, "grad_norm": 0.2021484375, "learning_rate": 1.1746075767871448e-05, "loss": 0.9974, "num_tokens": 30144502901.0, "step": 5492 }, { "epoch": 0.979144385026738, "grad_norm": 0.189453125, "learning_rate": 1.1743486651431395e-05, "loss": 1.0255, "num_tokens": 30150787361.0, "step": 5493 }, { "epoch": 0.9793226381461676, "grad_norm": 0.201171875, "learning_rate": 1.1740897473036447e-05, "loss": 1.0315, "num_tokens": 30157052285.0, "step": 5494 }, { "epoch": 0.9795008912655971, "grad_norm": 0.1923828125, "learning_rate": 1.1738308232902359e-05, "loss": 0.9836, "num_tokens": 30163304972.0, "step": 5495 }, { "epoch": 0.9796791443850268, "grad_norm": 0.1884765625, "learning_rate": 1.1735718931244881e-05, "loss": 1.0137, "num_tokens": 30169550032.0, "step": 5496 }, { "epoch": 0.9798573975044563, "grad_norm": 0.189453125, "learning_rate": 1.1733129568279797e-05, "loss": 1.044, "num_tokens": 30175834285.0, "step": 5497 }, { "epoch": 0.9800356506238859, "grad_norm": 0.1943359375, "learning_rate": 1.1730540144222868e-05, "loss": 1.0377, "num_tokens": 30182100450.0, "step": 5498 }, { "epoch": 0.9802139037433155, "grad_norm": 0.1923828125, "learning_rate": 1.1727950659289875e-05, "loss": 1.009, "num_tokens": 30188379621.0, "step": 5499 }, { "epoch": 0.9803921568627451, "grad_norm": 0.19921875, "learning_rate": 1.1725361113696598e-05, "loss": 1.0312, "num_tokens": 30194662714.0, "step": 5500 }, { "epoch": 0.9805704099821747, "grad_norm": 0.193359375, "learning_rate": 1.1722771507658826e-05, "loss": 1.002, "num_tokens": 30200947608.0, "step": 5501 }, { "epoch": 0.9807486631016042, "grad_norm": 0.193359375, "learning_rate": 1.1720181841392353e-05, "loss": 1.0387, "num_tokens": 30207194323.0, "step": 5502 }, { "epoch": 0.9809269162210339, "grad_norm": 0.1953125, "learning_rate": 1.1717592115112973e-05, "loss": 1.0161, "num_tokens": 30213479285.0, "step": 5503 }, { "epoch": 0.9811051693404634, "grad_norm": 0.2021484375, "learning_rate": 1.1715002329036494e-05, "loss": 1.0322, "num_tokens": 30219764838.0, "step": 5504 }, { "epoch": 0.9812834224598931, "grad_norm": 0.1953125, "learning_rate": 1.1712412483378714e-05, "loss": 1.054, "num_tokens": 30225993687.0, "step": 5505 }, { "epoch": 0.9814616755793226, "grad_norm": 0.1962890625, "learning_rate": 1.1709822578355456e-05, "loss": 1.0019, "num_tokens": 30232276584.0, "step": 5506 }, { "epoch": 0.9816399286987523, "grad_norm": 0.2099609375, "learning_rate": 1.170723261418253e-05, "loss": 1.0147, "num_tokens": 30238541199.0, "step": 5507 }, { "epoch": 0.9818181818181818, "grad_norm": 0.2041015625, "learning_rate": 1.1704642591075762e-05, "loss": 1.0338, "num_tokens": 30244784330.0, "step": 5508 }, { "epoch": 0.9819964349376115, "grad_norm": 0.2021484375, "learning_rate": 1.1702052509250976e-05, "loss": 1.0238, "num_tokens": 30251066688.0, "step": 5509 }, { "epoch": 0.982174688057041, "grad_norm": 0.201171875, "learning_rate": 1.1699462368924009e-05, "loss": 1.0359, "num_tokens": 30257349793.0, "step": 5510 }, { "epoch": 0.9823529411764705, "grad_norm": 0.2080078125, "learning_rate": 1.1696872170310692e-05, "loss": 0.994, "num_tokens": 30263606333.0, "step": 5511 }, { "epoch": 0.9825311942959002, "grad_norm": 0.1943359375, "learning_rate": 1.1694281913626868e-05, "loss": 1.0128, "num_tokens": 30269871699.0, "step": 5512 }, { "epoch": 0.9827094474153297, "grad_norm": 0.193359375, "learning_rate": 1.1691691599088393e-05, "loss": 1.027, "num_tokens": 30276155931.0, "step": 5513 }, { "epoch": 0.9828877005347594, "grad_norm": 0.2001953125, "learning_rate": 1.1689101226911101e-05, "loss": 1.0321, "num_tokens": 30282441489.0, "step": 5514 }, { "epoch": 0.9830659536541889, "grad_norm": 0.2021484375, "learning_rate": 1.1686510797310866e-05, "loss": 1.0576, "num_tokens": 30288724826.0, "step": 5515 }, { "epoch": 0.9832442067736186, "grad_norm": 0.1875, "learning_rate": 1.1683920310503535e-05, "loss": 1.0303, "num_tokens": 30295008812.0, "step": 5516 }, { "epoch": 0.9834224598930481, "grad_norm": 0.1982421875, "learning_rate": 1.1681329766704985e-05, "loss": 1.0, "num_tokens": 30301293552.0, "step": 5517 }, { "epoch": 0.9836007130124778, "grad_norm": 0.212890625, "learning_rate": 1.167873916613108e-05, "loss": 0.9967, "num_tokens": 30307576912.0, "step": 5518 }, { "epoch": 0.9837789661319073, "grad_norm": 0.2041015625, "learning_rate": 1.1676148508997693e-05, "loss": 1.0373, "num_tokens": 30313839932.0, "step": 5519 }, { "epoch": 0.983957219251337, "grad_norm": 0.185546875, "learning_rate": 1.1673557795520712e-05, "loss": 1.0209, "num_tokens": 30320123200.0, "step": 5520 }, { "epoch": 0.9841354723707665, "grad_norm": 0.2021484375, "learning_rate": 1.1670967025916015e-05, "loss": 0.9886, "num_tokens": 30326405565.0, "step": 5521 }, { "epoch": 0.984313725490196, "grad_norm": 0.216796875, "learning_rate": 1.1668376200399499e-05, "loss": 1.0211, "num_tokens": 30332662853.0, "step": 5522 }, { "epoch": 0.9844919786096257, "grad_norm": 0.1982421875, "learning_rate": 1.1665785319187046e-05, "loss": 1.0499, "num_tokens": 30338946694.0, "step": 5523 }, { "epoch": 0.9846702317290552, "grad_norm": 0.19921875, "learning_rate": 1.1663194382494565e-05, "loss": 1.0244, "num_tokens": 30345229532.0, "step": 5524 }, { "epoch": 0.9848484848484849, "grad_norm": 0.203125, "learning_rate": 1.1660603390537955e-05, "loss": 1.005, "num_tokens": 30351512842.0, "step": 5525 }, { "epoch": 0.9850267379679144, "grad_norm": 0.1875, "learning_rate": 1.1658012343533123e-05, "loss": 1.0411, "num_tokens": 30357762703.0, "step": 5526 }, { "epoch": 0.985204991087344, "grad_norm": 0.201171875, "learning_rate": 1.1655421241695985e-05, "loss": 1.0225, "num_tokens": 30364034781.0, "step": 5527 }, { "epoch": 0.9853832442067736, "grad_norm": 0.2021484375, "learning_rate": 1.1652830085242458e-05, "loss": 0.9935, "num_tokens": 30370320208.0, "step": 5528 }, { "epoch": 0.9855614973262032, "grad_norm": 0.201171875, "learning_rate": 1.165023887438846e-05, "loss": 1.0346, "num_tokens": 30376584531.0, "step": 5529 }, { "epoch": 0.9857397504456328, "grad_norm": 0.189453125, "learning_rate": 1.1647647609349919e-05, "loss": 1.0035, "num_tokens": 30382865199.0, "step": 5530 }, { "epoch": 0.9859180035650624, "grad_norm": 0.193359375, "learning_rate": 1.1645056290342766e-05, "loss": 0.9827, "num_tokens": 30389148945.0, "step": 5531 }, { "epoch": 0.986096256684492, "grad_norm": 0.2080078125, "learning_rate": 1.1642464917582939e-05, "loss": 1.0184, "num_tokens": 30395398164.0, "step": 5532 }, { "epoch": 0.9862745098039216, "grad_norm": 0.1953125, "learning_rate": 1.1639873491286371e-05, "loss": 1.0121, "num_tokens": 30401677766.0, "step": 5533 }, { "epoch": 0.9864527629233512, "grad_norm": 0.1875, "learning_rate": 1.1637282011669009e-05, "loss": 1.0221, "num_tokens": 30407947006.0, "step": 5534 }, { "epoch": 0.9866310160427807, "grad_norm": 0.1943359375, "learning_rate": 1.1634690478946803e-05, "loss": 1.0186, "num_tokens": 30414230303.0, "step": 5535 }, { "epoch": 0.9868092691622103, "grad_norm": 0.1953125, "learning_rate": 1.1632098893335708e-05, "loss": 1.0077, "num_tokens": 30420514616.0, "step": 5536 }, { "epoch": 0.9869875222816399, "grad_norm": 0.2080078125, "learning_rate": 1.1629507255051675e-05, "loss": 1.0214, "num_tokens": 30426773018.0, "step": 5537 }, { "epoch": 0.9871657754010695, "grad_norm": 0.197265625, "learning_rate": 1.1626915564310673e-05, "loss": 1.0387, "num_tokens": 30433030756.0, "step": 5538 }, { "epoch": 0.9873440285204991, "grad_norm": 0.201171875, "learning_rate": 1.1624323821328658e-05, "loss": 1.0239, "num_tokens": 30439315124.0, "step": 5539 }, { "epoch": 0.9875222816399287, "grad_norm": 0.19921875, "learning_rate": 1.1621732026321615e-05, "loss": 1.021, "num_tokens": 30445579771.0, "step": 5540 }, { "epoch": 0.9877005347593583, "grad_norm": 0.19921875, "learning_rate": 1.1619140179505505e-05, "loss": 1.027, "num_tokens": 30451857496.0, "step": 5541 }, { "epoch": 0.9878787878787879, "grad_norm": 0.19140625, "learning_rate": 1.1616548281096314e-05, "loss": 1.0113, "num_tokens": 30458125654.0, "step": 5542 }, { "epoch": 0.9880570409982175, "grad_norm": 0.1962890625, "learning_rate": 1.1613956331310026e-05, "loss": 1.0128, "num_tokens": 30464406182.0, "step": 5543 }, { "epoch": 0.9882352941176471, "grad_norm": 0.197265625, "learning_rate": 1.1611364330362626e-05, "loss": 1.0264, "num_tokens": 30470686350.0, "step": 5544 }, { "epoch": 0.9884135472370766, "grad_norm": 0.1943359375, "learning_rate": 1.1608772278470106e-05, "loss": 1.0351, "num_tokens": 30476965665.0, "step": 5545 }, { "epoch": 0.9885918003565063, "grad_norm": 0.1884765625, "learning_rate": 1.1606180175848462e-05, "loss": 1.0477, "num_tokens": 30483249346.0, "step": 5546 }, { "epoch": 0.9887700534759358, "grad_norm": 0.193359375, "learning_rate": 1.16035880227137e-05, "loss": 1.0324, "num_tokens": 30489533180.0, "step": 5547 }, { "epoch": 0.9889483065953654, "grad_norm": 0.19921875, "learning_rate": 1.160099581928182e-05, "loss": 1.0413, "num_tokens": 30495816279.0, "step": 5548 }, { "epoch": 0.989126559714795, "grad_norm": 0.197265625, "learning_rate": 1.159840356576883e-05, "loss": 1.014, "num_tokens": 30502098234.0, "step": 5549 }, { "epoch": 0.9893048128342246, "grad_norm": 0.1884765625, "learning_rate": 1.1595811262390744e-05, "loss": 1.024, "num_tokens": 30508354109.0, "step": 5550 }, { "epoch": 0.9894830659536542, "grad_norm": 0.185546875, "learning_rate": 1.159321890936358e-05, "loss": 1.0253, "num_tokens": 30514637488.0, "step": 5551 }, { "epoch": 0.9896613190730837, "grad_norm": 0.2001953125, "learning_rate": 1.1590626506903357e-05, "loss": 1.0262, "num_tokens": 30520859014.0, "step": 5552 }, { "epoch": 0.9898395721925134, "grad_norm": 0.1875, "learning_rate": 1.1588034055226102e-05, "loss": 1.0049, "num_tokens": 30527142763.0, "step": 5553 }, { "epoch": 0.9900178253119429, "grad_norm": 0.1943359375, "learning_rate": 1.1585441554547847e-05, "loss": 1.0169, "num_tokens": 30533423591.0, "step": 5554 }, { "epoch": 0.9901960784313726, "grad_norm": 0.1962890625, "learning_rate": 1.1582849005084625e-05, "loss": 1.0304, "num_tokens": 30539707308.0, "step": 5555 }, { "epoch": 0.9903743315508021, "grad_norm": 0.1962890625, "learning_rate": 1.1580256407052472e-05, "loss": 1.0035, "num_tokens": 30545964277.0, "step": 5556 }, { "epoch": 0.9905525846702318, "grad_norm": 0.2021484375, "learning_rate": 1.1577663760667426e-05, "loss": 1.0296, "num_tokens": 30552226708.0, "step": 5557 }, { "epoch": 0.9907308377896613, "grad_norm": 0.1904296875, "learning_rate": 1.157507106614554e-05, "loss": 1.0233, "num_tokens": 30558508709.0, "step": 5558 }, { "epoch": 0.990909090909091, "grad_norm": 0.193359375, "learning_rate": 1.1572478323702861e-05, "loss": 1.0346, "num_tokens": 30564788318.0, "step": 5559 }, { "epoch": 0.9910873440285205, "grad_norm": 0.205078125, "learning_rate": 1.1569885533555438e-05, "loss": 1.0456, "num_tokens": 30571070380.0, "step": 5560 }, { "epoch": 0.99126559714795, "grad_norm": 0.1943359375, "learning_rate": 1.1567292695919334e-05, "loss": 1.0523, "num_tokens": 30577351690.0, "step": 5561 }, { "epoch": 0.9914438502673797, "grad_norm": 0.20703125, "learning_rate": 1.156469981101061e-05, "loss": 1.0412, "num_tokens": 30583611838.0, "step": 5562 }, { "epoch": 0.9916221033868092, "grad_norm": 0.2119140625, "learning_rate": 1.1562106879045336e-05, "loss": 1.0326, "num_tokens": 30589884647.0, "step": 5563 }, { "epoch": 0.9918003565062389, "grad_norm": 0.197265625, "learning_rate": 1.1559513900239571e-05, "loss": 1.0018, "num_tokens": 30596160108.0, "step": 5564 }, { "epoch": 0.9919786096256684, "grad_norm": 0.2001953125, "learning_rate": 1.1556920874809398e-05, "loss": 1.0187, "num_tokens": 30602444065.0, "step": 5565 }, { "epoch": 0.9921568627450981, "grad_norm": 0.2109375, "learning_rate": 1.1554327802970886e-05, "loss": 1.0054, "num_tokens": 30608727805.0, "step": 5566 }, { "epoch": 0.9923351158645276, "grad_norm": 0.1962890625, "learning_rate": 1.1551734684940126e-05, "loss": 0.9945, "num_tokens": 30615011426.0, "step": 5567 }, { "epoch": 0.9925133689839573, "grad_norm": 0.197265625, "learning_rate": 1.1549141520933197e-05, "loss": 1.041, "num_tokens": 30621295906.0, "step": 5568 }, { "epoch": 0.9926916221033868, "grad_norm": 0.1923828125, "learning_rate": 1.1546548311166186e-05, "loss": 1.0218, "num_tokens": 30627559881.0, "step": 5569 }, { "epoch": 0.9928698752228164, "grad_norm": 0.1943359375, "learning_rate": 1.154395505585519e-05, "loss": 1.0045, "num_tokens": 30633817727.0, "step": 5570 }, { "epoch": 0.993048128342246, "grad_norm": 0.1962890625, "learning_rate": 1.1541361755216306e-05, "loss": 1.0254, "num_tokens": 30640101345.0, "step": 5571 }, { "epoch": 0.9932263814616756, "grad_norm": 0.1884765625, "learning_rate": 1.1538768409465633e-05, "loss": 1.0436, "num_tokens": 30646382177.0, "step": 5572 }, { "epoch": 0.9934046345811052, "grad_norm": 0.193359375, "learning_rate": 1.1536175018819273e-05, "loss": 1.0228, "num_tokens": 30652664793.0, "step": 5573 }, { "epoch": 0.9935828877005347, "grad_norm": 0.1884765625, "learning_rate": 1.1533581583493335e-05, "loss": 1.0311, "num_tokens": 30658948777.0, "step": 5574 }, { "epoch": 0.9937611408199644, "grad_norm": 0.1962890625, "learning_rate": 1.1530988103703933e-05, "loss": 1.0107, "num_tokens": 30665174223.0, "step": 5575 }, { "epoch": 0.9939393939393939, "grad_norm": 0.189453125, "learning_rate": 1.1528394579667175e-05, "loss": 1.0312, "num_tokens": 30671444623.0, "step": 5576 }, { "epoch": 0.9941176470588236, "grad_norm": 0.189453125, "learning_rate": 1.152580101159919e-05, "loss": 1.0167, "num_tokens": 30677727166.0, "step": 5577 }, { "epoch": 0.9942959001782531, "grad_norm": 0.1943359375, "learning_rate": 1.1523207399716094e-05, "loss": 1.0291, "num_tokens": 30684010035.0, "step": 5578 }, { "epoch": 0.9944741532976827, "grad_norm": 0.1982421875, "learning_rate": 1.1520613744234013e-05, "loss": 1.005, "num_tokens": 30690293911.0, "step": 5579 }, { "epoch": 0.9946524064171123, "grad_norm": 0.1865234375, "learning_rate": 1.151802004536908e-05, "loss": 0.9946, "num_tokens": 30696538621.0, "step": 5580 }, { "epoch": 0.9948306595365419, "grad_norm": 0.1826171875, "learning_rate": 1.1515426303337428e-05, "loss": 1.0216, "num_tokens": 30702823011.0, "step": 5581 }, { "epoch": 0.9950089126559715, "grad_norm": 0.18359375, "learning_rate": 1.1512832518355192e-05, "loss": 1.0262, "num_tokens": 30709085307.0, "step": 5582 }, { "epoch": 0.9951871657754011, "grad_norm": 0.1845703125, "learning_rate": 1.1510238690638511e-05, "loss": 1.0073, "num_tokens": 30715360229.0, "step": 5583 }, { "epoch": 0.9953654188948307, "grad_norm": 0.19140625, "learning_rate": 1.1507644820403536e-05, "loss": 1.0393, "num_tokens": 30721634004.0, "step": 5584 }, { "epoch": 0.9955436720142602, "grad_norm": 0.1923828125, "learning_rate": 1.1505050907866409e-05, "loss": 1.0548, "num_tokens": 30727898340.0, "step": 5585 }, { "epoch": 0.9957219251336898, "grad_norm": 0.1875, "learning_rate": 1.1502456953243284e-05, "loss": 1.0158, "num_tokens": 30734177107.0, "step": 5586 }, { "epoch": 0.9959001782531194, "grad_norm": 0.19140625, "learning_rate": 1.1499862956750317e-05, "loss": 1.0124, "num_tokens": 30740433852.0, "step": 5587 }, { "epoch": 0.996078431372549, "grad_norm": 0.1806640625, "learning_rate": 1.1497268918603658e-05, "loss": 1.0224, "num_tokens": 30746681596.0, "step": 5588 }, { "epoch": 0.9962566844919786, "grad_norm": 0.2021484375, "learning_rate": 1.1494674839019478e-05, "loss": 1.035, "num_tokens": 30752934723.0, "step": 5589 }, { "epoch": 0.9964349376114082, "grad_norm": 0.19921875, "learning_rate": 1.149208071821394e-05, "loss": 1.0115, "num_tokens": 30759211862.0, "step": 5590 }, { "epoch": 0.9966131907308378, "grad_norm": 0.189453125, "learning_rate": 1.1489486556403213e-05, "loss": 1.0124, "num_tokens": 30765461906.0, "step": 5591 }, { "epoch": 0.9967914438502674, "grad_norm": 0.1962890625, "learning_rate": 1.1486892353803463e-05, "loss": 1.0167, "num_tokens": 30771745742.0, "step": 5592 }, { "epoch": 0.996969696969697, "grad_norm": 0.1923828125, "learning_rate": 1.1484298110630876e-05, "loss": 1.0265, "num_tokens": 30778020583.0, "step": 5593 }, { "epoch": 0.9971479500891266, "grad_norm": 0.1943359375, "learning_rate": 1.148170382710162e-05, "loss": 1.0061, "num_tokens": 30784304124.0, "step": 5594 }, { "epoch": 0.9973262032085561, "grad_norm": 0.2021484375, "learning_rate": 1.1479109503431887e-05, "loss": 0.9895, "num_tokens": 30790588490.0, "step": 5595 }, { "epoch": 0.9975044563279858, "grad_norm": 0.1904296875, "learning_rate": 1.1476515139837854e-05, "loss": 1.0312, "num_tokens": 30796872269.0, "step": 5596 }, { "epoch": 0.9976827094474153, "grad_norm": 0.193359375, "learning_rate": 1.1473920736535714e-05, "loss": 1.0414, "num_tokens": 30803155270.0, "step": 5597 }, { "epoch": 0.9978609625668449, "grad_norm": 0.201171875, "learning_rate": 1.147132629374166e-05, "loss": 0.9858, "num_tokens": 30809413379.0, "step": 5598 }, { "epoch": 0.9980392156862745, "grad_norm": 0.1982421875, "learning_rate": 1.1468731811671885e-05, "loss": 1.0139, "num_tokens": 30815696214.0, "step": 5599 }, { "epoch": 0.9982174688057041, "grad_norm": 0.201171875, "learning_rate": 1.1466137290542589e-05, "loss": 1.0104, "num_tokens": 30821956009.0, "step": 5600 }, { "epoch": 0.9983957219251337, "grad_norm": 0.203125, "learning_rate": 1.1463542730569977e-05, "loss": 1.0168, "num_tokens": 30828239023.0, "step": 5601 }, { "epoch": 0.9985739750445632, "grad_norm": 0.2138671875, "learning_rate": 1.146094813197025e-05, "loss": 1.0393, "num_tokens": 30834523424.0, "step": 5602 }, { "epoch": 0.9987522281639929, "grad_norm": 0.1962890625, "learning_rate": 1.1458353494959613e-05, "loss": 1.0173, "num_tokens": 30840803833.0, "step": 5603 }, { "epoch": 0.9989304812834224, "grad_norm": 0.1923828125, "learning_rate": 1.1455758819754287e-05, "loss": 1.0195, "num_tokens": 30847058142.0, "step": 5604 }, { "epoch": 0.9991087344028521, "grad_norm": 0.1943359375, "learning_rate": 1.1453164106570483e-05, "loss": 1.0551, "num_tokens": 30853342763.0, "step": 5605 }, { "epoch": 0.9992869875222816, "grad_norm": 0.1923828125, "learning_rate": 1.1450569355624415e-05, "loss": 1.0176, "num_tokens": 30859601347.0, "step": 5606 }, { "epoch": 0.9994652406417113, "grad_norm": 0.2060546875, "learning_rate": 1.1447974567132307e-05, "loss": 1.0081, "num_tokens": 30865803815.0, "step": 5607 }, { "epoch": 0.9996434937611408, "grad_norm": 0.197265625, "learning_rate": 1.1445379741310382e-05, "loss": 1.0457, "num_tokens": 30872070266.0, "step": 5608 }, { "epoch": 0.9998217468805705, "grad_norm": 0.1826171875, "learning_rate": 1.1442784878374877e-05, "loss": 1.0594, "num_tokens": 30878341741.0, "step": 5609 }, { "epoch": 1.0, "grad_norm": 0.1982421875, "learning_rate": 1.144018997854201e-05, "loss": 1.009, "num_tokens": 30884624159.0, "step": 5610 }, { "epoch": 1.0001782531194296, "grad_norm": 0.2158203125, "learning_rate": 1.1437595042028018e-05, "loss": 1.0419, "num_tokens": 30890905362.0, "step": 5611 }, { "epoch": 1.000356506238859, "grad_norm": 0.205078125, "learning_rate": 1.1435000069049138e-05, "loss": 1.0013, "num_tokens": 30897189164.0, "step": 5612 }, { "epoch": 1.0005347593582887, "grad_norm": 0.19921875, "learning_rate": 1.1432405059821612e-05, "loss": 1.0364, "num_tokens": 30903451945.0, "step": 5613 }, { "epoch": 1.0007130124777184, "grad_norm": 0.193359375, "learning_rate": 1.142981001456168e-05, "loss": 1.0116, "num_tokens": 30909732141.0, "step": 5614 }, { "epoch": 1.000891265597148, "grad_norm": 0.2001953125, "learning_rate": 1.1427214933485588e-05, "loss": 1.0158, "num_tokens": 30915953100.0, "step": 5615 }, { "epoch": 1.0010695187165775, "grad_norm": 0.1943359375, "learning_rate": 1.1424619816809586e-05, "loss": 1.0316, "num_tokens": 30922222804.0, "step": 5616 }, { "epoch": 1.001247771836007, "grad_norm": 0.1953125, "learning_rate": 1.1422024664749926e-05, "loss": 1.0103, "num_tokens": 30928506489.0, "step": 5617 }, { "epoch": 1.0014260249554368, "grad_norm": 0.1904296875, "learning_rate": 1.1419429477522861e-05, "loss": 0.991, "num_tokens": 30934768133.0, "step": 5618 }, { "epoch": 1.0016042780748664, "grad_norm": 0.185546875, "learning_rate": 1.1416834255344647e-05, "loss": 1.002, "num_tokens": 30941034271.0, "step": 5619 }, { "epoch": 1.0017825311942958, "grad_norm": 0.2001953125, "learning_rate": 1.1414238998431549e-05, "loss": 1.0148, "num_tokens": 30947279944.0, "step": 5620 }, { "epoch": 1.0019607843137255, "grad_norm": 0.19921875, "learning_rate": 1.1411643706999826e-05, "loss": 1.0316, "num_tokens": 30953564712.0, "step": 5621 }, { "epoch": 1.0021390374331551, "grad_norm": 0.189453125, "learning_rate": 1.1409048381265745e-05, "loss": 1.0067, "num_tokens": 30959817560.0, "step": 5622 }, { "epoch": 1.0023172905525846, "grad_norm": 0.19921875, "learning_rate": 1.1406453021445578e-05, "loss": 1.0002, "num_tokens": 30966084393.0, "step": 5623 }, { "epoch": 1.0024955436720142, "grad_norm": 0.2060546875, "learning_rate": 1.1403857627755594e-05, "loss": 1.024, "num_tokens": 30972362018.0, "step": 5624 }, { "epoch": 1.0026737967914439, "grad_norm": 0.1884765625, "learning_rate": 1.140126220041207e-05, "loss": 1.0124, "num_tokens": 30978626420.0, "step": 5625 }, { "epoch": 1.0028520499108735, "grad_norm": 0.185546875, "learning_rate": 1.1398666739631279e-05, "loss": 1.0454, "num_tokens": 30984909457.0, "step": 5626 }, { "epoch": 1.003030303030303, "grad_norm": 0.2001953125, "learning_rate": 1.1396071245629505e-05, "loss": 0.9884, "num_tokens": 30991186868.0, "step": 5627 }, { "epoch": 1.0032085561497326, "grad_norm": 0.203125, "learning_rate": 1.1393475718623032e-05, "loss": 1.0116, "num_tokens": 30997435031.0, "step": 5628 }, { "epoch": 1.0033868092691622, "grad_norm": 0.1904296875, "learning_rate": 1.1390880158828144e-05, "loss": 1.0416, "num_tokens": 31003717027.0, "step": 5629 }, { "epoch": 1.0035650623885919, "grad_norm": 0.1845703125, "learning_rate": 1.1388284566461126e-05, "loss": 0.9938, "num_tokens": 31010000869.0, "step": 5630 }, { "epoch": 1.0037433155080213, "grad_norm": 0.197265625, "learning_rate": 1.1385688941738277e-05, "loss": 1.0436, "num_tokens": 31016260077.0, "step": 5631 }, { "epoch": 1.003921568627451, "grad_norm": 0.1962890625, "learning_rate": 1.1383093284875886e-05, "loss": 0.9981, "num_tokens": 31022533726.0, "step": 5632 }, { "epoch": 1.0040998217468806, "grad_norm": 0.2060546875, "learning_rate": 1.138049759609025e-05, "loss": 1.0391, "num_tokens": 31028816754.0, "step": 5633 }, { "epoch": 1.0042780748663103, "grad_norm": 0.2001953125, "learning_rate": 1.137790187559767e-05, "loss": 1.0002, "num_tokens": 31035094311.0, "step": 5634 }, { "epoch": 1.0044563279857397, "grad_norm": 0.1982421875, "learning_rate": 1.1375306123614442e-05, "loss": 1.0101, "num_tokens": 31041377111.0, "step": 5635 }, { "epoch": 1.0046345811051693, "grad_norm": 0.1865234375, "learning_rate": 1.1372710340356882e-05, "loss": 0.9994, "num_tokens": 31047662722.0, "step": 5636 }, { "epoch": 1.004812834224599, "grad_norm": 0.1962890625, "learning_rate": 1.1370114526041287e-05, "loss": 1.0415, "num_tokens": 31053944886.0, "step": 5637 }, { "epoch": 1.0049910873440284, "grad_norm": 0.201171875, "learning_rate": 1.1367518680883974e-05, "loss": 1.0022, "num_tokens": 31060197995.0, "step": 5638 }, { "epoch": 1.005169340463458, "grad_norm": 0.1904296875, "learning_rate": 1.1364922805101246e-05, "loss": 1.0211, "num_tokens": 31066449371.0, "step": 5639 }, { "epoch": 1.0053475935828877, "grad_norm": 0.201171875, "learning_rate": 1.1362326898909427e-05, "loss": 1.0046, "num_tokens": 31072734287.0, "step": 5640 }, { "epoch": 1.0055258467023174, "grad_norm": 0.1953125, "learning_rate": 1.1359730962524829e-05, "loss": 1.0097, "num_tokens": 31079015645.0, "step": 5641 }, { "epoch": 1.0057040998217468, "grad_norm": 0.1884765625, "learning_rate": 1.1357134996163775e-05, "loss": 1.0144, "num_tokens": 31085294544.0, "step": 5642 }, { "epoch": 1.0058823529411764, "grad_norm": 0.2060546875, "learning_rate": 1.1354539000042587e-05, "loss": 1.0229, "num_tokens": 31091576734.0, "step": 5643 }, { "epoch": 1.006060606060606, "grad_norm": 0.1923828125, "learning_rate": 1.1351942974377589e-05, "loss": 1.0321, "num_tokens": 31097838554.0, "step": 5644 }, { "epoch": 1.0062388591800357, "grad_norm": 0.19140625, "learning_rate": 1.1349346919385106e-05, "loss": 0.9722, "num_tokens": 31104098427.0, "step": 5645 }, { "epoch": 1.0064171122994652, "grad_norm": 0.19140625, "learning_rate": 1.134675083528147e-05, "loss": 1.0091, "num_tokens": 31110384649.0, "step": 5646 }, { "epoch": 1.0065953654188948, "grad_norm": 0.19140625, "learning_rate": 1.1344154722283019e-05, "loss": 1.0143, "num_tokens": 31116642357.0, "step": 5647 }, { "epoch": 1.0067736185383245, "grad_norm": 0.19921875, "learning_rate": 1.1341558580606078e-05, "loss": 1.0613, "num_tokens": 31122918996.0, "step": 5648 }, { "epoch": 1.006951871657754, "grad_norm": 0.197265625, "learning_rate": 1.1338962410466988e-05, "loss": 1.0099, "num_tokens": 31129194552.0, "step": 5649 }, { "epoch": 1.0071301247771836, "grad_norm": 0.1865234375, "learning_rate": 1.1336366212082089e-05, "loss": 1.0165, "num_tokens": 31135476184.0, "step": 5650 }, { "epoch": 1.0073083778966132, "grad_norm": 0.201171875, "learning_rate": 1.1333769985667724e-05, "loss": 1.0461, "num_tokens": 31141759268.0, "step": 5651 }, { "epoch": 1.0074866310160429, "grad_norm": 0.203125, "learning_rate": 1.1331173731440235e-05, "loss": 1.0545, "num_tokens": 31148041998.0, "step": 5652 }, { "epoch": 1.0076648841354723, "grad_norm": 0.181640625, "learning_rate": 1.1328577449615967e-05, "loss": 1.0201, "num_tokens": 31154324135.0, "step": 5653 }, { "epoch": 1.007843137254902, "grad_norm": 0.1904296875, "learning_rate": 1.1325981140411274e-05, "loss": 1.0098, "num_tokens": 31160596416.0, "step": 5654 }, { "epoch": 1.0080213903743316, "grad_norm": 0.1982421875, "learning_rate": 1.1323384804042502e-05, "loss": 0.9901, "num_tokens": 31166850941.0, "step": 5655 }, { "epoch": 1.0081996434937612, "grad_norm": 0.189453125, "learning_rate": 1.1320788440726005e-05, "loss": 1.0168, "num_tokens": 31173135706.0, "step": 5656 }, { "epoch": 1.0083778966131907, "grad_norm": 0.19140625, "learning_rate": 1.1318192050678141e-05, "loss": 1.0332, "num_tokens": 31179420871.0, "step": 5657 }, { "epoch": 1.0085561497326203, "grad_norm": 0.1982421875, "learning_rate": 1.1315595634115261e-05, "loss": 1.0336, "num_tokens": 31185690925.0, "step": 5658 }, { "epoch": 1.00873440285205, "grad_norm": 0.1806640625, "learning_rate": 1.131299919125374e-05, "loss": 0.9948, "num_tokens": 31191921070.0, "step": 5659 }, { "epoch": 1.0089126559714796, "grad_norm": 0.189453125, "learning_rate": 1.131040272230992e-05, "loss": 1.0192, "num_tokens": 31198206022.0, "step": 5660 }, { "epoch": 1.009090909090909, "grad_norm": 0.197265625, "learning_rate": 1.1307806227500182e-05, "loss": 1.0214, "num_tokens": 31204445197.0, "step": 5661 }, { "epoch": 1.0092691622103387, "grad_norm": 0.193359375, "learning_rate": 1.1305209707040882e-05, "loss": 1.0266, "num_tokens": 31210718932.0, "step": 5662 }, { "epoch": 1.0094474153297683, "grad_norm": 0.19140625, "learning_rate": 1.1302613161148397e-05, "loss": 1.0391, "num_tokens": 31216970303.0, "step": 5663 }, { "epoch": 1.0096256684491978, "grad_norm": 0.19921875, "learning_rate": 1.1300016590039091e-05, "loss": 1.042, "num_tokens": 31223254935.0, "step": 5664 }, { "epoch": 1.0098039215686274, "grad_norm": 0.1953125, "learning_rate": 1.1297419993929338e-05, "loss": 1.0323, "num_tokens": 31229534993.0, "step": 5665 }, { "epoch": 1.009982174688057, "grad_norm": 0.19140625, "learning_rate": 1.1294823373035513e-05, "loss": 1.056, "num_tokens": 31235810671.0, "step": 5666 }, { "epoch": 1.0101604278074867, "grad_norm": 0.1943359375, "learning_rate": 1.1292226727573996e-05, "loss": 1.0316, "num_tokens": 31242094387.0, "step": 5667 }, { "epoch": 1.0103386809269161, "grad_norm": 0.1845703125, "learning_rate": 1.1289630057761165e-05, "loss": 1.0175, "num_tokens": 31248326472.0, "step": 5668 }, { "epoch": 1.0105169340463458, "grad_norm": 0.1923828125, "learning_rate": 1.12870333638134e-05, "loss": 1.011, "num_tokens": 31254573280.0, "step": 5669 }, { "epoch": 1.0106951871657754, "grad_norm": 0.201171875, "learning_rate": 1.1284436645947083e-05, "loss": 1.0135, "num_tokens": 31260858618.0, "step": 5670 }, { "epoch": 1.010873440285205, "grad_norm": 0.2158203125, "learning_rate": 1.12818399043786e-05, "loss": 0.9992, "num_tokens": 31267131830.0, "step": 5671 }, { "epoch": 1.0110516934046345, "grad_norm": 0.1943359375, "learning_rate": 1.1279243139324336e-05, "loss": 1.0336, "num_tokens": 31273417513.0, "step": 5672 }, { "epoch": 1.0112299465240642, "grad_norm": 0.193359375, "learning_rate": 1.1276646351000684e-05, "loss": 1.0375, "num_tokens": 31279700260.0, "step": 5673 }, { "epoch": 1.0114081996434938, "grad_norm": 0.212890625, "learning_rate": 1.1274049539624035e-05, "loss": 1.0196, "num_tokens": 31285982212.0, "step": 5674 }, { "epoch": 1.0115864527629232, "grad_norm": 0.193359375, "learning_rate": 1.1271452705410779e-05, "loss": 1.0287, "num_tokens": 31292213496.0, "step": 5675 }, { "epoch": 1.011764705882353, "grad_norm": 0.1923828125, "learning_rate": 1.1268855848577312e-05, "loss": 1.0124, "num_tokens": 31298456354.0, "step": 5676 }, { "epoch": 1.0119429590017825, "grad_norm": 0.1982421875, "learning_rate": 1.1266258969340027e-05, "loss": 1.028, "num_tokens": 31304740509.0, "step": 5677 }, { "epoch": 1.0121212121212122, "grad_norm": 0.1943359375, "learning_rate": 1.1263662067915328e-05, "loss": 1.006, "num_tokens": 31311024904.0, "step": 5678 }, { "epoch": 1.0122994652406416, "grad_norm": 0.1943359375, "learning_rate": 1.1261065144519613e-05, "loss": 1.049, "num_tokens": 31317275775.0, "step": 5679 }, { "epoch": 1.0124777183600713, "grad_norm": 0.1953125, "learning_rate": 1.1258468199369285e-05, "loss": 1.0413, "num_tokens": 31323544044.0, "step": 5680 }, { "epoch": 1.012655971479501, "grad_norm": 0.189453125, "learning_rate": 1.1255871232680746e-05, "loss": 1.0617, "num_tokens": 31329821457.0, "step": 5681 }, { "epoch": 1.0128342245989306, "grad_norm": 0.20703125, "learning_rate": 1.1253274244670407e-05, "loss": 1.061, "num_tokens": 31336104040.0, "step": 5682 }, { "epoch": 1.01301247771836, "grad_norm": 0.193359375, "learning_rate": 1.1250677235554666e-05, "loss": 1.0254, "num_tokens": 31342388362.0, "step": 5683 }, { "epoch": 1.0131907308377897, "grad_norm": 0.19921875, "learning_rate": 1.1248080205549941e-05, "loss": 1.0276, "num_tokens": 31348654075.0, "step": 5684 }, { "epoch": 1.0133689839572193, "grad_norm": 0.1845703125, "learning_rate": 1.1245483154872639e-05, "loss": 1.0343, "num_tokens": 31354936948.0, "step": 5685 }, { "epoch": 1.0135472370766487, "grad_norm": 0.197265625, "learning_rate": 1.1242886083739175e-05, "loss": 1.0259, "num_tokens": 31361191807.0, "step": 5686 }, { "epoch": 1.0137254901960784, "grad_norm": 0.18359375, "learning_rate": 1.124028899236596e-05, "loss": 1.0113, "num_tokens": 31367470575.0, "step": 5687 }, { "epoch": 1.013903743315508, "grad_norm": 0.208984375, "learning_rate": 1.1237691880969415e-05, "loss": 1.0137, "num_tokens": 31373738349.0, "step": 5688 }, { "epoch": 1.0140819964349377, "grad_norm": 0.197265625, "learning_rate": 1.1235094749765954e-05, "loss": 1.0068, "num_tokens": 31380021598.0, "step": 5689 }, { "epoch": 1.014260249554367, "grad_norm": 0.1875, "learning_rate": 1.1232497598971998e-05, "loss": 1.0021, "num_tokens": 31386301470.0, "step": 5690 }, { "epoch": 1.0144385026737968, "grad_norm": 0.1982421875, "learning_rate": 1.1229900428803967e-05, "loss": 1.0298, "num_tokens": 31392552111.0, "step": 5691 }, { "epoch": 1.0146167557932264, "grad_norm": 0.201171875, "learning_rate": 1.1227303239478284e-05, "loss": 1.0139, "num_tokens": 31398817818.0, "step": 5692 }, { "epoch": 1.014795008912656, "grad_norm": 0.1845703125, "learning_rate": 1.122470603121138e-05, "loss": 1.0519, "num_tokens": 31405100352.0, "step": 5693 }, { "epoch": 1.0149732620320855, "grad_norm": 0.18359375, "learning_rate": 1.122210880421967e-05, "loss": 1.0218, "num_tokens": 31411356949.0, "step": 5694 }, { "epoch": 1.0151515151515151, "grad_norm": 0.19140625, "learning_rate": 1.1219511558719585e-05, "loss": 1.0245, "num_tokens": 31417629787.0, "step": 5695 }, { "epoch": 1.0153297682709448, "grad_norm": 0.19140625, "learning_rate": 1.1216914294927558e-05, "loss": 1.0186, "num_tokens": 31423883014.0, "step": 5696 }, { "epoch": 1.0155080213903744, "grad_norm": 0.1953125, "learning_rate": 1.1214317013060015e-05, "loss": 1.0117, "num_tokens": 31430121588.0, "step": 5697 }, { "epoch": 1.0156862745098039, "grad_norm": 0.205078125, "learning_rate": 1.1211719713333393e-05, "loss": 1.0257, "num_tokens": 31436399695.0, "step": 5698 }, { "epoch": 1.0158645276292335, "grad_norm": 0.203125, "learning_rate": 1.120912239596412e-05, "loss": 1.038, "num_tokens": 31442663977.0, "step": 5699 }, { "epoch": 1.0160427807486632, "grad_norm": 0.1904296875, "learning_rate": 1.1206525061168634e-05, "loss": 1.047, "num_tokens": 31448947389.0, "step": 5700 }, { "epoch": 1.0162210338680926, "grad_norm": 0.1962890625, "learning_rate": 1.1203927709163372e-05, "loss": 1.0183, "num_tokens": 31455230545.0, "step": 5701 }, { "epoch": 1.0163992869875222, "grad_norm": 0.205078125, "learning_rate": 1.1201330340164768e-05, "loss": 1.0381, "num_tokens": 31461501201.0, "step": 5702 }, { "epoch": 1.016577540106952, "grad_norm": 0.1923828125, "learning_rate": 1.1198732954389267e-05, "loss": 1.0038, "num_tokens": 31467785577.0, "step": 5703 }, { "epoch": 1.0167557932263815, "grad_norm": 0.1962890625, "learning_rate": 1.1196135552053307e-05, "loss": 0.9893, "num_tokens": 31474070086.0, "step": 5704 }, { "epoch": 1.016934046345811, "grad_norm": 0.1982421875, "learning_rate": 1.119353813337333e-05, "loss": 1.0035, "num_tokens": 31480355928.0, "step": 5705 }, { "epoch": 1.0171122994652406, "grad_norm": 0.189453125, "learning_rate": 1.1190940698565776e-05, "loss": 1.0228, "num_tokens": 31486638090.0, "step": 5706 }, { "epoch": 1.0172905525846703, "grad_norm": 0.1962890625, "learning_rate": 1.1188343247847096e-05, "loss": 1.0512, "num_tokens": 31492922515.0, "step": 5707 }, { "epoch": 1.0174688057041, "grad_norm": 0.205078125, "learning_rate": 1.1185745781433732e-05, "loss": 0.9985, "num_tokens": 31499154804.0, "step": 5708 }, { "epoch": 1.0176470588235293, "grad_norm": 0.1884765625, "learning_rate": 1.1183148299542135e-05, "loss": 1.009, "num_tokens": 31505437949.0, "step": 5709 }, { "epoch": 1.017825311942959, "grad_norm": 0.201171875, "learning_rate": 1.118055080238875e-05, "loss": 1.0004, "num_tokens": 31511723598.0, "step": 5710 }, { "epoch": 1.0180035650623886, "grad_norm": 0.1943359375, "learning_rate": 1.1177953290190029e-05, "loss": 1.0259, "num_tokens": 31517987002.0, "step": 5711 }, { "epoch": 1.018181818181818, "grad_norm": 0.1845703125, "learning_rate": 1.1175355763162421e-05, "loss": 1.021, "num_tokens": 31524222402.0, "step": 5712 }, { "epoch": 1.0183600713012477, "grad_norm": 0.205078125, "learning_rate": 1.1172758221522381e-05, "loss": 1.0118, "num_tokens": 31530505806.0, "step": 5713 }, { "epoch": 1.0185383244206774, "grad_norm": 0.1982421875, "learning_rate": 1.1170160665486366e-05, "loss": 1.024, "num_tokens": 31536743924.0, "step": 5714 }, { "epoch": 1.018716577540107, "grad_norm": 0.2001953125, "learning_rate": 1.1167563095270819e-05, "loss": 1.0278, "num_tokens": 31542973471.0, "step": 5715 }, { "epoch": 1.0188948306595365, "grad_norm": 0.1904296875, "learning_rate": 1.116496551109221e-05, "loss": 1.0542, "num_tokens": 31549253197.0, "step": 5716 }, { "epoch": 1.019073083778966, "grad_norm": 0.1875, "learning_rate": 1.1162367913166987e-05, "loss": 1.0024, "num_tokens": 31555516362.0, "step": 5717 }, { "epoch": 1.0192513368983958, "grad_norm": 0.1865234375, "learning_rate": 1.1159770301711614e-05, "loss": 1.035, "num_tokens": 31561795296.0, "step": 5718 }, { "epoch": 1.0194295900178254, "grad_norm": 0.189453125, "learning_rate": 1.1157172676942545e-05, "loss": 0.9968, "num_tokens": 31568010090.0, "step": 5719 }, { "epoch": 1.0196078431372548, "grad_norm": 0.1982421875, "learning_rate": 1.1154575039076245e-05, "loss": 1.0072, "num_tokens": 31574279971.0, "step": 5720 }, { "epoch": 1.0197860962566845, "grad_norm": 0.2001953125, "learning_rate": 1.1151977388329176e-05, "loss": 0.9951, "num_tokens": 31580562635.0, "step": 5721 }, { "epoch": 1.0199643493761141, "grad_norm": 0.193359375, "learning_rate": 1.1149379724917794e-05, "loss": 1.0024, "num_tokens": 31586842204.0, "step": 5722 }, { "epoch": 1.0201426024955436, "grad_norm": 0.201171875, "learning_rate": 1.1146782049058575e-05, "loss": 1.0332, "num_tokens": 31593105769.0, "step": 5723 }, { "epoch": 1.0203208556149732, "grad_norm": 0.1943359375, "learning_rate": 1.1144184360967973e-05, "loss": 1.0279, "num_tokens": 31599389843.0, "step": 5724 }, { "epoch": 1.0204991087344029, "grad_norm": 0.189453125, "learning_rate": 1.114158666086246e-05, "loss": 1.0005, "num_tokens": 31605632343.0, "step": 5725 }, { "epoch": 1.0206773618538325, "grad_norm": 0.1865234375, "learning_rate": 1.1138988948958495e-05, "loss": 1.007, "num_tokens": 31611914577.0, "step": 5726 }, { "epoch": 1.020855614973262, "grad_norm": 0.1953125, "learning_rate": 1.1136391225472555e-05, "loss": 1.0095, "num_tokens": 31618178764.0, "step": 5727 }, { "epoch": 1.0210338680926916, "grad_norm": 0.1943359375, "learning_rate": 1.1133793490621108e-05, "loss": 1.0294, "num_tokens": 31624462813.0, "step": 5728 }, { "epoch": 1.0212121212121212, "grad_norm": 0.1884765625, "learning_rate": 1.1131195744620617e-05, "loss": 0.9734, "num_tokens": 31630748244.0, "step": 5729 }, { "epoch": 1.0213903743315509, "grad_norm": 0.185546875, "learning_rate": 1.1128597987687558e-05, "loss": 1.0241, "num_tokens": 31637032482.0, "step": 5730 }, { "epoch": 1.0215686274509803, "grad_norm": 0.18359375, "learning_rate": 1.11260002200384e-05, "loss": 1.0266, "num_tokens": 31643277448.0, "step": 5731 }, { "epoch": 1.02174688057041, "grad_norm": 0.2060546875, "learning_rate": 1.1123402441889621e-05, "loss": 1.0298, "num_tokens": 31649501370.0, "step": 5732 }, { "epoch": 1.0219251336898396, "grad_norm": 0.1943359375, "learning_rate": 1.1120804653457687e-05, "loss": 1.0388, "num_tokens": 31655783848.0, "step": 5733 }, { "epoch": 1.0221033868092693, "grad_norm": 0.189453125, "learning_rate": 1.111820685495908e-05, "loss": 1.0138, "num_tokens": 31662067947.0, "step": 5734 }, { "epoch": 1.0222816399286987, "grad_norm": 0.19140625, "learning_rate": 1.1115609046610265e-05, "loss": 1.0108, "num_tokens": 31668339200.0, "step": 5735 }, { "epoch": 1.0224598930481283, "grad_norm": 0.19140625, "learning_rate": 1.1113011228627727e-05, "loss": 1.0408, "num_tokens": 31674623422.0, "step": 5736 }, { "epoch": 1.022638146167558, "grad_norm": 0.19921875, "learning_rate": 1.1110413401227937e-05, "loss": 0.9886, "num_tokens": 31680907496.0, "step": 5737 }, { "epoch": 1.0228163992869874, "grad_norm": 0.2021484375, "learning_rate": 1.1107815564627379e-05, "loss": 1.0107, "num_tokens": 31687182734.0, "step": 5738 }, { "epoch": 1.022994652406417, "grad_norm": 0.1943359375, "learning_rate": 1.1105217719042525e-05, "loss": 1.0252, "num_tokens": 31693465499.0, "step": 5739 }, { "epoch": 1.0231729055258467, "grad_norm": 0.1904296875, "learning_rate": 1.1102619864689854e-05, "loss": 1.0091, "num_tokens": 31699695120.0, "step": 5740 }, { "epoch": 1.0233511586452764, "grad_norm": 0.1875, "learning_rate": 1.1100022001785853e-05, "loss": 1.0256, "num_tokens": 31705947399.0, "step": 5741 }, { "epoch": 1.0235294117647058, "grad_norm": 0.19140625, "learning_rate": 1.1097424130546992e-05, "loss": 0.9973, "num_tokens": 31712230730.0, "step": 5742 }, { "epoch": 1.0237076648841354, "grad_norm": 0.181640625, "learning_rate": 1.109482625118976e-05, "loss": 0.9926, "num_tokens": 31718513357.0, "step": 5743 }, { "epoch": 1.023885918003565, "grad_norm": 0.2021484375, "learning_rate": 1.1092228363930637e-05, "loss": 1.0393, "num_tokens": 31724797502.0, "step": 5744 }, { "epoch": 1.0240641711229947, "grad_norm": 0.19140625, "learning_rate": 1.1089630468986105e-05, "loss": 1.0206, "num_tokens": 31731080129.0, "step": 5745 }, { "epoch": 1.0242424242424242, "grad_norm": 0.1962890625, "learning_rate": 1.1087032566572646e-05, "loss": 1.0249, "num_tokens": 31737347937.0, "step": 5746 }, { "epoch": 1.0244206773618538, "grad_norm": 0.1875, "learning_rate": 1.1084434656906746e-05, "loss": 1.047, "num_tokens": 31743612187.0, "step": 5747 }, { "epoch": 1.0245989304812835, "grad_norm": 0.2001953125, "learning_rate": 1.1081836740204887e-05, "loss": 1.0216, "num_tokens": 31749849659.0, "step": 5748 }, { "epoch": 1.024777183600713, "grad_norm": 0.189453125, "learning_rate": 1.1079238816683555e-05, "loss": 1.0272, "num_tokens": 31756106719.0, "step": 5749 }, { "epoch": 1.0249554367201426, "grad_norm": 0.2041015625, "learning_rate": 1.1076640886559238e-05, "loss": 0.9917, "num_tokens": 31762389510.0, "step": 5750 }, { "epoch": 1.0251336898395722, "grad_norm": 0.1982421875, "learning_rate": 1.1074042950048418e-05, "loss": 1.0341, "num_tokens": 31768671842.0, "step": 5751 }, { "epoch": 1.0253119429590019, "grad_norm": 0.1923828125, "learning_rate": 1.1071445007367583e-05, "loss": 1.0075, "num_tokens": 31774954633.0, "step": 5752 }, { "epoch": 1.0254901960784313, "grad_norm": 0.1884765625, "learning_rate": 1.106884705873322e-05, "loss": 1.0054, "num_tokens": 31781239756.0, "step": 5753 }, { "epoch": 1.025668449197861, "grad_norm": 0.19140625, "learning_rate": 1.1066249104361816e-05, "loss": 0.9989, "num_tokens": 31787494035.0, "step": 5754 }, { "epoch": 1.0258467023172906, "grad_norm": 0.201171875, "learning_rate": 1.1063651144469865e-05, "loss": 1.019, "num_tokens": 31793767945.0, "step": 5755 }, { "epoch": 1.0260249554367202, "grad_norm": 0.185546875, "learning_rate": 1.1061053179273849e-05, "loss": 1.0225, "num_tokens": 31800050335.0, "step": 5756 }, { "epoch": 1.0262032085561497, "grad_norm": 0.1845703125, "learning_rate": 1.1058455208990255e-05, "loss": 0.9896, "num_tokens": 31806256708.0, "step": 5757 }, { "epoch": 1.0263814616755793, "grad_norm": 0.1962890625, "learning_rate": 1.1055857233835579e-05, "loss": 1.0264, "num_tokens": 31812539912.0, "step": 5758 }, { "epoch": 1.026559714795009, "grad_norm": 0.205078125, "learning_rate": 1.1053259254026307e-05, "loss": 1.0344, "num_tokens": 31818779188.0, "step": 5759 }, { "epoch": 1.0267379679144386, "grad_norm": 0.19140625, "learning_rate": 1.105066126977893e-05, "loss": 1.0469, "num_tokens": 31825060758.0, "step": 5760 }, { "epoch": 1.026916221033868, "grad_norm": 0.2001953125, "learning_rate": 1.1048063281309937e-05, "loss": 1.0263, "num_tokens": 31831342749.0, "step": 5761 }, { "epoch": 1.0270944741532977, "grad_norm": 0.1943359375, "learning_rate": 1.1045465288835823e-05, "loss": 0.9963, "num_tokens": 31837558022.0, "step": 5762 }, { "epoch": 1.0272727272727273, "grad_norm": 0.1865234375, "learning_rate": 1.1042867292573081e-05, "loss": 1.0279, "num_tokens": 31843808318.0, "step": 5763 }, { "epoch": 1.0274509803921568, "grad_norm": 0.212890625, "learning_rate": 1.1040269292738193e-05, "loss": 1.0073, "num_tokens": 31850092081.0, "step": 5764 }, { "epoch": 1.0276292335115864, "grad_norm": 0.2138671875, "learning_rate": 1.1037671289547656e-05, "loss": 1.0046, "num_tokens": 31856370141.0, "step": 5765 }, { "epoch": 1.027807486631016, "grad_norm": 0.19921875, "learning_rate": 1.1035073283217963e-05, "loss": 1.0363, "num_tokens": 31862631653.0, "step": 5766 }, { "epoch": 1.0279857397504457, "grad_norm": 0.1904296875, "learning_rate": 1.1032475273965608e-05, "loss": 1.0278, "num_tokens": 31868914509.0, "step": 5767 }, { "epoch": 1.0281639928698751, "grad_norm": 0.2099609375, "learning_rate": 1.1029877262007079e-05, "loss": 1.0246, "num_tokens": 31875198188.0, "step": 5768 }, { "epoch": 1.0283422459893048, "grad_norm": 0.203125, "learning_rate": 1.1027279247558873e-05, "loss": 1.0012, "num_tokens": 31881482937.0, "step": 5769 }, { "epoch": 1.0285204991087344, "grad_norm": 0.1904296875, "learning_rate": 1.1024681230837482e-05, "loss": 1.0299, "num_tokens": 31887760308.0, "step": 5770 }, { "epoch": 1.028698752228164, "grad_norm": 0.2060546875, "learning_rate": 1.1022083212059399e-05, "loss": 1.0149, "num_tokens": 31894027830.0, "step": 5771 }, { "epoch": 1.0288770053475935, "grad_norm": 0.208984375, "learning_rate": 1.1019485191441116e-05, "loss": 1.0223, "num_tokens": 31900222958.0, "step": 5772 }, { "epoch": 1.0290552584670232, "grad_norm": 0.1943359375, "learning_rate": 1.1016887169199126e-05, "loss": 1.0197, "num_tokens": 31906485085.0, "step": 5773 }, { "epoch": 1.0292335115864528, "grad_norm": 0.193359375, "learning_rate": 1.1014289145549927e-05, "loss": 1.0657, "num_tokens": 31912753739.0, "step": 5774 }, { "epoch": 1.0294117647058822, "grad_norm": 0.2265625, "learning_rate": 1.101169112071001e-05, "loss": 1.06, "num_tokens": 31919036791.0, "step": 5775 }, { "epoch": 1.029590017825312, "grad_norm": 0.201171875, "learning_rate": 1.1009093094895871e-05, "loss": 1.0035, "num_tokens": 31925306823.0, "step": 5776 }, { "epoch": 1.0297682709447415, "grad_norm": 0.2060546875, "learning_rate": 1.1006495068323998e-05, "loss": 1.0227, "num_tokens": 31931587093.0, "step": 5777 }, { "epoch": 1.0299465240641712, "grad_norm": 0.1962890625, "learning_rate": 1.1003897041210899e-05, "loss": 1.0206, "num_tokens": 31937871247.0, "step": 5778 }, { "epoch": 1.0301247771836006, "grad_norm": 0.212890625, "learning_rate": 1.1001299013773049e-05, "loss": 0.9765, "num_tokens": 31944154861.0, "step": 5779 }, { "epoch": 1.0303030303030303, "grad_norm": 0.220703125, "learning_rate": 1.0998700986226956e-05, "loss": 1.0462, "num_tokens": 31950417229.0, "step": 5780 }, { "epoch": 1.03048128342246, "grad_norm": 0.19921875, "learning_rate": 1.0996102958789107e-05, "loss": 1.0133, "num_tokens": 31956689401.0, "step": 5781 }, { "epoch": 1.0306595365418896, "grad_norm": 0.201171875, "learning_rate": 1.0993504931676003e-05, "loss": 1.0246, "num_tokens": 31962939963.0, "step": 5782 }, { "epoch": 1.030837789661319, "grad_norm": 0.205078125, "learning_rate": 1.099090690510413e-05, "loss": 0.9999, "num_tokens": 31969217923.0, "step": 5783 }, { "epoch": 1.0310160427807487, "grad_norm": 0.2021484375, "learning_rate": 1.0988308879289993e-05, "loss": 0.9812, "num_tokens": 31975500184.0, "step": 5784 }, { "epoch": 1.0311942959001783, "grad_norm": 0.1953125, "learning_rate": 1.0985710854450077e-05, "loss": 1.0225, "num_tokens": 31981782192.0, "step": 5785 }, { "epoch": 1.0313725490196077, "grad_norm": 0.2021484375, "learning_rate": 1.0983112830800877e-05, "loss": 1.0213, "num_tokens": 31988051758.0, "step": 5786 }, { "epoch": 1.0315508021390374, "grad_norm": 0.18359375, "learning_rate": 1.0980514808558889e-05, "loss": 1.0004, "num_tokens": 31994322496.0, "step": 5787 }, { "epoch": 1.031729055258467, "grad_norm": 0.1943359375, "learning_rate": 1.0977916787940606e-05, "loss": 1.0534, "num_tokens": 32000606905.0, "step": 5788 }, { "epoch": 1.0319073083778967, "grad_norm": 0.1982421875, "learning_rate": 1.0975318769162522e-05, "loss": 1.0337, "num_tokens": 32006891001.0, "step": 5789 }, { "epoch": 1.032085561497326, "grad_norm": 0.201171875, "learning_rate": 1.0972720752441128e-05, "loss": 1.0304, "num_tokens": 32013176315.0, "step": 5790 }, { "epoch": 1.0322638146167558, "grad_norm": 0.205078125, "learning_rate": 1.0970122737992923e-05, "loss": 1.0211, "num_tokens": 32019460658.0, "step": 5791 }, { "epoch": 1.0324420677361854, "grad_norm": 0.1904296875, "learning_rate": 1.0967524726034398e-05, "loss": 1.0232, "num_tokens": 32025735545.0, "step": 5792 }, { "epoch": 1.032620320855615, "grad_norm": 0.1904296875, "learning_rate": 1.096492671678204e-05, "loss": 1.0287, "num_tokens": 32032017764.0, "step": 5793 }, { "epoch": 1.0327985739750445, "grad_norm": 0.193359375, "learning_rate": 1.0962328710452347e-05, "loss": 1.0242, "num_tokens": 32038280446.0, "step": 5794 }, { "epoch": 1.0329768270944741, "grad_norm": 0.203125, "learning_rate": 1.0959730707261811e-05, "loss": 1.0061, "num_tokens": 32044537667.0, "step": 5795 }, { "epoch": 1.0331550802139038, "grad_norm": 0.193359375, "learning_rate": 1.0957132707426925e-05, "loss": 1.0052, "num_tokens": 32050820684.0, "step": 5796 }, { "epoch": 1.0333333333333334, "grad_norm": 0.1884765625, "learning_rate": 1.0954534711164178e-05, "loss": 1.0276, "num_tokens": 32057103811.0, "step": 5797 }, { "epoch": 1.0335115864527629, "grad_norm": 0.203125, "learning_rate": 1.0951936718690064e-05, "loss": 1.001, "num_tokens": 32063388870.0, "step": 5798 }, { "epoch": 1.0336898395721925, "grad_norm": 0.19921875, "learning_rate": 1.0949338730221076e-05, "loss": 1.0224, "num_tokens": 32069640963.0, "step": 5799 }, { "epoch": 1.0338680926916222, "grad_norm": 0.19921875, "learning_rate": 1.0946740745973696e-05, "loss": 1.0313, "num_tokens": 32075907532.0, "step": 5800 }, { "epoch": 1.0340463458110516, "grad_norm": 0.1953125, "learning_rate": 1.0944142766164425e-05, "loss": 1.048, "num_tokens": 32082191167.0, "step": 5801 }, { "epoch": 1.0342245989304812, "grad_norm": 0.189453125, "learning_rate": 1.0941544791009748e-05, "loss": 0.9769, "num_tokens": 32088469472.0, "step": 5802 }, { "epoch": 1.034402852049911, "grad_norm": 0.2021484375, "learning_rate": 1.0938946820726158e-05, "loss": 1.0092, "num_tokens": 32094725644.0, "step": 5803 }, { "epoch": 1.0345811051693405, "grad_norm": 0.1923828125, "learning_rate": 1.0936348855530138e-05, "loss": 1.0138, "num_tokens": 32100993806.0, "step": 5804 }, { "epoch": 1.03475935828877, "grad_norm": 0.1865234375, "learning_rate": 1.0933750895638185e-05, "loss": 1.0312, "num_tokens": 32107278231.0, "step": 5805 }, { "epoch": 1.0349376114081996, "grad_norm": 0.1943359375, "learning_rate": 1.093115294126678e-05, "loss": 1.0158, "num_tokens": 32113559369.0, "step": 5806 }, { "epoch": 1.0351158645276293, "grad_norm": 0.2021484375, "learning_rate": 1.092855499263242e-05, "loss": 1.0138, "num_tokens": 32119844326.0, "step": 5807 }, { "epoch": 1.035294117647059, "grad_norm": 0.18359375, "learning_rate": 1.0925957049951587e-05, "loss": 0.992, "num_tokens": 32126109460.0, "step": 5808 }, { "epoch": 1.0354723707664883, "grad_norm": 0.1982421875, "learning_rate": 1.0923359113440765e-05, "loss": 0.9966, "num_tokens": 32132393849.0, "step": 5809 }, { "epoch": 1.035650623885918, "grad_norm": 0.203125, "learning_rate": 1.0920761183316448e-05, "loss": 1.0295, "num_tokens": 32138678774.0, "step": 5810 }, { "epoch": 1.0358288770053476, "grad_norm": 0.1923828125, "learning_rate": 1.0918163259795115e-05, "loss": 1.0486, "num_tokens": 32144935466.0, "step": 5811 }, { "epoch": 1.0360071301247773, "grad_norm": 0.1875, "learning_rate": 1.0915565343093257e-05, "loss": 0.9969, "num_tokens": 32151167809.0, "step": 5812 }, { "epoch": 1.0361853832442067, "grad_norm": 0.1953125, "learning_rate": 1.0912967433427356e-05, "loss": 1.016, "num_tokens": 32157453554.0, "step": 5813 }, { "epoch": 1.0363636363636364, "grad_norm": 0.197265625, "learning_rate": 1.0910369531013898e-05, "loss": 1.0366, "num_tokens": 32163739004.0, "step": 5814 }, { "epoch": 1.036541889483066, "grad_norm": 0.1982421875, "learning_rate": 1.0907771636069366e-05, "loss": 1.0155, "num_tokens": 32169988744.0, "step": 5815 }, { "epoch": 1.0367201426024955, "grad_norm": 0.193359375, "learning_rate": 1.0905173748810242e-05, "loss": 1.0569, "num_tokens": 32176272999.0, "step": 5816 }, { "epoch": 1.036898395721925, "grad_norm": 0.2001953125, "learning_rate": 1.0902575869453011e-05, "loss": 0.996, "num_tokens": 32182510902.0, "step": 5817 }, { "epoch": 1.0370766488413548, "grad_norm": 0.1904296875, "learning_rate": 1.089997799821415e-05, "loss": 1.0579, "num_tokens": 32188794620.0, "step": 5818 }, { "epoch": 1.0372549019607844, "grad_norm": 0.1875, "learning_rate": 1.089738013531015e-05, "loss": 1.0425, "num_tokens": 32195077682.0, "step": 5819 }, { "epoch": 1.0374331550802138, "grad_norm": 0.208984375, "learning_rate": 1.089478228095748e-05, "loss": 1.0363, "num_tokens": 32201359139.0, "step": 5820 }, { "epoch": 1.0376114081996435, "grad_norm": 0.19140625, "learning_rate": 1.0892184435372624e-05, "loss": 1.0426, "num_tokens": 32207616529.0, "step": 5821 }, { "epoch": 1.0377896613190731, "grad_norm": 0.18359375, "learning_rate": 1.0889586598772066e-05, "loss": 0.9944, "num_tokens": 32213899989.0, "step": 5822 }, { "epoch": 1.0379679144385028, "grad_norm": 0.1845703125, "learning_rate": 1.0886988771372278e-05, "loss": 0.9956, "num_tokens": 32220184528.0, "step": 5823 }, { "epoch": 1.0381461675579322, "grad_norm": 0.193359375, "learning_rate": 1.0884390953389738e-05, "loss": 1.0221, "num_tokens": 32226456185.0, "step": 5824 }, { "epoch": 1.0383244206773619, "grad_norm": 0.19140625, "learning_rate": 1.0881793145040927e-05, "loss": 1.0025, "num_tokens": 32232739369.0, "step": 5825 }, { "epoch": 1.0385026737967915, "grad_norm": 0.1865234375, "learning_rate": 1.0879195346542314e-05, "loss": 1.0262, "num_tokens": 32239024301.0, "step": 5826 }, { "epoch": 1.038680926916221, "grad_norm": 0.2041015625, "learning_rate": 1.0876597558110382e-05, "loss": 1.0289, "num_tokens": 32245231819.0, "step": 5827 }, { "epoch": 1.0388591800356506, "grad_norm": 0.193359375, "learning_rate": 1.0873999779961602e-05, "loss": 1.0083, "num_tokens": 32251514407.0, "step": 5828 }, { "epoch": 1.0390374331550802, "grad_norm": 0.185546875, "learning_rate": 1.0871402012312445e-05, "loss": 1.0529, "num_tokens": 32257798723.0, "step": 5829 }, { "epoch": 1.0392156862745099, "grad_norm": 0.19140625, "learning_rate": 1.0868804255379388e-05, "loss": 1.0233, "num_tokens": 32264060607.0, "step": 5830 }, { "epoch": 1.0393939393939393, "grad_norm": 0.1875, "learning_rate": 1.0866206509378898e-05, "loss": 1.0532, "num_tokens": 32270342084.0, "step": 5831 }, { "epoch": 1.039572192513369, "grad_norm": 0.18359375, "learning_rate": 1.0863608774527448e-05, "loss": 1.0081, "num_tokens": 32276627356.0, "step": 5832 }, { "epoch": 1.0397504456327986, "grad_norm": 0.1796875, "learning_rate": 1.0861011051041507e-05, "loss": 0.992, "num_tokens": 32282901532.0, "step": 5833 }, { "epoch": 1.0399286987522283, "grad_norm": 0.1962890625, "learning_rate": 1.0858413339137545e-05, "loss": 0.9857, "num_tokens": 32289171197.0, "step": 5834 }, { "epoch": 1.0401069518716577, "grad_norm": 0.185546875, "learning_rate": 1.085581563903203e-05, "loss": 1.0135, "num_tokens": 32295457556.0, "step": 5835 }, { "epoch": 1.0402852049910873, "grad_norm": 0.1875, "learning_rate": 1.0853217950941428e-05, "loss": 1.0286, "num_tokens": 32301741708.0, "step": 5836 }, { "epoch": 1.040463458110517, "grad_norm": 0.208984375, "learning_rate": 1.0850620275082207e-05, "loss": 1.0126, "num_tokens": 32308024047.0, "step": 5837 }, { "epoch": 1.0406417112299464, "grad_norm": 0.19921875, "learning_rate": 1.084802261167083e-05, "loss": 1.0342, "num_tokens": 32314282277.0, "step": 5838 }, { "epoch": 1.040819964349376, "grad_norm": 0.2021484375, "learning_rate": 1.0845424960923757e-05, "loss": 1.0103, "num_tokens": 32320517249.0, "step": 5839 }, { "epoch": 1.0409982174688057, "grad_norm": 0.19921875, "learning_rate": 1.084282732305746e-05, "loss": 1.0085, "num_tokens": 32326801999.0, "step": 5840 }, { "epoch": 1.0411764705882354, "grad_norm": 0.1865234375, "learning_rate": 1.084022969828839e-05, "loss": 1.0154, "num_tokens": 32333070169.0, "step": 5841 }, { "epoch": 1.0413547237076648, "grad_norm": 0.193359375, "learning_rate": 1.0837632086833014e-05, "loss": 1.0185, "num_tokens": 32339322778.0, "step": 5842 }, { "epoch": 1.0415329768270944, "grad_norm": 0.2177734375, "learning_rate": 1.0835034488907792e-05, "loss": 1.0336, "num_tokens": 32345576995.0, "step": 5843 }, { "epoch": 1.041711229946524, "grad_norm": 0.1953125, "learning_rate": 1.0832436904729183e-05, "loss": 0.9956, "num_tokens": 32351861681.0, "step": 5844 }, { "epoch": 1.0418894830659537, "grad_norm": 0.1875, "learning_rate": 1.082983933451364e-05, "loss": 1.0017, "num_tokens": 32358091495.0, "step": 5845 }, { "epoch": 1.0420677361853832, "grad_norm": 0.1943359375, "learning_rate": 1.0827241778477622e-05, "loss": 1.0226, "num_tokens": 32364343883.0, "step": 5846 }, { "epoch": 1.0422459893048128, "grad_norm": 0.2138671875, "learning_rate": 1.0824644236837583e-05, "loss": 1.0317, "num_tokens": 32370590805.0, "step": 5847 }, { "epoch": 1.0424242424242425, "grad_norm": 0.2021484375, "learning_rate": 1.0822046709809974e-05, "loss": 1.0257, "num_tokens": 32376836943.0, "step": 5848 }, { "epoch": 1.042602495543672, "grad_norm": 0.1982421875, "learning_rate": 1.0819449197611254e-05, "loss": 1.0319, "num_tokens": 32383077543.0, "step": 5849 }, { "epoch": 1.0427807486631016, "grad_norm": 0.1962890625, "learning_rate": 1.0816851700457867e-05, "loss": 1.0029, "num_tokens": 32389361954.0, "step": 5850 }, { "epoch": 1.0429590017825312, "grad_norm": 0.1962890625, "learning_rate": 1.081425421856627e-05, "loss": 1.0293, "num_tokens": 32395619449.0, "step": 5851 }, { "epoch": 1.0431372549019609, "grad_norm": 0.1923828125, "learning_rate": 1.0811656752152906e-05, "loss": 1.0365, "num_tokens": 32401904599.0, "step": 5852 }, { "epoch": 1.0433155080213903, "grad_norm": 0.2021484375, "learning_rate": 1.0809059301434229e-05, "loss": 1.0289, "num_tokens": 32408140671.0, "step": 5853 }, { "epoch": 1.04349376114082, "grad_norm": 0.1982421875, "learning_rate": 1.0806461866626677e-05, "loss": 1.0308, "num_tokens": 32414394387.0, "step": 5854 }, { "epoch": 1.0436720142602496, "grad_norm": 0.1904296875, "learning_rate": 1.0803864447946698e-05, "loss": 0.9832, "num_tokens": 32420675870.0, "step": 5855 }, { "epoch": 1.0438502673796792, "grad_norm": 0.1943359375, "learning_rate": 1.0801267045610738e-05, "loss": 1.0213, "num_tokens": 32426934611.0, "step": 5856 }, { "epoch": 1.0440285204991087, "grad_norm": 0.19140625, "learning_rate": 1.0798669659835234e-05, "loss": 1.0519, "num_tokens": 32433218245.0, "step": 5857 }, { "epoch": 1.0442067736185383, "grad_norm": 0.1923828125, "learning_rate": 1.0796072290836633e-05, "loss": 1.03, "num_tokens": 32439501628.0, "step": 5858 }, { "epoch": 1.044385026737968, "grad_norm": 0.1962890625, "learning_rate": 1.0793474938831368e-05, "loss": 1.0296, "num_tokens": 32445756195.0, "step": 5859 }, { "epoch": 1.0445632798573976, "grad_norm": 0.2060546875, "learning_rate": 1.0790877604035884e-05, "loss": 0.993, "num_tokens": 32452023046.0, "step": 5860 }, { "epoch": 1.044741532976827, "grad_norm": 0.2021484375, "learning_rate": 1.0788280286666612e-05, "loss": 1.0257, "num_tokens": 32458300865.0, "step": 5861 }, { "epoch": 1.0449197860962567, "grad_norm": 0.1982421875, "learning_rate": 1.078568298693999e-05, "loss": 1.0314, "num_tokens": 32464541340.0, "step": 5862 }, { "epoch": 1.0450980392156863, "grad_norm": 0.1982421875, "learning_rate": 1.0783085705072447e-05, "loss": 1.0288, "num_tokens": 32470825312.0, "step": 5863 }, { "epoch": 1.0452762923351158, "grad_norm": 0.197265625, "learning_rate": 1.0780488441280418e-05, "loss": 1.0069, "num_tokens": 32477108661.0, "step": 5864 }, { "epoch": 1.0454545454545454, "grad_norm": 0.197265625, "learning_rate": 1.0777891195780335e-05, "loss": 1.0067, "num_tokens": 32483355862.0, "step": 5865 }, { "epoch": 1.045632798573975, "grad_norm": 0.181640625, "learning_rate": 1.0775293968788623e-05, "loss": 1.0098, "num_tokens": 32489640629.0, "step": 5866 }, { "epoch": 1.0458110516934047, "grad_norm": 0.19921875, "learning_rate": 1.0772696760521715e-05, "loss": 1.0152, "num_tokens": 32495924869.0, "step": 5867 }, { "epoch": 1.0459893048128341, "grad_norm": 0.1884765625, "learning_rate": 1.0770099571196035e-05, "loss": 1.0215, "num_tokens": 32502194364.0, "step": 5868 }, { "epoch": 1.0461675579322638, "grad_norm": 0.1953125, "learning_rate": 1.0767502401028007e-05, "loss": 1.0027, "num_tokens": 32508477142.0, "step": 5869 }, { "epoch": 1.0463458110516934, "grad_norm": 0.1923828125, "learning_rate": 1.076490525023405e-05, "loss": 1.0362, "num_tokens": 32514760335.0, "step": 5870 }, { "epoch": 1.046524064171123, "grad_norm": 0.1943359375, "learning_rate": 1.076230811903059e-05, "loss": 1.026, "num_tokens": 32521043562.0, "step": 5871 }, { "epoch": 1.0467023172905525, "grad_norm": 0.1943359375, "learning_rate": 1.0759711007634042e-05, "loss": 0.9879, "num_tokens": 32527301632.0, "step": 5872 }, { "epoch": 1.0468805704099822, "grad_norm": 0.19921875, "learning_rate": 1.0757113916260828e-05, "loss": 1.0151, "num_tokens": 32533556973.0, "step": 5873 }, { "epoch": 1.0470588235294118, "grad_norm": 0.2041015625, "learning_rate": 1.0754516845127363e-05, "loss": 0.9949, "num_tokens": 32539829279.0, "step": 5874 }, { "epoch": 1.0472370766488412, "grad_norm": 0.2001953125, "learning_rate": 1.0751919794450063e-05, "loss": 1.0212, "num_tokens": 32546110878.0, "step": 5875 }, { "epoch": 1.047415329768271, "grad_norm": 0.19921875, "learning_rate": 1.074932276444534e-05, "loss": 1.027, "num_tokens": 32552377731.0, "step": 5876 }, { "epoch": 1.0475935828877005, "grad_norm": 0.193359375, "learning_rate": 1.07467257553296e-05, "loss": 1.0228, "num_tokens": 32558663326.0, "step": 5877 }, { "epoch": 1.0477718360071302, "grad_norm": 0.193359375, "learning_rate": 1.0744128767319257e-05, "loss": 1.0134, "num_tokens": 32564945495.0, "step": 5878 }, { "epoch": 1.0479500891265596, "grad_norm": 0.21484375, "learning_rate": 1.0741531800630717e-05, "loss": 0.9938, "num_tokens": 32571193925.0, "step": 5879 }, { "epoch": 1.0481283422459893, "grad_norm": 0.1884765625, "learning_rate": 1.073893485548039e-05, "loss": 1.0078, "num_tokens": 32577449858.0, "step": 5880 }, { "epoch": 1.048306595365419, "grad_norm": 0.197265625, "learning_rate": 1.0736337932084673e-05, "loss": 1.0088, "num_tokens": 32583732999.0, "step": 5881 }, { "epoch": 1.0484848484848486, "grad_norm": 0.2080078125, "learning_rate": 1.0733741030659976e-05, "loss": 1.0251, "num_tokens": 32590009336.0, "step": 5882 }, { "epoch": 1.048663101604278, "grad_norm": 0.1982421875, "learning_rate": 1.0731144151422693e-05, "loss": 0.9738, "num_tokens": 32596294165.0, "step": 5883 }, { "epoch": 1.0488413547237077, "grad_norm": 0.1884765625, "learning_rate": 1.0728547294589227e-05, "loss": 1.0388, "num_tokens": 32602553503.0, "step": 5884 }, { "epoch": 1.0490196078431373, "grad_norm": 0.1884765625, "learning_rate": 1.072595046037597e-05, "loss": 1.0372, "num_tokens": 32608836186.0, "step": 5885 }, { "epoch": 1.049197860962567, "grad_norm": 0.1943359375, "learning_rate": 1.0723353648999317e-05, "loss": 1.0263, "num_tokens": 32615119830.0, "step": 5886 }, { "epoch": 1.0493761140819964, "grad_norm": 0.2041015625, "learning_rate": 1.0720756860675667e-05, "loss": 1.0228, "num_tokens": 32621403031.0, "step": 5887 }, { "epoch": 1.049554367201426, "grad_norm": 0.2021484375, "learning_rate": 1.0718160095621404e-05, "loss": 1.0104, "num_tokens": 32627680359.0, "step": 5888 }, { "epoch": 1.0497326203208557, "grad_norm": 0.1875, "learning_rate": 1.0715563354052923e-05, "loss": 1.0452, "num_tokens": 32633944907.0, "step": 5889 }, { "epoch": 1.049910873440285, "grad_norm": 0.1923828125, "learning_rate": 1.0712966636186603e-05, "loss": 1.0213, "num_tokens": 32640147888.0, "step": 5890 }, { "epoch": 1.0500891265597148, "grad_norm": 0.2021484375, "learning_rate": 1.0710369942238839e-05, "loss": 1.006, "num_tokens": 32646409540.0, "step": 5891 }, { "epoch": 1.0502673796791444, "grad_norm": 0.1923828125, "learning_rate": 1.0707773272426008e-05, "loss": 1.0121, "num_tokens": 32652692695.0, "step": 5892 }, { "epoch": 1.050445632798574, "grad_norm": 0.193359375, "learning_rate": 1.0705176626964489e-05, "loss": 0.9986, "num_tokens": 32658949074.0, "step": 5893 }, { "epoch": 1.0506238859180035, "grad_norm": 0.1875, "learning_rate": 1.0702580006070667e-05, "loss": 1.0335, "num_tokens": 32665230825.0, "step": 5894 }, { "epoch": 1.0508021390374331, "grad_norm": 0.1953125, "learning_rate": 1.0699983409960915e-05, "loss": 1.0301, "num_tokens": 32671513456.0, "step": 5895 }, { "epoch": 1.0509803921568628, "grad_norm": 0.193359375, "learning_rate": 1.0697386838851609e-05, "loss": 1.028, "num_tokens": 32677797907.0, "step": 5896 }, { "epoch": 1.0511586452762924, "grad_norm": 0.1845703125, "learning_rate": 1.0694790292959119e-05, "loss": 1.0063, "num_tokens": 32684083039.0, "step": 5897 }, { "epoch": 1.0513368983957219, "grad_norm": 0.1962890625, "learning_rate": 1.0692193772499821e-05, "loss": 1.0363, "num_tokens": 32690355151.0, "step": 5898 }, { "epoch": 1.0515151515151515, "grad_norm": 0.1884765625, "learning_rate": 1.0689597277690082e-05, "loss": 1.011, "num_tokens": 32696639408.0, "step": 5899 }, { "epoch": 1.0516934046345812, "grad_norm": 0.1845703125, "learning_rate": 1.0687000808746266e-05, "loss": 1.0331, "num_tokens": 32702923241.0, "step": 5900 }, { "epoch": 1.0518716577540106, "grad_norm": 0.1953125, "learning_rate": 1.068440436588474e-05, "loss": 1.0239, "num_tokens": 32709178668.0, "step": 5901 }, { "epoch": 1.0520499108734402, "grad_norm": 0.193359375, "learning_rate": 1.0681807949321863e-05, "loss": 0.9885, "num_tokens": 32715447187.0, "step": 5902 }, { "epoch": 1.05222816399287, "grad_norm": 0.18359375, "learning_rate": 1.0679211559274e-05, "loss": 1.0251, "num_tokens": 32721721693.0, "step": 5903 }, { "epoch": 1.0524064171122995, "grad_norm": 0.2060546875, "learning_rate": 1.0676615195957501e-05, "loss": 1.0512, "num_tokens": 32728003855.0, "step": 5904 }, { "epoch": 1.052584670231729, "grad_norm": 0.189453125, "learning_rate": 1.0674018859588729e-05, "loss": 1.0342, "num_tokens": 32734260594.0, "step": 5905 }, { "epoch": 1.0527629233511586, "grad_norm": 0.1904296875, "learning_rate": 1.0671422550384034e-05, "loss": 1.0168, "num_tokens": 32740515734.0, "step": 5906 }, { "epoch": 1.0529411764705883, "grad_norm": 0.197265625, "learning_rate": 1.0668826268559771e-05, "loss": 1.0186, "num_tokens": 32746780767.0, "step": 5907 }, { "epoch": 1.053119429590018, "grad_norm": 0.189453125, "learning_rate": 1.0666230014332282e-05, "loss": 1.0265, "num_tokens": 32753036608.0, "step": 5908 }, { "epoch": 1.0532976827094473, "grad_norm": 0.208984375, "learning_rate": 1.0663633787917912e-05, "loss": 1.0405, "num_tokens": 32759319598.0, "step": 5909 }, { "epoch": 1.053475935828877, "grad_norm": 0.1943359375, "learning_rate": 1.0661037589533017e-05, "loss": 0.9901, "num_tokens": 32765604715.0, "step": 5910 }, { "epoch": 1.0536541889483066, "grad_norm": 0.189453125, "learning_rate": 1.0658441419393925e-05, "loss": 1.0292, "num_tokens": 32771889799.0, "step": 5911 }, { "epoch": 1.053832442067736, "grad_norm": 0.1953125, "learning_rate": 1.0655845277716986e-05, "loss": 1.0317, "num_tokens": 32778173304.0, "step": 5912 }, { "epoch": 1.0540106951871657, "grad_norm": 0.193359375, "learning_rate": 1.0653249164718529e-05, "loss": 1.0106, "num_tokens": 32784457299.0, "step": 5913 }, { "epoch": 1.0541889483065954, "grad_norm": 0.1884765625, "learning_rate": 1.0650653080614896e-05, "loss": 1.0198, "num_tokens": 32790738906.0, "step": 5914 }, { "epoch": 1.054367201426025, "grad_norm": 0.1884765625, "learning_rate": 1.0648057025622418e-05, "loss": 1.0091, "num_tokens": 32796966003.0, "step": 5915 }, { "epoch": 1.0545454545454545, "grad_norm": 0.1962890625, "learning_rate": 1.0645460999957418e-05, "loss": 0.9956, "num_tokens": 32803231691.0, "step": 5916 }, { "epoch": 1.054723707664884, "grad_norm": 0.1923828125, "learning_rate": 1.064286500383623e-05, "loss": 1.0113, "num_tokens": 32809491669.0, "step": 5917 }, { "epoch": 1.0549019607843138, "grad_norm": 0.189453125, "learning_rate": 1.0640269037475173e-05, "loss": 1.0224, "num_tokens": 32815753027.0, "step": 5918 }, { "epoch": 1.0550802139037434, "grad_norm": 0.1943359375, "learning_rate": 1.0637673101090578e-05, "loss": 0.9882, "num_tokens": 32822036902.0, "step": 5919 }, { "epoch": 1.0552584670231728, "grad_norm": 0.1953125, "learning_rate": 1.0635077194898755e-05, "loss": 1.0108, "num_tokens": 32828308988.0, "step": 5920 }, { "epoch": 1.0554367201426025, "grad_norm": 0.18359375, "learning_rate": 1.063248131911603e-05, "loss": 1.0259, "num_tokens": 32834574011.0, "step": 5921 }, { "epoch": 1.0556149732620321, "grad_norm": 0.1884765625, "learning_rate": 1.0629885473958719e-05, "loss": 1.0349, "num_tokens": 32840856506.0, "step": 5922 }, { "epoch": 1.0557932263814618, "grad_norm": 0.1845703125, "learning_rate": 1.062728965964312e-05, "loss": 1.0285, "num_tokens": 32847139352.0, "step": 5923 }, { "epoch": 1.0559714795008912, "grad_norm": 0.1884765625, "learning_rate": 1.0624693876385559e-05, "loss": 1.0132, "num_tokens": 32853409851.0, "step": 5924 }, { "epoch": 1.0561497326203209, "grad_norm": 0.1943359375, "learning_rate": 1.0622098124402334e-05, "loss": 1.0173, "num_tokens": 32859694764.0, "step": 5925 }, { "epoch": 1.0563279857397505, "grad_norm": 0.193359375, "learning_rate": 1.0619502403909754e-05, "loss": 1.0447, "num_tokens": 32865954685.0, "step": 5926 }, { "epoch": 1.05650623885918, "grad_norm": 0.1884765625, "learning_rate": 1.0616906715124118e-05, "loss": 1.0373, "num_tokens": 32872189411.0, "step": 5927 }, { "epoch": 1.0566844919786096, "grad_norm": 0.1943359375, "learning_rate": 1.0614311058261726e-05, "loss": 1.033, "num_tokens": 32878472911.0, "step": 5928 }, { "epoch": 1.0568627450980392, "grad_norm": 0.181640625, "learning_rate": 1.0611715433538874e-05, "loss": 1.0198, "num_tokens": 32884758446.0, "step": 5929 }, { "epoch": 1.0570409982174689, "grad_norm": 0.189453125, "learning_rate": 1.0609119841171862e-05, "loss": 1.0197, "num_tokens": 32891042118.0, "step": 5930 }, { "epoch": 1.0572192513368983, "grad_norm": 0.1904296875, "learning_rate": 1.0606524281376972e-05, "loss": 1.0008, "num_tokens": 32897324453.0, "step": 5931 }, { "epoch": 1.057397504456328, "grad_norm": 0.189453125, "learning_rate": 1.0603928754370499e-05, "loss": 1.0334, "num_tokens": 32903556490.0, "step": 5932 }, { "epoch": 1.0575757575757576, "grad_norm": 0.189453125, "learning_rate": 1.0601333260368724e-05, "loss": 1.0018, "num_tokens": 32909823743.0, "step": 5933 }, { "epoch": 1.0577540106951873, "grad_norm": 0.1884765625, "learning_rate": 1.0598737799587935e-05, "loss": 1.0171, "num_tokens": 32916092690.0, "step": 5934 }, { "epoch": 1.0579322638146167, "grad_norm": 0.189453125, "learning_rate": 1.0596142372244409e-05, "loss": 1.0357, "num_tokens": 32922378358.0, "step": 5935 }, { "epoch": 1.0581105169340463, "grad_norm": 0.1982421875, "learning_rate": 1.0593546978554423e-05, "loss": 1.004, "num_tokens": 32928653087.0, "step": 5936 }, { "epoch": 1.058288770053476, "grad_norm": 0.19140625, "learning_rate": 1.0590951618734256e-05, "loss": 1.0197, "num_tokens": 32934926831.0, "step": 5937 }, { "epoch": 1.0584670231729054, "grad_norm": 0.18359375, "learning_rate": 1.0588356293000179e-05, "loss": 1.0506, "num_tokens": 32941202705.0, "step": 5938 }, { "epoch": 1.058645276292335, "grad_norm": 0.1826171875, "learning_rate": 1.0585761001568455e-05, "loss": 1.0232, "num_tokens": 32947488358.0, "step": 5939 }, { "epoch": 1.0588235294117647, "grad_norm": 0.205078125, "learning_rate": 1.0583165744655358e-05, "loss": 0.9948, "num_tokens": 32953745291.0, "step": 5940 }, { "epoch": 1.0590017825311944, "grad_norm": 0.1845703125, "learning_rate": 1.0580570522477143e-05, "loss": 1.0479, "num_tokens": 32960012123.0, "step": 5941 }, { "epoch": 1.0591800356506238, "grad_norm": 0.1865234375, "learning_rate": 1.0577975335250077e-05, "loss": 1.0112, "num_tokens": 32966242648.0, "step": 5942 }, { "epoch": 1.0593582887700534, "grad_norm": 0.1865234375, "learning_rate": 1.0575380183190415e-05, "loss": 1.0128, "num_tokens": 32972526174.0, "step": 5943 }, { "epoch": 1.059536541889483, "grad_norm": 0.1904296875, "learning_rate": 1.0572785066514415e-05, "loss": 1.0102, "num_tokens": 32978808843.0, "step": 5944 }, { "epoch": 1.0597147950089127, "grad_norm": 0.1845703125, "learning_rate": 1.0570189985438326e-05, "loss": 1.0427, "num_tokens": 32985067952.0, "step": 5945 }, { "epoch": 1.0598930481283422, "grad_norm": 0.1953125, "learning_rate": 1.0567594940178391e-05, "loss": 1.0177, "num_tokens": 32991328950.0, "step": 5946 }, { "epoch": 1.0600713012477718, "grad_norm": 0.1884765625, "learning_rate": 1.0564999930950868e-05, "loss": 1.0089, "num_tokens": 32997566416.0, "step": 5947 }, { "epoch": 1.0602495543672015, "grad_norm": 0.1748046875, "learning_rate": 1.0562404957971986e-05, "loss": 1.0174, "num_tokens": 33003844309.0, "step": 5948 }, { "epoch": 1.0604278074866311, "grad_norm": 0.18359375, "learning_rate": 1.0559810021457995e-05, "loss": 0.9925, "num_tokens": 33010071369.0, "step": 5949 }, { "epoch": 1.0606060606060606, "grad_norm": 0.19140625, "learning_rate": 1.0557215121625126e-05, "loss": 1.0198, "num_tokens": 33016346492.0, "step": 5950 }, { "epoch": 1.0607843137254902, "grad_norm": 0.1962890625, "learning_rate": 1.0554620258689617e-05, "loss": 1.0144, "num_tokens": 33022611762.0, "step": 5951 }, { "epoch": 1.0609625668449199, "grad_norm": 0.1875, "learning_rate": 1.0552025432867694e-05, "loss": 1.0316, "num_tokens": 33028897329.0, "step": 5952 }, { "epoch": 1.0611408199643493, "grad_norm": 0.1865234375, "learning_rate": 1.054943064437559e-05, "loss": 1.0036, "num_tokens": 33035176375.0, "step": 5953 }, { "epoch": 1.061319073083779, "grad_norm": 0.1875, "learning_rate": 1.0546835893429524e-05, "loss": 1.0152, "num_tokens": 33041459324.0, "step": 5954 }, { "epoch": 1.0614973262032086, "grad_norm": 0.1943359375, "learning_rate": 1.0544241180245716e-05, "loss": 1.0143, "num_tokens": 33047717688.0, "step": 5955 }, { "epoch": 1.0616755793226382, "grad_norm": 0.1904296875, "learning_rate": 1.0541646505040391e-05, "loss": 1.0331, "num_tokens": 33053952255.0, "step": 5956 }, { "epoch": 1.0618538324420677, "grad_norm": 0.193359375, "learning_rate": 1.0539051868029755e-05, "loss": 1.0272, "num_tokens": 33060219068.0, "step": 5957 }, { "epoch": 1.0620320855614973, "grad_norm": 0.1884765625, "learning_rate": 1.0536457269430027e-05, "loss": 1.0143, "num_tokens": 33066500861.0, "step": 5958 }, { "epoch": 1.062210338680927, "grad_norm": 0.1826171875, "learning_rate": 1.0533862709457412e-05, "loss": 1.0109, "num_tokens": 33072779983.0, "step": 5959 }, { "epoch": 1.0623885918003566, "grad_norm": 0.189453125, "learning_rate": 1.0531268188328116e-05, "loss": 1.0161, "num_tokens": 33079062362.0, "step": 5960 }, { "epoch": 1.062566844919786, "grad_norm": 0.1865234375, "learning_rate": 1.0528673706258346e-05, "loss": 1.0219, "num_tokens": 33085323966.0, "step": 5961 }, { "epoch": 1.0627450980392157, "grad_norm": 0.185546875, "learning_rate": 1.052607926346429e-05, "loss": 1.0371, "num_tokens": 33091598189.0, "step": 5962 }, { "epoch": 1.0629233511586453, "grad_norm": 0.1943359375, "learning_rate": 1.0523484860162149e-05, "loss": 1.0114, "num_tokens": 33097882453.0, "step": 5963 }, { "epoch": 1.0631016042780748, "grad_norm": 0.193359375, "learning_rate": 1.0520890496568117e-05, "loss": 1.0096, "num_tokens": 33104166086.0, "step": 5964 }, { "epoch": 1.0632798573975044, "grad_norm": 0.1845703125, "learning_rate": 1.051829617289838e-05, "loss": 1.0167, "num_tokens": 33110450480.0, "step": 5965 }, { "epoch": 1.063458110516934, "grad_norm": 0.1904296875, "learning_rate": 1.0515701889369127e-05, "loss": 1.023, "num_tokens": 33116705541.0, "step": 5966 }, { "epoch": 1.0636363636363637, "grad_norm": 0.1806640625, "learning_rate": 1.0513107646196538e-05, "loss": 1.026, "num_tokens": 33122989460.0, "step": 5967 }, { "epoch": 1.0638146167557931, "grad_norm": 0.1884765625, "learning_rate": 1.0510513443596793e-05, "loss": 1.0037, "num_tokens": 33129247380.0, "step": 5968 }, { "epoch": 1.0639928698752228, "grad_norm": 0.185546875, "learning_rate": 1.0507919281786062e-05, "loss": 1.0746, "num_tokens": 33135529930.0, "step": 5969 }, { "epoch": 1.0641711229946524, "grad_norm": 0.1923828125, "learning_rate": 1.0505325160980523e-05, "loss": 1.039, "num_tokens": 33141811661.0, "step": 5970 }, { "epoch": 1.064349376114082, "grad_norm": 0.1865234375, "learning_rate": 1.0502731081396343e-05, "loss": 1.0237, "num_tokens": 33148094917.0, "step": 5971 }, { "epoch": 1.0645276292335115, "grad_norm": 0.1875, "learning_rate": 1.050013704324969e-05, "loss": 0.9752, "num_tokens": 33154376841.0, "step": 5972 }, { "epoch": 1.0647058823529412, "grad_norm": 0.1962890625, "learning_rate": 1.0497543046756717e-05, "loss": 1.0218, "num_tokens": 33160628412.0, "step": 5973 }, { "epoch": 1.0648841354723708, "grad_norm": 0.1884765625, "learning_rate": 1.0494949092133592e-05, "loss": 1.0247, "num_tokens": 33166902965.0, "step": 5974 }, { "epoch": 1.0650623885918002, "grad_norm": 0.1982421875, "learning_rate": 1.0492355179596465e-05, "loss": 1.029, "num_tokens": 33173186321.0, "step": 5975 }, { "epoch": 1.06524064171123, "grad_norm": 0.19140625, "learning_rate": 1.0489761309361492e-05, "loss": 0.9934, "num_tokens": 33179470979.0, "step": 5976 }, { "epoch": 1.0654188948306595, "grad_norm": 0.185546875, "learning_rate": 1.0487167481644816e-05, "loss": 0.9997, "num_tokens": 33185722804.0, "step": 5977 }, { "epoch": 1.0655971479500892, "grad_norm": 0.1962890625, "learning_rate": 1.0484573696662577e-05, "loss": 1.0058, "num_tokens": 33192008655.0, "step": 5978 }, { "epoch": 1.0657754010695186, "grad_norm": 0.205078125, "learning_rate": 1.0481979954630926e-05, "loss": 1.0139, "num_tokens": 33198284864.0, "step": 5979 }, { "epoch": 1.0659536541889483, "grad_norm": 0.1865234375, "learning_rate": 1.0479386255765991e-05, "loss": 1.0445, "num_tokens": 33204553378.0, "step": 5980 }, { "epoch": 1.066131907308378, "grad_norm": 0.1943359375, "learning_rate": 1.0476792600283909e-05, "loss": 1.0192, "num_tokens": 33210815646.0, "step": 5981 }, { "epoch": 1.0663101604278076, "grad_norm": 0.1962890625, "learning_rate": 1.0474198988400811e-05, "loss": 1.0414, "num_tokens": 33217099571.0, "step": 5982 }, { "epoch": 1.066488413547237, "grad_norm": 0.19921875, "learning_rate": 1.0471605420332826e-05, "loss": 1.0075, "num_tokens": 33223383643.0, "step": 5983 }, { "epoch": 1.0666666666666667, "grad_norm": 0.1943359375, "learning_rate": 1.0469011896296073e-05, "loss": 1.0058, "num_tokens": 33229621597.0, "step": 5984 }, { "epoch": 1.0668449197860963, "grad_norm": 0.18359375, "learning_rate": 1.0466418416506667e-05, "loss": 1.0205, "num_tokens": 33235904778.0, "step": 5985 }, { "epoch": 1.0670231729055257, "grad_norm": 0.18359375, "learning_rate": 1.0463824981180732e-05, "loss": 1.0321, "num_tokens": 33242188651.0, "step": 5986 }, { "epoch": 1.0672014260249554, "grad_norm": 0.1875, "learning_rate": 1.0461231590534371e-05, "loss": 1.0156, "num_tokens": 33248464795.0, "step": 5987 }, { "epoch": 1.067379679144385, "grad_norm": 0.185546875, "learning_rate": 1.0458638244783696e-05, "loss": 0.9983, "num_tokens": 33254749110.0, "step": 5988 }, { "epoch": 1.0675579322638147, "grad_norm": 0.181640625, "learning_rate": 1.045604494414481e-05, "loss": 1.0251, "num_tokens": 33261009664.0, "step": 5989 }, { "epoch": 1.067736185383244, "grad_norm": 0.181640625, "learning_rate": 1.0453451688833815e-05, "loss": 1.0229, "num_tokens": 33267293143.0, "step": 5990 }, { "epoch": 1.0679144385026738, "grad_norm": 0.1865234375, "learning_rate": 1.0450858479066807e-05, "loss": 1.0321, "num_tokens": 33273571607.0, "step": 5991 }, { "epoch": 1.0680926916221034, "grad_norm": 0.1865234375, "learning_rate": 1.0448265315059879e-05, "loss": 1.0155, "num_tokens": 33279854971.0, "step": 5992 }, { "epoch": 1.068270944741533, "grad_norm": 0.1796875, "learning_rate": 1.0445672197029116e-05, "loss": 1.0128, "num_tokens": 33286136246.0, "step": 5993 }, { "epoch": 1.0684491978609625, "grad_norm": 0.185546875, "learning_rate": 1.0443079125190605e-05, "loss": 1.0278, "num_tokens": 33292407054.0, "step": 5994 }, { "epoch": 1.0686274509803921, "grad_norm": 0.1826171875, "learning_rate": 1.0440486099760431e-05, "loss": 1.0254, "num_tokens": 33298680265.0, "step": 5995 }, { "epoch": 1.0688057040998218, "grad_norm": 0.193359375, "learning_rate": 1.0437893120954668e-05, "loss": 1.0497, "num_tokens": 33304962766.0, "step": 5996 }, { "epoch": 1.0689839572192514, "grad_norm": 0.1787109375, "learning_rate": 1.043530018898939e-05, "loss": 1.0079, "num_tokens": 33311246032.0, "step": 5997 }, { "epoch": 1.0691622103386809, "grad_norm": 0.1826171875, "learning_rate": 1.0432707304080668e-05, "loss": 1.0072, "num_tokens": 33317492030.0, "step": 5998 }, { "epoch": 1.0693404634581105, "grad_norm": 0.1826171875, "learning_rate": 1.0430114466444568e-05, "loss": 1.0305, "num_tokens": 33323748910.0, "step": 5999 }, { "epoch": 1.0695187165775402, "grad_norm": 0.1884765625, "learning_rate": 1.0427521676297148e-05, "loss": 1.0139, "num_tokens": 33330033587.0, "step": 6000 }, { "epoch": 1.0696969696969698, "grad_norm": 0.181640625, "learning_rate": 1.0424928933854464e-05, "loss": 1.0112, "num_tokens": 33336299716.0, "step": 6001 }, { "epoch": 1.0698752228163992, "grad_norm": 0.1904296875, "learning_rate": 1.0422336239332578e-05, "loss": 1.0083, "num_tokens": 33342565602.0, "step": 6002 }, { "epoch": 1.070053475935829, "grad_norm": 0.1904296875, "learning_rate": 1.0419743592947533e-05, "loss": 1.027, "num_tokens": 33348835695.0, "step": 6003 }, { "epoch": 1.0702317290552585, "grad_norm": 0.1904296875, "learning_rate": 1.0417150994915377e-05, "loss": 1.0235, "num_tokens": 33355069059.0, "step": 6004 }, { "epoch": 1.070409982174688, "grad_norm": 0.1845703125, "learning_rate": 1.0414558445452152e-05, "loss": 1.0091, "num_tokens": 33361346020.0, "step": 6005 }, { "epoch": 1.0705882352941176, "grad_norm": 0.193359375, "learning_rate": 1.0411965944773899e-05, "loss": 1.0113, "num_tokens": 33367611731.0, "step": 6006 }, { "epoch": 1.0707664884135473, "grad_norm": 0.1943359375, "learning_rate": 1.0409373493096646e-05, "loss": 1.0375, "num_tokens": 33373838294.0, "step": 6007 }, { "epoch": 1.070944741532977, "grad_norm": 0.203125, "learning_rate": 1.0406781090636425e-05, "loss": 1.0248, "num_tokens": 33380121820.0, "step": 6008 }, { "epoch": 1.0711229946524063, "grad_norm": 0.2021484375, "learning_rate": 1.040418873760926e-05, "loss": 1.0142, "num_tokens": 33386393894.0, "step": 6009 }, { "epoch": 1.071301247771836, "grad_norm": 0.1923828125, "learning_rate": 1.0401596434231174e-05, "loss": 1.0142, "num_tokens": 33392673548.0, "step": 6010 }, { "epoch": 1.0714795008912656, "grad_norm": 0.1826171875, "learning_rate": 1.0399004180718183e-05, "loss": 1.0025, "num_tokens": 33398955421.0, "step": 6011 }, { "epoch": 1.0716577540106953, "grad_norm": 0.189453125, "learning_rate": 1.03964119772863e-05, "loss": 1.0135, "num_tokens": 33405238096.0, "step": 6012 }, { "epoch": 1.0718360071301247, "grad_norm": 0.197265625, "learning_rate": 1.0393819824151537e-05, "loss": 0.9943, "num_tokens": 33411522757.0, "step": 6013 }, { "epoch": 1.0720142602495544, "grad_norm": 0.1884765625, "learning_rate": 1.0391227721529897e-05, "loss": 1.0079, "num_tokens": 33417788957.0, "step": 6014 }, { "epoch": 1.072192513368984, "grad_norm": 0.1806640625, "learning_rate": 1.0388635669637382e-05, "loss": 1.0044, "num_tokens": 33424071754.0, "step": 6015 }, { "epoch": 1.0723707664884135, "grad_norm": 0.1875, "learning_rate": 1.0386043668689978e-05, "loss": 1.0118, "num_tokens": 33430354063.0, "step": 6016 }, { "epoch": 1.072549019607843, "grad_norm": 0.1875, "learning_rate": 1.0383451718903687e-05, "loss": 1.03, "num_tokens": 33436638251.0, "step": 6017 }, { "epoch": 1.0727272727272728, "grad_norm": 0.1953125, "learning_rate": 1.0380859820494497e-05, "loss": 1.0081, "num_tokens": 33442921585.0, "step": 6018 }, { "epoch": 1.0729055258467024, "grad_norm": 0.19140625, "learning_rate": 1.0378267973678388e-05, "loss": 1.0297, "num_tokens": 33449204188.0, "step": 6019 }, { "epoch": 1.0730837789661318, "grad_norm": 0.18359375, "learning_rate": 1.0375676178671343e-05, "loss": 1.0293, "num_tokens": 33455409288.0, "step": 6020 }, { "epoch": 1.0732620320855615, "grad_norm": 0.1826171875, "learning_rate": 1.0373084435689332e-05, "loss": 1.0312, "num_tokens": 33461691621.0, "step": 6021 }, { "epoch": 1.0734402852049911, "grad_norm": 0.1865234375, "learning_rate": 1.0370492744948331e-05, "loss": 1.0341, "num_tokens": 33467949727.0, "step": 6022 }, { "epoch": 1.0736185383244208, "grad_norm": 0.1904296875, "learning_rate": 1.0367901106664298e-05, "loss": 1.0066, "num_tokens": 33474233646.0, "step": 6023 }, { "epoch": 1.0737967914438502, "grad_norm": 0.1884765625, "learning_rate": 1.03653095210532e-05, "loss": 1.0053, "num_tokens": 33480516897.0, "step": 6024 }, { "epoch": 1.0739750445632799, "grad_norm": 0.1962890625, "learning_rate": 1.0362717988330992e-05, "loss": 1.0069, "num_tokens": 33486801322.0, "step": 6025 }, { "epoch": 1.0741532976827095, "grad_norm": 0.185546875, "learning_rate": 1.0360126508713635e-05, "loss": 1.0139, "num_tokens": 33493027458.0, "step": 6026 }, { "epoch": 1.074331550802139, "grad_norm": 0.1826171875, "learning_rate": 1.0357535082417066e-05, "loss": 0.9974, "num_tokens": 33499302179.0, "step": 6027 }, { "epoch": 1.0745098039215686, "grad_norm": 0.1904296875, "learning_rate": 1.0354943709657235e-05, "loss": 1.0319, "num_tokens": 33505540804.0, "step": 6028 }, { "epoch": 1.0746880570409982, "grad_norm": 0.1884765625, "learning_rate": 1.0352352390650082e-05, "loss": 1.0143, "num_tokens": 33511807897.0, "step": 6029 }, { "epoch": 1.0748663101604279, "grad_norm": 0.185546875, "learning_rate": 1.0349761125611544e-05, "loss": 1.012, "num_tokens": 33518059764.0, "step": 6030 }, { "epoch": 1.0750445632798573, "grad_norm": 0.181640625, "learning_rate": 1.0347169914757546e-05, "loss": 1.0346, "num_tokens": 33524331965.0, "step": 6031 }, { "epoch": 1.075222816399287, "grad_norm": 0.1884765625, "learning_rate": 1.0344578758304016e-05, "loss": 1.0399, "num_tokens": 33530614830.0, "step": 6032 }, { "epoch": 1.0754010695187166, "grad_norm": 0.1806640625, "learning_rate": 1.0341987656466882e-05, "loss": 1.033, "num_tokens": 33536882826.0, "step": 6033 }, { "epoch": 1.0755793226381463, "grad_norm": 0.18359375, "learning_rate": 1.033939660946205e-05, "loss": 1.0309, "num_tokens": 33543152373.0, "step": 6034 }, { "epoch": 1.0757575757575757, "grad_norm": 0.189453125, "learning_rate": 1.033680561750544e-05, "loss": 0.9923, "num_tokens": 33549434670.0, "step": 6035 }, { "epoch": 1.0759358288770053, "grad_norm": 0.1845703125, "learning_rate": 1.0334214680812955e-05, "loss": 0.9856, "num_tokens": 33555721414.0, "step": 6036 }, { "epoch": 1.076114081996435, "grad_norm": 0.1826171875, "learning_rate": 1.0331623799600509e-05, "loss": 1.0315, "num_tokens": 33561993593.0, "step": 6037 }, { "epoch": 1.0762923351158644, "grad_norm": 0.18359375, "learning_rate": 1.032903297408399e-05, "loss": 1.0037, "num_tokens": 33568278165.0, "step": 6038 }, { "epoch": 1.076470588235294, "grad_norm": 0.1845703125, "learning_rate": 1.0326442204479293e-05, "loss": 1.0379, "num_tokens": 33574545602.0, "step": 6039 }, { "epoch": 1.0766488413547237, "grad_norm": 0.18359375, "learning_rate": 1.032385149100231e-05, "loss": 1.0028, "num_tokens": 33580829733.0, "step": 6040 }, { "epoch": 1.0768270944741534, "grad_norm": 0.18359375, "learning_rate": 1.0321260833868925e-05, "loss": 1.0278, "num_tokens": 33587110941.0, "step": 6041 }, { "epoch": 1.0770053475935828, "grad_norm": 0.1767578125, "learning_rate": 1.0318670233295018e-05, "loss": 0.9968, "num_tokens": 33593394892.0, "step": 6042 }, { "epoch": 1.0771836007130124, "grad_norm": 0.1904296875, "learning_rate": 1.0316079689496466e-05, "loss": 0.9978, "num_tokens": 33599678424.0, "step": 6043 }, { "epoch": 1.077361853832442, "grad_norm": 0.19140625, "learning_rate": 1.031348920268914e-05, "loss": 1.0231, "num_tokens": 33605944610.0, "step": 6044 }, { "epoch": 1.0775401069518717, "grad_norm": 0.193359375, "learning_rate": 1.0310898773088902e-05, "loss": 1.0037, "num_tokens": 33612172441.0, "step": 6045 }, { "epoch": 1.0777183600713012, "grad_norm": 0.1953125, "learning_rate": 1.0308308400911615e-05, "loss": 1.0306, "num_tokens": 33618388523.0, "step": 6046 }, { "epoch": 1.0778966131907308, "grad_norm": 0.1953125, "learning_rate": 1.0305718086373133e-05, "loss": 1.0054, "num_tokens": 33624672956.0, "step": 6047 }, { "epoch": 1.0780748663101605, "grad_norm": 0.1875, "learning_rate": 1.0303127829689313e-05, "loss": 1.0173, "num_tokens": 33630957208.0, "step": 6048 }, { "epoch": 1.07825311942959, "grad_norm": 0.18359375, "learning_rate": 1.0300537631075996e-05, "loss": 1.0118, "num_tokens": 33637196572.0, "step": 6049 }, { "epoch": 1.0784313725490196, "grad_norm": 0.193359375, "learning_rate": 1.0297947490749025e-05, "loss": 1.0011, "num_tokens": 33643377597.0, "step": 6050 }, { "epoch": 1.0786096256684492, "grad_norm": 0.185546875, "learning_rate": 1.0295357408924241e-05, "loss": 1.0332, "num_tokens": 33649662118.0, "step": 6051 }, { "epoch": 1.0787878787878789, "grad_norm": 0.189453125, "learning_rate": 1.0292767385817472e-05, "loss": 1.0355, "num_tokens": 33655913384.0, "step": 6052 }, { "epoch": 1.0789661319073083, "grad_norm": 0.18359375, "learning_rate": 1.029017742164455e-05, "loss": 1.0059, "num_tokens": 33662190239.0, "step": 6053 }, { "epoch": 1.079144385026738, "grad_norm": 0.185546875, "learning_rate": 1.0287587516621289e-05, "loss": 0.9982, "num_tokens": 33668474565.0, "step": 6054 }, { "epoch": 1.0793226381461676, "grad_norm": 0.1845703125, "learning_rate": 1.0284997670963513e-05, "loss": 0.9805, "num_tokens": 33674755482.0, "step": 6055 }, { "epoch": 1.0795008912655972, "grad_norm": 0.1904296875, "learning_rate": 1.0282407884887032e-05, "loss": 1.0359, "num_tokens": 33681008309.0, "step": 6056 }, { "epoch": 1.0796791443850267, "grad_norm": 0.19921875, "learning_rate": 1.027981815860765e-05, "loss": 1.0435, "num_tokens": 33687290297.0, "step": 6057 }, { "epoch": 1.0798573975044563, "grad_norm": 0.1845703125, "learning_rate": 1.0277228492341176e-05, "loss": 0.9998, "num_tokens": 33693556278.0, "step": 6058 }, { "epoch": 1.080035650623886, "grad_norm": 0.1962890625, "learning_rate": 1.0274638886303406e-05, "loss": 1.0065, "num_tokens": 33699821835.0, "step": 6059 }, { "epoch": 1.0802139037433156, "grad_norm": 0.1943359375, "learning_rate": 1.0272049340710128e-05, "loss": 1.0007, "num_tokens": 33706085133.0, "step": 6060 }, { "epoch": 1.080392156862745, "grad_norm": 0.19140625, "learning_rate": 1.0269459855777138e-05, "loss": 1.0425, "num_tokens": 33712337525.0, "step": 6061 }, { "epoch": 1.0805704099821747, "grad_norm": 0.189453125, "learning_rate": 1.0266870431720206e-05, "loss": 1.0104, "num_tokens": 33718599022.0, "step": 6062 }, { "epoch": 1.0807486631016043, "grad_norm": 0.1875, "learning_rate": 1.026428106875512e-05, "loss": 0.9965, "num_tokens": 33724881026.0, "step": 6063 }, { "epoch": 1.080926916221034, "grad_norm": 0.1875, "learning_rate": 1.0261691767097646e-05, "loss": 1.02, "num_tokens": 33731162052.0, "step": 6064 }, { "epoch": 1.0811051693404634, "grad_norm": 0.1865234375, "learning_rate": 1.0259102526963556e-05, "loss": 1.0205, "num_tokens": 33737427116.0, "step": 6065 }, { "epoch": 1.081283422459893, "grad_norm": 0.19140625, "learning_rate": 1.0256513348568604e-05, "loss": 1.0177, "num_tokens": 33743650620.0, "step": 6066 }, { "epoch": 1.0814616755793227, "grad_norm": 0.1904296875, "learning_rate": 1.0253924232128558e-05, "loss": 1.012, "num_tokens": 33749921047.0, "step": 6067 }, { "epoch": 1.0816399286987521, "grad_norm": 0.18359375, "learning_rate": 1.0251335177859164e-05, "loss": 1.0344, "num_tokens": 33756175397.0, "step": 6068 }, { "epoch": 1.0818181818181818, "grad_norm": 0.19140625, "learning_rate": 1.0248746185976161e-05, "loss": 1.0203, "num_tokens": 33762460519.0, "step": 6069 }, { "epoch": 1.0819964349376114, "grad_norm": 0.19140625, "learning_rate": 1.02461572566953e-05, "loss": 0.9979, "num_tokens": 33768723712.0, "step": 6070 }, { "epoch": 1.082174688057041, "grad_norm": 0.201171875, "learning_rate": 1.0243568390232316e-05, "loss": 1.035, "num_tokens": 33775007027.0, "step": 6071 }, { "epoch": 1.0823529411764705, "grad_norm": 0.1884765625, "learning_rate": 1.0240979586802937e-05, "loss": 1.0033, "num_tokens": 33781293474.0, "step": 6072 }, { "epoch": 1.0825311942959002, "grad_norm": 0.18359375, "learning_rate": 1.023839084662289e-05, "loss": 0.9964, "num_tokens": 33787559710.0, "step": 6073 }, { "epoch": 1.0827094474153298, "grad_norm": 0.1884765625, "learning_rate": 1.0235802169907894e-05, "loss": 1.026, "num_tokens": 33793830806.0, "step": 6074 }, { "epoch": 1.0828877005347595, "grad_norm": 0.1923828125, "learning_rate": 1.0233213556873664e-05, "loss": 1.0453, "num_tokens": 33800115032.0, "step": 6075 }, { "epoch": 1.083065953654189, "grad_norm": 0.2001953125, "learning_rate": 1.0230625007735915e-05, "loss": 1.013, "num_tokens": 33806396595.0, "step": 6076 }, { "epoch": 1.0832442067736185, "grad_norm": 0.1904296875, "learning_rate": 1.0228036522710343e-05, "loss": 1.018, "num_tokens": 33812663167.0, "step": 6077 }, { "epoch": 1.0834224598930482, "grad_norm": 0.1875, "learning_rate": 1.022544810201265e-05, "loss": 1.0416, "num_tokens": 33818945585.0, "step": 6078 }, { "epoch": 1.0836007130124776, "grad_norm": 0.1865234375, "learning_rate": 1.0222859745858534e-05, "loss": 0.9944, "num_tokens": 33825230713.0, "step": 6079 }, { "epoch": 1.0837789661319073, "grad_norm": 0.197265625, "learning_rate": 1.0220271454463675e-05, "loss": 1.0352, "num_tokens": 33831461634.0, "step": 6080 }, { "epoch": 1.083957219251337, "grad_norm": 0.197265625, "learning_rate": 1.0217683228043764e-05, "loss": 1.0352, "num_tokens": 33837746091.0, "step": 6081 }, { "epoch": 1.0841354723707666, "grad_norm": 0.1865234375, "learning_rate": 1.0215095066814473e-05, "loss": 1.015, "num_tokens": 33844027586.0, "step": 6082 }, { "epoch": 1.084313725490196, "grad_norm": 0.1806640625, "learning_rate": 1.0212506970991478e-05, "loss": 1.0078, "num_tokens": 33850299544.0, "step": 6083 }, { "epoch": 1.0844919786096257, "grad_norm": 0.2001953125, "learning_rate": 1.0209918940790446e-05, "loss": 1.0348, "num_tokens": 33856576444.0, "step": 6084 }, { "epoch": 1.0846702317290553, "grad_norm": 0.1923828125, "learning_rate": 1.0207330976427032e-05, "loss": 1.0396, "num_tokens": 33862846588.0, "step": 6085 }, { "epoch": 1.084848484848485, "grad_norm": 0.1904296875, "learning_rate": 1.0204743078116897e-05, "loss": 1.0564, "num_tokens": 33869129794.0, "step": 6086 }, { "epoch": 1.0850267379679144, "grad_norm": 0.1845703125, "learning_rate": 1.0202155246075687e-05, "loss": 1.0106, "num_tokens": 33875414310.0, "step": 6087 }, { "epoch": 1.085204991087344, "grad_norm": 0.181640625, "learning_rate": 1.0199567480519051e-05, "loss": 0.9914, "num_tokens": 33881697690.0, "step": 6088 }, { "epoch": 1.0853832442067737, "grad_norm": 0.1884765625, "learning_rate": 1.0196979781662624e-05, "loss": 1.0223, "num_tokens": 33887918602.0, "step": 6089 }, { "epoch": 1.085561497326203, "grad_norm": 0.18359375, "learning_rate": 1.0194392149722045e-05, "loss": 1.0352, "num_tokens": 33894201096.0, "step": 6090 }, { "epoch": 1.0857397504456328, "grad_norm": 0.1884765625, "learning_rate": 1.0191804584912939e-05, "loss": 1.0227, "num_tokens": 33900478896.0, "step": 6091 }, { "epoch": 1.0859180035650624, "grad_norm": 0.1806640625, "learning_rate": 1.0189217087450924e-05, "loss": 0.9907, "num_tokens": 33906762927.0, "step": 6092 }, { "epoch": 1.086096256684492, "grad_norm": 0.1875, "learning_rate": 1.0186629657551624e-05, "loss": 1.0207, "num_tokens": 33912994171.0, "step": 6093 }, { "epoch": 1.0862745098039215, "grad_norm": 0.1865234375, "learning_rate": 1.0184042295430643e-05, "loss": 1.0308, "num_tokens": 33919247602.0, "step": 6094 }, { "epoch": 1.0864527629233511, "grad_norm": 0.189453125, "learning_rate": 1.0181455001303593e-05, "loss": 1.0208, "num_tokens": 33925523142.0, "step": 6095 }, { "epoch": 1.0866310160427808, "grad_norm": 0.18359375, "learning_rate": 1.0178867775386067e-05, "loss": 0.9929, "num_tokens": 33931807671.0, "step": 6096 }, { "epoch": 1.0868092691622104, "grad_norm": 0.1796875, "learning_rate": 1.0176280617893669e-05, "loss": 0.9879, "num_tokens": 33938061314.0, "step": 6097 }, { "epoch": 1.0869875222816399, "grad_norm": 0.18359375, "learning_rate": 1.0173693529041977e-05, "loss": 1.0144, "num_tokens": 33944339295.0, "step": 6098 }, { "epoch": 1.0871657754010695, "grad_norm": 0.1865234375, "learning_rate": 1.0171106509046582e-05, "loss": 1.0165, "num_tokens": 33950624492.0, "step": 6099 }, { "epoch": 1.0873440285204992, "grad_norm": 0.1865234375, "learning_rate": 1.0168519558123053e-05, "loss": 1.0248, "num_tokens": 6258185.0, "step": 6100 }, { "epoch": 1.0875222816399286, "grad_norm": 0.197265625, "learning_rate": 1.0165932676486964e-05, "loss": 1.0212, "num_tokens": 12539987.0, "step": 6101 }, { "epoch": 1.0877005347593582, "grad_norm": 0.1923828125, "learning_rate": 1.0163345864353886e-05, "loss": 1.0266, "num_tokens": 18771422.0, "step": 6102 }, { "epoch": 1.087878787878788, "grad_norm": 0.1923828125, "learning_rate": 1.016075912193937e-05, "loss": 0.9978, "num_tokens": 25028848.0, "step": 6103 }, { "epoch": 1.0880570409982175, "grad_norm": 0.1806640625, "learning_rate": 1.0158172449458976e-05, "loss": 1.0083, "num_tokens": 31313981.0, "step": 6104 }, { "epoch": 1.088235294117647, "grad_norm": 0.1787109375, "learning_rate": 1.0155585847128249e-05, "loss": 1.0141, "num_tokens": 37596795.0, "step": 6105 }, { "epoch": 1.0884135472370766, "grad_norm": 0.1875, "learning_rate": 1.0152999315162733e-05, "loss": 1.0174, "num_tokens": 43879499.0, "step": 6106 }, { "epoch": 1.0885918003565063, "grad_norm": 0.1875, "learning_rate": 1.0150412853777965e-05, "loss": 1.0065, "num_tokens": 50131960.0, "step": 6107 }, { "epoch": 1.088770053475936, "grad_norm": 0.189453125, "learning_rate": 1.014782646318947e-05, "loss": 1.0452, "num_tokens": 56391865.0, "step": 6108 }, { "epoch": 1.0889483065953653, "grad_norm": 0.19140625, "learning_rate": 1.0145240143612776e-05, "loss": 1.0352, "num_tokens": 62674999.0, "step": 6109 }, { "epoch": 1.089126559714795, "grad_norm": 0.189453125, "learning_rate": 1.0142653895263401e-05, "loss": 1.0458, "num_tokens": 68933470.0, "step": 6110 }, { "epoch": 1.0893048128342246, "grad_norm": 0.2099609375, "learning_rate": 1.0140067718356861e-05, "loss": 1.0283, "num_tokens": 75183036.0, "step": 6111 }, { "epoch": 1.089483065953654, "grad_norm": 0.1982421875, "learning_rate": 1.013748161310866e-05, "loss": 1.0331, "num_tokens": 81388459.0, "step": 6112 }, { "epoch": 1.0896613190730837, "grad_norm": 0.2099609375, "learning_rate": 1.0134895579734296e-05, "loss": 1.003, "num_tokens": 87670162.0, "step": 6113 }, { "epoch": 1.0898395721925134, "grad_norm": 0.2099609375, "learning_rate": 1.0132309618449273e-05, "loss": 1.0352, "num_tokens": 93912587.0, "step": 6114 }, { "epoch": 1.090017825311943, "grad_norm": 0.1875, "learning_rate": 1.0129723729469064e-05, "loss": 1.0544, "num_tokens": 100196317.0, "step": 6115 }, { "epoch": 1.0901960784313725, "grad_norm": 0.19921875, "learning_rate": 1.0127137913009165e-05, "loss": 1.0308, "num_tokens": 106464362.0, "step": 6116 }, { "epoch": 1.090374331550802, "grad_norm": 0.205078125, "learning_rate": 1.0124552169285047e-05, "loss": 1.0211, "num_tokens": 112713082.0, "step": 6117 }, { "epoch": 1.0905525846702318, "grad_norm": 0.2001953125, "learning_rate": 1.0121966498512184e-05, "loss": 0.9916, "num_tokens": 118988202.0, "step": 6118 }, { "epoch": 1.0907308377896614, "grad_norm": 0.19921875, "learning_rate": 1.0119380900906035e-05, "loss": 1.0149, "num_tokens": 125228491.0, "step": 6119 }, { "epoch": 1.0909090909090908, "grad_norm": 0.1943359375, "learning_rate": 1.0116795376682064e-05, "loss": 1.0187, "num_tokens": 131510565.0, "step": 6120 }, { "epoch": 1.0910873440285205, "grad_norm": 0.1962890625, "learning_rate": 1.0114209926055717e-05, "loss": 1.012, "num_tokens": 137793586.0, "step": 6121 }, { "epoch": 1.0912655971479501, "grad_norm": 0.1962890625, "learning_rate": 1.0111624549242452e-05, "loss": 1.0178, "num_tokens": 144039324.0, "step": 6122 }, { "epoch": 1.0914438502673798, "grad_norm": 0.1982421875, "learning_rate": 1.01090392464577e-05, "loss": 1.0075, "num_tokens": 150323091.0, "step": 6123 }, { "epoch": 1.0916221033868092, "grad_norm": 0.1875, "learning_rate": 1.010645401791689e-05, "loss": 1.0496, "num_tokens": 156606971.0, "step": 6124 }, { "epoch": 1.0918003565062389, "grad_norm": 0.1865234375, "learning_rate": 1.010386886383546e-05, "loss": 1.0247, "num_tokens": 162890808.0, "step": 6125 }, { "epoch": 1.0919786096256685, "grad_norm": 0.1904296875, "learning_rate": 1.0101283784428823e-05, "loss": 0.9987, "num_tokens": 169143117.0, "step": 6126 }, { "epoch": 1.0921568627450982, "grad_norm": 0.1884765625, "learning_rate": 1.0098698779912403e-05, "loss": 1.0399, "num_tokens": 175375316.0, "step": 6127 }, { "epoch": 1.0923351158645276, "grad_norm": 0.1953125, "learning_rate": 1.0096113850501602e-05, "loss": 1.016, "num_tokens": 181615207.0, "step": 6128 }, { "epoch": 1.0925133689839572, "grad_norm": 0.189453125, "learning_rate": 1.0093528996411826e-05, "loss": 1.0333, "num_tokens": 187898740.0, "step": 6129 }, { "epoch": 1.0926916221033869, "grad_norm": 0.1904296875, "learning_rate": 1.0090944217858472e-05, "loss": 1.0166, "num_tokens": 194182541.0, "step": 6130 }, { "epoch": 1.0928698752228163, "grad_norm": 0.1923828125, "learning_rate": 1.0088359515056924e-05, "loss": 1.0131, "num_tokens": 200468893.0, "step": 6131 }, { "epoch": 1.093048128342246, "grad_norm": 0.201171875, "learning_rate": 1.0085774888222575e-05, "loss": 1.0409, "num_tokens": 206752258.0, "step": 6132 }, { "epoch": 1.0932263814616756, "grad_norm": 0.185546875, "learning_rate": 1.0083190337570795e-05, "loss": 0.9989, "num_tokens": 213036080.0, "step": 6133 }, { "epoch": 1.0934046345811053, "grad_norm": 0.19140625, "learning_rate": 1.008060586331696e-05, "loss": 1.0248, "num_tokens": 219319685.0, "step": 6134 }, { "epoch": 1.0935828877005347, "grad_norm": 0.1953125, "learning_rate": 1.0078021465676429e-05, "loss": 1.009, "num_tokens": 225533234.0, "step": 6135 }, { "epoch": 1.0937611408199643, "grad_norm": 0.1806640625, "learning_rate": 1.0075437144864568e-05, "loss": 0.997, "num_tokens": 231784439.0, "step": 6136 }, { "epoch": 1.093939393939394, "grad_norm": 0.181640625, "learning_rate": 1.0072852901096728e-05, "loss": 1.0549, "num_tokens": 238066678.0, "step": 6137 }, { "epoch": 1.0941176470588236, "grad_norm": 0.197265625, "learning_rate": 1.0070268734588244e-05, "loss": 1.0317, "num_tokens": 244327473.0, "step": 6138 }, { "epoch": 1.094295900178253, "grad_norm": 0.201171875, "learning_rate": 1.006768464555447e-05, "loss": 0.9983, "num_tokens": 250609396.0, "step": 6139 }, { "epoch": 1.0944741532976827, "grad_norm": 0.185546875, "learning_rate": 1.0065100634210727e-05, "loss": 1.0207, "num_tokens": 256877892.0, "step": 6140 }, { "epoch": 1.0946524064171124, "grad_norm": 0.177734375, "learning_rate": 1.0062516700772347e-05, "loss": 0.9764, "num_tokens": 263162574.0, "step": 6141 }, { "epoch": 1.0948306595365418, "grad_norm": 0.185546875, "learning_rate": 1.0059932845454648e-05, "loss": 1.0064, "num_tokens": 269402829.0, "step": 6142 }, { "epoch": 1.0950089126559714, "grad_norm": 0.193359375, "learning_rate": 1.0057349068472946e-05, "loss": 1.0571, "num_tokens": 275650660.0, "step": 6143 }, { "epoch": 1.095187165775401, "grad_norm": 0.1884765625, "learning_rate": 1.0054765370042543e-05, "loss": 1.0304, "num_tokens": 281915313.0, "step": 6144 }, { "epoch": 1.0953654188948307, "grad_norm": 0.1884765625, "learning_rate": 1.0052181750378746e-05, "loss": 1.0323, "num_tokens": 288190051.0, "step": 6145 }, { "epoch": 1.0955436720142602, "grad_norm": 0.1865234375, "learning_rate": 1.004959820969684e-05, "loss": 1.0104, "num_tokens": 294474015.0, "step": 6146 }, { "epoch": 1.0957219251336898, "grad_norm": 0.189453125, "learning_rate": 1.0047014748212118e-05, "loss": 1.0415, "num_tokens": 300726082.0, "step": 6147 }, { "epoch": 1.0959001782531195, "grad_norm": 0.197265625, "learning_rate": 1.0044431366139857e-05, "loss": 1.023, "num_tokens": 307002324.0, "step": 6148 }, { "epoch": 1.0960784313725491, "grad_norm": 0.1904296875, "learning_rate": 1.0041848063695335e-05, "loss": 1.0409, "num_tokens": 313253710.0, "step": 6149 }, { "epoch": 1.0962566844919786, "grad_norm": 0.19140625, "learning_rate": 1.0039264841093814e-05, "loss": 1.0412, "num_tokens": 319537358.0, "step": 6150 }, { "epoch": 1.0964349376114082, "grad_norm": 0.1787109375, "learning_rate": 1.0036681698550556e-05, "loss": 1.0202, "num_tokens": 325810918.0, "step": 6151 }, { "epoch": 1.0966131907308379, "grad_norm": 0.181640625, "learning_rate": 1.0034098636280818e-05, "loss": 1.0124, "num_tokens": 332073887.0, "step": 6152 }, { "epoch": 1.0967914438502673, "grad_norm": 0.185546875, "learning_rate": 1.0031515654499848e-05, "loss": 1.0105, "num_tokens": 338333530.0, "step": 6153 }, { "epoch": 1.096969696969697, "grad_norm": 0.1865234375, "learning_rate": 1.0028932753422878e-05, "loss": 1.0063, "num_tokens": 344601775.0, "step": 6154 }, { "epoch": 1.0971479500891266, "grad_norm": 0.197265625, "learning_rate": 1.002634993326515e-05, "loss": 0.9999, "num_tokens": 350856526.0, "step": 6155 }, { "epoch": 1.0973262032085562, "grad_norm": 0.1904296875, "learning_rate": 1.0023767194241885e-05, "loss": 1.01, "num_tokens": 357121302.0, "step": 6156 }, { "epoch": 1.0975044563279857, "grad_norm": 0.1865234375, "learning_rate": 1.0021184536568309e-05, "loss": 1.0206, "num_tokens": 363376005.0, "step": 6157 }, { "epoch": 1.0976827094474153, "grad_norm": 0.1845703125, "learning_rate": 1.001860196045963e-05, "loss": 1.0219, "num_tokens": 369631536.0, "step": 6158 }, { "epoch": 1.097860962566845, "grad_norm": 0.19140625, "learning_rate": 1.001601946613106e-05, "loss": 1.0252, "num_tokens": 375914288.0, "step": 6159 }, { "epoch": 1.0980392156862746, "grad_norm": 0.1884765625, "learning_rate": 1.0013437053797796e-05, "loss": 0.9951, "num_tokens": 382197996.0, "step": 6160 }, { "epoch": 1.098217468805704, "grad_norm": 0.1875, "learning_rate": 1.001085472367503e-05, "loss": 1.044, "num_tokens": 388421705.0, "step": 6161 }, { "epoch": 1.0983957219251337, "grad_norm": 0.1845703125, "learning_rate": 1.000827247597795e-05, "loss": 1.0416, "num_tokens": 394699899.0, "step": 6162 }, { "epoch": 1.0985739750445633, "grad_norm": 0.1884765625, "learning_rate": 1.000569031092173e-05, "loss": 1.0525, "num_tokens": 400983433.0, "step": 6163 }, { "epoch": 1.0987522281639928, "grad_norm": 0.1884765625, "learning_rate": 1.000310822872155e-05, "loss": 1.0227, "num_tokens": 407269181.0, "step": 6164 }, { "epoch": 1.0989304812834224, "grad_norm": 0.189453125, "learning_rate": 1.0000526229592571e-05, "loss": 1.0099, "num_tokens": 413528356.0, "step": 6165 }, { "epoch": 1.099108734402852, "grad_norm": 0.1845703125, "learning_rate": 9.997944313749955e-06, "loss": 1.0384, "num_tokens": 419797719.0, "step": 6166 }, { "epoch": 1.0992869875222817, "grad_norm": 0.1904296875, "learning_rate": 9.995362481408848e-06, "loss": 1.0127, "num_tokens": 426052987.0, "step": 6167 }, { "epoch": 1.0994652406417111, "grad_norm": 0.1904296875, "learning_rate": 9.992780732784402e-06, "loss": 0.9924, "num_tokens": 432336684.0, "step": 6168 }, { "epoch": 1.0996434937611408, "grad_norm": 0.1884765625, "learning_rate": 9.990199068091749e-06, "loss": 1.0025, "num_tokens": 438619419.0, "step": 6169 }, { "epoch": 1.0998217468805704, "grad_norm": 0.1884765625, "learning_rate": 9.987617487546019e-06, "loss": 1.028, "num_tokens": 444903887.0, "step": 6170 }, { "epoch": 1.1, "grad_norm": 0.1962890625, "learning_rate": 9.98503599136234e-06, "loss": 1.0477, "num_tokens": 451169411.0, "step": 6171 }, { "epoch": 1.1001782531194295, "grad_norm": 0.1943359375, "learning_rate": 9.982454579755825e-06, "loss": 1.0001, "num_tokens": 457443269.0, "step": 6172 }, { "epoch": 1.1003565062388592, "grad_norm": 0.19921875, "learning_rate": 9.979873252941585e-06, "loss": 1.0375, "num_tokens": 463712792.0, "step": 6173 }, { "epoch": 1.1005347593582888, "grad_norm": 0.1865234375, "learning_rate": 9.977292011134722e-06, "loss": 1.0011, "num_tokens": 469996518.0, "step": 6174 }, { "epoch": 1.1007130124777182, "grad_norm": 0.19140625, "learning_rate": 9.974710854550333e-06, "loss": 0.992, "num_tokens": 476282232.0, "step": 6175 }, { "epoch": 1.100891265597148, "grad_norm": 0.19140625, "learning_rate": 9.97212978340351e-06, "loss": 1.0204, "num_tokens": 482549953.0, "step": 6176 }, { "epoch": 1.1010695187165775, "grad_norm": 0.173828125, "learning_rate": 9.969548797909324e-06, "loss": 1.0322, "num_tokens": 488831877.0, "step": 6177 }, { "epoch": 1.1012477718360072, "grad_norm": 0.1865234375, "learning_rate": 9.966967898282856e-06, "loss": 1.047, "num_tokens": 495107139.0, "step": 6178 }, { "epoch": 1.1014260249554366, "grad_norm": 0.1767578125, "learning_rate": 9.964387084739172e-06, "loss": 0.9887, "num_tokens": 501368705.0, "step": 6179 }, { "epoch": 1.1016042780748663, "grad_norm": 0.1875, "learning_rate": 9.961806357493332e-06, "loss": 1.0312, "num_tokens": 507624035.0, "step": 6180 }, { "epoch": 1.101782531194296, "grad_norm": 0.1787109375, "learning_rate": 9.959225716760384e-06, "loss": 1.0067, "num_tokens": 513884587.0, "step": 6181 }, { "epoch": 1.1019607843137256, "grad_norm": 0.18359375, "learning_rate": 9.956645162755379e-06, "loss": 1.031, "num_tokens": 520170434.0, "step": 6182 }, { "epoch": 1.102139037433155, "grad_norm": 0.1884765625, "learning_rate": 9.954064695693354e-06, "loss": 1.025, "num_tokens": 526380979.0, "step": 6183 }, { "epoch": 1.1023172905525846, "grad_norm": 0.189453125, "learning_rate": 9.951484315789342e-06, "loss": 1.0041, "num_tokens": 532608188.0, "step": 6184 }, { "epoch": 1.1024955436720143, "grad_norm": 0.189453125, "learning_rate": 9.948904023258359e-06, "loss": 1.0202, "num_tokens": 538863433.0, "step": 6185 }, { "epoch": 1.102673796791444, "grad_norm": 0.1826171875, "learning_rate": 9.946323818315428e-06, "loss": 1.0027, "num_tokens": 545109615.0, "step": 6186 }, { "epoch": 1.1028520499108734, "grad_norm": 0.1806640625, "learning_rate": 9.943743701175555e-06, "loss": 1.0019, "num_tokens": 551365983.0, "step": 6187 }, { "epoch": 1.103030303030303, "grad_norm": 0.19140625, "learning_rate": 9.941163672053743e-06, "loss": 0.9972, "num_tokens": 557643900.0, "step": 6188 }, { "epoch": 1.1032085561497327, "grad_norm": 0.1875, "learning_rate": 9.938583731164986e-06, "loss": 1.0413, "num_tokens": 563928374.0, "step": 6189 }, { "epoch": 1.1033868092691623, "grad_norm": 0.1865234375, "learning_rate": 9.93600387872427e-06, "loss": 1.0145, "num_tokens": 570211867.0, "step": 6190 }, { "epoch": 1.1035650623885918, "grad_norm": 0.1943359375, "learning_rate": 9.933424114946582e-06, "loss": 1.0452, "num_tokens": 576462447.0, "step": 6191 }, { "epoch": 1.1037433155080214, "grad_norm": 0.1865234375, "learning_rate": 9.930844440046883e-06, "loss": 1.053, "num_tokens": 582747050.0, "step": 6192 }, { "epoch": 1.103921568627451, "grad_norm": 0.203125, "learning_rate": 9.928264854240143e-06, "loss": 1.019, "num_tokens": 589031595.0, "step": 6193 }, { "epoch": 1.1040998217468805, "grad_norm": 0.1953125, "learning_rate": 9.925685357741319e-06, "loss": 1.0261, "num_tokens": 595248312.0, "step": 6194 }, { "epoch": 1.1042780748663101, "grad_norm": 0.1787109375, "learning_rate": 9.923105950765364e-06, "loss": 1.0037, "num_tokens": 601533219.0, "step": 6195 }, { "epoch": 1.1044563279857398, "grad_norm": 0.1826171875, "learning_rate": 9.920526633527214e-06, "loss": 1.0134, "num_tokens": 607809934.0, "step": 6196 }, { "epoch": 1.1046345811051694, "grad_norm": 0.1845703125, "learning_rate": 9.91794740624181e-06, "loss": 1.0053, "num_tokens": 614093745.0, "step": 6197 }, { "epoch": 1.1048128342245989, "grad_norm": 0.1865234375, "learning_rate": 9.915368269124078e-06, "loss": 1.0494, "num_tokens": 620377365.0, "step": 6198 }, { "epoch": 1.1049910873440285, "grad_norm": 0.1884765625, "learning_rate": 9.912789222388938e-06, "loss": 1.018, "num_tokens": 626632016.0, "step": 6199 }, { "epoch": 1.1051693404634582, "grad_norm": 0.189453125, "learning_rate": 9.910210266251301e-06, "loss": 0.989, "num_tokens": 632889235.0, "step": 6200 }, { "epoch": 1.1053475935828878, "grad_norm": 0.1796875, "learning_rate": 9.907631400926072e-06, "loss": 1.024, "num_tokens": 639154669.0, "step": 6201 }, { "epoch": 1.1055258467023172, "grad_norm": 0.1875, "learning_rate": 9.90505262662815e-06, "loss": 1.024, "num_tokens": 645438838.0, "step": 6202 }, { "epoch": 1.1057040998217469, "grad_norm": 0.2001953125, "learning_rate": 9.902473943572425e-06, "loss": 1.0009, "num_tokens": 651722722.0, "step": 6203 }, { "epoch": 1.1058823529411765, "grad_norm": 0.197265625, "learning_rate": 9.899895351973778e-06, "loss": 0.9984, "num_tokens": 657983967.0, "step": 6204 }, { "epoch": 1.106060606060606, "grad_norm": 0.193359375, "learning_rate": 9.897316852047084e-06, "loss": 1.0375, "num_tokens": 664265233.0, "step": 6205 }, { "epoch": 1.1062388591800356, "grad_norm": 0.181640625, "learning_rate": 9.89473844400721e-06, "loss": 0.999, "num_tokens": 670521203.0, "step": 6206 }, { "epoch": 1.1064171122994653, "grad_norm": 0.1865234375, "learning_rate": 9.892160128069017e-06, "loss": 1.0147, "num_tokens": 676791018.0, "step": 6207 }, { "epoch": 1.106595365418895, "grad_norm": 0.2001953125, "learning_rate": 9.889581904447352e-06, "loss": 0.9886, "num_tokens": 683048511.0, "step": 6208 }, { "epoch": 1.1067736185383243, "grad_norm": 0.19140625, "learning_rate": 9.887003773357063e-06, "loss": 1.0376, "num_tokens": 689300774.0, "step": 6209 }, { "epoch": 1.106951871657754, "grad_norm": 0.1806640625, "learning_rate": 9.884425735012984e-06, "loss": 1.0263, "num_tokens": 695543289.0, "step": 6210 }, { "epoch": 1.1071301247771836, "grad_norm": 0.1884765625, "learning_rate": 9.881847789629946e-06, "loss": 1.0585, "num_tokens": 701825785.0, "step": 6211 }, { "epoch": 1.1073083778966133, "grad_norm": 0.19140625, "learning_rate": 9.879269937422764e-06, "loss": 1.0075, "num_tokens": 708112194.0, "step": 6212 }, { "epoch": 1.1074866310160427, "grad_norm": 0.177734375, "learning_rate": 9.87669217860626e-06, "loss": 1.0095, "num_tokens": 714395864.0, "step": 6213 }, { "epoch": 1.1076648841354724, "grad_norm": 0.1875, "learning_rate": 9.874114513395234e-06, "loss": 1.0135, "num_tokens": 720669118.0, "step": 6214 }, { "epoch": 1.107843137254902, "grad_norm": 0.1845703125, "learning_rate": 9.871536942004482e-06, "loss": 1.0226, "num_tokens": 726939652.0, "step": 6215 }, { "epoch": 1.1080213903743314, "grad_norm": 0.1845703125, "learning_rate": 9.868959464648794e-06, "loss": 1.0116, "num_tokens": 733223073.0, "step": 6216 }, { "epoch": 1.108199643493761, "grad_norm": 0.185546875, "learning_rate": 9.866382081542956e-06, "loss": 1.0275, "num_tokens": 739464132.0, "step": 6217 }, { "epoch": 1.1083778966131907, "grad_norm": 0.185546875, "learning_rate": 9.863804792901736e-06, "loss": 1.0079, "num_tokens": 745748102.0, "step": 6218 }, { "epoch": 1.1085561497326204, "grad_norm": 0.1953125, "learning_rate": 9.861227598939904e-06, "loss": 0.9924, "num_tokens": 752025563.0, "step": 6219 }, { "epoch": 1.1087344028520498, "grad_norm": 0.1923828125, "learning_rate": 9.858650499872217e-06, "loss": 1.0215, "num_tokens": 758281223.0, "step": 6220 }, { "epoch": 1.1089126559714795, "grad_norm": 0.1884765625, "learning_rate": 9.856073495913425e-06, "loss": 1.0256, "num_tokens": 764533955.0, "step": 6221 }, { "epoch": 1.1090909090909091, "grad_norm": 0.177734375, "learning_rate": 9.853496587278275e-06, "loss": 1.0183, "num_tokens": 770809401.0, "step": 6222 }, { "epoch": 1.1092691622103388, "grad_norm": 0.197265625, "learning_rate": 9.850919774181493e-06, "loss": 1.042, "num_tokens": 777058397.0, "step": 6223 }, { "epoch": 1.1094474153297682, "grad_norm": 0.1875, "learning_rate": 9.848343056837808e-06, "loss": 1.0026, "num_tokens": 783341508.0, "step": 6224 }, { "epoch": 1.1096256684491979, "grad_norm": 0.189453125, "learning_rate": 9.84576643546194e-06, "loss": 0.9814, "num_tokens": 789617694.0, "step": 6225 }, { "epoch": 1.1098039215686275, "grad_norm": 0.19140625, "learning_rate": 9.8431899102686e-06, "loss": 0.993, "num_tokens": 795875807.0, "step": 6226 }, { "epoch": 1.109982174688057, "grad_norm": 0.1953125, "learning_rate": 9.84061348147249e-06, "loss": 1.0245, "num_tokens": 802160179.0, "step": 6227 }, { "epoch": 1.1101604278074866, "grad_norm": 0.1923828125, "learning_rate": 9.838037149288303e-06, "loss": 1.0102, "num_tokens": 808430735.0, "step": 6228 }, { "epoch": 1.1103386809269162, "grad_norm": 0.177734375, "learning_rate": 9.835460913930725e-06, "loss": 1.0215, "num_tokens": 814714184.0, "step": 6229 }, { "epoch": 1.1105169340463459, "grad_norm": 0.1943359375, "learning_rate": 9.83288477561444e-06, "loss": 1.0066, "num_tokens": 820986686.0, "step": 6230 }, { "epoch": 1.1106951871657753, "grad_norm": 0.185546875, "learning_rate": 9.830308734554109e-06, "loss": 1.0103, "num_tokens": 827209642.0, "step": 6231 }, { "epoch": 1.110873440285205, "grad_norm": 0.19140625, "learning_rate": 9.8277327909644e-06, "loss": 1.0465, "num_tokens": 833467695.0, "step": 6232 }, { "epoch": 1.1110516934046346, "grad_norm": 0.189453125, "learning_rate": 9.825156945059964e-06, "loss": 0.9876, "num_tokens": 839732988.0, "step": 6233 }, { "epoch": 1.1112299465240643, "grad_norm": 0.19140625, "learning_rate": 9.82258119705545e-06, "loss": 1.0225, "num_tokens": 846013375.0, "step": 6234 }, { "epoch": 1.1114081996434937, "grad_norm": 0.1904296875, "learning_rate": 9.820005547165492e-06, "loss": 1.0183, "num_tokens": 852296559.0, "step": 6235 }, { "epoch": 1.1115864527629233, "grad_norm": 0.1904296875, "learning_rate": 9.817429995604721e-06, "loss": 1.0058, "num_tokens": 858573420.0, "step": 6236 }, { "epoch": 1.111764705882353, "grad_norm": 0.2021484375, "learning_rate": 9.814854542587763e-06, "loss": 1.0241, "num_tokens": 864842817.0, "step": 6237 }, { "epoch": 1.1119429590017824, "grad_norm": 0.185546875, "learning_rate": 9.812279188329221e-06, "loss": 1.0262, "num_tokens": 871118755.0, "step": 6238 }, { "epoch": 1.112121212121212, "grad_norm": 0.1884765625, "learning_rate": 9.80970393304371e-06, "loss": 1.0294, "num_tokens": 877363076.0, "step": 6239 }, { "epoch": 1.1122994652406417, "grad_norm": 0.1953125, "learning_rate": 9.807128776945817e-06, "loss": 1.0135, "num_tokens": 883648796.0, "step": 6240 }, { "epoch": 1.1124777183600714, "grad_norm": 0.1865234375, "learning_rate": 9.80455372025014e-06, "loss": 1.0389, "num_tokens": 889930558.0, "step": 6241 }, { "epoch": 1.1126559714795008, "grad_norm": 0.1982421875, "learning_rate": 9.801978763171251e-06, "loss": 1.0117, "num_tokens": 896200171.0, "step": 6242 }, { "epoch": 1.1128342245989304, "grad_norm": 0.185546875, "learning_rate": 9.799403905923727e-06, "loss": 0.993, "num_tokens": 902446595.0, "step": 6243 }, { "epoch": 1.11301247771836, "grad_norm": 0.1826171875, "learning_rate": 9.79682914872213e-06, "loss": 0.9936, "num_tokens": 908731812.0, "step": 6244 }, { "epoch": 1.1131907308377897, "grad_norm": 0.1953125, "learning_rate": 9.794254491781015e-06, "loss": 1.0399, "num_tokens": 915016023.0, "step": 6245 }, { "epoch": 1.1133689839572192, "grad_norm": 0.1962890625, "learning_rate": 9.791679935314928e-06, "loss": 1.0162, "num_tokens": 921299822.0, "step": 6246 }, { "epoch": 1.1135472370766488, "grad_norm": 0.1845703125, "learning_rate": 9.789105479538404e-06, "loss": 1.0147, "num_tokens": 927583405.0, "step": 6247 }, { "epoch": 1.1137254901960785, "grad_norm": 0.1865234375, "learning_rate": 9.786531124665979e-06, "loss": 1.0179, "num_tokens": 933866599.0, "step": 6248 }, { "epoch": 1.1139037433155081, "grad_norm": 0.1923828125, "learning_rate": 9.783956870912171e-06, "loss": 1.0269, "num_tokens": 940131393.0, "step": 6249 }, { "epoch": 1.1140819964349375, "grad_norm": 0.1865234375, "learning_rate": 9.781382718491495e-06, "loss": 0.983, "num_tokens": 946415023.0, "step": 6250 }, { "epoch": 1.1142602495543672, "grad_norm": 0.1796875, "learning_rate": 9.778808667618454e-06, "loss": 1.0304, "num_tokens": 952691944.0, "step": 6251 }, { "epoch": 1.1144385026737968, "grad_norm": 0.18359375, "learning_rate": 9.776234718507545e-06, "loss": 1.0273, "num_tokens": 958974582.0, "step": 6252 }, { "epoch": 1.1146167557932265, "grad_norm": 0.185546875, "learning_rate": 9.773660871373259e-06, "loss": 1.0077, "num_tokens": 965218098.0, "step": 6253 }, { "epoch": 1.114795008912656, "grad_norm": 0.185546875, "learning_rate": 9.771087126430067e-06, "loss": 1.0228, "num_tokens": 971500302.0, "step": 6254 }, { "epoch": 1.1149732620320856, "grad_norm": 0.1787109375, "learning_rate": 9.768513483892447e-06, "loss": 0.9873, "num_tokens": 977756815.0, "step": 6255 }, { "epoch": 1.1151515151515152, "grad_norm": 0.1806640625, "learning_rate": 9.765939943974861e-06, "loss": 0.9999, "num_tokens": 984012360.0, "step": 6256 }, { "epoch": 1.1153297682709447, "grad_norm": 0.1904296875, "learning_rate": 9.763366506891755e-06, "loss": 1.0243, "num_tokens": 990296917.0, "step": 6257 }, { "epoch": 1.1155080213903743, "grad_norm": 0.1845703125, "learning_rate": 9.760793172857584e-06, "loss": 1.062, "num_tokens": 996559172.0, "step": 6258 }, { "epoch": 1.115686274509804, "grad_norm": 0.189453125, "learning_rate": 9.758219942086777e-06, "loss": 1.0079, "num_tokens": 1002843052.0, "step": 6259 }, { "epoch": 1.1158645276292336, "grad_norm": 0.1943359375, "learning_rate": 9.755646814793772e-06, "loss": 1.0539, "num_tokens": 1009126066.0, "step": 6260 }, { "epoch": 1.116042780748663, "grad_norm": 0.1845703125, "learning_rate": 9.753073791192974e-06, "loss": 1.0199, "num_tokens": 1015386249.0, "step": 6261 }, { "epoch": 1.1162210338680927, "grad_norm": 0.181640625, "learning_rate": 9.750500871498802e-06, "loss": 0.9811, "num_tokens": 1021622691.0, "step": 6262 }, { "epoch": 1.1163992869875223, "grad_norm": 0.1904296875, "learning_rate": 9.747928055925656e-06, "loss": 0.9961, "num_tokens": 1027878729.0, "step": 6263 }, { "epoch": 1.116577540106952, "grad_norm": 0.1796875, "learning_rate": 9.74535534468793e-06, "loss": 1.0066, "num_tokens": 1034163184.0, "step": 6264 }, { "epoch": 1.1167557932263814, "grad_norm": 0.1923828125, "learning_rate": 9.74278273800001e-06, "loss": 0.9896, "num_tokens": 1040417818.0, "step": 6265 }, { "epoch": 1.116934046345811, "grad_norm": 0.1943359375, "learning_rate": 9.74021023607627e-06, "loss": 1.0312, "num_tokens": 1046701316.0, "step": 6266 }, { "epoch": 1.1171122994652407, "grad_norm": 0.1845703125, "learning_rate": 9.737637839131076e-06, "loss": 1.0044, "num_tokens": 1052987575.0, "step": 6267 }, { "epoch": 1.1172905525846701, "grad_norm": 0.185546875, "learning_rate": 9.73506554737879e-06, "loss": 1.0159, "num_tokens": 1059246565.0, "step": 6268 }, { "epoch": 1.1174688057040998, "grad_norm": 0.1904296875, "learning_rate": 9.73249336103376e-06, "loss": 1.02, "num_tokens": 1065511441.0, "step": 6269 }, { "epoch": 1.1176470588235294, "grad_norm": 0.2001953125, "learning_rate": 9.729921280310321e-06, "loss": 1.0114, "num_tokens": 1071773964.0, "step": 6270 }, { "epoch": 1.117825311942959, "grad_norm": 0.185546875, "learning_rate": 9.727349305422815e-06, "loss": 1.0187, "num_tokens": 1078033332.0, "step": 6271 }, { "epoch": 1.1180035650623885, "grad_norm": 0.1845703125, "learning_rate": 9.724777436585558e-06, "loss": 1.0459, "num_tokens": 1084308499.0, "step": 6272 }, { "epoch": 1.1181818181818182, "grad_norm": 0.1982421875, "learning_rate": 9.72220567401287e-06, "loss": 0.9901, "num_tokens": 1090591738.0, "step": 6273 }, { "epoch": 1.1183600713012478, "grad_norm": 0.1904296875, "learning_rate": 9.71963401791905e-06, "loss": 1.0279, "num_tokens": 1096874376.0, "step": 6274 }, { "epoch": 1.1185383244206775, "grad_norm": 0.1904296875, "learning_rate": 9.7170624685184e-06, "loss": 0.9933, "num_tokens": 1103136246.0, "step": 6275 }, { "epoch": 1.118716577540107, "grad_norm": 0.1923828125, "learning_rate": 9.71449102602521e-06, "loss": 1.0105, "num_tokens": 1109419318.0, "step": 6276 }, { "epoch": 1.1188948306595365, "grad_norm": 0.1953125, "learning_rate": 9.711919690653748e-06, "loss": 1.0206, "num_tokens": 1115684753.0, "step": 6277 }, { "epoch": 1.1190730837789662, "grad_norm": 0.185546875, "learning_rate": 9.709348462618293e-06, "loss": 1.0389, "num_tokens": 1121968474.0, "step": 6278 }, { "epoch": 1.1192513368983956, "grad_norm": 0.1787109375, "learning_rate": 9.706777342133103e-06, "loss": 1.026, "num_tokens": 1128254197.0, "step": 6279 }, { "epoch": 1.1194295900178253, "grad_norm": 0.1796875, "learning_rate": 9.704206329412432e-06, "loss": 1.0501, "num_tokens": 1134521761.0, "step": 6280 }, { "epoch": 1.119607843137255, "grad_norm": 0.1796875, "learning_rate": 9.70163542467052e-06, "loss": 1.0185, "num_tokens": 1140786362.0, "step": 6281 }, { "epoch": 1.1197860962566846, "grad_norm": 0.1904296875, "learning_rate": 9.699064628121602e-06, "loss": 1.0138, "num_tokens": 1147071059.0, "step": 6282 }, { "epoch": 1.119964349376114, "grad_norm": 0.18359375, "learning_rate": 9.696493939979907e-06, "loss": 0.9974, "num_tokens": 1153354853.0, "step": 6283 }, { "epoch": 1.1201426024955436, "grad_norm": 0.1806640625, "learning_rate": 9.693923360459645e-06, "loss": 1.0047, "num_tokens": 1159638910.0, "step": 6284 }, { "epoch": 1.1203208556149733, "grad_norm": 0.185546875, "learning_rate": 9.691352889775028e-06, "loss": 0.9828, "num_tokens": 1165921155.0, "step": 6285 }, { "epoch": 1.120499108734403, "grad_norm": 0.1845703125, "learning_rate": 9.688782528140249e-06, "loss": 1.0047, "num_tokens": 1172206052.0, "step": 6286 }, { "epoch": 1.1206773618538324, "grad_norm": 0.1845703125, "learning_rate": 9.6862122757695e-06, "loss": 1.0254, "num_tokens": 1178473721.0, "step": 6287 }, { "epoch": 1.120855614973262, "grad_norm": 0.1806640625, "learning_rate": 9.683642132876958e-06, "loss": 1.0051, "num_tokens": 1184727484.0, "step": 6288 }, { "epoch": 1.1210338680926917, "grad_norm": 0.1875, "learning_rate": 9.681072099676795e-06, "loss": 1.0246, "num_tokens": 1190991540.0, "step": 6289 }, { "epoch": 1.121212121212121, "grad_norm": 0.1865234375, "learning_rate": 9.678502176383176e-06, "loss": 1.0085, "num_tokens": 1197276290.0, "step": 6290 }, { "epoch": 1.1213903743315508, "grad_norm": 0.1845703125, "learning_rate": 9.675932363210253e-06, "loss": 1.0317, "num_tokens": 1203549077.0, "step": 6291 }, { "epoch": 1.1215686274509804, "grad_norm": 0.1796875, "learning_rate": 9.67336266037216e-06, "loss": 1.044, "num_tokens": 1209828876.0, "step": 6292 }, { "epoch": 1.12174688057041, "grad_norm": 0.1845703125, "learning_rate": 9.67079306808304e-06, "loss": 1.0099, "num_tokens": 1216111971.0, "step": 6293 }, { "epoch": 1.1219251336898395, "grad_norm": 0.181640625, "learning_rate": 9.668223586557015e-06, "loss": 1.0146, "num_tokens": 1222275618.0, "step": 6294 }, { "epoch": 1.1221033868092691, "grad_norm": 0.1826171875, "learning_rate": 9.665654216008198e-06, "loss": 1.0266, "num_tokens": 1228558577.0, "step": 6295 }, { "epoch": 1.1222816399286988, "grad_norm": 0.1787109375, "learning_rate": 9.663084956650698e-06, "loss": 1.0028, "num_tokens": 1234845240.0, "step": 6296 }, { "epoch": 1.1224598930481284, "grad_norm": 0.1796875, "learning_rate": 9.660515808698614e-06, "loss": 1.0222, "num_tokens": 1241109410.0, "step": 6297 }, { "epoch": 1.1226381461675579, "grad_norm": 0.1865234375, "learning_rate": 9.657946772366028e-06, "loss": 1.0344, "num_tokens": 1247361468.0, "step": 6298 }, { "epoch": 1.1228163992869875, "grad_norm": 0.1767578125, "learning_rate": 9.655377847867025e-06, "loss": 1.0286, "num_tokens": 1253646239.0, "step": 6299 }, { "epoch": 1.1229946524064172, "grad_norm": 0.185546875, "learning_rate": 9.652809035415667e-06, "loss": 1.0134, "num_tokens": 1259919763.0, "step": 6300 }, { "epoch": 1.1231729055258466, "grad_norm": 0.18359375, "learning_rate": 9.650240335226017e-06, "loss": 1.0195, "num_tokens": 1266135659.0, "step": 6301 }, { "epoch": 1.1233511586452762, "grad_norm": 0.181640625, "learning_rate": 9.647671747512126e-06, "loss": 1.0045, "num_tokens": 1272391261.0, "step": 6302 }, { "epoch": 1.1235294117647059, "grad_norm": 0.181640625, "learning_rate": 9.645103272488034e-06, "loss": 1.0266, "num_tokens": 1278646680.0, "step": 6303 }, { "epoch": 1.1237076648841355, "grad_norm": 0.1826171875, "learning_rate": 9.642534910367772e-06, "loss": 1.0449, "num_tokens": 1284925989.0, "step": 6304 }, { "epoch": 1.123885918003565, "grad_norm": 0.1806640625, "learning_rate": 9.639966661365366e-06, "loss": 1.0302, "num_tokens": 1291186189.0, "step": 6305 }, { "epoch": 1.1240641711229946, "grad_norm": 0.1953125, "learning_rate": 9.637398525694825e-06, "loss": 1.0386, "num_tokens": 1297462040.0, "step": 6306 }, { "epoch": 1.1242424242424243, "grad_norm": 0.1806640625, "learning_rate": 9.634830503570149e-06, "loss": 1.0288, "num_tokens": 1303734578.0, "step": 6307 }, { "epoch": 1.124420677361854, "grad_norm": 0.1796875, "learning_rate": 9.63226259520534e-06, "loss": 1.0057, "num_tokens": 1309991595.0, "step": 6308 }, { "epoch": 1.1245989304812833, "grad_norm": 0.1787109375, "learning_rate": 9.629694800814378e-06, "loss": 1.0189, "num_tokens": 1316277564.0, "step": 6309 }, { "epoch": 1.124777183600713, "grad_norm": 0.18359375, "learning_rate": 9.627127120611236e-06, "loss": 1.0178, "num_tokens": 1322560028.0, "step": 6310 }, { "epoch": 1.1249554367201426, "grad_norm": 0.185546875, "learning_rate": 9.624559554809881e-06, "loss": 1.0292, "num_tokens": 1328843068.0, "step": 6311 }, { "epoch": 1.125133689839572, "grad_norm": 0.1845703125, "learning_rate": 9.621992103624271e-06, "loss": 1.0468, "num_tokens": 1335067673.0, "step": 6312 }, { "epoch": 1.1253119429590017, "grad_norm": 0.1826171875, "learning_rate": 9.619424767268348e-06, "loss": 1.0579, "num_tokens": 1341328886.0, "step": 6313 }, { "epoch": 1.1254901960784314, "grad_norm": 0.19140625, "learning_rate": 9.616857545956057e-06, "loss": 1.0033, "num_tokens": 1347612376.0, "step": 6314 }, { "epoch": 1.125668449197861, "grad_norm": 0.197265625, "learning_rate": 9.614290439901314e-06, "loss": 1.0201, "num_tokens": 1353888494.0, "step": 6315 }, { "epoch": 1.1258467023172907, "grad_norm": 0.1806640625, "learning_rate": 9.611723449318042e-06, "loss": 1.0072, "num_tokens": 1360161171.0, "step": 6316 }, { "epoch": 1.12602495543672, "grad_norm": 0.18359375, "learning_rate": 9.609156574420151e-06, "loss": 1.0137, "num_tokens": 1366414641.0, "step": 6317 }, { "epoch": 1.1262032085561497, "grad_norm": 0.2060546875, "learning_rate": 9.606589815421534e-06, "loss": 1.0324, "num_tokens": 1372699081.0, "step": 6318 }, { "epoch": 1.1263814616755794, "grad_norm": 0.1826171875, "learning_rate": 9.604023172536083e-06, "loss": 1.0176, "num_tokens": 1378983071.0, "step": 6319 }, { "epoch": 1.1265597147950088, "grad_norm": 0.1875, "learning_rate": 9.601456645977676e-06, "loss": 1.0132, "num_tokens": 1385234082.0, "step": 6320 }, { "epoch": 1.1267379679144385, "grad_norm": 0.1826171875, "learning_rate": 9.598890235960184e-06, "loss": 1.0079, "num_tokens": 1391489532.0, "step": 6321 }, { "epoch": 1.1269162210338681, "grad_norm": 0.1923828125, "learning_rate": 9.596323942697468e-06, "loss": 0.9997, "num_tokens": 1397765570.0, "step": 6322 }, { "epoch": 1.1270944741532978, "grad_norm": 0.1953125, "learning_rate": 9.593757766403368e-06, "loss": 1.0309, "num_tokens": 1404049546.0, "step": 6323 }, { "epoch": 1.1272727272727272, "grad_norm": 0.1962890625, "learning_rate": 9.591191707291736e-06, "loss": 1.0129, "num_tokens": 1410333389.0, "step": 6324 }, { "epoch": 1.1274509803921569, "grad_norm": 0.181640625, "learning_rate": 9.588625765576394e-06, "loss": 1.0489, "num_tokens": 1416617655.0, "step": 6325 }, { "epoch": 1.1276292335115865, "grad_norm": 0.1806640625, "learning_rate": 9.586059941471167e-06, "loss": 1.0319, "num_tokens": 1422900420.0, "step": 6326 }, { "epoch": 1.1278074866310162, "grad_norm": 0.1953125, "learning_rate": 9.583494235189861e-06, "loss": 1.001, "num_tokens": 1429145186.0, "step": 6327 }, { "epoch": 1.1279857397504456, "grad_norm": 0.177734375, "learning_rate": 9.580928646946284e-06, "loss": 1.0005, "num_tokens": 1435401154.0, "step": 6328 }, { "epoch": 1.1281639928698752, "grad_norm": 0.1875, "learning_rate": 9.578363176954219e-06, "loss": 0.9995, "num_tokens": 1441677884.0, "step": 6329 }, { "epoch": 1.1283422459893049, "grad_norm": 0.1962890625, "learning_rate": 9.575797825427455e-06, "loss": 1.0239, "num_tokens": 1447904414.0, "step": 6330 }, { "epoch": 1.1285204991087343, "grad_norm": 0.1923828125, "learning_rate": 9.573232592579758e-06, "loss": 1.0171, "num_tokens": 1454180307.0, "step": 6331 }, { "epoch": 1.128698752228164, "grad_norm": 0.1904296875, "learning_rate": 9.57066747862489e-06, "loss": 1.0162, "num_tokens": 1460449185.0, "step": 6332 }, { "epoch": 1.1288770053475936, "grad_norm": 0.1904296875, "learning_rate": 9.568102483776604e-06, "loss": 1.0435, "num_tokens": 1466717463.0, "step": 6333 }, { "epoch": 1.1290552584670233, "grad_norm": 0.193359375, "learning_rate": 9.565537608248638e-06, "loss": 1.0349, "num_tokens": 1472979649.0, "step": 6334 }, { "epoch": 1.1292335115864527, "grad_norm": 0.1796875, "learning_rate": 9.562972852254732e-06, "loss": 1.0018, "num_tokens": 1479239710.0, "step": 6335 }, { "epoch": 1.1294117647058823, "grad_norm": 0.1865234375, "learning_rate": 9.560408216008598e-06, "loss": 1.016, "num_tokens": 1485522122.0, "step": 6336 }, { "epoch": 1.129590017825312, "grad_norm": 0.20703125, "learning_rate": 9.557843699723954e-06, "loss": 1.035, "num_tokens": 1491739734.0, "step": 6337 }, { "epoch": 1.1297682709447416, "grad_norm": 0.1826171875, "learning_rate": 9.5552793036145e-06, "loss": 1.0187, "num_tokens": 1498023839.0, "step": 6338 }, { "epoch": 1.129946524064171, "grad_norm": 0.1806640625, "learning_rate": 9.552715027893927e-06, "loss": 1.0181, "num_tokens": 1504281856.0, "step": 6339 }, { "epoch": 1.1301247771836007, "grad_norm": 0.1787109375, "learning_rate": 9.550150872775914e-06, "loss": 1.0034, "num_tokens": 1510565634.0, "step": 6340 }, { "epoch": 1.1303030303030304, "grad_norm": 0.208984375, "learning_rate": 9.547586838474135e-06, "loss": 1.0429, "num_tokens": 1516847444.0, "step": 6341 }, { "epoch": 1.1304812834224598, "grad_norm": 0.1962890625, "learning_rate": 9.545022925202255e-06, "loss": 1.0363, "num_tokens": 1523106479.0, "step": 6342 }, { "epoch": 1.1306595365418894, "grad_norm": 0.1845703125, "learning_rate": 9.54245913317392e-06, "loss": 1.0232, "num_tokens": 1529367547.0, "step": 6343 }, { "epoch": 1.130837789661319, "grad_norm": 0.1875, "learning_rate": 9.539895462602775e-06, "loss": 1.0412, "num_tokens": 1535650436.0, "step": 6344 }, { "epoch": 1.1310160427807487, "grad_norm": 0.20703125, "learning_rate": 9.537331913702452e-06, "loss": 0.9988, "num_tokens": 1541933098.0, "step": 6345 }, { "epoch": 1.1311942959001782, "grad_norm": 0.189453125, "learning_rate": 9.53476848668657e-06, "loss": 1.0739, "num_tokens": 1548218112.0, "step": 6346 }, { "epoch": 1.1313725490196078, "grad_norm": 0.1875, "learning_rate": 9.532205181768737e-06, "loss": 1.0249, "num_tokens": 1554488423.0, "step": 6347 }, { "epoch": 1.1315508021390375, "grad_norm": 0.1796875, "learning_rate": 9.529641999162555e-06, "loss": 1.025, "num_tokens": 1560725765.0, "step": 6348 }, { "epoch": 1.1317290552584671, "grad_norm": 0.189453125, "learning_rate": 9.52707893908162e-06, "loss": 1.0493, "num_tokens": 1566959545.0, "step": 6349 }, { "epoch": 1.1319073083778965, "grad_norm": 0.1982421875, "learning_rate": 9.524516001739506e-06, "loss": 0.9865, "num_tokens": 1573238864.0, "step": 6350 }, { "epoch": 1.1320855614973262, "grad_norm": 0.1845703125, "learning_rate": 9.521953187349784e-06, "loss": 1.0131, "num_tokens": 1579490329.0, "step": 6351 }, { "epoch": 1.1322638146167558, "grad_norm": 0.1865234375, "learning_rate": 9.519390496126018e-06, "loss": 1.0245, "num_tokens": 1585706996.0, "step": 6352 }, { "epoch": 1.1324420677361853, "grad_norm": 0.1787109375, "learning_rate": 9.51682792828176e-06, "loss": 1.0321, "num_tokens": 1591989884.0, "step": 6353 }, { "epoch": 1.132620320855615, "grad_norm": 0.1904296875, "learning_rate": 9.514265484030538e-06, "loss": 1.0233, "num_tokens": 1598274317.0, "step": 6354 }, { "epoch": 1.1327985739750446, "grad_norm": 0.1826171875, "learning_rate": 9.51170316358589e-06, "loss": 1.0364, "num_tokens": 1604525346.0, "step": 6355 }, { "epoch": 1.1329768270944742, "grad_norm": 0.18359375, "learning_rate": 9.50914096716133e-06, "loss": 1.0251, "num_tokens": 1610802371.0, "step": 6356 }, { "epoch": 1.1331550802139037, "grad_norm": 0.19140625, "learning_rate": 9.506578894970368e-06, "loss": 1.0142, "num_tokens": 1617086099.0, "step": 6357 }, { "epoch": 1.1333333333333333, "grad_norm": 0.1796875, "learning_rate": 9.504016947226504e-06, "loss": 1.0008, "num_tokens": 1623368071.0, "step": 6358 }, { "epoch": 1.133511586452763, "grad_norm": 0.18359375, "learning_rate": 9.501455124143225e-06, "loss": 1.0269, "num_tokens": 1629637341.0, "step": 6359 }, { "epoch": 1.1336898395721926, "grad_norm": 0.1904296875, "learning_rate": 9.498893425934008e-06, "loss": 1.0324, "num_tokens": 1635892056.0, "step": 6360 }, { "epoch": 1.133868092691622, "grad_norm": 0.189453125, "learning_rate": 9.496331852812316e-06, "loss": 1.0102, "num_tokens": 1642150940.0, "step": 6361 }, { "epoch": 1.1340463458110517, "grad_norm": 0.1865234375, "learning_rate": 9.493770404991614e-06, "loss": 1.016, "num_tokens": 1648410295.0, "step": 6362 }, { "epoch": 1.1342245989304813, "grad_norm": 0.189453125, "learning_rate": 9.491209082685338e-06, "loss": 1.0765, "num_tokens": 1654694991.0, "step": 6363 }, { "epoch": 1.1344028520499108, "grad_norm": 0.193359375, "learning_rate": 9.488647886106929e-06, "loss": 1.0339, "num_tokens": 1660977077.0, "step": 6364 }, { "epoch": 1.1345811051693404, "grad_norm": 0.18359375, "learning_rate": 9.48608681546981e-06, "loss": 1.0083, "num_tokens": 1667262655.0, "step": 6365 }, { "epoch": 1.13475935828877, "grad_norm": 0.1826171875, "learning_rate": 9.4835258709874e-06, "loss": 1.0008, "num_tokens": 1673547512.0, "step": 6366 }, { "epoch": 1.1349376114081997, "grad_norm": 0.1845703125, "learning_rate": 9.480965052873096e-06, "loss": 1.0224, "num_tokens": 1679831792.0, "step": 6367 }, { "epoch": 1.1351158645276291, "grad_norm": 0.1865234375, "learning_rate": 9.478404361340301e-06, "loss": 1.0076, "num_tokens": 1686113723.0, "step": 6368 }, { "epoch": 1.1352941176470588, "grad_norm": 0.1953125, "learning_rate": 9.47584379660239e-06, "loss": 0.9973, "num_tokens": 1692378112.0, "step": 6369 }, { "epoch": 1.1354723707664884, "grad_norm": 0.1767578125, "learning_rate": 9.473283358872737e-06, "loss": 1.0231, "num_tokens": 1698661645.0, "step": 6370 }, { "epoch": 1.135650623885918, "grad_norm": 0.1845703125, "learning_rate": 9.470723048364706e-06, "loss": 1.0338, "num_tokens": 1704903653.0, "step": 6371 }, { "epoch": 1.1358288770053475, "grad_norm": 0.1865234375, "learning_rate": 9.468162865291643e-06, "loss": 1.0267, "num_tokens": 1711168085.0, "step": 6372 }, { "epoch": 1.1360071301247772, "grad_norm": 0.1884765625, "learning_rate": 9.465602809866893e-06, "loss": 1.0329, "num_tokens": 1717448318.0, "step": 6373 }, { "epoch": 1.1361853832442068, "grad_norm": 0.185546875, "learning_rate": 9.463042882303788e-06, "loss": 1.0219, "num_tokens": 1723731328.0, "step": 6374 }, { "epoch": 1.1363636363636362, "grad_norm": 0.1806640625, "learning_rate": 9.460483082815643e-06, "loss": 1.0324, "num_tokens": 1729999645.0, "step": 6375 }, { "epoch": 1.136541889483066, "grad_norm": 0.1787109375, "learning_rate": 9.457923411615772e-06, "loss": 1.0355, "num_tokens": 1736259493.0, "step": 6376 }, { "epoch": 1.1367201426024955, "grad_norm": 0.189453125, "learning_rate": 9.455363868917466e-06, "loss": 1.0499, "num_tokens": 1742541975.0, "step": 6377 }, { "epoch": 1.1368983957219252, "grad_norm": 0.17578125, "learning_rate": 9.452804454934016e-06, "loss": 1.0263, "num_tokens": 1748790048.0, "step": 6378 }, { "epoch": 1.1370766488413548, "grad_norm": 0.1826171875, "learning_rate": 9.450245169878698e-06, "loss": 1.012, "num_tokens": 1755067569.0, "step": 6379 }, { "epoch": 1.1372549019607843, "grad_norm": 0.17578125, "learning_rate": 9.44768601396478e-06, "loss": 1.0043, "num_tokens": 1761291069.0, "step": 6380 }, { "epoch": 1.137433155080214, "grad_norm": 0.1865234375, "learning_rate": 9.445126987405509e-06, "loss": 1.0243, "num_tokens": 1767554607.0, "step": 6381 }, { "epoch": 1.1376114081996436, "grad_norm": 0.1884765625, "learning_rate": 9.442568090414139e-06, "loss": 1.0338, "num_tokens": 1773827603.0, "step": 6382 }, { "epoch": 1.137789661319073, "grad_norm": 0.1865234375, "learning_rate": 9.4400093232039e-06, "loss": 1.0168, "num_tokens": 1780077494.0, "step": 6383 }, { "epoch": 1.1379679144385026, "grad_norm": 0.185546875, "learning_rate": 9.437450685988012e-06, "loss": 1.017, "num_tokens": 1786334743.0, "step": 6384 }, { "epoch": 1.1381461675579323, "grad_norm": 0.173828125, "learning_rate": 9.434892178979691e-06, "loss": 1.0091, "num_tokens": 1792617298.0, "step": 6385 }, { "epoch": 1.138324420677362, "grad_norm": 0.181640625, "learning_rate": 9.432333802392132e-06, "loss": 1.0093, "num_tokens": 1798860716.0, "step": 6386 }, { "epoch": 1.1385026737967914, "grad_norm": 0.1796875, "learning_rate": 9.429775556438532e-06, "loss": 1.0089, "num_tokens": 1805082937.0, "step": 6387 }, { "epoch": 1.138680926916221, "grad_norm": 0.185546875, "learning_rate": 9.427217441332063e-06, "loss": 1.0286, "num_tokens": 1811358115.0, "step": 6388 }, { "epoch": 1.1388591800356507, "grad_norm": 0.1865234375, "learning_rate": 9.424659457285903e-06, "loss": 1.0316, "num_tokens": 1817584212.0, "step": 6389 }, { "epoch": 1.1390374331550803, "grad_norm": 0.1953125, "learning_rate": 9.422101604513198e-06, "loss": 0.9952, "num_tokens": 1823868274.0, "step": 6390 }, { "epoch": 1.1392156862745098, "grad_norm": 0.1796875, "learning_rate": 9.419543883227107e-06, "loss": 1.0198, "num_tokens": 1830137039.0, "step": 6391 }, { "epoch": 1.1393939393939394, "grad_norm": 0.1767578125, "learning_rate": 9.416986293640756e-06, "loss": 0.9911, "num_tokens": 1836395657.0, "step": 6392 }, { "epoch": 1.139572192513369, "grad_norm": 0.1982421875, "learning_rate": 9.414428835967266e-06, "loss": 1.0272, "num_tokens": 1842671351.0, "step": 6393 }, { "epoch": 1.1397504456327985, "grad_norm": 0.1962890625, "learning_rate": 9.411871510419761e-06, "loss": 1.0262, "num_tokens": 1848927332.0, "step": 6394 }, { "epoch": 1.1399286987522281, "grad_norm": 0.1787109375, "learning_rate": 9.409314317211338e-06, "loss": 0.9969, "num_tokens": 1855210373.0, "step": 6395 }, { "epoch": 1.1401069518716578, "grad_norm": 0.1787109375, "learning_rate": 9.40675725655509e-06, "loss": 1.0227, "num_tokens": 1861492299.0, "step": 6396 }, { "epoch": 1.1402852049910874, "grad_norm": 0.1826171875, "learning_rate": 9.404200328664097e-06, "loss": 1.0322, "num_tokens": 1867775452.0, "step": 6397 }, { "epoch": 1.1404634581105169, "grad_norm": 0.1845703125, "learning_rate": 9.401643533751428e-06, "loss": 1.0219, "num_tokens": 1874030891.0, "step": 6398 }, { "epoch": 1.1406417112299465, "grad_norm": 0.1875, "learning_rate": 9.399086872030143e-06, "loss": 1.0185, "num_tokens": 1880314463.0, "step": 6399 }, { "epoch": 1.1408199643493762, "grad_norm": 0.177734375, "learning_rate": 9.396530343713284e-06, "loss": 1.0244, "num_tokens": 1886561908.0, "step": 6400 }, { "epoch": 1.1409982174688058, "grad_norm": 0.1884765625, "learning_rate": 9.393973949013893e-06, "loss": 1.0256, "num_tokens": 1892846836.0, "step": 6401 }, { "epoch": 1.1411764705882352, "grad_norm": 0.18359375, "learning_rate": 9.39141768814499e-06, "loss": 1.0342, "num_tokens": 1899129215.0, "step": 6402 }, { "epoch": 1.1413547237076649, "grad_norm": 0.189453125, "learning_rate": 9.388861561319594e-06, "loss": 1.0436, "num_tokens": 1905410993.0, "step": 6403 }, { "epoch": 1.1415329768270945, "grad_norm": 0.1845703125, "learning_rate": 9.386305568750702e-06, "loss": 1.0151, "num_tokens": 1911694846.0, "step": 6404 }, { "epoch": 1.141711229946524, "grad_norm": 0.1923828125, "learning_rate": 9.38374971065131e-06, "loss": 1.041, "num_tokens": 1917976154.0, "step": 6405 }, { "epoch": 1.1418894830659536, "grad_norm": 0.18359375, "learning_rate": 9.3811939872344e-06, "loss": 0.9971, "num_tokens": 1924234306.0, "step": 6406 }, { "epoch": 1.1420677361853833, "grad_norm": 0.1865234375, "learning_rate": 9.378638398712934e-06, "loss": 1.0126, "num_tokens": 1930519007.0, "step": 6407 }, { "epoch": 1.142245989304813, "grad_norm": 0.1904296875, "learning_rate": 9.376082945299871e-06, "loss": 1.0187, "num_tokens": 1936802226.0, "step": 6408 }, { "epoch": 1.1424242424242423, "grad_norm": 0.1826171875, "learning_rate": 9.373527627208161e-06, "loss": 0.9944, "num_tokens": 1943085738.0, "step": 6409 }, { "epoch": 1.142602495543672, "grad_norm": 0.1962890625, "learning_rate": 9.370972444650739e-06, "loss": 1.0336, "num_tokens": 1949368387.0, "step": 6410 }, { "epoch": 1.1427807486631016, "grad_norm": 0.1728515625, "learning_rate": 9.368417397840526e-06, "loss": 1.0241, "num_tokens": 1955651521.0, "step": 6411 }, { "epoch": 1.1429590017825313, "grad_norm": 0.185546875, "learning_rate": 9.365862486990438e-06, "loss": 0.9951, "num_tokens": 1961934943.0, "step": 6412 }, { "epoch": 1.1431372549019607, "grad_norm": 0.181640625, "learning_rate": 9.363307712313372e-06, "loss": 1.0102, "num_tokens": 1968218847.0, "step": 6413 }, { "epoch": 1.1433155080213904, "grad_norm": 0.181640625, "learning_rate": 9.360753074022224e-06, "loss": 1.025, "num_tokens": 1974496201.0, "step": 6414 }, { "epoch": 1.14349376114082, "grad_norm": 0.1943359375, "learning_rate": 9.358198572329868e-06, "loss": 0.9974, "num_tokens": 1980757913.0, "step": 6415 }, { "epoch": 1.1436720142602494, "grad_norm": 0.1845703125, "learning_rate": 9.355644207449171e-06, "loss": 1.0136, "num_tokens": 1987041386.0, "step": 6416 }, { "epoch": 1.143850267379679, "grad_norm": 0.2001953125, "learning_rate": 9.35308997959299e-06, "loss": 1.0202, "num_tokens": 1993287651.0, "step": 6417 }, { "epoch": 1.1440285204991087, "grad_norm": 0.1845703125, "learning_rate": 9.35053588897417e-06, "loss": 1.0126, "num_tokens": 1999570783.0, "step": 6418 }, { "epoch": 1.1442067736185384, "grad_norm": 0.1962890625, "learning_rate": 9.347981935805543e-06, "loss": 0.9823, "num_tokens": 2005828626.0, "step": 6419 }, { "epoch": 1.1443850267379678, "grad_norm": 0.1884765625, "learning_rate": 9.345428120299929e-06, "loss": 0.9954, "num_tokens": 2012091398.0, "step": 6420 }, { "epoch": 1.1445632798573975, "grad_norm": 0.18359375, "learning_rate": 9.342874442670142e-06, "loss": 0.9883, "num_tokens": 2018348156.0, "step": 6421 }, { "epoch": 1.1447415329768271, "grad_norm": 0.1826171875, "learning_rate": 9.34032090312898e-06, "loss": 1.0346, "num_tokens": 2024616104.0, "step": 6422 }, { "epoch": 1.1449197860962568, "grad_norm": 0.1865234375, "learning_rate": 9.337767501889226e-06, "loss": 0.9931, "num_tokens": 2030878771.0, "step": 6423 }, { "epoch": 1.1450980392156862, "grad_norm": 0.189453125, "learning_rate": 9.335214239163657e-06, "loss": 1.0252, "num_tokens": 2037147111.0, "step": 6424 }, { "epoch": 1.1452762923351159, "grad_norm": 0.18359375, "learning_rate": 9.332661115165035e-06, "loss": 1.0261, "num_tokens": 2043431744.0, "step": 6425 }, { "epoch": 1.1454545454545455, "grad_norm": 0.1884765625, "learning_rate": 9.33010813010612e-06, "loss": 1.0373, "num_tokens": 2049715970.0, "step": 6426 }, { "epoch": 1.145632798573975, "grad_norm": 0.1796875, "learning_rate": 9.327555284199643e-06, "loss": 1.0109, "num_tokens": 2055965077.0, "step": 6427 }, { "epoch": 1.1458110516934046, "grad_norm": 0.181640625, "learning_rate": 9.325002577658342e-06, "loss": 1.0223, "num_tokens": 2062196176.0, "step": 6428 }, { "epoch": 1.1459893048128342, "grad_norm": 0.185546875, "learning_rate": 9.32245001069493e-06, "loss": 1.014, "num_tokens": 2068454298.0, "step": 6429 }, { "epoch": 1.1461675579322639, "grad_norm": 0.1953125, "learning_rate": 9.31989758352211e-06, "loss": 1.0293, "num_tokens": 2074710036.0, "step": 6430 }, { "epoch": 1.1463458110516933, "grad_norm": 0.203125, "learning_rate": 9.317345296352582e-06, "loss": 1.0404, "num_tokens": 2080993290.0, "step": 6431 }, { "epoch": 1.146524064171123, "grad_norm": 0.1904296875, "learning_rate": 9.314793149399026e-06, "loss": 0.9911, "num_tokens": 2087277444.0, "step": 6432 }, { "epoch": 1.1467023172905526, "grad_norm": 0.197265625, "learning_rate": 9.312241142874115e-06, "loss": 1.0216, "num_tokens": 2093561920.0, "step": 6433 }, { "epoch": 1.1468805704099823, "grad_norm": 0.19140625, "learning_rate": 9.309689276990504e-06, "loss": 1.0144, "num_tokens": 2099846468.0, "step": 6434 }, { "epoch": 1.1470588235294117, "grad_norm": 0.1923828125, "learning_rate": 9.307137551960843e-06, "loss": 1.011, "num_tokens": 2106109292.0, "step": 6435 }, { "epoch": 1.1472370766488413, "grad_norm": 0.1796875, "learning_rate": 9.304585967997768e-06, "loss": 1.0194, "num_tokens": 2112366137.0, "step": 6436 }, { "epoch": 1.147415329768271, "grad_norm": 0.193359375, "learning_rate": 9.302034525313903e-06, "loss": 0.9885, "num_tokens": 2118650167.0, "step": 6437 }, { "epoch": 1.1475935828877004, "grad_norm": 0.193359375, "learning_rate": 9.299483224121863e-06, "loss": 0.996, "num_tokens": 2124911207.0, "step": 6438 }, { "epoch": 1.14777183600713, "grad_norm": 0.1943359375, "learning_rate": 9.296932064634241e-06, "loss": 0.996, "num_tokens": 2131188315.0, "step": 6439 }, { "epoch": 1.1479500891265597, "grad_norm": 0.1875, "learning_rate": 9.294381047063632e-06, "loss": 1.0068, "num_tokens": 2137433923.0, "step": 6440 }, { "epoch": 1.1481283422459894, "grad_norm": 0.1826171875, "learning_rate": 9.29183017162261e-06, "loss": 1.0312, "num_tokens": 2143709737.0, "step": 6441 }, { "epoch": 1.148306595365419, "grad_norm": 0.1845703125, "learning_rate": 9.289279438523745e-06, "loss": 1.0123, "num_tokens": 2149955264.0, "step": 6442 }, { "epoch": 1.1484848484848484, "grad_norm": 0.19140625, "learning_rate": 9.28672884797958e-06, "loss": 1.0487, "num_tokens": 2156232528.0, "step": 6443 }, { "epoch": 1.148663101604278, "grad_norm": 0.1904296875, "learning_rate": 9.284178400202667e-06, "loss": 1.0349, "num_tokens": 2162486755.0, "step": 6444 }, { "epoch": 1.1488413547237077, "grad_norm": 0.185546875, "learning_rate": 9.281628095405533e-06, "loss": 1.0013, "num_tokens": 2168764240.0, "step": 6445 }, { "epoch": 1.1490196078431372, "grad_norm": 0.1904296875, "learning_rate": 9.27907793380069e-06, "loss": 1.0023, "num_tokens": 2175041601.0, "step": 6446 }, { "epoch": 1.1491978609625668, "grad_norm": 0.185546875, "learning_rate": 9.276527915600646e-06, "loss": 1.0147, "num_tokens": 2181323155.0, "step": 6447 }, { "epoch": 1.1493761140819965, "grad_norm": 0.1865234375, "learning_rate": 9.273978041017895e-06, "loss": 0.9997, "num_tokens": 2187574372.0, "step": 6448 }, { "epoch": 1.1495543672014261, "grad_norm": 0.1796875, "learning_rate": 9.27142831026492e-06, "loss": 1.0145, "num_tokens": 2193831234.0, "step": 6449 }, { "epoch": 1.1497326203208555, "grad_norm": 0.1845703125, "learning_rate": 9.268878723554189e-06, "loss": 1.0345, "num_tokens": 2200108550.0, "step": 6450 }, { "epoch": 1.1499108734402852, "grad_norm": 0.18359375, "learning_rate": 9.26632928109816e-06, "loss": 1.0518, "num_tokens": 2206388777.0, "step": 6451 }, { "epoch": 1.1500891265597148, "grad_norm": 0.17578125, "learning_rate": 9.263779983109278e-06, "loss": 1.0203, "num_tokens": 2212652502.0, "step": 6452 }, { "epoch": 1.1502673796791445, "grad_norm": 0.181640625, "learning_rate": 9.261230829799981e-06, "loss": 1.0093, "num_tokens": 2218903179.0, "step": 6453 }, { "epoch": 1.150445632798574, "grad_norm": 0.185546875, "learning_rate": 9.258681821382685e-06, "loss": 1.0165, "num_tokens": 2225187826.0, "step": 6454 }, { "epoch": 1.1506238859180036, "grad_norm": 0.1884765625, "learning_rate": 9.256132958069799e-06, "loss": 1.0203, "num_tokens": 2231462761.0, "step": 6455 }, { "epoch": 1.1508021390374332, "grad_norm": 0.181640625, "learning_rate": 9.253584240073723e-06, "loss": 1.0392, "num_tokens": 2237714790.0, "step": 6456 }, { "epoch": 1.1509803921568627, "grad_norm": 0.1943359375, "learning_rate": 9.251035667606843e-06, "loss": 0.9877, "num_tokens": 2243997137.0, "step": 6457 }, { "epoch": 1.1511586452762923, "grad_norm": 0.1845703125, "learning_rate": 9.248487240881529e-06, "loss": 1.0116, "num_tokens": 2250279502.0, "step": 6458 }, { "epoch": 1.151336898395722, "grad_norm": 0.1826171875, "learning_rate": 9.245938960110143e-06, "loss": 1.013, "num_tokens": 2256563321.0, "step": 6459 }, { "epoch": 1.1515151515151516, "grad_norm": 0.1826171875, "learning_rate": 9.243390825505039e-06, "loss": 1.0211, "num_tokens": 2262841542.0, "step": 6460 }, { "epoch": 1.151693404634581, "grad_norm": 0.189453125, "learning_rate": 9.240842837278546e-06, "loss": 1.0102, "num_tokens": 2269126869.0, "step": 6461 }, { "epoch": 1.1518716577540107, "grad_norm": 0.19140625, "learning_rate": 9.238294995642989e-06, "loss": 1.0011, "num_tokens": 2275382569.0, "step": 6462 }, { "epoch": 1.1520499108734403, "grad_norm": 0.1875, "learning_rate": 9.235747300810687e-06, "loss": 1.0391, "num_tokens": 2281667853.0, "step": 6463 }, { "epoch": 1.15222816399287, "grad_norm": 0.1787109375, "learning_rate": 9.23319975299393e-06, "loss": 0.996, "num_tokens": 2287951213.0, "step": 6464 }, { "epoch": 1.1524064171122994, "grad_norm": 0.181640625, "learning_rate": 9.230652352405017e-06, "loss": 1.0226, "num_tokens": 2294217301.0, "step": 6465 }, { "epoch": 1.152584670231729, "grad_norm": 0.17578125, "learning_rate": 9.228105099256213e-06, "loss": 1.0244, "num_tokens": 2300475830.0, "step": 6466 }, { "epoch": 1.1527629233511587, "grad_norm": 0.1826171875, "learning_rate": 9.225557993759788e-06, "loss": 1.0214, "num_tokens": 2306759080.0, "step": 6467 }, { "epoch": 1.1529411764705881, "grad_norm": 0.177734375, "learning_rate": 9.223011036127993e-06, "loss": 1.0089, "num_tokens": 2313017629.0, "step": 6468 }, { "epoch": 1.1531194295900178, "grad_norm": 0.1796875, "learning_rate": 9.22046422657306e-06, "loss": 1.0212, "num_tokens": 2319296236.0, "step": 6469 }, { "epoch": 1.1532976827094474, "grad_norm": 0.185546875, "learning_rate": 9.217917565307224e-06, "loss": 1.0329, "num_tokens": 2325581285.0, "step": 6470 }, { "epoch": 1.153475935828877, "grad_norm": 0.1796875, "learning_rate": 9.21537105254269e-06, "loss": 1.0183, "num_tokens": 2331864990.0, "step": 6471 }, { "epoch": 1.1536541889483065, "grad_norm": 0.177734375, "learning_rate": 9.212824688491664e-06, "loss": 1.0229, "num_tokens": 2338150329.0, "step": 6472 }, { "epoch": 1.1538324420677362, "grad_norm": 0.1904296875, "learning_rate": 9.210278473366337e-06, "loss": 0.9962, "num_tokens": 2344387828.0, "step": 6473 }, { "epoch": 1.1540106951871658, "grad_norm": 0.1865234375, "learning_rate": 9.20773240737888e-06, "loss": 1.0459, "num_tokens": 2350626566.0, "step": 6474 }, { "epoch": 1.1541889483065955, "grad_norm": 0.181640625, "learning_rate": 9.205186490741462e-06, "loss": 1.0172, "num_tokens": 2356897414.0, "step": 6475 }, { "epoch": 1.154367201426025, "grad_norm": 0.1826171875, "learning_rate": 9.202640723666236e-06, "loss": 0.9996, "num_tokens": 2363130739.0, "step": 6476 }, { "epoch": 1.1545454545454545, "grad_norm": 0.1806640625, "learning_rate": 9.200095106365337e-06, "loss": 1.016, "num_tokens": 2369368402.0, "step": 6477 }, { "epoch": 1.1547237076648842, "grad_norm": 0.181640625, "learning_rate": 9.19754963905089e-06, "loss": 1.0296, "num_tokens": 2375650379.0, "step": 6478 }, { "epoch": 1.1549019607843136, "grad_norm": 0.1884765625, "learning_rate": 9.195004321935018e-06, "loss": 1.0397, "num_tokens": 2381935360.0, "step": 6479 }, { "epoch": 1.1550802139037433, "grad_norm": 0.1904296875, "learning_rate": 9.192459155229814e-06, "loss": 1.029, "num_tokens": 2388188129.0, "step": 6480 }, { "epoch": 1.155258467023173, "grad_norm": 0.1806640625, "learning_rate": 9.18991413914737e-06, "loss": 1.0221, "num_tokens": 2394457157.0, "step": 6481 }, { "epoch": 1.1554367201426026, "grad_norm": 0.1845703125, "learning_rate": 9.187369273899767e-06, "loss": 0.9705, "num_tokens": 2400741643.0, "step": 6482 }, { "epoch": 1.155614973262032, "grad_norm": 0.1748046875, "learning_rate": 9.184824559699066e-06, "loss": 0.9869, "num_tokens": 2407027021.0, "step": 6483 }, { "epoch": 1.1557932263814616, "grad_norm": 0.1865234375, "learning_rate": 9.182279996757318e-06, "loss": 1.0471, "num_tokens": 2413311977.0, "step": 6484 }, { "epoch": 1.1559714795008913, "grad_norm": 0.1826171875, "learning_rate": 9.17973558528656e-06, "loss": 1.0277, "num_tokens": 2419595768.0, "step": 6485 }, { "epoch": 1.156149732620321, "grad_norm": 0.181640625, "learning_rate": 9.177191325498822e-06, "loss": 0.9819, "num_tokens": 2425880289.0, "step": 6486 }, { "epoch": 1.1563279857397504, "grad_norm": 0.1865234375, "learning_rate": 9.174647217606114e-06, "loss": 1.0487, "num_tokens": 2432138500.0, "step": 6487 }, { "epoch": 1.15650623885918, "grad_norm": 0.1884765625, "learning_rate": 9.172103261820442e-06, "loss": 1.0457, "num_tokens": 2438400087.0, "step": 6488 }, { "epoch": 1.1566844919786097, "grad_norm": 0.1826171875, "learning_rate": 9.169559458353789e-06, "loss": 1.039, "num_tokens": 2444662397.0, "step": 6489 }, { "epoch": 1.156862745098039, "grad_norm": 0.181640625, "learning_rate": 9.167015807418138e-06, "loss": 1.0393, "num_tokens": 2450947770.0, "step": 6490 }, { "epoch": 1.1570409982174688, "grad_norm": 0.1826171875, "learning_rate": 9.164472309225447e-06, "loss": 1.055, "num_tokens": 2457220759.0, "step": 6491 }, { "epoch": 1.1572192513368984, "grad_norm": 0.193359375, "learning_rate": 9.161928963987663e-06, "loss": 1.0037, "num_tokens": 2463470802.0, "step": 6492 }, { "epoch": 1.157397504456328, "grad_norm": 0.19140625, "learning_rate": 9.159385771916729e-06, "loss": 0.9807, "num_tokens": 2469716427.0, "step": 6493 }, { "epoch": 1.1575757575757575, "grad_norm": 0.181640625, "learning_rate": 9.156842733224567e-06, "loss": 0.999, "num_tokens": 2475975440.0, "step": 6494 }, { "epoch": 1.1577540106951871, "grad_norm": 0.1806640625, "learning_rate": 9.154299848123091e-06, "loss": 1.0099, "num_tokens": 2482258064.0, "step": 6495 }, { "epoch": 1.1579322638146168, "grad_norm": 0.19140625, "learning_rate": 9.151757116824199e-06, "loss": 1.0271, "num_tokens": 2488543273.0, "step": 6496 }, { "epoch": 1.1581105169340464, "grad_norm": 0.185546875, "learning_rate": 9.149214539539774e-06, "loss": 1.0394, "num_tokens": 2494816836.0, "step": 6497 }, { "epoch": 1.1582887700534759, "grad_norm": 0.185546875, "learning_rate": 9.146672116481696e-06, "loss": 1.0384, "num_tokens": 2501089699.0, "step": 6498 }, { "epoch": 1.1584670231729055, "grad_norm": 0.1845703125, "learning_rate": 9.144129847861825e-06, "loss": 1.0135, "num_tokens": 2507372300.0, "step": 6499 }, { "epoch": 1.1586452762923352, "grad_norm": 0.197265625, "learning_rate": 9.141587733892004e-06, "loss": 1.0148, "num_tokens": 2513626597.0, "step": 6500 }, { "epoch": 1.1588235294117646, "grad_norm": 0.1904296875, "learning_rate": 9.139045774784067e-06, "loss": 1.0239, "num_tokens": 2519909015.0, "step": 6501 }, { "epoch": 1.1590017825311942, "grad_norm": 0.181640625, "learning_rate": 9.136503970749844e-06, "loss": 1.0288, "num_tokens": 2526193150.0, "step": 6502 }, { "epoch": 1.1591800356506239, "grad_norm": 0.189453125, "learning_rate": 9.133962322001135e-06, "loss": 1.0457, "num_tokens": 2532470134.0, "step": 6503 }, { "epoch": 1.1593582887700535, "grad_norm": 0.1806640625, "learning_rate": 9.131420828749741e-06, "loss": 1.03, "num_tokens": 2538754169.0, "step": 6504 }, { "epoch": 1.1595365418894832, "grad_norm": 0.1904296875, "learning_rate": 9.128879491207448e-06, "loss": 1.0293, "num_tokens": 2545037965.0, "step": 6505 }, { "epoch": 1.1597147950089126, "grad_norm": 0.1865234375, "learning_rate": 9.126338309586024e-06, "loss": 0.9953, "num_tokens": 2551319320.0, "step": 6506 }, { "epoch": 1.1598930481283423, "grad_norm": 0.193359375, "learning_rate": 9.12379728409722e-06, "loss": 1.0351, "num_tokens": 2557565161.0, "step": 6507 }, { "epoch": 1.160071301247772, "grad_norm": 0.1767578125, "learning_rate": 9.121256414952786e-06, "loss": 1.0017, "num_tokens": 2563844285.0, "step": 6508 }, { "epoch": 1.1602495543672013, "grad_norm": 0.19140625, "learning_rate": 9.118715702364454e-06, "loss": 1.0328, "num_tokens": 2570123358.0, "step": 6509 }, { "epoch": 1.160427807486631, "grad_norm": 0.197265625, "learning_rate": 9.116175146543938e-06, "loss": 1.0661, "num_tokens": 2576365808.0, "step": 6510 }, { "epoch": 1.1606060606060606, "grad_norm": 0.189453125, "learning_rate": 9.113634747702947e-06, "loss": 1.0143, "num_tokens": 2582620916.0, "step": 6511 }, { "epoch": 1.1607843137254903, "grad_norm": 0.1884765625, "learning_rate": 9.111094506053168e-06, "loss": 1.0006, "num_tokens": 2588888791.0, "step": 6512 }, { "epoch": 1.1609625668449197, "grad_norm": 0.1865234375, "learning_rate": 9.108554421806286e-06, "loss": 0.9787, "num_tokens": 2595149809.0, "step": 6513 }, { "epoch": 1.1611408199643494, "grad_norm": 0.185546875, "learning_rate": 9.106014495173969e-06, "loss": 1.0191, "num_tokens": 2601419706.0, "step": 6514 }, { "epoch": 1.161319073083779, "grad_norm": 0.1875, "learning_rate": 9.10347472636786e-06, "loss": 0.9956, "num_tokens": 2607704777.0, "step": 6515 }, { "epoch": 1.1614973262032087, "grad_norm": 0.18359375, "learning_rate": 9.100935115599603e-06, "loss": 0.9928, "num_tokens": 2613927944.0, "step": 6516 }, { "epoch": 1.161675579322638, "grad_norm": 0.177734375, "learning_rate": 9.098395663080824e-06, "loss": 0.9862, "num_tokens": 2620185496.0, "step": 6517 }, { "epoch": 1.1618538324420677, "grad_norm": 0.1796875, "learning_rate": 9.095856369023134e-06, "loss": 1.0552, "num_tokens": 2626469846.0, "step": 6518 }, { "epoch": 1.1620320855614974, "grad_norm": 0.19140625, "learning_rate": 9.093317233638141e-06, "loss": 1.0144, "num_tokens": 2632752839.0, "step": 6519 }, { "epoch": 1.1622103386809268, "grad_norm": 0.1943359375, "learning_rate": 9.090778257137423e-06, "loss": 0.9966, "num_tokens": 2639034897.0, "step": 6520 }, { "epoch": 1.1623885918003565, "grad_norm": 0.1826171875, "learning_rate": 9.088239439732556e-06, "loss": 1.0162, "num_tokens": 2645318007.0, "step": 6521 }, { "epoch": 1.1625668449197861, "grad_norm": 0.18359375, "learning_rate": 9.085700781635107e-06, "loss": 1.034, "num_tokens": 2651577342.0, "step": 6522 }, { "epoch": 1.1627450980392158, "grad_norm": 0.197265625, "learning_rate": 9.08316228305661e-06, "loss": 1.0345, "num_tokens": 2657853606.0, "step": 6523 }, { "epoch": 1.1629233511586452, "grad_norm": 0.18359375, "learning_rate": 9.08062394420861e-06, "loss": 1.0194, "num_tokens": 2664140125.0, "step": 6524 }, { "epoch": 1.1631016042780749, "grad_norm": 0.18359375, "learning_rate": 9.078085765302616e-06, "loss": 1.017, "num_tokens": 2670401934.0, "step": 6525 }, { "epoch": 1.1632798573975045, "grad_norm": 0.181640625, "learning_rate": 9.075547746550147e-06, "loss": 1.0367, "num_tokens": 2676660119.0, "step": 6526 }, { "epoch": 1.1634581105169342, "grad_norm": 0.189453125, "learning_rate": 9.07300988816269e-06, "loss": 1.0324, "num_tokens": 2682890784.0, "step": 6527 }, { "epoch": 1.1636363636363636, "grad_norm": 0.181640625, "learning_rate": 9.070472190351724e-06, "loss": 1.0311, "num_tokens": 2689170226.0, "step": 6528 }, { "epoch": 1.1638146167557932, "grad_norm": 0.177734375, "learning_rate": 9.067934653328724e-06, "loss": 1.0284, "num_tokens": 2695426536.0, "step": 6529 }, { "epoch": 1.1639928698752229, "grad_norm": 0.1845703125, "learning_rate": 9.065397277305132e-06, "loss": 1.0337, "num_tokens": 2701687693.0, "step": 6530 }, { "epoch": 1.1641711229946523, "grad_norm": 0.1865234375, "learning_rate": 9.0628600624924e-06, "loss": 0.9981, "num_tokens": 2707968652.0, "step": 6531 }, { "epoch": 1.164349376114082, "grad_norm": 0.1904296875, "learning_rate": 9.060323009101943e-06, "loss": 1.0053, "num_tokens": 2714226406.0, "step": 6532 }, { "epoch": 1.1645276292335116, "grad_norm": 0.1845703125, "learning_rate": 9.057786117345185e-06, "loss": 1.0172, "num_tokens": 2720510609.0, "step": 6533 }, { "epoch": 1.1647058823529413, "grad_norm": 0.1787109375, "learning_rate": 9.055249387433516e-06, "loss": 1.0071, "num_tokens": 2726794010.0, "step": 6534 }, { "epoch": 1.1648841354723707, "grad_norm": 0.1796875, "learning_rate": 9.05271281957833e-06, "loss": 1.0352, "num_tokens": 2733078120.0, "step": 6535 }, { "epoch": 1.1650623885918003, "grad_norm": 0.185546875, "learning_rate": 9.050176413990991e-06, "loss": 1.0394, "num_tokens": 2739360303.0, "step": 6536 }, { "epoch": 1.16524064171123, "grad_norm": 0.1845703125, "learning_rate": 9.047640170882872e-06, "loss": 1.0329, "num_tokens": 2745644258.0, "step": 6537 }, { "epoch": 1.1654188948306596, "grad_norm": 0.18359375, "learning_rate": 9.045104090465306e-06, "loss": 1.0212, "num_tokens": 2751928634.0, "step": 6538 }, { "epoch": 1.165597147950089, "grad_norm": 0.177734375, "learning_rate": 9.042568172949631e-06, "loss": 1.0355, "num_tokens": 2758212563.0, "step": 6539 }, { "epoch": 1.1657754010695187, "grad_norm": 0.1767578125, "learning_rate": 9.040032418547162e-06, "loss": 1.0035, "num_tokens": 2764495833.0, "step": 6540 }, { "epoch": 1.1659536541889484, "grad_norm": 0.1806640625, "learning_rate": 9.037496827469205e-06, "loss": 0.9807, "num_tokens": 2770781963.0, "step": 6541 }, { "epoch": 1.1661319073083778, "grad_norm": 0.1796875, "learning_rate": 9.034961399927056e-06, "loss": 1.0176, "num_tokens": 2777032220.0, "step": 6542 }, { "epoch": 1.1663101604278074, "grad_norm": 0.1845703125, "learning_rate": 9.032426136131987e-06, "loss": 1.0517, "num_tokens": 2783316427.0, "step": 6543 }, { "epoch": 1.166488413547237, "grad_norm": 0.18359375, "learning_rate": 9.029891036295267e-06, "loss": 1.0372, "num_tokens": 2789588468.0, "step": 6544 }, { "epoch": 1.1666666666666667, "grad_norm": 0.1826171875, "learning_rate": 9.027356100628143e-06, "loss": 1.0124, "num_tokens": 2795871896.0, "step": 6545 }, { "epoch": 1.1668449197860962, "grad_norm": 0.1806640625, "learning_rate": 9.024821329341849e-06, "loss": 1.016, "num_tokens": 2802116910.0, "step": 6546 }, { "epoch": 1.1670231729055258, "grad_norm": 0.181640625, "learning_rate": 9.022286722647616e-06, "loss": 1.0078, "num_tokens": 2808402193.0, "step": 6547 }, { "epoch": 1.1672014260249555, "grad_norm": 0.181640625, "learning_rate": 9.019752280756645e-06, "loss": 1.0046, "num_tokens": 2814685097.0, "step": 6548 }, { "epoch": 1.1673796791443851, "grad_norm": 0.1826171875, "learning_rate": 9.017218003880134e-06, "loss": 1.0195, "num_tokens": 2820955346.0, "step": 6549 }, { "epoch": 1.1675579322638145, "grad_norm": 0.1875, "learning_rate": 9.014683892229267e-06, "loss": 1.0088, "num_tokens": 2827193080.0, "step": 6550 }, { "epoch": 1.1677361853832442, "grad_norm": 0.1806640625, "learning_rate": 9.012149946015214e-06, "loss": 1.0372, "num_tokens": 2833474841.0, "step": 6551 }, { "epoch": 1.1679144385026738, "grad_norm": 0.1826171875, "learning_rate": 9.009616165449126e-06, "loss": 1.0036, "num_tokens": 2839729732.0, "step": 6552 }, { "epoch": 1.1680926916221033, "grad_norm": 0.1787109375, "learning_rate": 9.007082550742142e-06, "loss": 1.0193, "num_tokens": 2845982158.0, "step": 6553 }, { "epoch": 1.168270944741533, "grad_norm": 0.181640625, "learning_rate": 9.00454910210539e-06, "loss": 1.015, "num_tokens": 2852223482.0, "step": 6554 }, { "epoch": 1.1684491978609626, "grad_norm": 0.181640625, "learning_rate": 9.002015819749984e-06, "loss": 1.0199, "num_tokens": 2858485317.0, "step": 6555 }, { "epoch": 1.1686274509803922, "grad_norm": 0.1748046875, "learning_rate": 8.999482703887024e-06, "loss": 1.0053, "num_tokens": 2864732531.0, "step": 6556 }, { "epoch": 1.1688057040998217, "grad_norm": 0.1796875, "learning_rate": 8.99694975472759e-06, "loss": 1.0256, "num_tokens": 2871004987.0, "step": 6557 }, { "epoch": 1.1689839572192513, "grad_norm": 0.1787109375, "learning_rate": 8.99441697248276e-06, "loss": 1.033, "num_tokens": 2877288070.0, "step": 6558 }, { "epoch": 1.169162210338681, "grad_norm": 0.189453125, "learning_rate": 8.991884357363586e-06, "loss": 1.0376, "num_tokens": 2883573844.0, "step": 6559 }, { "epoch": 1.1693404634581106, "grad_norm": 0.1806640625, "learning_rate": 8.989351909581116e-06, "loss": 1.0398, "num_tokens": 2889816573.0, "step": 6560 }, { "epoch": 1.16951871657754, "grad_norm": 0.2138671875, "learning_rate": 8.986819629346375e-06, "loss": 1.0212, "num_tokens": 2896102059.0, "step": 6561 }, { "epoch": 1.1696969696969697, "grad_norm": 0.1875, "learning_rate": 8.984287516870378e-06, "loss": 1.0454, "num_tokens": 2902368359.0, "step": 6562 }, { "epoch": 1.1698752228163993, "grad_norm": 0.185546875, "learning_rate": 8.98175557236413e-06, "loss": 1.0285, "num_tokens": 2908628237.0, "step": 6563 }, { "epoch": 1.1700534759358288, "grad_norm": 0.1787109375, "learning_rate": 8.979223796038616e-06, "loss": 1.006, "num_tokens": 2914911573.0, "step": 6564 }, { "epoch": 1.1702317290552584, "grad_norm": 0.1767578125, "learning_rate": 8.97669218810481e-06, "loss": 0.997, "num_tokens": 2921177573.0, "step": 6565 }, { "epoch": 1.170409982174688, "grad_norm": 0.1826171875, "learning_rate": 8.974160748773673e-06, "loss": 1.0235, "num_tokens": 2927412735.0, "step": 6566 }, { "epoch": 1.1705882352941177, "grad_norm": 0.1865234375, "learning_rate": 8.971629478256149e-06, "loss": 0.9832, "num_tokens": 2933696474.0, "step": 6567 }, { "epoch": 1.1707664884135474, "grad_norm": 0.1806640625, "learning_rate": 8.969098376763172e-06, "loss": 1.0248, "num_tokens": 2939937182.0, "step": 6568 }, { "epoch": 1.1709447415329768, "grad_norm": 0.18359375, "learning_rate": 8.966567444505653e-06, "loss": 1.026, "num_tokens": 2946222141.0, "step": 6569 }, { "epoch": 1.1711229946524064, "grad_norm": 0.181640625, "learning_rate": 8.964036681694504e-06, "loss": 1.0362, "num_tokens": 2952505779.0, "step": 6570 }, { "epoch": 1.171301247771836, "grad_norm": 0.17578125, "learning_rate": 8.961506088540603e-06, "loss": 1.0301, "num_tokens": 2958756923.0, "step": 6571 }, { "epoch": 1.1714795008912655, "grad_norm": 0.185546875, "learning_rate": 8.958975665254833e-06, "loss": 1.0254, "num_tokens": 2965040600.0, "step": 6572 }, { "epoch": 1.1716577540106952, "grad_norm": 0.1767578125, "learning_rate": 8.956445412048052e-06, "loss": 1.0395, "num_tokens": 2971322612.0, "step": 6573 }, { "epoch": 1.1718360071301248, "grad_norm": 0.1865234375, "learning_rate": 8.953915329131108e-06, "loss": 1.0391, "num_tokens": 2977604474.0, "step": 6574 }, { "epoch": 1.1720142602495545, "grad_norm": 0.1826171875, "learning_rate": 8.951385416714836e-06, "loss": 1.0219, "num_tokens": 2983888778.0, "step": 6575 }, { "epoch": 1.172192513368984, "grad_norm": 0.177734375, "learning_rate": 8.948855675010044e-06, "loss": 1.0326, "num_tokens": 2990156480.0, "step": 6576 }, { "epoch": 1.1723707664884135, "grad_norm": 0.1826171875, "learning_rate": 8.946326104227545e-06, "loss": 0.9844, "num_tokens": 2996438994.0, "step": 6577 }, { "epoch": 1.1725490196078432, "grad_norm": 0.1787109375, "learning_rate": 8.943796704578124e-06, "loss": 1.0268, "num_tokens": 3002698309.0, "step": 6578 }, { "epoch": 1.1727272727272728, "grad_norm": 0.177734375, "learning_rate": 8.941267476272562e-06, "loss": 1.0045, "num_tokens": 3008949985.0, "step": 6579 }, { "epoch": 1.1729055258467023, "grad_norm": 0.181640625, "learning_rate": 8.938738419521612e-06, "loss": 1.0153, "num_tokens": 3015209460.0, "step": 6580 }, { "epoch": 1.173083778966132, "grad_norm": 0.1865234375, "learning_rate": 8.93620953453603e-06, "loss": 1.0773, "num_tokens": 3021489193.0, "step": 6581 }, { "epoch": 1.1732620320855616, "grad_norm": 0.1904296875, "learning_rate": 8.93368082152654e-06, "loss": 1.0606, "num_tokens": 3027694771.0, "step": 6582 }, { "epoch": 1.173440285204991, "grad_norm": 0.1884765625, "learning_rate": 8.93115228070387e-06, "loss": 0.981, "num_tokens": 3033977155.0, "step": 6583 }, { "epoch": 1.1736185383244206, "grad_norm": 0.1806640625, "learning_rate": 8.928623912278715e-06, "loss": 1.0168, "num_tokens": 3040252539.0, "step": 6584 }, { "epoch": 1.1737967914438503, "grad_norm": 0.1865234375, "learning_rate": 8.926095716461765e-06, "loss": 1.0181, "num_tokens": 3046532297.0, "step": 6585 }, { "epoch": 1.17397504456328, "grad_norm": 0.193359375, "learning_rate": 8.923567693463702e-06, "loss": 1.0085, "num_tokens": 3052814472.0, "step": 6586 }, { "epoch": 1.1741532976827094, "grad_norm": 0.1904296875, "learning_rate": 8.921039843495178e-06, "loss": 1.0492, "num_tokens": 3059083968.0, "step": 6587 }, { "epoch": 1.174331550802139, "grad_norm": 0.1826171875, "learning_rate": 8.918512166766846e-06, "loss": 0.9864, "num_tokens": 3065350129.0, "step": 6588 }, { "epoch": 1.1745098039215687, "grad_norm": 0.1767578125, "learning_rate": 8.915984663489332e-06, "loss": 1.059, "num_tokens": 3071633100.0, "step": 6589 }, { "epoch": 1.1746880570409983, "grad_norm": 0.1865234375, "learning_rate": 8.913457333873262e-06, "loss": 1.027, "num_tokens": 3077918707.0, "step": 6590 }, { "epoch": 1.1748663101604278, "grad_norm": 0.1845703125, "learning_rate": 8.910930178129231e-06, "loss": 1.0172, "num_tokens": 3084174235.0, "step": 6591 }, { "epoch": 1.1750445632798574, "grad_norm": 0.1767578125, "learning_rate": 8.90840319646783e-06, "loss": 1.0076, "num_tokens": 3090432421.0, "step": 6592 }, { "epoch": 1.175222816399287, "grad_norm": 0.1826171875, "learning_rate": 8.905876389099633e-06, "loss": 1.0595, "num_tokens": 3096714325.0, "step": 6593 }, { "epoch": 1.1754010695187165, "grad_norm": 0.185546875, "learning_rate": 8.903349756235199e-06, "loss": 0.9973, "num_tokens": 3102998129.0, "step": 6594 }, { "epoch": 1.1755793226381461, "grad_norm": 0.1865234375, "learning_rate": 8.900823298085073e-06, "loss": 1.0199, "num_tokens": 3109282236.0, "step": 6595 }, { "epoch": 1.1757575757575758, "grad_norm": 0.1826171875, "learning_rate": 8.898297014859785e-06, "loss": 1.022, "num_tokens": 3115541463.0, "step": 6596 }, { "epoch": 1.1759358288770054, "grad_norm": 0.1748046875, "learning_rate": 8.895770906769854e-06, "loss": 1.0313, "num_tokens": 3121798895.0, "step": 6597 }, { "epoch": 1.1761140819964349, "grad_norm": 0.1787109375, "learning_rate": 8.893244974025775e-06, "loss": 1.0192, "num_tokens": 3128083066.0, "step": 6598 }, { "epoch": 1.1762923351158645, "grad_norm": 0.193359375, "learning_rate": 8.890719216838042e-06, "loss": 1.0124, "num_tokens": 3134367081.0, "step": 6599 }, { "epoch": 1.1764705882352942, "grad_norm": 0.17578125, "learning_rate": 8.888193635417119e-06, "loss": 1.0245, "num_tokens": 3140624278.0, "step": 6600 }, { "epoch": 1.1766488413547238, "grad_norm": 0.181640625, "learning_rate": 8.885668229973468e-06, "loss": 1.0488, "num_tokens": 3146907105.0, "step": 6601 }, { "epoch": 1.1768270944741532, "grad_norm": 0.181640625, "learning_rate": 8.88314300071753e-06, "loss": 1.0025, "num_tokens": 3153188978.0, "step": 6602 }, { "epoch": 1.1770053475935829, "grad_norm": 0.1865234375, "learning_rate": 8.880617947859733e-06, "loss": 1.0276, "num_tokens": 3159475692.0, "step": 6603 }, { "epoch": 1.1771836007130125, "grad_norm": 0.1884765625, "learning_rate": 8.878093071610491e-06, "loss": 1.0022, "num_tokens": 3165735864.0, "step": 6604 }, { "epoch": 1.177361853832442, "grad_norm": 0.1865234375, "learning_rate": 8.875568372180203e-06, "loss": 1.0442, "num_tokens": 3172018805.0, "step": 6605 }, { "epoch": 1.1775401069518716, "grad_norm": 0.1748046875, "learning_rate": 8.873043849779256e-06, "loss": 1.0099, "num_tokens": 3178302404.0, "step": 6606 }, { "epoch": 1.1777183600713013, "grad_norm": 0.18359375, "learning_rate": 8.870519504618011e-06, "loss": 1.0267, "num_tokens": 3184558912.0, "step": 6607 }, { "epoch": 1.177896613190731, "grad_norm": 0.1826171875, "learning_rate": 8.867995336906823e-06, "loss": 1.0192, "num_tokens": 3190816957.0, "step": 6608 }, { "epoch": 1.1780748663101603, "grad_norm": 0.1904296875, "learning_rate": 8.865471346856039e-06, "loss": 0.9887, "num_tokens": 3197101690.0, "step": 6609 }, { "epoch": 1.17825311942959, "grad_norm": 0.1806640625, "learning_rate": 8.862947534675975e-06, "loss": 0.9799, "num_tokens": 3203384700.0, "step": 6610 }, { "epoch": 1.1784313725490196, "grad_norm": 0.18359375, "learning_rate": 8.860423900576946e-06, "loss": 1.0159, "num_tokens": 3209669096.0, "step": 6611 }, { "epoch": 1.1786096256684493, "grad_norm": 0.1845703125, "learning_rate": 8.857900444769247e-06, "loss": 1.0665, "num_tokens": 3215951623.0, "step": 6612 }, { "epoch": 1.1787878787878787, "grad_norm": 0.1845703125, "learning_rate": 8.855377167463157e-06, "loss": 1.0533, "num_tokens": 3222211144.0, "step": 6613 }, { "epoch": 1.1789661319073084, "grad_norm": 0.1845703125, "learning_rate": 8.852854068868944e-06, "loss": 1.0123, "num_tokens": 3228496186.0, "step": 6614 }, { "epoch": 1.179144385026738, "grad_norm": 0.1923828125, "learning_rate": 8.850331149196851e-06, "loss": 1.0155, "num_tokens": 3234753405.0, "step": 6615 }, { "epoch": 1.1793226381461674, "grad_norm": 0.181640625, "learning_rate": 8.84780840865712e-06, "loss": 1.009, "num_tokens": 3241036705.0, "step": 6616 }, { "epoch": 1.179500891265597, "grad_norm": 0.1806640625, "learning_rate": 8.845285847459968e-06, "loss": 1.0306, "num_tokens": 3247314682.0, "step": 6617 }, { "epoch": 1.1796791443850267, "grad_norm": 0.1748046875, "learning_rate": 8.842763465815605e-06, "loss": 0.9794, "num_tokens": 3253573791.0, "step": 6618 }, { "epoch": 1.1798573975044564, "grad_norm": 0.1845703125, "learning_rate": 8.840241263934215e-06, "loss": 1.0023, "num_tokens": 3259858619.0, "step": 6619 }, { "epoch": 1.1800356506238858, "grad_norm": 0.1806640625, "learning_rate": 8.837719242025979e-06, "loss": 1.0167, "num_tokens": 3266143400.0, "step": 6620 }, { "epoch": 1.1802139037433155, "grad_norm": 0.1787109375, "learning_rate": 8.835197400301054e-06, "loss": 1.0142, "num_tokens": 3272408684.0, "step": 6621 }, { "epoch": 1.1803921568627451, "grad_norm": 0.1884765625, "learning_rate": 8.832675738969591e-06, "loss": 1.0494, "num_tokens": 3278690918.0, "step": 6622 }, { "epoch": 1.1805704099821748, "grad_norm": 0.177734375, "learning_rate": 8.830154258241716e-06, "loss": 1.0233, "num_tokens": 3284957715.0, "step": 6623 }, { "epoch": 1.1807486631016042, "grad_norm": 0.1787109375, "learning_rate": 8.827632958327543e-06, "loss": 1.0532, "num_tokens": 3291219974.0, "step": 6624 }, { "epoch": 1.1809269162210339, "grad_norm": 0.1796875, "learning_rate": 8.825111839437177e-06, "loss": 1.0301, "num_tokens": 3297503177.0, "step": 6625 }, { "epoch": 1.1811051693404635, "grad_norm": 0.1767578125, "learning_rate": 8.822590901780702e-06, "loss": 0.9979, "num_tokens": 3303788422.0, "step": 6626 }, { "epoch": 1.181283422459893, "grad_norm": 0.17578125, "learning_rate": 8.820070145568187e-06, "loss": 1.0178, "num_tokens": 3310070024.0, "step": 6627 }, { "epoch": 1.1814616755793226, "grad_norm": 0.185546875, "learning_rate": 8.817549571009684e-06, "loss": 1.0357, "num_tokens": 3316341081.0, "step": 6628 }, { "epoch": 1.1816399286987522, "grad_norm": 0.181640625, "learning_rate": 8.815029178315244e-06, "loss": 1.0131, "num_tokens": 3322612851.0, "step": 6629 }, { "epoch": 1.1818181818181819, "grad_norm": 0.177734375, "learning_rate": 8.812508967694882e-06, "loss": 1.0194, "num_tokens": 3328863869.0, "step": 6630 }, { "epoch": 1.1819964349376115, "grad_norm": 0.1826171875, "learning_rate": 8.809988939358608e-06, "loss": 0.988, "num_tokens": 3335141749.0, "step": 6631 }, { "epoch": 1.182174688057041, "grad_norm": 0.1767578125, "learning_rate": 8.80746909351642e-06, "loss": 1.0355, "num_tokens": 3341423658.0, "step": 6632 }, { "epoch": 1.1823529411764706, "grad_norm": 0.1875, "learning_rate": 8.804949430378296e-06, "loss": 1.0057, "num_tokens": 3347675741.0, "step": 6633 }, { "epoch": 1.1825311942959003, "grad_norm": 0.181640625, "learning_rate": 8.802429950154201e-06, "loss": 1.0189, "num_tokens": 3353959714.0, "step": 6634 }, { "epoch": 1.1827094474153297, "grad_norm": 0.1728515625, "learning_rate": 8.79991065305408e-06, "loss": 1.0028, "num_tokens": 3360219711.0, "step": 6635 }, { "epoch": 1.1828877005347593, "grad_norm": 0.1796875, "learning_rate": 8.797391539287873e-06, "loss": 1.0071, "num_tokens": 3366499674.0, "step": 6636 }, { "epoch": 1.183065953654189, "grad_norm": 0.1796875, "learning_rate": 8.794872609065496e-06, "loss": 1.0329, "num_tokens": 3372764158.0, "step": 6637 }, { "epoch": 1.1832442067736186, "grad_norm": 0.1796875, "learning_rate": 8.79235386259685e-06, "loss": 0.9976, "num_tokens": 3379047407.0, "step": 6638 }, { "epoch": 1.183422459893048, "grad_norm": 0.19140625, "learning_rate": 8.789835300091822e-06, "loss": 1.041, "num_tokens": 3385307732.0, "step": 6639 }, { "epoch": 1.1836007130124777, "grad_norm": 0.185546875, "learning_rate": 8.787316921760284e-06, "loss": 0.993, "num_tokens": 3391592547.0, "step": 6640 }, { "epoch": 1.1837789661319074, "grad_norm": 0.1884765625, "learning_rate": 8.784798727812098e-06, "loss": 1.028, "num_tokens": 3397845496.0, "step": 6641 }, { "epoch": 1.183957219251337, "grad_norm": 0.1904296875, "learning_rate": 8.7822807184571e-06, "loss": 1.0422, "num_tokens": 3404125331.0, "step": 6642 }, { "epoch": 1.1841354723707664, "grad_norm": 0.177734375, "learning_rate": 8.779762893905121e-06, "loss": 1.0496, "num_tokens": 3410380670.0, "step": 6643 }, { "epoch": 1.184313725490196, "grad_norm": 0.1787109375, "learning_rate": 8.777245254365967e-06, "loss": 0.9967, "num_tokens": 3416663316.0, "step": 6644 }, { "epoch": 1.1844919786096257, "grad_norm": 0.1787109375, "learning_rate": 8.77472780004944e-06, "loss": 1.0321, "num_tokens": 3422946915.0, "step": 6645 }, { "epoch": 1.1846702317290552, "grad_norm": 0.1884765625, "learning_rate": 8.772210531165314e-06, "loss": 0.9965, "num_tokens": 3429177973.0, "step": 6646 }, { "epoch": 1.1848484848484848, "grad_norm": 0.1875, "learning_rate": 8.769693447923355e-06, "loss": 1.0271, "num_tokens": 3435461212.0, "step": 6647 }, { "epoch": 1.1850267379679145, "grad_norm": 0.1884765625, "learning_rate": 8.767176550533313e-06, "loss": 1.0435, "num_tokens": 3441739681.0, "step": 6648 }, { "epoch": 1.1852049910873441, "grad_norm": 0.1865234375, "learning_rate": 8.764659839204922e-06, "loss": 1.0314, "num_tokens": 3448023945.0, "step": 6649 }, { "epoch": 1.1853832442067735, "grad_norm": 0.1767578125, "learning_rate": 8.7621433141479e-06, "loss": 1.0198, "num_tokens": 3454286679.0, "step": 6650 }, { "epoch": 1.1855614973262032, "grad_norm": 0.181640625, "learning_rate": 8.759626975571945e-06, "loss": 0.9917, "num_tokens": 3460536000.0, "step": 6651 }, { "epoch": 1.1857397504456328, "grad_norm": 0.185546875, "learning_rate": 8.757110823686754e-06, "loss": 1.034, "num_tokens": 3466817592.0, "step": 6652 }, { "epoch": 1.1859180035650625, "grad_norm": 0.1806640625, "learning_rate": 8.754594858701988e-06, "loss": 1.0183, "num_tokens": 3473099314.0, "step": 6653 }, { "epoch": 1.186096256684492, "grad_norm": 0.1826171875, "learning_rate": 8.752079080827307e-06, "loss": 1.0101, "num_tokens": 3479345942.0, "step": 6654 }, { "epoch": 1.1862745098039216, "grad_norm": 0.1826171875, "learning_rate": 8.749563490272352e-06, "loss": 0.9905, "num_tokens": 3485603555.0, "step": 6655 }, { "epoch": 1.1864527629233512, "grad_norm": 0.185546875, "learning_rate": 8.747048087246747e-06, "loss": 1.0448, "num_tokens": 3491885084.0, "step": 6656 }, { "epoch": 1.1866310160427807, "grad_norm": 0.1796875, "learning_rate": 8.744532871960105e-06, "loss": 1.0264, "num_tokens": 3498167793.0, "step": 6657 }, { "epoch": 1.1868092691622103, "grad_norm": 0.1826171875, "learning_rate": 8.74201784462201e-06, "loss": 1.0342, "num_tokens": 3504452227.0, "step": 6658 }, { "epoch": 1.18698752228164, "grad_norm": 0.18359375, "learning_rate": 8.739503005442051e-06, "loss": 1.005, "num_tokens": 3510735518.0, "step": 6659 }, { "epoch": 1.1871657754010696, "grad_norm": 0.1826171875, "learning_rate": 8.736988354629785e-06, "loss": 1.0608, "num_tokens": 3517017761.0, "step": 6660 }, { "epoch": 1.187344028520499, "grad_norm": 0.1884765625, "learning_rate": 8.734473892394755e-06, "loss": 0.9759, "num_tokens": 3523303087.0, "step": 6661 }, { "epoch": 1.1875222816399287, "grad_norm": 0.173828125, "learning_rate": 8.731959618946495e-06, "loss": 0.9859, "num_tokens": 3529587388.0, "step": 6662 }, { "epoch": 1.1877005347593583, "grad_norm": 0.185546875, "learning_rate": 8.729445534494518e-06, "loss": 1.0267, "num_tokens": 3535853087.0, "step": 6663 }, { "epoch": 1.187878787878788, "grad_norm": 0.18359375, "learning_rate": 8.726931639248326e-06, "loss": 1.0037, "num_tokens": 3542136389.0, "step": 6664 }, { "epoch": 1.1880570409982174, "grad_norm": 0.18359375, "learning_rate": 8.724417933417401e-06, "loss": 1.0206, "num_tokens": 3548419136.0, "step": 6665 }, { "epoch": 1.188235294117647, "grad_norm": 0.18359375, "learning_rate": 8.721904417211212e-06, "loss": 1.0295, "num_tokens": 3554686894.0, "step": 6666 }, { "epoch": 1.1884135472370767, "grad_norm": 0.1787109375, "learning_rate": 8.719391090839208e-06, "loss": 1.0231, "num_tokens": 3560970272.0, "step": 6667 }, { "epoch": 1.1885918003565061, "grad_norm": 0.173828125, "learning_rate": 8.716877954510833e-06, "loss": 0.9832, "num_tokens": 3567254705.0, "step": 6668 }, { "epoch": 1.1887700534759358, "grad_norm": 0.17578125, "learning_rate": 8.714365008435494e-06, "loss": 1.0001, "num_tokens": 3573522881.0, "step": 6669 }, { "epoch": 1.1889483065953654, "grad_norm": 0.18359375, "learning_rate": 8.711852252822605e-06, "loss": 1.0079, "num_tokens": 3579805924.0, "step": 6670 }, { "epoch": 1.189126559714795, "grad_norm": 0.1826171875, "learning_rate": 8.70933968788155e-06, "loss": 1.0309, "num_tokens": 3586089797.0, "step": 6671 }, { "epoch": 1.1893048128342245, "grad_norm": 0.181640625, "learning_rate": 8.706827313821702e-06, "loss": 1.0288, "num_tokens": 3592347369.0, "step": 6672 }, { "epoch": 1.1894830659536542, "grad_norm": 0.181640625, "learning_rate": 8.704315130852423e-06, "loss": 1.003, "num_tokens": 3598626394.0, "step": 6673 }, { "epoch": 1.1896613190730838, "grad_norm": 0.1787109375, "learning_rate": 8.701803139183045e-06, "loss": 1.0537, "num_tokens": 3604863633.0, "step": 6674 }, { "epoch": 1.1898395721925135, "grad_norm": 0.1875, "learning_rate": 8.699291339022906e-06, "loss": 0.9956, "num_tokens": 3611119151.0, "step": 6675 }, { "epoch": 1.190017825311943, "grad_norm": 0.185546875, "learning_rate": 8.6967797305813e-06, "loss": 1.0263, "num_tokens": 3617340311.0, "step": 6676 }, { "epoch": 1.1901960784313725, "grad_norm": 0.1806640625, "learning_rate": 8.694268314067526e-06, "loss": 0.9933, "num_tokens": 3623606589.0, "step": 6677 }, { "epoch": 1.1903743315508022, "grad_norm": 0.1884765625, "learning_rate": 8.69175708969086e-06, "loss": 1.0561, "num_tokens": 3629890453.0, "step": 6678 }, { "epoch": 1.1905525846702316, "grad_norm": 0.1845703125, "learning_rate": 8.689246057660566e-06, "loss": 1.0374, "num_tokens": 3636145929.0, "step": 6679 }, { "epoch": 1.1907308377896613, "grad_norm": 0.1767578125, "learning_rate": 8.686735218185884e-06, "loss": 1.0294, "num_tokens": 3642427371.0, "step": 6680 }, { "epoch": 1.190909090909091, "grad_norm": 0.1884765625, "learning_rate": 8.684224571476048e-06, "loss": 1.0165, "num_tokens": 3648710983.0, "step": 6681 }, { "epoch": 1.1910873440285206, "grad_norm": 0.17578125, "learning_rate": 8.681714117740265e-06, "loss": 1.0391, "num_tokens": 3654980435.0, "step": 6682 }, { "epoch": 1.19126559714795, "grad_norm": 0.1796875, "learning_rate": 8.679203857187742e-06, "loss": 1.0246, "num_tokens": 3661236181.0, "step": 6683 }, { "epoch": 1.1914438502673796, "grad_norm": 0.1767578125, "learning_rate": 8.676693790027649e-06, "loss": 1.0178, "num_tokens": 3667520188.0, "step": 6684 }, { "epoch": 1.1916221033868093, "grad_norm": 0.18359375, "learning_rate": 8.67418391646915e-06, "loss": 0.9898, "num_tokens": 3673778542.0, "step": 6685 }, { "epoch": 1.191800356506239, "grad_norm": 0.1884765625, "learning_rate": 8.671674236721401e-06, "loss": 1.0289, "num_tokens": 3680061363.0, "step": 6686 }, { "epoch": 1.1919786096256684, "grad_norm": 0.1748046875, "learning_rate": 8.66916475099353e-06, "loss": 1.0499, "num_tokens": 3686336138.0, "step": 6687 }, { "epoch": 1.192156862745098, "grad_norm": 0.171875, "learning_rate": 8.66665545949465e-06, "loss": 1.0238, "num_tokens": 3692619700.0, "step": 6688 }, { "epoch": 1.1923351158645277, "grad_norm": 0.1845703125, "learning_rate": 8.664146362433865e-06, "loss": 1.0112, "num_tokens": 3698881000.0, "step": 6689 }, { "epoch": 1.192513368983957, "grad_norm": 0.1806640625, "learning_rate": 8.66163746002026e-06, "loss": 1.0078, "num_tokens": 3705132472.0, "step": 6690 }, { "epoch": 1.1926916221033868, "grad_norm": 0.185546875, "learning_rate": 8.659128752462903e-06, "loss": 1.0363, "num_tokens": 3711401262.0, "step": 6691 }, { "epoch": 1.1928698752228164, "grad_norm": 0.185546875, "learning_rate": 8.656620239970837e-06, "loss": 1.0146, "num_tokens": 3717659738.0, "step": 6692 }, { "epoch": 1.193048128342246, "grad_norm": 0.1728515625, "learning_rate": 8.654111922753103e-06, "loss": 1.0343, "num_tokens": 3723931983.0, "step": 6693 }, { "epoch": 1.1932263814616757, "grad_norm": 0.1875, "learning_rate": 8.651603801018718e-06, "loss": 1.0324, "num_tokens": 3730213097.0, "step": 6694 }, { "epoch": 1.1934046345811051, "grad_norm": 0.1767578125, "learning_rate": 8.649095874976687e-06, "loss": 0.9994, "num_tokens": 3736497990.0, "step": 6695 }, { "epoch": 1.1935828877005348, "grad_norm": 0.1875, "learning_rate": 8.64658814483599e-06, "loss": 1.0103, "num_tokens": 3742781985.0, "step": 6696 }, { "epoch": 1.1937611408199644, "grad_norm": 0.1826171875, "learning_rate": 8.644080610805605e-06, "loss": 1.0387, "num_tokens": 3749065993.0, "step": 6697 }, { "epoch": 1.1939393939393939, "grad_norm": 0.1767578125, "learning_rate": 8.641573273094481e-06, "loss": 1.047, "num_tokens": 3755349350.0, "step": 6698 }, { "epoch": 1.1941176470588235, "grad_norm": 0.1748046875, "learning_rate": 8.639066131911553e-06, "loss": 1.0067, "num_tokens": 3761633097.0, "step": 6699 }, { "epoch": 1.1942959001782532, "grad_norm": 0.1904296875, "learning_rate": 8.636559187465743e-06, "loss": 1.05, "num_tokens": 3767818305.0, "step": 6700 }, { "epoch": 1.1944741532976828, "grad_norm": 0.1904296875, "learning_rate": 8.634052439965955e-06, "loss": 1.0453, "num_tokens": 3774103261.0, "step": 6701 }, { "epoch": 1.1946524064171122, "grad_norm": 0.1796875, "learning_rate": 8.63154588962108e-06, "loss": 1.0261, "num_tokens": 3780387623.0, "step": 6702 }, { "epoch": 1.1948306595365419, "grad_norm": 0.18359375, "learning_rate": 8.629039536639982e-06, "loss": 0.9879, "num_tokens": 3786672753.0, "step": 6703 }, { "epoch": 1.1950089126559715, "grad_norm": 0.185546875, "learning_rate": 8.626533381231528e-06, "loss": 1.0131, "num_tokens": 3792911534.0, "step": 6704 }, { "epoch": 1.1951871657754012, "grad_norm": 0.19140625, "learning_rate": 8.624027423604544e-06, "loss": 1.0443, "num_tokens": 3799171291.0, "step": 6705 }, { "epoch": 1.1953654188948306, "grad_norm": 0.1923828125, "learning_rate": 8.62152166396786e-06, "loss": 1.0128, "num_tokens": 3805455277.0, "step": 6706 }, { "epoch": 1.1955436720142603, "grad_norm": 0.185546875, "learning_rate": 8.61901610253028e-06, "loss": 0.9969, "num_tokens": 3811738736.0, "step": 6707 }, { "epoch": 1.19572192513369, "grad_norm": 0.1796875, "learning_rate": 8.616510739500587e-06, "loss": 0.9924, "num_tokens": 3818022910.0, "step": 6708 }, { "epoch": 1.1959001782531193, "grad_norm": 0.1748046875, "learning_rate": 8.614005575087562e-06, "loss": 0.9929, "num_tokens": 3824308914.0, "step": 6709 }, { "epoch": 1.196078431372549, "grad_norm": 0.1904296875, "learning_rate": 8.611500609499956e-06, "loss": 0.9739, "num_tokens": 3830587607.0, "step": 6710 }, { "epoch": 1.1962566844919786, "grad_norm": 0.17578125, "learning_rate": 8.608995842946508e-06, "loss": 1.0348, "num_tokens": 3836844649.0, "step": 6711 }, { "epoch": 1.1964349376114083, "grad_norm": 0.1806640625, "learning_rate": 8.606491275635942e-06, "loss": 0.9888, "num_tokens": 3843128568.0, "step": 6712 }, { "epoch": 1.1966131907308377, "grad_norm": 0.1787109375, "learning_rate": 8.603986907776967e-06, "loss": 1.0075, "num_tokens": 3849403595.0, "step": 6713 }, { "epoch": 1.1967914438502674, "grad_norm": 0.18359375, "learning_rate": 8.601482739578271e-06, "loss": 1.0266, "num_tokens": 3855683741.0, "step": 6714 }, { "epoch": 1.196969696969697, "grad_norm": 0.1806640625, "learning_rate": 8.59897877124852e-06, "loss": 1.013, "num_tokens": 3861968121.0, "step": 6715 }, { "epoch": 1.1971479500891267, "grad_norm": 0.1923828125, "learning_rate": 8.59647500299638e-06, "loss": 1.0123, "num_tokens": 3868251867.0, "step": 6716 }, { "epoch": 1.197326203208556, "grad_norm": 0.1796875, "learning_rate": 8.593971435030484e-06, "loss": 1.013, "num_tokens": 3874534941.0, "step": 6717 }, { "epoch": 1.1975044563279857, "grad_norm": 0.1787109375, "learning_rate": 8.591468067559458e-06, "loss": 1.0137, "num_tokens": 3880817569.0, "step": 6718 }, { "epoch": 1.1976827094474154, "grad_norm": 0.1865234375, "learning_rate": 8.588964900791906e-06, "loss": 1.0596, "num_tokens": 3887073513.0, "step": 6719 }, { "epoch": 1.1978609625668448, "grad_norm": 0.1845703125, "learning_rate": 8.58646193493642e-06, "loss": 0.9792, "num_tokens": 3893356484.0, "step": 6720 }, { "epoch": 1.1980392156862745, "grad_norm": 0.17578125, "learning_rate": 8.58395917020157e-06, "loss": 1.0089, "num_tokens": 3899608284.0, "step": 6721 }, { "epoch": 1.1982174688057041, "grad_norm": 0.19140625, "learning_rate": 8.58145660679592e-06, "loss": 1.0281, "num_tokens": 3905854800.0, "step": 6722 }, { "epoch": 1.1983957219251338, "grad_norm": 0.17578125, "learning_rate": 8.578954244927996e-06, "loss": 1.0156, "num_tokens": 3912137962.0, "step": 6723 }, { "epoch": 1.1985739750445632, "grad_norm": 0.1806640625, "learning_rate": 8.576452084806326e-06, "loss": 1.0011, "num_tokens": 3918380230.0, "step": 6724 }, { "epoch": 1.1987522281639929, "grad_norm": 0.185546875, "learning_rate": 8.57395012663942e-06, "loss": 1.0041, "num_tokens": 3924657662.0, "step": 6725 }, { "epoch": 1.1989304812834225, "grad_norm": 0.1796875, "learning_rate": 8.57144837063576e-06, "loss": 1.0112, "num_tokens": 3930939158.0, "step": 6726 }, { "epoch": 1.1991087344028521, "grad_norm": 0.1767578125, "learning_rate": 8.568946817003821e-06, "loss": 1.0481, "num_tokens": 3937220735.0, "step": 6727 }, { "epoch": 1.1992869875222816, "grad_norm": 0.17578125, "learning_rate": 8.566445465952058e-06, "loss": 1.0341, "num_tokens": 3943486848.0, "step": 6728 }, { "epoch": 1.1994652406417112, "grad_norm": 0.1748046875, "learning_rate": 8.563944317688912e-06, "loss": 1.0184, "num_tokens": 3949750576.0, "step": 6729 }, { "epoch": 1.1996434937611409, "grad_norm": 0.1796875, "learning_rate": 8.561443372422799e-06, "loss": 1.0212, "num_tokens": 3956035092.0, "step": 6730 }, { "epoch": 1.1998217468805703, "grad_norm": 0.1796875, "learning_rate": 8.558942630362126e-06, "loss": 1.0158, "num_tokens": 3962318435.0, "step": 6731 }, { "epoch": 1.2, "grad_norm": 0.177734375, "learning_rate": 8.556442091715277e-06, "loss": 1.0313, "num_tokens": 3968574377.0, "step": 6732 }, { "epoch": 1.2001782531194296, "grad_norm": 0.1845703125, "learning_rate": 8.553941756690629e-06, "loss": 1.0194, "num_tokens": 3974852242.0, "step": 6733 }, { "epoch": 1.2003565062388593, "grad_norm": 0.1796875, "learning_rate": 8.551441625496531e-06, "loss": 1.0275, "num_tokens": 3981137494.0, "step": 6734 }, { "epoch": 1.2005347593582887, "grad_norm": 0.1796875, "learning_rate": 8.548941698341317e-06, "loss": 1.0264, "num_tokens": 3987418588.0, "step": 6735 }, { "epoch": 1.2007130124777183, "grad_norm": 0.177734375, "learning_rate": 8.546441975433314e-06, "loss": 1.017, "num_tokens": 3993700431.0, "step": 6736 }, { "epoch": 1.200891265597148, "grad_norm": 0.1826171875, "learning_rate": 8.54394245698082e-06, "loss": 1.0149, "num_tokens": 3999983929.0, "step": 6737 }, { "epoch": 1.2010695187165776, "grad_norm": 0.1875, "learning_rate": 8.54144314319212e-06, "loss": 1.0199, "num_tokens": 4006266148.0, "step": 6738 }, { "epoch": 1.201247771836007, "grad_norm": 0.18359375, "learning_rate": 8.53894403427548e-06, "loss": 1.007, "num_tokens": 4012536901.0, "step": 6739 }, { "epoch": 1.2014260249554367, "grad_norm": 0.1845703125, "learning_rate": 8.536445130439155e-06, "loss": 1.0333, "num_tokens": 4018798301.0, "step": 6740 }, { "epoch": 1.2016042780748664, "grad_norm": 0.1845703125, "learning_rate": 8.53394643189138e-06, "loss": 1.0174, "num_tokens": 4025079524.0, "step": 6741 }, { "epoch": 1.2017825311942958, "grad_norm": 0.1904296875, "learning_rate": 8.531447938840368e-06, "loss": 1.0045, "num_tokens": 4031363017.0, "step": 6742 }, { "epoch": 1.2019607843137254, "grad_norm": 0.1826171875, "learning_rate": 8.528949651494322e-06, "loss": 1.0053, "num_tokens": 4037625099.0, "step": 6743 }, { "epoch": 1.202139037433155, "grad_norm": 0.1787109375, "learning_rate": 8.526451570061425e-06, "loss": 1.0526, "num_tokens": 4043906889.0, "step": 6744 }, { "epoch": 1.2023172905525847, "grad_norm": 0.177734375, "learning_rate": 8.523953694749845e-06, "loss": 1.0234, "num_tokens": 4050189709.0, "step": 6745 }, { "epoch": 1.2024955436720142, "grad_norm": 0.177734375, "learning_rate": 8.521456025767722e-06, "loss": 1.0341, "num_tokens": 4056426454.0, "step": 6746 }, { "epoch": 1.2026737967914438, "grad_norm": 0.1904296875, "learning_rate": 8.518958563323195e-06, "loss": 1.0192, "num_tokens": 4062711300.0, "step": 6747 }, { "epoch": 1.2028520499108735, "grad_norm": 0.1826171875, "learning_rate": 8.516461307624376e-06, "loss": 1.0345, "num_tokens": 4068993376.0, "step": 6748 }, { "epoch": 1.2030303030303031, "grad_norm": 0.197265625, "learning_rate": 8.513964258879359e-06, "loss": 1.0365, "num_tokens": 4075206553.0, "step": 6749 }, { "epoch": 1.2032085561497325, "grad_norm": 0.1767578125, "learning_rate": 8.511467417296229e-06, "loss": 1.0178, "num_tokens": 4081482810.0, "step": 6750 }, { "epoch": 1.2033868092691622, "grad_norm": 0.1806640625, "learning_rate": 8.508970783083041e-06, "loss": 1.011, "num_tokens": 4087735427.0, "step": 6751 }, { "epoch": 1.2035650623885918, "grad_norm": 0.1806640625, "learning_rate": 8.506474356447852e-06, "loss": 1.0072, "num_tokens": 4094018287.0, "step": 6752 }, { "epoch": 1.2037433155080213, "grad_norm": 0.1806640625, "learning_rate": 8.503978137598676e-06, "loss": 1.0178, "num_tokens": 4100298464.0, "step": 6753 }, { "epoch": 1.203921568627451, "grad_norm": 0.18359375, "learning_rate": 8.50148212674353e-06, "loss": 1.0084, "num_tokens": 4106583047.0, "step": 6754 }, { "epoch": 1.2040998217468806, "grad_norm": 0.177734375, "learning_rate": 8.498986324090407e-06, "loss": 1.0115, "num_tokens": 4112865514.0, "step": 6755 }, { "epoch": 1.2042780748663102, "grad_norm": 0.1845703125, "learning_rate": 8.496490729847284e-06, "loss": 1.0317, "num_tokens": 4119151419.0, "step": 6756 }, { "epoch": 1.2044563279857399, "grad_norm": 0.189453125, "learning_rate": 8.49399534422212e-06, "loss": 1.0351, "num_tokens": 4125426128.0, "step": 6757 }, { "epoch": 1.2046345811051693, "grad_norm": 0.1796875, "learning_rate": 8.49150016742285e-06, "loss": 1.0238, "num_tokens": 4131693687.0, "step": 6758 }, { "epoch": 1.204812834224599, "grad_norm": 0.1826171875, "learning_rate": 8.489005199657404e-06, "loss": 1.0325, "num_tokens": 4137978308.0, "step": 6759 }, { "epoch": 1.2049910873440286, "grad_norm": 0.189453125, "learning_rate": 8.486510441133686e-06, "loss": 0.9982, "num_tokens": 4144262263.0, "step": 6760 }, { "epoch": 1.205169340463458, "grad_norm": 0.1875, "learning_rate": 8.484015892059582e-06, "loss": 1.006, "num_tokens": 4150479158.0, "step": 6761 }, { "epoch": 1.2053475935828877, "grad_norm": 0.1845703125, "learning_rate": 8.481521552642968e-06, "loss": 1.0202, "num_tokens": 4156759168.0, "step": 6762 }, { "epoch": 1.2055258467023173, "grad_norm": 0.18359375, "learning_rate": 8.479027423091695e-06, "loss": 1.0142, "num_tokens": 4163043767.0, "step": 6763 }, { "epoch": 1.205704099821747, "grad_norm": 0.19921875, "learning_rate": 8.4765335036136e-06, "loss": 1.0555, "num_tokens": 4169316428.0, "step": 6764 }, { "epoch": 1.2058823529411764, "grad_norm": 0.1826171875, "learning_rate": 8.4740397944165e-06, "loss": 1.0093, "num_tokens": 4175575453.0, "step": 6765 }, { "epoch": 1.206060606060606, "grad_norm": 0.193359375, "learning_rate": 8.4715462957082e-06, "loss": 0.9851, "num_tokens": 4181828866.0, "step": 6766 }, { "epoch": 1.2062388591800357, "grad_norm": 0.1826171875, "learning_rate": 8.46905300769648e-06, "loss": 1.0133, "num_tokens": 4188113482.0, "step": 6767 }, { "epoch": 1.2064171122994654, "grad_norm": 0.1923828125, "learning_rate": 8.466559930589111e-06, "loss": 1.0333, "num_tokens": 4194396039.0, "step": 6768 }, { "epoch": 1.2065953654188948, "grad_norm": 0.1806640625, "learning_rate": 8.464067064593839e-06, "loss": 0.9921, "num_tokens": 4200617697.0, "step": 6769 }, { "epoch": 1.2067736185383244, "grad_norm": 0.18359375, "learning_rate": 8.461574409918392e-06, "loss": 1.0426, "num_tokens": 4206849129.0, "step": 6770 }, { "epoch": 1.206951871657754, "grad_norm": 0.189453125, "learning_rate": 8.459081966770488e-06, "loss": 0.9925, "num_tokens": 4213128998.0, "step": 6771 }, { "epoch": 1.2071301247771835, "grad_norm": 0.19140625, "learning_rate": 8.456589735357819e-06, "loss": 0.9956, "num_tokens": 4219415161.0, "step": 6772 }, { "epoch": 1.2073083778966132, "grad_norm": 0.1845703125, "learning_rate": 8.454097715888067e-06, "loss": 0.9974, "num_tokens": 4225699652.0, "step": 6773 }, { "epoch": 1.2074866310160428, "grad_norm": 0.1845703125, "learning_rate": 8.451605908568892e-06, "loss": 1.0192, "num_tokens": 4231960931.0, "step": 6774 }, { "epoch": 1.2076648841354725, "grad_norm": 0.181640625, "learning_rate": 8.449114313607935e-06, "loss": 1.0269, "num_tokens": 4238245438.0, "step": 6775 }, { "epoch": 1.2078431372549019, "grad_norm": 0.189453125, "learning_rate": 8.446622931212823e-06, "loss": 1.0287, "num_tokens": 4244509740.0, "step": 6776 }, { "epoch": 1.2080213903743315, "grad_norm": 0.1923828125, "learning_rate": 8.44413176159116e-06, "loss": 1.0255, "num_tokens": 4250749087.0, "step": 6777 }, { "epoch": 1.2081996434937612, "grad_norm": 0.1806640625, "learning_rate": 8.441640804950538e-06, "loss": 1.0262, "num_tokens": 4257030035.0, "step": 6778 }, { "epoch": 1.2083778966131908, "grad_norm": 0.1904296875, "learning_rate": 8.439150061498524e-06, "loss": 1.0133, "num_tokens": 4263310288.0, "step": 6779 }, { "epoch": 1.2085561497326203, "grad_norm": 0.173828125, "learning_rate": 8.436659531442684e-06, "loss": 1.0085, "num_tokens": 4269580286.0, "step": 6780 }, { "epoch": 1.20873440285205, "grad_norm": 0.1904296875, "learning_rate": 8.434169214990545e-06, "loss": 0.9931, "num_tokens": 4275863996.0, "step": 6781 }, { "epoch": 1.2089126559714796, "grad_norm": 0.1787109375, "learning_rate": 8.431679112349627e-06, "loss": 1.0382, "num_tokens": 4282147641.0, "step": 6782 }, { "epoch": 1.209090909090909, "grad_norm": 0.1826171875, "learning_rate": 8.429189223727436e-06, "loss": 0.9967, "num_tokens": 4288433337.0, "step": 6783 }, { "epoch": 1.2092691622103386, "grad_norm": 0.177734375, "learning_rate": 8.426699549331445e-06, "loss": 1.039, "num_tokens": 4294716941.0, "step": 6784 }, { "epoch": 1.2094474153297683, "grad_norm": 0.18359375, "learning_rate": 8.424210089369129e-06, "loss": 0.9988, "num_tokens": 4300999454.0, "step": 6785 }, { "epoch": 1.209625668449198, "grad_norm": 0.1875, "learning_rate": 8.42172084404793e-06, "loss": 1.0243, "num_tokens": 4307271755.0, "step": 6786 }, { "epoch": 1.2098039215686274, "grad_norm": 0.18359375, "learning_rate": 8.419231813575277e-06, "loss": 0.9978, "num_tokens": 4313548302.0, "step": 6787 }, { "epoch": 1.209982174688057, "grad_norm": 0.1748046875, "learning_rate": 8.41674299815858e-06, "loss": 0.9889, "num_tokens": 4319806233.0, "step": 6788 }, { "epoch": 1.2101604278074867, "grad_norm": 0.1845703125, "learning_rate": 8.41425439800524e-06, "loss": 1.0207, "num_tokens": 4326031002.0, "step": 6789 }, { "epoch": 1.2103386809269163, "grad_norm": 0.1796875, "learning_rate": 8.411766013322624e-06, "loss": 1.0007, "num_tokens": 4332284104.0, "step": 6790 }, { "epoch": 1.2105169340463457, "grad_norm": 0.1796875, "learning_rate": 8.4092778443181e-06, "loss": 0.9954, "num_tokens": 4338533449.0, "step": 6791 }, { "epoch": 1.2106951871657754, "grad_norm": 0.1826171875, "learning_rate": 8.406789891198997e-06, "loss": 1.014, "num_tokens": 4344818284.0, "step": 6792 }, { "epoch": 1.210873440285205, "grad_norm": 0.181640625, "learning_rate": 8.404302154172638e-06, "loss": 1.0258, "num_tokens": 4351085578.0, "step": 6793 }, { "epoch": 1.2110516934046345, "grad_norm": 0.1904296875, "learning_rate": 8.401814633446335e-06, "loss": 1.0265, "num_tokens": 4357362128.0, "step": 6794 }, { "epoch": 1.2112299465240641, "grad_norm": 0.18359375, "learning_rate": 8.399327329227363e-06, "loss": 0.9883, "num_tokens": 4363645139.0, "step": 6795 }, { "epoch": 1.2114081996434938, "grad_norm": 0.177734375, "learning_rate": 8.396840241722996e-06, "loss": 0.9999, "num_tokens": 4369903706.0, "step": 6796 }, { "epoch": 1.2115864527629234, "grad_norm": 0.1923828125, "learning_rate": 8.394353371140485e-06, "loss": 1.0227, "num_tokens": 4376186795.0, "step": 6797 }, { "epoch": 1.2117647058823529, "grad_norm": 0.1806640625, "learning_rate": 8.391866717687058e-06, "loss": 1.0399, "num_tokens": 4382470604.0, "step": 6798 }, { "epoch": 1.2119429590017825, "grad_norm": 0.18359375, "learning_rate": 8.389380281569928e-06, "loss": 1.0026, "num_tokens": 4388754238.0, "step": 6799 }, { "epoch": 1.2121212121212122, "grad_norm": 0.181640625, "learning_rate": 8.38689406299629e-06, "loss": 1.0371, "num_tokens": 4395010910.0, "step": 6800 }, { "epoch": 1.2122994652406418, "grad_norm": 0.18359375, "learning_rate": 8.384408062173325e-06, "loss": 1.0253, "num_tokens": 4401294249.0, "step": 6801 }, { "epoch": 1.2124777183600712, "grad_norm": 0.173828125, "learning_rate": 8.381922279308186e-06, "loss": 0.9866, "num_tokens": 4407561063.0, "step": 6802 }, { "epoch": 1.2126559714795009, "grad_norm": 0.1845703125, "learning_rate": 8.379436714608018e-06, "loss": 1.0043, "num_tokens": 4413843960.0, "step": 6803 }, { "epoch": 1.2128342245989305, "grad_norm": 0.1845703125, "learning_rate": 8.376951368279945e-06, "loss": 1.0188, "num_tokens": 4420085267.0, "step": 6804 }, { "epoch": 1.21301247771836, "grad_norm": 0.1875, "learning_rate": 8.374466240531068e-06, "loss": 1.0189, "num_tokens": 4426367257.0, "step": 6805 }, { "epoch": 1.2131907308377896, "grad_norm": 0.181640625, "learning_rate": 8.371981331568476e-06, "loss": 1.0014, "num_tokens": 4432595632.0, "step": 6806 }, { "epoch": 1.2133689839572193, "grad_norm": 0.18359375, "learning_rate": 8.369496641599234e-06, "loss": 1.0138, "num_tokens": 4438879562.0, "step": 6807 }, { "epoch": 1.213547237076649, "grad_norm": 0.1875, "learning_rate": 8.36701217083039e-06, "loss": 1.0028, "num_tokens": 4445107278.0, "step": 6808 }, { "epoch": 1.2137254901960783, "grad_norm": 0.1875, "learning_rate": 8.364527919468984e-06, "loss": 1.0104, "num_tokens": 4451357362.0, "step": 6809 }, { "epoch": 1.213903743315508, "grad_norm": 0.1796875, "learning_rate": 8.36204388772202e-06, "loss": 1.0148, "num_tokens": 4457608625.0, "step": 6810 }, { "epoch": 1.2140819964349376, "grad_norm": 0.1767578125, "learning_rate": 8.359560075796495e-06, "loss": 1.0282, "num_tokens": 4463883309.0, "step": 6811 }, { "epoch": 1.2142602495543673, "grad_norm": 0.181640625, "learning_rate": 8.357076483899392e-06, "loss": 0.9972, "num_tokens": 4470151249.0, "step": 6812 }, { "epoch": 1.2144385026737967, "grad_norm": 0.1875, "learning_rate": 8.354593112237661e-06, "loss": 0.9846, "num_tokens": 4476393982.0, "step": 6813 }, { "epoch": 1.2146167557932264, "grad_norm": 0.181640625, "learning_rate": 8.352109961018251e-06, "loss": 1.017, "num_tokens": 4482646571.0, "step": 6814 }, { "epoch": 1.214795008912656, "grad_norm": 0.18359375, "learning_rate": 8.349627030448076e-06, "loss": 0.998, "num_tokens": 4488902432.0, "step": 6815 }, { "epoch": 1.2149732620320854, "grad_norm": 0.1728515625, "learning_rate": 8.347144320734037e-06, "loss": 1.0138, "num_tokens": 4495187245.0, "step": 6816 }, { "epoch": 1.215151515151515, "grad_norm": 0.1728515625, "learning_rate": 8.344661832083027e-06, "loss": 1.0275, "num_tokens": 4501472005.0, "step": 6817 }, { "epoch": 1.2153297682709447, "grad_norm": 0.185546875, "learning_rate": 8.342179564701904e-06, "loss": 1.063, "num_tokens": 4507718645.0, "step": 6818 }, { "epoch": 1.2155080213903744, "grad_norm": 0.1787109375, "learning_rate": 8.339697518797522e-06, "loss": 0.9905, "num_tokens": 4513991067.0, "step": 6819 }, { "epoch": 1.215686274509804, "grad_norm": 0.1796875, "learning_rate": 8.337215694576708e-06, "loss": 1.0278, "num_tokens": 4520262053.0, "step": 6820 }, { "epoch": 1.2158645276292335, "grad_norm": 0.18359375, "learning_rate": 8.334734092246277e-06, "loss": 1.0221, "num_tokens": 4526519955.0, "step": 6821 }, { "epoch": 1.2160427807486631, "grad_norm": 0.177734375, "learning_rate": 8.332252712013014e-06, "loss": 1.0033, "num_tokens": 4532804361.0, "step": 6822 }, { "epoch": 1.2162210338680928, "grad_norm": 0.1875, "learning_rate": 8.329771554083697e-06, "loss": 1.0165, "num_tokens": 4539088081.0, "step": 6823 }, { "epoch": 1.2163992869875222, "grad_norm": 0.1826171875, "learning_rate": 8.327290618665082e-06, "loss": 1.0101, "num_tokens": 4545355435.0, "step": 6824 }, { "epoch": 1.2165775401069518, "grad_norm": 0.19140625, "learning_rate": 8.324809905963904e-06, "loss": 1.0117, "num_tokens": 4551609296.0, "step": 6825 }, { "epoch": 1.2167557932263815, "grad_norm": 0.1748046875, "learning_rate": 8.322329416186883e-06, "loss": 1.0012, "num_tokens": 4557837523.0, "step": 6826 }, { "epoch": 1.216934046345811, "grad_norm": 0.1806640625, "learning_rate": 8.319849149540716e-06, "loss": 1.0269, "num_tokens": 4564121848.0, "step": 6827 }, { "epoch": 1.2171122994652406, "grad_norm": 0.1884765625, "learning_rate": 8.31736910623209e-06, "loss": 1.0336, "num_tokens": 4570405405.0, "step": 6828 }, { "epoch": 1.2172905525846702, "grad_norm": 0.1943359375, "learning_rate": 8.314889286467663e-06, "loss": 0.9995, "num_tokens": 4576688444.0, "step": 6829 }, { "epoch": 1.2174688057040999, "grad_norm": 0.1796875, "learning_rate": 8.31240969045408e-06, "loss": 0.9971, "num_tokens": 4582954200.0, "step": 6830 }, { "epoch": 1.2176470588235295, "grad_norm": 0.1806640625, "learning_rate": 8.30993031839796e-06, "loss": 1.031, "num_tokens": 4589225931.0, "step": 6831 }, { "epoch": 1.217825311942959, "grad_norm": 0.1845703125, "learning_rate": 8.30745117050592e-06, "loss": 1.0045, "num_tokens": 4595510192.0, "step": 6832 }, { "epoch": 1.2180035650623886, "grad_norm": 0.177734375, "learning_rate": 8.304972246984544e-06, "loss": 1.0008, "num_tokens": 4601792063.0, "step": 6833 }, { "epoch": 1.2181818181818183, "grad_norm": 0.189453125, "learning_rate": 8.302493548040398e-06, "loss": 1.0289, "num_tokens": 4607986699.0, "step": 6834 }, { "epoch": 1.2183600713012477, "grad_norm": 0.1845703125, "learning_rate": 8.300015073880036e-06, "loss": 1.0302, "num_tokens": 4614268875.0, "step": 6835 }, { "epoch": 1.2185383244206773, "grad_norm": 0.1796875, "learning_rate": 8.297536824709988e-06, "loss": 0.9768, "num_tokens": 4620537461.0, "step": 6836 }, { "epoch": 1.218716577540107, "grad_norm": 0.18359375, "learning_rate": 8.295058800736776e-06, "loss": 0.9875, "num_tokens": 4626783389.0, "step": 6837 }, { "epoch": 1.2188948306595366, "grad_norm": 0.1845703125, "learning_rate": 8.292581002166877e-06, "loss": 1.0295, "num_tokens": 4633057862.0, "step": 6838 }, { "epoch": 1.219073083778966, "grad_norm": 0.181640625, "learning_rate": 8.290103429206779e-06, "loss": 0.9928, "num_tokens": 4639310768.0, "step": 6839 }, { "epoch": 1.2192513368983957, "grad_norm": 0.181640625, "learning_rate": 8.287626082062933e-06, "loss": 1.0624, "num_tokens": 4645577635.0, "step": 6840 }, { "epoch": 1.2194295900178254, "grad_norm": 0.177734375, "learning_rate": 8.285148960941785e-06, "loss": 1.006, "num_tokens": 4651822750.0, "step": 6841 }, { "epoch": 1.219607843137255, "grad_norm": 0.177734375, "learning_rate": 8.282672066049742e-06, "loss": 1.0166, "num_tokens": 4658107496.0, "step": 6842 }, { "epoch": 1.2197860962566844, "grad_norm": 0.1806640625, "learning_rate": 8.280195397593212e-06, "loss": 1.0688, "num_tokens": 4664341811.0, "step": 6843 }, { "epoch": 1.219964349376114, "grad_norm": 0.185546875, "learning_rate": 8.277718955778577e-06, "loss": 1.0041, "num_tokens": 4670610531.0, "step": 6844 }, { "epoch": 1.2201426024955437, "grad_norm": 0.1865234375, "learning_rate": 8.275242740812195e-06, "loss": 0.9997, "num_tokens": 4676894270.0, "step": 6845 }, { "epoch": 1.2203208556149732, "grad_norm": 0.1767578125, "learning_rate": 8.272766752900412e-06, "loss": 1.0146, "num_tokens": 4683176460.0, "step": 6846 }, { "epoch": 1.2204991087344028, "grad_norm": 0.1875, "learning_rate": 8.270290992249552e-06, "loss": 0.9996, "num_tokens": 4689459964.0, "step": 6847 }, { "epoch": 1.2206773618538325, "grad_norm": 0.1748046875, "learning_rate": 8.267815459065921e-06, "loss": 0.9942, "num_tokens": 4695744965.0, "step": 6848 }, { "epoch": 1.2208556149732621, "grad_norm": 0.1923828125, "learning_rate": 8.265340153555806e-06, "loss": 1.0086, "num_tokens": 4702030362.0, "step": 6849 }, { "epoch": 1.2210338680926915, "grad_norm": 0.1865234375, "learning_rate": 8.262865075925475e-06, "loss": 1.0166, "num_tokens": 4708315933.0, "step": 6850 }, { "epoch": 1.2212121212121212, "grad_norm": 0.1796875, "learning_rate": 8.260390226381174e-06, "loss": 1.0309, "num_tokens": 4714582326.0, "step": 6851 }, { "epoch": 1.2213903743315508, "grad_norm": 0.1748046875, "learning_rate": 8.257915605129139e-06, "loss": 1.011, "num_tokens": 4720855807.0, "step": 6852 }, { "epoch": 1.2215686274509805, "grad_norm": 0.1796875, "learning_rate": 8.255441212375574e-06, "loss": 1.0295, "num_tokens": 4727133031.0, "step": 6853 }, { "epoch": 1.22174688057041, "grad_norm": 0.17578125, "learning_rate": 8.252967048326672e-06, "loss": 1.0326, "num_tokens": 4733418144.0, "step": 6854 }, { "epoch": 1.2219251336898396, "grad_norm": 0.1826171875, "learning_rate": 8.250493113188607e-06, "loss": 0.9942, "num_tokens": 4739701384.0, "step": 6855 }, { "epoch": 1.2221033868092692, "grad_norm": 0.1787109375, "learning_rate": 8.248019407167533e-06, "loss": 0.969, "num_tokens": 4745947613.0, "step": 6856 }, { "epoch": 1.2222816399286986, "grad_norm": 0.1787109375, "learning_rate": 8.245545930469583e-06, "loss": 1.007, "num_tokens": 4752233627.0, "step": 6857 }, { "epoch": 1.2224598930481283, "grad_norm": 0.1806640625, "learning_rate": 8.243072683300875e-06, "loss": 1.0258, "num_tokens": 4758506602.0, "step": 6858 }, { "epoch": 1.222638146167558, "grad_norm": 0.1728515625, "learning_rate": 8.240599665867502e-06, "loss": 0.9981, "num_tokens": 4764782738.0, "step": 6859 }, { "epoch": 1.2228163992869876, "grad_norm": 0.173828125, "learning_rate": 8.238126878375546e-06, "loss": 0.9966, "num_tokens": 4771009140.0, "step": 6860 }, { "epoch": 1.222994652406417, "grad_norm": 0.1748046875, "learning_rate": 8.235654321031054e-06, "loss": 1.0297, "num_tokens": 4777273686.0, "step": 6861 }, { "epoch": 1.2231729055258467, "grad_norm": 0.1787109375, "learning_rate": 8.233181994040078e-06, "loss": 1.0176, "num_tokens": 4783503143.0, "step": 6862 }, { "epoch": 1.2233511586452763, "grad_norm": 0.1796875, "learning_rate": 8.23070989760863e-06, "loss": 0.9979, "num_tokens": 4789756797.0, "step": 6863 }, { "epoch": 1.223529411764706, "grad_norm": 0.1826171875, "learning_rate": 8.22823803194271e-06, "loss": 1.0057, "num_tokens": 4796022602.0, "step": 6864 }, { "epoch": 1.2237076648841354, "grad_norm": 0.17578125, "learning_rate": 8.225766397248302e-06, "loss": 1.0096, "num_tokens": 4802300251.0, "step": 6865 }, { "epoch": 1.223885918003565, "grad_norm": 0.1845703125, "learning_rate": 8.223294993731368e-06, "loss": 1.0253, "num_tokens": 4808534754.0, "step": 6866 }, { "epoch": 1.2240641711229947, "grad_norm": 0.1806640625, "learning_rate": 8.220823821597848e-06, "loss": 0.9987, "num_tokens": 4814819108.0, "step": 6867 }, { "epoch": 1.2242424242424241, "grad_norm": 0.1845703125, "learning_rate": 8.218352881053671e-06, "loss": 0.9856, "num_tokens": 4821080916.0, "step": 6868 }, { "epoch": 1.2244206773618538, "grad_norm": 0.177734375, "learning_rate": 8.215882172304732e-06, "loss": 1.0451, "num_tokens": 4827364004.0, "step": 6869 }, { "epoch": 1.2245989304812834, "grad_norm": 0.181640625, "learning_rate": 8.21341169555692e-06, "loss": 1.0384, "num_tokens": 4833644877.0, "step": 6870 }, { "epoch": 1.224777183600713, "grad_norm": 0.189453125, "learning_rate": 8.210941451016103e-06, "loss": 1.0134, "num_tokens": 4839929807.0, "step": 6871 }, { "epoch": 1.2249554367201425, "grad_norm": 0.1884765625, "learning_rate": 8.208471438888124e-06, "loss": 1.0405, "num_tokens": 4846215342.0, "step": 6872 }, { "epoch": 1.2251336898395722, "grad_norm": 0.18359375, "learning_rate": 8.20600165937881e-06, "loss": 1.0288, "num_tokens": 4852467510.0, "step": 6873 }, { "epoch": 1.2253119429590018, "grad_norm": 0.1865234375, "learning_rate": 8.203532112693969e-06, "loss": 0.999, "num_tokens": 4858736278.0, "step": 6874 }, { "epoch": 1.2254901960784315, "grad_norm": 0.1796875, "learning_rate": 8.201062799039393e-06, "loss": 1.0418, "num_tokens": 4864997215.0, "step": 6875 }, { "epoch": 1.2256684491978609, "grad_norm": 0.1806640625, "learning_rate": 8.19859371862084e-06, "loss": 1.0359, "num_tokens": 4871281094.0, "step": 6876 }, { "epoch": 1.2258467023172905, "grad_norm": 0.1826171875, "learning_rate": 8.196124871644067e-06, "loss": 1.0122, "num_tokens": 4877564341.0, "step": 6877 }, { "epoch": 1.2260249554367202, "grad_norm": 0.1845703125, "learning_rate": 8.1936562583148e-06, "loss": 0.999, "num_tokens": 4883848180.0, "step": 6878 }, { "epoch": 1.2262032085561496, "grad_norm": 0.18359375, "learning_rate": 8.19118787883875e-06, "loss": 1.0221, "num_tokens": 4890127812.0, "step": 6879 }, { "epoch": 1.2263814616755793, "grad_norm": 0.1796875, "learning_rate": 8.188719733421611e-06, "loss": 1.0354, "num_tokens": 4896411272.0, "step": 6880 }, { "epoch": 1.226559714795009, "grad_norm": 0.185546875, "learning_rate": 8.186251822269047e-06, "loss": 1.0226, "num_tokens": 4902695404.0, "step": 6881 }, { "epoch": 1.2267379679144386, "grad_norm": 0.18359375, "learning_rate": 8.183784145586718e-06, "loss": 1.0189, "num_tokens": 4908947119.0, "step": 6882 }, { "epoch": 1.2269162210338682, "grad_norm": 0.193359375, "learning_rate": 8.181316703580252e-06, "loss": 1.0191, "num_tokens": 4915223354.0, "step": 6883 }, { "epoch": 1.2270944741532976, "grad_norm": 0.1826171875, "learning_rate": 8.178849496455256e-06, "loss": 1.0343, "num_tokens": 4921479017.0, "step": 6884 }, { "epoch": 1.2272727272727273, "grad_norm": 0.1865234375, "learning_rate": 8.176382524417333e-06, "loss": 1.0168, "num_tokens": 4927721446.0, "step": 6885 }, { "epoch": 1.227450980392157, "grad_norm": 0.1865234375, "learning_rate": 8.173915787672047e-06, "loss": 1.0217, "num_tokens": 4933939433.0, "step": 6886 }, { "epoch": 1.2276292335115864, "grad_norm": 0.1787109375, "learning_rate": 8.17144928642496e-06, "loss": 0.9972, "num_tokens": 4940218706.0, "step": 6887 }, { "epoch": 1.227807486631016, "grad_norm": 0.1865234375, "learning_rate": 8.168983020881598e-06, "loss": 1.025, "num_tokens": 4946483744.0, "step": 6888 }, { "epoch": 1.2279857397504457, "grad_norm": 0.185546875, "learning_rate": 8.166516991247484e-06, "loss": 1.0538, "num_tokens": 4952734839.0, "step": 6889 }, { "epoch": 1.228163992869875, "grad_norm": 0.1845703125, "learning_rate": 8.164051197728106e-06, "loss": 1.0153, "num_tokens": 4959017254.0, "step": 6890 }, { "epoch": 1.2283422459893047, "grad_norm": 0.18359375, "learning_rate": 8.161585640528944e-06, "loss": 1.0037, "num_tokens": 4965282829.0, "step": 6891 }, { "epoch": 1.2285204991087344, "grad_norm": 0.1865234375, "learning_rate": 8.159120319855449e-06, "loss": 1.053, "num_tokens": 4971520328.0, "step": 6892 }, { "epoch": 1.228698752228164, "grad_norm": 0.1845703125, "learning_rate": 8.156655235913058e-06, "loss": 1.0079, "num_tokens": 4977788355.0, "step": 6893 }, { "epoch": 1.2288770053475937, "grad_norm": 0.185546875, "learning_rate": 8.154190388907188e-06, "loss": 0.9964, "num_tokens": 4984054808.0, "step": 6894 }, { "epoch": 1.2290552584670231, "grad_norm": 0.185546875, "learning_rate": 8.151725779043234e-06, "loss": 1.0419, "num_tokens": 4990336091.0, "step": 6895 }, { "epoch": 1.2292335115864528, "grad_norm": 0.181640625, "learning_rate": 8.149261406526577e-06, "loss": 1.0294, "num_tokens": 4996601386.0, "step": 6896 }, { "epoch": 1.2294117647058824, "grad_norm": 0.1767578125, "learning_rate": 8.146797271562562e-06, "loss": 1.0443, "num_tokens": 5002885025.0, "step": 6897 }, { "epoch": 1.2295900178253119, "grad_norm": 0.18359375, "learning_rate": 8.144333374356543e-06, "loss": 1.0522, "num_tokens": 5009165783.0, "step": 6898 }, { "epoch": 1.2297682709447415, "grad_norm": 0.193359375, "learning_rate": 8.141869715113825e-06, "loss": 1.046, "num_tokens": 5015452607.0, "step": 6899 }, { "epoch": 1.2299465240641712, "grad_norm": 0.17578125, "learning_rate": 8.139406294039705e-06, "loss": 1.0204, "num_tokens": 5021719923.0, "step": 6900 }, { "epoch": 1.2301247771836008, "grad_norm": 0.1796875, "learning_rate": 8.136943111339466e-06, "loss": 1.0182, "num_tokens": 5027985768.0, "step": 6901 }, { "epoch": 1.2303030303030302, "grad_norm": 0.1767578125, "learning_rate": 8.134480167218361e-06, "loss": 1.0678, "num_tokens": 5034267592.0, "step": 6902 }, { "epoch": 1.2304812834224599, "grad_norm": 0.1865234375, "learning_rate": 8.132017461881632e-06, "loss": 1.0265, "num_tokens": 5040550190.0, "step": 6903 }, { "epoch": 1.2306595365418895, "grad_norm": 0.1767578125, "learning_rate": 8.129554995534493e-06, "loss": 0.996, "num_tokens": 5046762326.0, "step": 6904 }, { "epoch": 1.2308377896613192, "grad_norm": 0.1796875, "learning_rate": 8.127092768382144e-06, "loss": 1.0114, "num_tokens": 5053035355.0, "step": 6905 }, { "epoch": 1.2310160427807486, "grad_norm": 0.1865234375, "learning_rate": 8.124630780629761e-06, "loss": 1.0282, "num_tokens": 5059301583.0, "step": 6906 }, { "epoch": 1.2311942959001783, "grad_norm": 0.185546875, "learning_rate": 8.122169032482502e-06, "loss": 1.0126, "num_tokens": 5065586639.0, "step": 6907 }, { "epoch": 1.231372549019608, "grad_norm": 0.18359375, "learning_rate": 8.119707524145511e-06, "loss": 1.0158, "num_tokens": 5071870867.0, "step": 6908 }, { "epoch": 1.2315508021390373, "grad_norm": 0.1796875, "learning_rate": 8.117246255823896e-06, "loss": 0.9878, "num_tokens": 5078128692.0, "step": 6909 }, { "epoch": 1.231729055258467, "grad_norm": 0.1826171875, "learning_rate": 8.114785227722763e-06, "loss": 1.021, "num_tokens": 5084412628.0, "step": 6910 }, { "epoch": 1.2319073083778966, "grad_norm": 0.18359375, "learning_rate": 8.112324440047186e-06, "loss": 1.0425, "num_tokens": 5090694780.0, "step": 6911 }, { "epoch": 1.2320855614973263, "grad_norm": 0.1865234375, "learning_rate": 8.109863893002225e-06, "loss": 1.0278, "num_tokens": 5096979436.0, "step": 6912 }, { "epoch": 1.2322638146167557, "grad_norm": 0.1767578125, "learning_rate": 8.107403586792914e-06, "loss": 0.9903, "num_tokens": 5103236937.0, "step": 6913 }, { "epoch": 1.2324420677361854, "grad_norm": 0.18359375, "learning_rate": 8.10494352162428e-06, "loss": 0.9684, "num_tokens": 5109500537.0, "step": 6914 }, { "epoch": 1.232620320855615, "grad_norm": 0.189453125, "learning_rate": 8.10248369770131e-06, "loss": 1.0176, "num_tokens": 5115783953.0, "step": 6915 }, { "epoch": 1.2327985739750447, "grad_norm": 0.2001953125, "learning_rate": 8.100024115228986e-06, "loss": 0.9963, "num_tokens": 5122067141.0, "step": 6916 }, { "epoch": 1.232976827094474, "grad_norm": 0.1875, "learning_rate": 8.09756477441227e-06, "loss": 1.0461, "num_tokens": 5128337673.0, "step": 6917 }, { "epoch": 1.2331550802139037, "grad_norm": 0.181640625, "learning_rate": 8.09510567545609e-06, "loss": 1.03, "num_tokens": 5134613733.0, "step": 6918 }, { "epoch": 1.2333333333333334, "grad_norm": 0.1767578125, "learning_rate": 8.092646818565371e-06, "loss": 0.9937, "num_tokens": 5140898518.0, "step": 6919 }, { "epoch": 1.2335115864527628, "grad_norm": 0.177734375, "learning_rate": 8.090188203945006e-06, "loss": 1.0179, "num_tokens": 5147135843.0, "step": 6920 }, { "epoch": 1.2336898395721925, "grad_norm": 0.189453125, "learning_rate": 8.087729831799878e-06, "loss": 1.0465, "num_tokens": 5153418965.0, "step": 6921 }, { "epoch": 1.2338680926916221, "grad_norm": 0.1806640625, "learning_rate": 8.085271702334837e-06, "loss": 0.9898, "num_tokens": 5159701551.0, "step": 6922 }, { "epoch": 1.2340463458110518, "grad_norm": 0.18359375, "learning_rate": 8.082813815754718e-06, "loss": 1.0095, "num_tokens": 5165955071.0, "step": 6923 }, { "epoch": 1.2342245989304812, "grad_norm": 0.1787109375, "learning_rate": 8.080356172264342e-06, "loss": 1.0339, "num_tokens": 5172177104.0, "step": 6924 }, { "epoch": 1.2344028520499108, "grad_norm": 0.185546875, "learning_rate": 8.077898772068504e-06, "loss": 1.045, "num_tokens": 5178437231.0, "step": 6925 }, { "epoch": 1.2345811051693405, "grad_norm": 0.1748046875, "learning_rate": 8.07544161537198e-06, "loss": 1.0528, "num_tokens": 5184680790.0, "step": 6926 }, { "epoch": 1.2347593582887701, "grad_norm": 0.181640625, "learning_rate": 8.072984702379524e-06, "loss": 1.0463, "num_tokens": 5190963668.0, "step": 6927 }, { "epoch": 1.2349376114081996, "grad_norm": 0.1796875, "learning_rate": 8.07052803329587e-06, "loss": 1.0116, "num_tokens": 5197246608.0, "step": 6928 }, { "epoch": 1.2351158645276292, "grad_norm": 0.189453125, "learning_rate": 8.068071608325741e-06, "loss": 1.023, "num_tokens": 5203506304.0, "step": 6929 }, { "epoch": 1.2352941176470589, "grad_norm": 0.18359375, "learning_rate": 8.065615427673819e-06, "loss": 1.026, "num_tokens": 5209763668.0, "step": 6930 }, { "epoch": 1.2354723707664883, "grad_norm": 0.1728515625, "learning_rate": 8.063159491544785e-06, "loss": 1.0111, "num_tokens": 5216046651.0, "step": 6931 }, { "epoch": 1.235650623885918, "grad_norm": 0.177734375, "learning_rate": 8.060703800143293e-06, "loss": 1.0009, "num_tokens": 5222259028.0, "step": 6932 }, { "epoch": 1.2358288770053476, "grad_norm": 0.1865234375, "learning_rate": 8.058248353673972e-06, "loss": 0.9823, "num_tokens": 5228518529.0, "step": 6933 }, { "epoch": 1.2360071301247773, "grad_norm": 0.177734375, "learning_rate": 8.05579315234144e-06, "loss": 1.0507, "num_tokens": 5234785634.0, "step": 6934 }, { "epoch": 1.2361853832442067, "grad_norm": 0.1796875, "learning_rate": 8.05333819635029e-06, "loss": 1.0174, "num_tokens": 5241039472.0, "step": 6935 }, { "epoch": 1.2363636363636363, "grad_norm": 0.1748046875, "learning_rate": 8.050883485905085e-06, "loss": 1.033, "num_tokens": 5247323660.0, "step": 6936 }, { "epoch": 1.236541889483066, "grad_norm": 0.17578125, "learning_rate": 8.04842902121039e-06, "loss": 1.0409, "num_tokens": 5253606247.0, "step": 6937 }, { "epoch": 1.2367201426024956, "grad_norm": 0.1923828125, "learning_rate": 8.045974802470725e-06, "loss": 1.0612, "num_tokens": 5259887349.0, "step": 6938 }, { "epoch": 1.236898395721925, "grad_norm": 0.1767578125, "learning_rate": 8.043520829890603e-06, "loss": 0.9979, "num_tokens": 5266172760.0, "step": 6939 }, { "epoch": 1.2370766488413547, "grad_norm": 0.18359375, "learning_rate": 8.041067103674518e-06, "loss": 1.0302, "num_tokens": 5272425145.0, "step": 6940 }, { "epoch": 1.2372549019607844, "grad_norm": 0.1845703125, "learning_rate": 8.038613624026935e-06, "loss": 1.0358, "num_tokens": 5278707175.0, "step": 6941 }, { "epoch": 1.2374331550802138, "grad_norm": 0.1787109375, "learning_rate": 8.036160391152305e-06, "loss": 1.0151, "num_tokens": 5284966522.0, "step": 6942 }, { "epoch": 1.2376114081996434, "grad_norm": 0.185546875, "learning_rate": 8.033707405255056e-06, "loss": 0.9967, "num_tokens": 5291248763.0, "step": 6943 }, { "epoch": 1.237789661319073, "grad_norm": 0.1796875, "learning_rate": 8.0312546665396e-06, "loss": 1.0216, "num_tokens": 5297502169.0, "step": 6944 }, { "epoch": 1.2379679144385027, "grad_norm": 0.1826171875, "learning_rate": 8.02880217521032e-06, "loss": 1.0276, "num_tokens": 5303722946.0, "step": 6945 }, { "epoch": 1.2381461675579324, "grad_norm": 0.1826171875, "learning_rate": 8.02634993147158e-06, "loss": 1.0363, "num_tokens": 5310005655.0, "step": 6946 }, { "epoch": 1.2383244206773618, "grad_norm": 0.1796875, "learning_rate": 8.02389793552773e-06, "loss": 1.0451, "num_tokens": 5316280946.0, "step": 6947 }, { "epoch": 1.2385026737967915, "grad_norm": 0.17578125, "learning_rate": 8.021446187583095e-06, "loss": 0.9841, "num_tokens": 5322565253.0, "step": 6948 }, { "epoch": 1.2386809269162211, "grad_norm": 0.17578125, "learning_rate": 8.01899468784198e-06, "loss": 1.017, "num_tokens": 5328848579.0, "step": 6949 }, { "epoch": 1.2388591800356505, "grad_norm": 0.17578125, "learning_rate": 8.016543436508666e-06, "loss": 1.0165, "num_tokens": 5335132447.0, "step": 6950 }, { "epoch": 1.2390374331550802, "grad_norm": 0.1826171875, "learning_rate": 8.014092433787419e-06, "loss": 1.0278, "num_tokens": 5341379701.0, "step": 6951 }, { "epoch": 1.2392156862745098, "grad_norm": 0.177734375, "learning_rate": 8.011641679882484e-06, "loss": 1.0154, "num_tokens": 5347662791.0, "step": 6952 }, { "epoch": 1.2393939393939393, "grad_norm": 0.1826171875, "learning_rate": 8.009191174998077e-06, "loss": 0.9994, "num_tokens": 5353944994.0, "step": 6953 }, { "epoch": 1.239572192513369, "grad_norm": 0.1875, "learning_rate": 8.0067409193384e-06, "loss": 0.9762, "num_tokens": 5360213842.0, "step": 6954 }, { "epoch": 1.2397504456327986, "grad_norm": 0.1767578125, "learning_rate": 8.004290913107638e-06, "loss": 1.0139, "num_tokens": 5366443704.0, "step": 6955 }, { "epoch": 1.2399286987522282, "grad_norm": 0.1875, "learning_rate": 8.001841156509948e-06, "loss": 0.9894, "num_tokens": 5372726839.0, "step": 6956 }, { "epoch": 1.2401069518716579, "grad_norm": 0.1767578125, "learning_rate": 7.999391649749465e-06, "loss": 0.9838, "num_tokens": 5379011325.0, "step": 6957 }, { "epoch": 1.2402852049910873, "grad_norm": 0.177734375, "learning_rate": 7.996942393030314e-06, "loss": 0.9674, "num_tokens": 5385296796.0, "step": 6958 }, { "epoch": 1.240463458110517, "grad_norm": 0.181640625, "learning_rate": 7.994493386556588e-06, "loss": 1.0298, "num_tokens": 5391556141.0, "step": 6959 }, { "epoch": 1.2406417112299466, "grad_norm": 0.1787109375, "learning_rate": 7.992044630532365e-06, "loss": 1.0078, "num_tokens": 5397796154.0, "step": 6960 }, { "epoch": 1.240819964349376, "grad_norm": 0.1787109375, "learning_rate": 7.989596125161698e-06, "loss": 1.0381, "num_tokens": 5404078559.0, "step": 6961 }, { "epoch": 1.2409982174688057, "grad_norm": 0.1748046875, "learning_rate": 7.98714787064862e-06, "loss": 1.0169, "num_tokens": 5410362399.0, "step": 6962 }, { "epoch": 1.2411764705882353, "grad_norm": 0.1826171875, "learning_rate": 7.98469986719715e-06, "loss": 1.0342, "num_tokens": 5416645045.0, "step": 6963 }, { "epoch": 1.241354723707665, "grad_norm": 0.1826171875, "learning_rate": 7.982252115011276e-06, "loss": 0.9979, "num_tokens": 5422928585.0, "step": 6964 }, { "epoch": 1.2415329768270944, "grad_norm": 0.181640625, "learning_rate": 7.97980461429497e-06, "loss": 1.034, "num_tokens": 5429212656.0, "step": 6965 }, { "epoch": 1.241711229946524, "grad_norm": 0.18359375, "learning_rate": 7.977357365252187e-06, "loss": 1.0328, "num_tokens": 5435492091.0, "step": 6966 }, { "epoch": 1.2418894830659537, "grad_norm": 0.1826171875, "learning_rate": 7.974910368086857e-06, "loss": 1.0063, "num_tokens": 5441755526.0, "step": 6967 }, { "epoch": 1.2420677361853834, "grad_norm": 0.177734375, "learning_rate": 7.972463623002881e-06, "loss": 1.0303, "num_tokens": 5448007398.0, "step": 6968 }, { "epoch": 1.2422459893048128, "grad_norm": 0.1865234375, "learning_rate": 7.970017130204153e-06, "loss": 1.0224, "num_tokens": 5454244884.0, "step": 6969 }, { "epoch": 1.2424242424242424, "grad_norm": 0.1796875, "learning_rate": 7.967570889894538e-06, "loss": 1.0053, "num_tokens": 5460487973.0, "step": 6970 }, { "epoch": 1.242602495543672, "grad_norm": 0.1806640625, "learning_rate": 7.965124902277881e-06, "loss": 0.9955, "num_tokens": 5466768591.0, "step": 6971 }, { "epoch": 1.2427807486631015, "grad_norm": 0.1865234375, "learning_rate": 7.96267916755801e-06, "loss": 1.0128, "num_tokens": 5473028665.0, "step": 6972 }, { "epoch": 1.2429590017825312, "grad_norm": 0.1826171875, "learning_rate": 7.960233685938723e-06, "loss": 1.0156, "num_tokens": 5479313626.0, "step": 6973 }, { "epoch": 1.2431372549019608, "grad_norm": 0.1767578125, "learning_rate": 7.957788457623808e-06, "loss": 1.0293, "num_tokens": 5485573404.0, "step": 6974 }, { "epoch": 1.2433155080213905, "grad_norm": 0.1767578125, "learning_rate": 7.95534348281703e-06, "loss": 1.005, "num_tokens": 5491838528.0, "step": 6975 }, { "epoch": 1.2434937611408199, "grad_norm": 0.18359375, "learning_rate": 7.952898761722114e-06, "loss": 0.9992, "num_tokens": 5498122371.0, "step": 6976 }, { "epoch": 1.2436720142602495, "grad_norm": 0.181640625, "learning_rate": 7.950454294542794e-06, "loss": 1.001, "num_tokens": 5504404995.0, "step": 6977 }, { "epoch": 1.2438502673796792, "grad_norm": 0.1875, "learning_rate": 7.948010081482758e-06, "loss": 0.9996, "num_tokens": 5510690780.0, "step": 6978 }, { "epoch": 1.2440285204991088, "grad_norm": 0.181640625, "learning_rate": 7.945566122745692e-06, "loss": 1.0085, "num_tokens": 5516936976.0, "step": 6979 }, { "epoch": 1.2442067736185383, "grad_norm": 0.1787109375, "learning_rate": 7.943122418535243e-06, "loss": 1.0081, "num_tokens": 5523207122.0, "step": 6980 }, { "epoch": 1.244385026737968, "grad_norm": 0.1884765625, "learning_rate": 7.940678969055052e-06, "loss": 1.0185, "num_tokens": 5529490341.0, "step": 6981 }, { "epoch": 1.2445632798573976, "grad_norm": 0.17578125, "learning_rate": 7.938235774508726e-06, "loss": 0.9939, "num_tokens": 5535748818.0, "step": 6982 }, { "epoch": 1.244741532976827, "grad_norm": 0.173828125, "learning_rate": 7.935792835099867e-06, "loss": 1.0025, "num_tokens": 5542005671.0, "step": 6983 }, { "epoch": 1.2449197860962566, "grad_norm": 0.177734375, "learning_rate": 7.933350151032038e-06, "loss": 1.0083, "num_tokens": 5548262757.0, "step": 6984 }, { "epoch": 1.2450980392156863, "grad_norm": 0.189453125, "learning_rate": 7.930907722508785e-06, "loss": 1.0242, "num_tokens": 5554545020.0, "step": 6985 }, { "epoch": 1.245276292335116, "grad_norm": 0.1865234375, "learning_rate": 7.928465549733644e-06, "loss": 1.0343, "num_tokens": 5560822168.0, "step": 6986 }, { "epoch": 1.2454545454545454, "grad_norm": 0.19140625, "learning_rate": 7.926023632910114e-06, "loss": 1.0238, "num_tokens": 5567104350.0, "step": 6987 }, { "epoch": 1.245632798573975, "grad_norm": 0.17578125, "learning_rate": 7.923581972241689e-06, "loss": 1.0244, "num_tokens": 5573386437.0, "step": 6988 }, { "epoch": 1.2458110516934047, "grad_norm": 0.18359375, "learning_rate": 7.921140567931826e-06, "loss": 1.0346, "num_tokens": 5579671632.0, "step": 6989 }, { "epoch": 1.2459893048128343, "grad_norm": 0.197265625, "learning_rate": 7.918699420183973e-06, "loss": 1.0345, "num_tokens": 5585934637.0, "step": 6990 }, { "epoch": 1.2461675579322637, "grad_norm": 0.1904296875, "learning_rate": 7.91625852920155e-06, "loss": 1.0053, "num_tokens": 5592219162.0, "step": 6991 }, { "epoch": 1.2463458110516934, "grad_norm": 0.1796875, "learning_rate": 7.913817895187951e-06, "loss": 1.0242, "num_tokens": 5598496977.0, "step": 6992 }, { "epoch": 1.246524064171123, "grad_norm": 0.19140625, "learning_rate": 7.911377518346566e-06, "loss": 1.008, "num_tokens": 5604779564.0, "step": 6993 }, { "epoch": 1.2467023172905525, "grad_norm": 0.1875, "learning_rate": 7.90893739888074e-06, "loss": 1.0351, "num_tokens": 5611063996.0, "step": 6994 }, { "epoch": 1.2468805704099821, "grad_norm": 0.18359375, "learning_rate": 7.906497536993816e-06, "loss": 1.0239, "num_tokens": 5617309887.0, "step": 6995 }, { "epoch": 1.2470588235294118, "grad_norm": 0.1962890625, "learning_rate": 7.904057932889106e-06, "loss": 1.0368, "num_tokens": 5623568167.0, "step": 6996 }, { "epoch": 1.2472370766488414, "grad_norm": 0.1865234375, "learning_rate": 7.901618586769904e-06, "loss": 1.014, "num_tokens": 5629821323.0, "step": 6997 }, { "epoch": 1.2474153297682709, "grad_norm": 0.173828125, "learning_rate": 7.899179498839485e-06, "loss": 0.9925, "num_tokens": 5636102228.0, "step": 6998 }, { "epoch": 1.2475935828877005, "grad_norm": 0.1806640625, "learning_rate": 7.896740669301089e-06, "loss": 1.0168, "num_tokens": 5642381011.0, "step": 6999 }, { "epoch": 1.2477718360071302, "grad_norm": 0.1796875, "learning_rate": 7.894302098357949e-06, "loss": 1.0219, "num_tokens": 5648635875.0, "step": 7000 }, { "epoch": 1.2479500891265598, "grad_norm": 0.1796875, "learning_rate": 7.891863786213271e-06, "loss": 1.0358, "num_tokens": 5654918780.0, "step": 7001 }, { "epoch": 1.2481283422459892, "grad_norm": 0.1787109375, "learning_rate": 7.889425733070244e-06, "loss": 0.9935, "num_tokens": 5661202570.0, "step": 7002 }, { "epoch": 1.2483065953654189, "grad_norm": 0.1787109375, "learning_rate": 7.886987939132027e-06, "loss": 1.0142, "num_tokens": 5667481690.0, "step": 7003 }, { "epoch": 1.2484848484848485, "grad_norm": 0.1767578125, "learning_rate": 7.884550404601763e-06, "loss": 1.0205, "num_tokens": 5673763626.0, "step": 7004 }, { "epoch": 1.248663101604278, "grad_norm": 0.185546875, "learning_rate": 7.882113129682571e-06, "loss": 1.0176, "num_tokens": 5680030386.0, "step": 7005 }, { "epoch": 1.2488413547237076, "grad_norm": 0.181640625, "learning_rate": 7.879676114577558e-06, "loss": 1.0277, "num_tokens": 5686315671.0, "step": 7006 }, { "epoch": 1.2490196078431373, "grad_norm": 0.1806640625, "learning_rate": 7.87723935948979e-06, "loss": 1.0149, "num_tokens": 5692598655.0, "step": 7007 }, { "epoch": 1.249197860962567, "grad_norm": 0.1748046875, "learning_rate": 7.874802864622327e-06, "loss": 1.0236, "num_tokens": 5698880211.0, "step": 7008 }, { "epoch": 1.2493761140819966, "grad_norm": 0.1884765625, "learning_rate": 7.872366630178201e-06, "loss": 1.0059, "num_tokens": 5705165284.0, "step": 7009 }, { "epoch": 1.249554367201426, "grad_norm": 0.177734375, "learning_rate": 7.869930656360424e-06, "loss": 1.0369, "num_tokens": 5711427017.0, "step": 7010 }, { "epoch": 1.2497326203208556, "grad_norm": 0.1806640625, "learning_rate": 7.86749494337199e-06, "loss": 1.0414, "num_tokens": 5717711612.0, "step": 7011 }, { "epoch": 1.2499108734402853, "grad_norm": 0.1748046875, "learning_rate": 7.865059491415864e-06, "loss": 1.0057, "num_tokens": 5723992875.0, "step": 7012 }, { "epoch": 1.2500891265597147, "grad_norm": 0.1796875, "learning_rate": 7.862624300694993e-06, "loss": 1.0076, "num_tokens": 5730267414.0, "step": 7013 }, { "epoch": 1.2502673796791444, "grad_norm": 0.1767578125, "learning_rate": 7.860189371412305e-06, "loss": 1.0181, "num_tokens": 5736521275.0, "step": 7014 }, { "epoch": 1.250445632798574, "grad_norm": 0.1806640625, "learning_rate": 7.8577547037707e-06, "loss": 1.0072, "num_tokens": 5742778925.0, "step": 7015 }, { "epoch": 1.2506238859180034, "grad_norm": 0.1748046875, "learning_rate": 7.855320297973057e-06, "loss": 1.0291, "num_tokens": 5749060866.0, "step": 7016 }, { "epoch": 1.250802139037433, "grad_norm": 0.177734375, "learning_rate": 7.85288615422224e-06, "loss": 1.0045, "num_tokens": 5755345121.0, "step": 7017 }, { "epoch": 1.2509803921568627, "grad_norm": 0.1787109375, "learning_rate": 7.850452272721088e-06, "loss": 1.0209, "num_tokens": 5761628924.0, "step": 7018 }, { "epoch": 1.2511586452762924, "grad_norm": 0.1796875, "learning_rate": 7.848018653672411e-06, "loss": 1.0169, "num_tokens": 5767889832.0, "step": 7019 }, { "epoch": 1.251336898395722, "grad_norm": 0.1787109375, "learning_rate": 7.845585297279005e-06, "loss": 1.0115, "num_tokens": 5774170803.0, "step": 7020 }, { "epoch": 1.2515151515151515, "grad_norm": 0.1796875, "learning_rate": 7.843152203743648e-06, "loss": 1.012, "num_tokens": 5780455565.0, "step": 7021 }, { "epoch": 1.2516934046345811, "grad_norm": 0.1767578125, "learning_rate": 7.840719373269086e-06, "loss": 1.0294, "num_tokens": 5786678742.0, "step": 7022 }, { "epoch": 1.2518716577540108, "grad_norm": 0.1875, "learning_rate": 7.838286806058043e-06, "loss": 1.0057, "num_tokens": 5792957062.0, "step": 7023 }, { "epoch": 1.2520499108734402, "grad_norm": 0.1923828125, "learning_rate": 7.83585450231323e-06, "loss": 1.0112, "num_tokens": 5799216936.0, "step": 7024 }, { "epoch": 1.2522281639928698, "grad_norm": 0.18359375, "learning_rate": 7.83342246223733e-06, "loss": 1.0025, "num_tokens": 5805501848.0, "step": 7025 }, { "epoch": 1.2524064171122995, "grad_norm": 0.1796875, "learning_rate": 7.830990686033007e-06, "loss": 1.003, "num_tokens": 5811746786.0, "step": 7026 }, { "epoch": 1.252584670231729, "grad_norm": 0.1728515625, "learning_rate": 7.828559173902897e-06, "loss": 0.9894, "num_tokens": 5818029732.0, "step": 7027 }, { "epoch": 1.2527629233511586, "grad_norm": 0.1865234375, "learning_rate": 7.826127926049625e-06, "loss": 1.0203, "num_tokens": 5824314569.0, "step": 7028 }, { "epoch": 1.2529411764705882, "grad_norm": 0.1806640625, "learning_rate": 7.823696942675788e-06, "loss": 1.0193, "num_tokens": 5830599644.0, "step": 7029 }, { "epoch": 1.2531194295900179, "grad_norm": 0.185546875, "learning_rate": 7.82126622398395e-06, "loss": 1.0479, "num_tokens": 5836871091.0, "step": 7030 }, { "epoch": 1.2532976827094475, "grad_norm": 0.1787109375, "learning_rate": 7.818835770176672e-06, "loss": 1.0323, "num_tokens": 5843154572.0, "step": 7031 }, { "epoch": 1.253475935828877, "grad_norm": 0.18359375, "learning_rate": 7.81640558145648e-06, "loss": 1.0305, "num_tokens": 5849439303.0, "step": 7032 }, { "epoch": 1.2536541889483066, "grad_norm": 0.1875, "learning_rate": 7.813975658025887e-06, "loss": 1.0129, "num_tokens": 5855691820.0, "step": 7033 }, { "epoch": 1.2538324420677363, "grad_norm": 0.181640625, "learning_rate": 7.811546000087375e-06, "loss": 1.0494, "num_tokens": 5861975520.0, "step": 7034 }, { "epoch": 1.2540106951871657, "grad_norm": 0.1767578125, "learning_rate": 7.80911660784341e-06, "loss": 0.9737, "num_tokens": 5868249322.0, "step": 7035 }, { "epoch": 1.2541889483065953, "grad_norm": 0.177734375, "learning_rate": 7.806687481496432e-06, "loss": 1.02, "num_tokens": 5874526456.0, "step": 7036 }, { "epoch": 1.254367201426025, "grad_norm": 0.181640625, "learning_rate": 7.804258621248864e-06, "loss": 1.0092, "num_tokens": 5880811287.0, "step": 7037 }, { "epoch": 1.2545454545454544, "grad_norm": 0.1845703125, "learning_rate": 7.801830027303098e-06, "loss": 1.0268, "num_tokens": 5887066823.0, "step": 7038 }, { "epoch": 1.254723707664884, "grad_norm": 0.1875, "learning_rate": 7.799401699861513e-06, "loss": 1.0317, "num_tokens": 5893349895.0, "step": 7039 }, { "epoch": 1.2549019607843137, "grad_norm": 0.181640625, "learning_rate": 7.796973639126462e-06, "loss": 1.0101, "num_tokens": 5899617156.0, "step": 7040 }, { "epoch": 1.2550802139037434, "grad_norm": 0.181640625, "learning_rate": 7.794545845300271e-06, "loss": 1.0114, "num_tokens": 5905900190.0, "step": 7041 }, { "epoch": 1.255258467023173, "grad_norm": 0.17578125, "learning_rate": 7.792118318585258e-06, "loss": 1.0276, "num_tokens": 5912184639.0, "step": 7042 }, { "epoch": 1.2554367201426024, "grad_norm": 0.18359375, "learning_rate": 7.7896910591837e-06, "loss": 1.0168, "num_tokens": 5918467928.0, "step": 7043 }, { "epoch": 1.255614973262032, "grad_norm": 0.1728515625, "learning_rate": 7.787264067297868e-06, "loss": 1.015, "num_tokens": 5924723859.0, "step": 7044 }, { "epoch": 1.2557932263814617, "grad_norm": 0.181640625, "learning_rate": 7.784837343130001e-06, "loss": 1.0469, "num_tokens": 5931008886.0, "step": 7045 }, { "epoch": 1.2559714795008912, "grad_norm": 0.1767578125, "learning_rate": 7.782410886882315e-06, "loss": 1.0345, "num_tokens": 5937292512.0, "step": 7046 }, { "epoch": 1.2561497326203208, "grad_norm": 0.1884765625, "learning_rate": 7.779984698757012e-06, "loss": 1.0235, "num_tokens": 5943573599.0, "step": 7047 }, { "epoch": 1.2563279857397505, "grad_norm": 0.1865234375, "learning_rate": 7.777558778956264e-06, "loss": 1.0185, "num_tokens": 5949858987.0, "step": 7048 }, { "epoch": 1.2565062388591801, "grad_norm": 0.1796875, "learning_rate": 7.775133127682226e-06, "loss": 1.0501, "num_tokens": 5956144045.0, "step": 7049 }, { "epoch": 1.2566844919786098, "grad_norm": 0.1748046875, "learning_rate": 7.772707745137025e-06, "loss": 1.0236, "num_tokens": 5962400607.0, "step": 7050 }, { "epoch": 1.2568627450980392, "grad_norm": 0.1796875, "learning_rate": 7.770282631522773e-06, "loss": 1.0542, "num_tokens": 5968683933.0, "step": 7051 }, { "epoch": 1.2570409982174688, "grad_norm": 0.18359375, "learning_rate": 7.767857787041552e-06, "loss": 1.0263, "num_tokens": 5974968638.0, "step": 7052 }, { "epoch": 1.2572192513368985, "grad_norm": 0.1806640625, "learning_rate": 7.765433211895421e-06, "loss": 1.0023, "num_tokens": 5981243759.0, "step": 7053 }, { "epoch": 1.257397504456328, "grad_norm": 0.1806640625, "learning_rate": 7.76300890628643e-06, "loss": 1.033, "num_tokens": 5987501152.0, "step": 7054 }, { "epoch": 1.2575757575757576, "grad_norm": 0.1806640625, "learning_rate": 7.760584870416585e-06, "loss": 1.022, "num_tokens": 5993774098.0, "step": 7055 }, { "epoch": 1.2577540106951872, "grad_norm": 0.1806640625, "learning_rate": 7.758161104487894e-06, "loss": 1.0346, "num_tokens": 6000059090.0, "step": 7056 }, { "epoch": 1.2579322638146166, "grad_norm": 0.18359375, "learning_rate": 7.755737608702318e-06, "loss": 1.0031, "num_tokens": 6006255236.0, "step": 7057 }, { "epoch": 1.2581105169340463, "grad_norm": 0.1826171875, "learning_rate": 7.753314383261818e-06, "loss": 1.0089, "num_tokens": 6012519354.0, "step": 7058 }, { "epoch": 1.258288770053476, "grad_norm": 0.1875, "learning_rate": 7.750891428368315e-06, "loss": 1.0022, "num_tokens": 6018778035.0, "step": 7059 }, { "epoch": 1.2584670231729056, "grad_norm": 0.189453125, "learning_rate": 7.748468744223718e-06, "loss": 1.029, "num_tokens": 6025034747.0, "step": 7060 }, { "epoch": 1.2586452762923352, "grad_norm": 0.18359375, "learning_rate": 7.746046331029906e-06, "loss": 1.0, "num_tokens": 6031319234.0, "step": 7061 }, { "epoch": 1.2588235294117647, "grad_norm": 0.1845703125, "learning_rate": 7.743624188988743e-06, "loss": 1.0105, "num_tokens": 6037574482.0, "step": 7062 }, { "epoch": 1.2590017825311943, "grad_norm": 0.181640625, "learning_rate": 7.741202318302063e-06, "loss": 1.0333, "num_tokens": 6043804896.0, "step": 7063 }, { "epoch": 1.259180035650624, "grad_norm": 0.181640625, "learning_rate": 7.738780719171684e-06, "loss": 1.0362, "num_tokens": 6050087379.0, "step": 7064 }, { "epoch": 1.2593582887700534, "grad_norm": 0.1806640625, "learning_rate": 7.7363593917994e-06, "loss": 1.0316, "num_tokens": 6056369835.0, "step": 7065 }, { "epoch": 1.259536541889483, "grad_norm": 0.1796875, "learning_rate": 7.733938336386973e-06, "loss": 1.0237, "num_tokens": 6062622852.0, "step": 7066 }, { "epoch": 1.2597147950089127, "grad_norm": 0.1845703125, "learning_rate": 7.73151755313616e-06, "loss": 1.0482, "num_tokens": 6068856030.0, "step": 7067 }, { "epoch": 1.2598930481283421, "grad_norm": 0.193359375, "learning_rate": 7.729097042248679e-06, "loss": 0.9965, "num_tokens": 6075141057.0, "step": 7068 }, { "epoch": 1.2600713012477718, "grad_norm": 0.1904296875, "learning_rate": 7.726676803926232e-06, "loss": 1.0518, "num_tokens": 6081366868.0, "step": 7069 }, { "epoch": 1.2602495543672014, "grad_norm": 0.181640625, "learning_rate": 7.724256838370498e-06, "loss": 1.0116, "num_tokens": 6087651611.0, "step": 7070 }, { "epoch": 1.260427807486631, "grad_norm": 0.1806640625, "learning_rate": 7.721837145783138e-06, "loss": 1.0378, "num_tokens": 6093854564.0, "step": 7071 }, { "epoch": 1.2606060606060607, "grad_norm": 0.18359375, "learning_rate": 7.719417726365781e-06, "loss": 1.035, "num_tokens": 6100137278.0, "step": 7072 }, { "epoch": 1.2607843137254902, "grad_norm": 0.1884765625, "learning_rate": 7.716998580320036e-06, "loss": 1.0443, "num_tokens": 6106420170.0, "step": 7073 }, { "epoch": 1.2609625668449198, "grad_norm": 0.1845703125, "learning_rate": 7.714579707847495e-06, "loss": 1.0285, "num_tokens": 6112704349.0, "step": 7074 }, { "epoch": 1.2611408199643495, "grad_norm": 0.1806640625, "learning_rate": 7.712161109149725e-06, "loss": 1.0183, "num_tokens": 6118926768.0, "step": 7075 }, { "epoch": 1.2613190730837789, "grad_norm": 0.1806640625, "learning_rate": 7.709742784428263e-06, "loss": 1.0137, "num_tokens": 6125208521.0, "step": 7076 }, { "epoch": 1.2614973262032085, "grad_norm": 0.17578125, "learning_rate": 7.70732473388463e-06, "loss": 0.9955, "num_tokens": 6131463381.0, "step": 7077 }, { "epoch": 1.2616755793226382, "grad_norm": 0.19140625, "learning_rate": 7.704906957720322e-06, "loss": 1.0138, "num_tokens": 6137731077.0, "step": 7078 }, { "epoch": 1.2618538324420676, "grad_norm": 0.1826171875, "learning_rate": 7.702489456136815e-06, "loss": 1.0248, "num_tokens": 6144016200.0, "step": 7079 }, { "epoch": 1.2620320855614973, "grad_norm": 0.1767578125, "learning_rate": 7.700072229335557e-06, "loss": 1.0451, "num_tokens": 6150294691.0, "step": 7080 }, { "epoch": 1.262210338680927, "grad_norm": 0.1875, "learning_rate": 7.697655277517981e-06, "loss": 1.0427, "num_tokens": 6156574807.0, "step": 7081 }, { "epoch": 1.2623885918003566, "grad_norm": 0.18359375, "learning_rate": 7.695238600885486e-06, "loss": 1.052, "num_tokens": 6162859408.0, "step": 7082 }, { "epoch": 1.2625668449197862, "grad_norm": 0.1865234375, "learning_rate": 7.692822199639464e-06, "loss": 1.016, "num_tokens": 6169144749.0, "step": 7083 }, { "epoch": 1.2627450980392156, "grad_norm": 0.1845703125, "learning_rate": 7.690406073981265e-06, "loss": 1.0177, "num_tokens": 6175415817.0, "step": 7084 }, { "epoch": 1.2629233511586453, "grad_norm": 0.1865234375, "learning_rate": 7.687990224112223e-06, "loss": 1.0183, "num_tokens": 6181697932.0, "step": 7085 }, { "epoch": 1.263101604278075, "grad_norm": 0.1826171875, "learning_rate": 7.68557465023366e-06, "loss": 1.0013, "num_tokens": 6187973201.0, "step": 7086 }, { "epoch": 1.2632798573975044, "grad_norm": 0.1845703125, "learning_rate": 7.68315935254686e-06, "loss": 1.033, "num_tokens": 6194255662.0, "step": 7087 }, { "epoch": 1.263458110516934, "grad_norm": 0.1865234375, "learning_rate": 7.680744331253094e-06, "loss": 0.9949, "num_tokens": 6200540684.0, "step": 7088 }, { "epoch": 1.2636363636363637, "grad_norm": 0.1796875, "learning_rate": 7.678329586553604e-06, "loss": 1.0242, "num_tokens": 6206816404.0, "step": 7089 }, { "epoch": 1.263814616755793, "grad_norm": 0.181640625, "learning_rate": 7.675915118649615e-06, "loss": 1.0217, "num_tokens": 6213056923.0, "step": 7090 }, { "epoch": 1.2639928698752227, "grad_norm": 0.185546875, "learning_rate": 7.673500927742321e-06, "loss": 1.0589, "num_tokens": 6219300231.0, "step": 7091 }, { "epoch": 1.2641711229946524, "grad_norm": 0.17578125, "learning_rate": 7.671087014032896e-06, "loss": 1.0407, "num_tokens": 6225584339.0, "step": 7092 }, { "epoch": 1.264349376114082, "grad_norm": 0.1767578125, "learning_rate": 7.668673377722497e-06, "loss": 1.0192, "num_tokens": 6231867231.0, "step": 7093 }, { "epoch": 1.2645276292335117, "grad_norm": 0.18359375, "learning_rate": 7.666260019012248e-06, "loss": 1.0188, "num_tokens": 6238152294.0, "step": 7094 }, { "epoch": 1.2647058823529411, "grad_norm": 0.189453125, "learning_rate": 7.663846938103257e-06, "loss": 0.9922, "num_tokens": 6244436077.0, "step": 7095 }, { "epoch": 1.2648841354723708, "grad_norm": 0.1806640625, "learning_rate": 7.661434135196606e-06, "loss": 1.0037, "num_tokens": 6250719062.0, "step": 7096 }, { "epoch": 1.2650623885918004, "grad_norm": 0.185546875, "learning_rate": 7.659021610493354e-06, "loss": 1.0012, "num_tokens": 6256996665.0, "step": 7097 }, { "epoch": 1.2652406417112299, "grad_norm": 0.18359375, "learning_rate": 7.65660936419454e-06, "loss": 1.0291, "num_tokens": 6263269047.0, "step": 7098 }, { "epoch": 1.2654188948306595, "grad_norm": 0.1796875, "learning_rate": 7.654197396501173e-06, "loss": 1.0058, "num_tokens": 6269530325.0, "step": 7099 }, { "epoch": 1.2655971479500892, "grad_norm": 0.1982421875, "learning_rate": 7.651785707614246e-06, "loss": 1.0692, "num_tokens": 6275813139.0, "step": 7100 }, { "epoch": 1.2657754010695186, "grad_norm": 0.189453125, "learning_rate": 7.649374297734723e-06, "loss": 1.0078, "num_tokens": 6282096602.0, "step": 7101 }, { "epoch": 1.2659536541889482, "grad_norm": 0.1875, "learning_rate": 7.646963167063546e-06, "loss": 0.9915, "num_tokens": 6288379655.0, "step": 7102 }, { "epoch": 1.2661319073083779, "grad_norm": 0.169921875, "learning_rate": 7.644552315801638e-06, "loss": 1.0052, "num_tokens": 6294662323.0, "step": 7103 }, { "epoch": 1.2663101604278075, "grad_norm": 0.177734375, "learning_rate": 7.642141744149898e-06, "loss": 1.0051, "num_tokens": 6300946635.0, "step": 7104 }, { "epoch": 1.2664884135472372, "grad_norm": 0.1767578125, "learning_rate": 7.639731452309194e-06, "loss": 1.0266, "num_tokens": 6307230558.0, "step": 7105 }, { "epoch": 1.2666666666666666, "grad_norm": 0.185546875, "learning_rate": 7.637321440480383e-06, "loss": 0.9956, "num_tokens": 6313514866.0, "step": 7106 }, { "epoch": 1.2668449197860963, "grad_norm": 0.18359375, "learning_rate": 7.634911708864286e-06, "loss": 1.0092, "num_tokens": 6319774686.0, "step": 7107 }, { "epoch": 1.267023172905526, "grad_norm": 0.1748046875, "learning_rate": 7.632502257661704e-06, "loss": 1.0277, "num_tokens": 6326044905.0, "step": 7108 }, { "epoch": 1.2672014260249553, "grad_norm": 0.1748046875, "learning_rate": 7.630093087073424e-06, "loss": 1.0031, "num_tokens": 6332312880.0, "step": 7109 }, { "epoch": 1.267379679144385, "grad_norm": 0.18359375, "learning_rate": 7.6276841973002e-06, "loss": 1.0123, "num_tokens": 6338596634.0, "step": 7110 }, { "epoch": 1.2675579322638146, "grad_norm": 0.1748046875, "learning_rate": 7.625275588542764e-06, "loss": 1.0204, "num_tokens": 6344878159.0, "step": 7111 }, { "epoch": 1.2677361853832443, "grad_norm": 0.18359375, "learning_rate": 7.622867261001827e-06, "loss": 1.0285, "num_tokens": 6351146817.0, "step": 7112 }, { "epoch": 1.267914438502674, "grad_norm": 0.1845703125, "learning_rate": 7.620459214878077e-06, "loss": 1.0008, "num_tokens": 6357428866.0, "step": 7113 }, { "epoch": 1.2680926916221034, "grad_norm": 0.173828125, "learning_rate": 7.618051450372175e-06, "loss": 0.997, "num_tokens": 6363681940.0, "step": 7114 }, { "epoch": 1.268270944741533, "grad_norm": 0.1845703125, "learning_rate": 7.615643967684758e-06, "loss": 1.0111, "num_tokens": 6369951679.0, "step": 7115 }, { "epoch": 1.2684491978609627, "grad_norm": 0.177734375, "learning_rate": 7.61323676701645e-06, "loss": 1.0294, "num_tokens": 6376230641.0, "step": 7116 }, { "epoch": 1.268627450980392, "grad_norm": 0.1884765625, "learning_rate": 7.610829848567833e-06, "loss": 1.0183, "num_tokens": 6382513589.0, "step": 7117 }, { "epoch": 1.2688057040998217, "grad_norm": 0.18359375, "learning_rate": 7.608423212539484e-06, "loss": 1.036, "num_tokens": 6388775318.0, "step": 7118 }, { "epoch": 1.2689839572192514, "grad_norm": 0.1796875, "learning_rate": 7.606016859131944e-06, "loss": 1.037, "num_tokens": 6395023615.0, "step": 7119 }, { "epoch": 1.2691622103386808, "grad_norm": 0.181640625, "learning_rate": 7.603610788545739e-06, "loss": 1.0159, "num_tokens": 6401270676.0, "step": 7120 }, { "epoch": 1.2693404634581105, "grad_norm": 0.171875, "learning_rate": 7.601205000981369e-06, "loss": 0.9628, "num_tokens": 6407526507.0, "step": 7121 }, { "epoch": 1.2695187165775401, "grad_norm": 0.1826171875, "learning_rate": 7.5987994966393e-06, "loss": 1.0243, "num_tokens": 6413808375.0, "step": 7122 }, { "epoch": 1.2696969696969698, "grad_norm": 0.17578125, "learning_rate": 7.596394275719989e-06, "loss": 1.0138, "num_tokens": 6420062599.0, "step": 7123 }, { "epoch": 1.2698752228163994, "grad_norm": 0.1806640625, "learning_rate": 7.593989338423862e-06, "loss": 1.0299, "num_tokens": 6426346219.0, "step": 7124 }, { "epoch": 1.2700534759358288, "grad_norm": 0.1728515625, "learning_rate": 7.591584684951325e-06, "loss": 1.0272, "num_tokens": 6432629225.0, "step": 7125 }, { "epoch": 1.2702317290552585, "grad_norm": 0.1796875, "learning_rate": 7.5891803155027555e-06, "loss": 1.0294, "num_tokens": 6438901153.0, "step": 7126 }, { "epoch": 1.2704099821746881, "grad_norm": 0.1796875, "learning_rate": 7.586776230278514e-06, "loss": 1.006, "num_tokens": 6445187061.0, "step": 7127 }, { "epoch": 1.2705882352941176, "grad_norm": 0.1806640625, "learning_rate": 7.584372429478928e-06, "loss": 1.0234, "num_tokens": 6451470862.0, "step": 7128 }, { "epoch": 1.2707664884135472, "grad_norm": 0.1826171875, "learning_rate": 7.581968913304316e-06, "loss": 1.0293, "num_tokens": 6457753932.0, "step": 7129 }, { "epoch": 1.2709447415329769, "grad_norm": 0.1826171875, "learning_rate": 7.579565681954952e-06, "loss": 1.0167, "num_tokens": 6464037148.0, "step": 7130 }, { "epoch": 1.2711229946524063, "grad_norm": 0.1904296875, "learning_rate": 7.577162735631103e-06, "loss": 1.0275, "num_tokens": 6470302581.0, "step": 7131 }, { "epoch": 1.271301247771836, "grad_norm": 0.1806640625, "learning_rate": 7.5747600745330096e-06, "loss": 1.0319, "num_tokens": 6476586942.0, "step": 7132 }, { "epoch": 1.2714795008912656, "grad_norm": 0.1796875, "learning_rate": 7.572357698860881e-06, "loss": 1.0117, "num_tokens": 6482872119.0, "step": 7133 }, { "epoch": 1.2716577540106953, "grad_norm": 0.1806640625, "learning_rate": 7.569955608814912e-06, "loss": 0.9724, "num_tokens": 6489122734.0, "step": 7134 }, { "epoch": 1.271836007130125, "grad_norm": 0.173828125, "learning_rate": 7.567553804595267e-06, "loss": 1.0235, "num_tokens": 6495364539.0, "step": 7135 }, { "epoch": 1.2720142602495543, "grad_norm": 0.189453125, "learning_rate": 7.565152286402089e-06, "loss": 1.0209, "num_tokens": 6501627870.0, "step": 7136 }, { "epoch": 1.272192513368984, "grad_norm": 0.177734375, "learning_rate": 7.5627510544355e-06, "loss": 1.0063, "num_tokens": 6507910864.0, "step": 7137 }, { "epoch": 1.2723707664884136, "grad_norm": 0.181640625, "learning_rate": 7.560350108895589e-06, "loss": 1.0299, "num_tokens": 6514190686.0, "step": 7138 }, { "epoch": 1.272549019607843, "grad_norm": 0.1806640625, "learning_rate": 7.557949449982434e-06, "loss": 1.0239, "num_tokens": 6520455563.0, "step": 7139 }, { "epoch": 1.2727272727272727, "grad_norm": 0.181640625, "learning_rate": 7.555549077896076e-06, "loss": 1.0211, "num_tokens": 6526739450.0, "step": 7140 }, { "epoch": 1.2729055258467024, "grad_norm": 0.181640625, "learning_rate": 7.553148992836543e-06, "loss": 1.0351, "num_tokens": 6533021830.0, "step": 7141 }, { "epoch": 1.2730837789661318, "grad_norm": 0.177734375, "learning_rate": 7.550749195003832e-06, "loss": 1.0067, "num_tokens": 6539288363.0, "step": 7142 }, { "epoch": 1.2732620320855614, "grad_norm": 0.185546875, "learning_rate": 7.548349684597924e-06, "loss": 1.0627, "num_tokens": 6545570840.0, "step": 7143 }, { "epoch": 1.273440285204991, "grad_norm": 0.1796875, "learning_rate": 7.5459504618187675e-06, "loss": 1.0174, "num_tokens": 6551852912.0, "step": 7144 }, { "epoch": 1.2736185383244207, "grad_norm": 0.173828125, "learning_rate": 7.543551526866284e-06, "loss": 1.0066, "num_tokens": 6558120459.0, "step": 7145 }, { "epoch": 1.2737967914438504, "grad_norm": 0.1845703125, "learning_rate": 7.541152879940386e-06, "loss": 1.0235, "num_tokens": 6564390736.0, "step": 7146 }, { "epoch": 1.2739750445632798, "grad_norm": 0.1845703125, "learning_rate": 7.53875452124095e-06, "loss": 0.9902, "num_tokens": 6570675684.0, "step": 7147 }, { "epoch": 1.2741532976827095, "grad_norm": 0.18359375, "learning_rate": 7.53635645096783e-06, "loss": 1.056, "num_tokens": 6576960069.0, "step": 7148 }, { "epoch": 1.2743315508021391, "grad_norm": 0.185546875, "learning_rate": 7.5339586693208596e-06, "loss": 1.0335, "num_tokens": 6583220613.0, "step": 7149 }, { "epoch": 1.2745098039215685, "grad_norm": 0.1767578125, "learning_rate": 7.531561176499848e-06, "loss": 0.9964, "num_tokens": 6589490800.0, "step": 7150 }, { "epoch": 1.2746880570409982, "grad_norm": 0.171875, "learning_rate": 7.529163972704574e-06, "loss": 1.0396, "num_tokens": 6595729667.0, "step": 7151 }, { "epoch": 1.2748663101604278, "grad_norm": 0.1767578125, "learning_rate": 7.526767058134807e-06, "loss": 1.0141, "num_tokens": 6601985934.0, "step": 7152 }, { "epoch": 1.2750445632798573, "grad_norm": 0.181640625, "learning_rate": 7.524370432990271e-06, "loss": 1.0166, "num_tokens": 6608269812.0, "step": 7153 }, { "epoch": 1.275222816399287, "grad_norm": 0.181640625, "learning_rate": 7.5219740974706815e-06, "loss": 1.0312, "num_tokens": 6614553085.0, "step": 7154 }, { "epoch": 1.2754010695187166, "grad_norm": 0.1826171875, "learning_rate": 7.519578051775726e-06, "loss": 1.0252, "num_tokens": 6620812967.0, "step": 7155 }, { "epoch": 1.2755793226381462, "grad_norm": 0.1875, "learning_rate": 7.517182296105066e-06, "loss": 1.0028, "num_tokens": 6627097971.0, "step": 7156 }, { "epoch": 1.2757575757575759, "grad_norm": 0.1796875, "learning_rate": 7.514786830658345e-06, "loss": 1.0264, "num_tokens": 6633383356.0, "step": 7157 }, { "epoch": 1.2759358288770053, "grad_norm": 0.185546875, "learning_rate": 7.5123916556351716e-06, "loss": 1.0091, "num_tokens": 6639667771.0, "step": 7158 }, { "epoch": 1.276114081996435, "grad_norm": 0.1728515625, "learning_rate": 7.509996771235142e-06, "loss": 1.0383, "num_tokens": 6645928651.0, "step": 7159 }, { "epoch": 1.2762923351158646, "grad_norm": 0.181640625, "learning_rate": 7.50760217765782e-06, "loss": 1.0163, "num_tokens": 6652212059.0, "step": 7160 }, { "epoch": 1.276470588235294, "grad_norm": 0.18359375, "learning_rate": 7.505207875102745e-06, "loss": 1.0561, "num_tokens": 6658440563.0, "step": 7161 }, { "epoch": 1.2766488413547237, "grad_norm": 0.173828125, "learning_rate": 7.502813863769436e-06, "loss": 1.0251, "num_tokens": 6664667520.0, "step": 7162 }, { "epoch": 1.2768270944741533, "grad_norm": 0.17578125, "learning_rate": 7.50042014385739e-06, "loss": 1.0133, "num_tokens": 6670916602.0, "step": 7163 }, { "epoch": 1.2770053475935828, "grad_norm": 0.181640625, "learning_rate": 7.498026715566071e-06, "loss": 1.0423, "num_tokens": 6677201273.0, "step": 7164 }, { "epoch": 1.2771836007130124, "grad_norm": 0.1796875, "learning_rate": 7.495633579094925e-06, "loss": 1.0065, "num_tokens": 6683486316.0, "step": 7165 }, { "epoch": 1.277361853832442, "grad_norm": 0.1884765625, "learning_rate": 7.4932407346433785e-06, "loss": 1.0502, "num_tokens": 6689771478.0, "step": 7166 }, { "epoch": 1.2775401069518717, "grad_norm": 0.177734375, "learning_rate": 7.490848182410824e-06, "loss": 1.0189, "num_tokens": 6696035046.0, "step": 7167 }, { "epoch": 1.2777183600713014, "grad_norm": 0.177734375, "learning_rate": 7.488455922596629e-06, "loss": 1.0436, "num_tokens": 6702319271.0, "step": 7168 }, { "epoch": 1.2778966131907308, "grad_norm": 0.171875, "learning_rate": 7.486063955400144e-06, "loss": 1.0297, "num_tokens": 6708571015.0, "step": 7169 }, { "epoch": 1.2780748663101604, "grad_norm": 0.1796875, "learning_rate": 7.483672281020695e-06, "loss": 1.0212, "num_tokens": 6714836866.0, "step": 7170 }, { "epoch": 1.27825311942959, "grad_norm": 0.1767578125, "learning_rate": 7.481280899657577e-06, "loss": 1.0251, "num_tokens": 6721110174.0, "step": 7171 }, { "epoch": 1.2784313725490195, "grad_norm": 0.1826171875, "learning_rate": 7.478889811510068e-06, "loss": 1.0184, "num_tokens": 6727394283.0, "step": 7172 }, { "epoch": 1.2786096256684492, "grad_norm": 0.177734375, "learning_rate": 7.476499016777412e-06, "loss": 1.016, "num_tokens": 6733676350.0, "step": 7173 }, { "epoch": 1.2787878787878788, "grad_norm": 0.1826171875, "learning_rate": 7.47410851565884e-06, "loss": 1.0178, "num_tokens": 6739893730.0, "step": 7174 }, { "epoch": 1.2789661319073085, "grad_norm": 0.1796875, "learning_rate": 7.471718308353555e-06, "loss": 1.0175, "num_tokens": 6746161522.0, "step": 7175 }, { "epoch": 1.279144385026738, "grad_norm": 0.173828125, "learning_rate": 7.469328395060722e-06, "loss": 1.0262, "num_tokens": 6752444962.0, "step": 7176 }, { "epoch": 1.2793226381461675, "grad_norm": 0.18359375, "learning_rate": 7.4669387759795065e-06, "loss": 0.9954, "num_tokens": 6758707461.0, "step": 7177 }, { "epoch": 1.2795008912655972, "grad_norm": 0.1708984375, "learning_rate": 7.464549451309027e-06, "loss": 1.0188, "num_tokens": 6764992712.0, "step": 7178 }, { "epoch": 1.2796791443850268, "grad_norm": 0.1787109375, "learning_rate": 7.46216042124839e-06, "loss": 1.0193, "num_tokens": 6771278043.0, "step": 7179 }, { "epoch": 1.2798573975044563, "grad_norm": 0.1806640625, "learning_rate": 7.459771685996673e-06, "loss": 1.0204, "num_tokens": 6777548377.0, "step": 7180 }, { "epoch": 1.280035650623886, "grad_norm": 0.1787109375, "learning_rate": 7.457383245752929e-06, "loss": 1.0233, "num_tokens": 6783832788.0, "step": 7181 }, { "epoch": 1.2802139037433156, "grad_norm": 0.1767578125, "learning_rate": 7.4549951007161894e-06, "loss": 1.0217, "num_tokens": 6790117450.0, "step": 7182 }, { "epoch": 1.280392156862745, "grad_norm": 0.1796875, "learning_rate": 7.452607251085462e-06, "loss": 1.0391, "num_tokens": 6796391061.0, "step": 7183 }, { "epoch": 1.2805704099821746, "grad_norm": 0.1748046875, "learning_rate": 7.450219697059719e-06, "loss": 1.0272, "num_tokens": 6802639732.0, "step": 7184 }, { "epoch": 1.2807486631016043, "grad_norm": 0.181640625, "learning_rate": 7.447832438837918e-06, "loss": 1.0008, "num_tokens": 6808920781.0, "step": 7185 }, { "epoch": 1.280926916221034, "grad_norm": 0.185546875, "learning_rate": 7.445445476618993e-06, "loss": 1.0049, "num_tokens": 6815191026.0, "step": 7186 }, { "epoch": 1.2811051693404636, "grad_norm": 0.17578125, "learning_rate": 7.443058810601847e-06, "loss": 1.0233, "num_tokens": 6821408371.0, "step": 7187 }, { "epoch": 1.281283422459893, "grad_norm": 0.177734375, "learning_rate": 7.440672440985362e-06, "loss": 1.0225, "num_tokens": 6827638065.0, "step": 7188 }, { "epoch": 1.2814616755793227, "grad_norm": 0.1787109375, "learning_rate": 7.438286367968395e-06, "loss": 1.0122, "num_tokens": 6833895479.0, "step": 7189 }, { "epoch": 1.2816399286987523, "grad_norm": 0.1826171875, "learning_rate": 7.435900591749783e-06, "loss": 0.9793, "num_tokens": 6840154550.0, "step": 7190 }, { "epoch": 1.2818181818181817, "grad_norm": 0.1826171875, "learning_rate": 7.4335151125283265e-06, "loss": 1.0327, "num_tokens": 6846409641.0, "step": 7191 }, { "epoch": 1.2819964349376114, "grad_norm": 0.181640625, "learning_rate": 7.431129930502805e-06, "loss": 1.038, "num_tokens": 6852682781.0, "step": 7192 }, { "epoch": 1.282174688057041, "grad_norm": 0.177734375, "learning_rate": 7.428745045871986e-06, "loss": 1.0378, "num_tokens": 6858908809.0, "step": 7193 }, { "epoch": 1.2823529411764705, "grad_norm": 0.1826171875, "learning_rate": 7.4263604588345935e-06, "loss": 1.0266, "num_tokens": 6865195131.0, "step": 7194 }, { "epoch": 1.2825311942959001, "grad_norm": 0.1826171875, "learning_rate": 7.423976169589344e-06, "loss": 1.0312, "num_tokens": 6871480145.0, "step": 7195 }, { "epoch": 1.2827094474153298, "grad_norm": 0.1796875, "learning_rate": 7.421592178334913e-06, "loss": 0.9856, "num_tokens": 6877748334.0, "step": 7196 }, { "epoch": 1.2828877005347594, "grad_norm": 0.177734375, "learning_rate": 7.4192084852699666e-06, "loss": 1.0142, "num_tokens": 6884032676.0, "step": 7197 }, { "epoch": 1.283065953654189, "grad_norm": 0.1875, "learning_rate": 7.416825090593136e-06, "loss": 1.0094, "num_tokens": 6890316463.0, "step": 7198 }, { "epoch": 1.2832442067736185, "grad_norm": 0.1845703125, "learning_rate": 7.414441994503021e-06, "loss": 1.0162, "num_tokens": 6896599782.0, "step": 7199 }, { "epoch": 1.2834224598930482, "grad_norm": 0.17578125, "learning_rate": 7.412059197198218e-06, "loss": 1.0316, "num_tokens": 6902882247.0, "step": 7200 }, { "epoch": 1.2836007130124778, "grad_norm": 0.1728515625, "learning_rate": 7.4096766988772775e-06, "loss": 1.0663, "num_tokens": 6909160893.0, "step": 7201 }, { "epoch": 1.2837789661319072, "grad_norm": 0.1767578125, "learning_rate": 7.40729449973874e-06, "loss": 1.0407, "num_tokens": 6915440610.0, "step": 7202 }, { "epoch": 1.2839572192513369, "grad_norm": 0.17578125, "learning_rate": 7.404912599981111e-06, "loss": 1.0702, "num_tokens": 6921722060.0, "step": 7203 }, { "epoch": 1.2841354723707665, "grad_norm": 0.177734375, "learning_rate": 7.402530999802875e-06, "loss": 1.0305, "num_tokens": 6927966739.0, "step": 7204 }, { "epoch": 1.284313725490196, "grad_norm": 0.1875, "learning_rate": 7.40014969940249e-06, "loss": 1.0176, "num_tokens": 6934238297.0, "step": 7205 }, { "epoch": 1.2844919786096256, "grad_norm": 0.1806640625, "learning_rate": 7.397768698978394e-06, "loss": 1.0008, "num_tokens": 6940522643.0, "step": 7206 }, { "epoch": 1.2846702317290553, "grad_norm": 0.173828125, "learning_rate": 7.395387998728996e-06, "loss": 1.0319, "num_tokens": 6946805889.0, "step": 7207 }, { "epoch": 1.284848484848485, "grad_norm": 0.18359375, "learning_rate": 7.3930075988526725e-06, "loss": 1.0458, "num_tokens": 6953079233.0, "step": 7208 }, { "epoch": 1.2850267379679146, "grad_norm": 0.18359375, "learning_rate": 7.390627499547793e-06, "loss": 0.9894, "num_tokens": 6959336994.0, "step": 7209 }, { "epoch": 1.285204991087344, "grad_norm": 0.1845703125, "learning_rate": 7.388247701012685e-06, "loss": 1.0384, "num_tokens": 6965620926.0, "step": 7210 }, { "epoch": 1.2853832442067736, "grad_norm": 0.201171875, "learning_rate": 7.385868203445663e-06, "loss": 1.0702, "num_tokens": 6971903591.0, "step": 7211 }, { "epoch": 1.2855614973262033, "grad_norm": 0.1748046875, "learning_rate": 7.383489007045006e-06, "loss": 0.9989, "num_tokens": 6978174456.0, "step": 7212 }, { "epoch": 1.2857397504456327, "grad_norm": 0.1826171875, "learning_rate": 7.381110112008976e-06, "loss": 1.0087, "num_tokens": 6984457534.0, "step": 7213 }, { "epoch": 1.2859180035650624, "grad_norm": 0.1982421875, "learning_rate": 7.378731518535805e-06, "loss": 1.0091, "num_tokens": 6990715842.0, "step": 7214 }, { "epoch": 1.286096256684492, "grad_norm": 0.1875, "learning_rate": 7.376353226823703e-06, "loss": 1.0285, "num_tokens": 6996998439.0, "step": 7215 }, { "epoch": 1.2862745098039214, "grad_norm": 0.1845703125, "learning_rate": 7.373975237070852e-06, "loss": 0.9862, "num_tokens": 7003254375.0, "step": 7216 }, { "epoch": 1.286452762923351, "grad_norm": 0.189453125, "learning_rate": 7.371597549475411e-06, "loss": 1.023, "num_tokens": 7009525051.0, "step": 7217 }, { "epoch": 1.2866310160427807, "grad_norm": 0.1875, "learning_rate": 7.369220164235513e-06, "loss": 0.9932, "num_tokens": 7015808881.0, "step": 7218 }, { "epoch": 1.2868092691622104, "grad_norm": 0.1767578125, "learning_rate": 7.366843081549266e-06, "loss": 1.0191, "num_tokens": 7022063106.0, "step": 7219 }, { "epoch": 1.28698752228164, "grad_norm": 0.1884765625, "learning_rate": 7.364466301614757e-06, "loss": 1.0105, "num_tokens": 7028337271.0, "step": 7220 }, { "epoch": 1.2871657754010695, "grad_norm": 0.1875, "learning_rate": 7.362089824630039e-06, "loss": 1.0196, "num_tokens": 7034596381.0, "step": 7221 }, { "epoch": 1.2873440285204991, "grad_norm": 0.1796875, "learning_rate": 7.359713650793143e-06, "loss": 0.9967, "num_tokens": 7040881110.0, "step": 7222 }, { "epoch": 1.2875222816399288, "grad_norm": 0.177734375, "learning_rate": 7.357337780302081e-06, "loss": 1.0246, "num_tokens": 7047163133.0, "step": 7223 }, { "epoch": 1.2877005347593582, "grad_norm": 0.18359375, "learning_rate": 7.354962213354828e-06, "loss": 1.0161, "num_tokens": 7053447633.0, "step": 7224 }, { "epoch": 1.2878787878787878, "grad_norm": 0.181640625, "learning_rate": 7.35258695014935e-06, "loss": 0.9804, "num_tokens": 7059679194.0, "step": 7225 }, { "epoch": 1.2880570409982175, "grad_norm": 0.1767578125, "learning_rate": 7.350211990883566e-06, "loss": 1.0093, "num_tokens": 7065963681.0, "step": 7226 }, { "epoch": 1.288235294117647, "grad_norm": 0.1796875, "learning_rate": 7.347837335755395e-06, "loss": 1.0231, "num_tokens": 7072248656.0, "step": 7227 }, { "epoch": 1.2884135472370766, "grad_norm": 0.1904296875, "learning_rate": 7.345462984962708e-06, "loss": 1.0189, "num_tokens": 7078507105.0, "step": 7228 }, { "epoch": 1.2885918003565062, "grad_norm": 0.1767578125, "learning_rate": 7.343088938703366e-06, "loss": 1.0184, "num_tokens": 7084792683.0, "step": 7229 }, { "epoch": 1.2887700534759359, "grad_norm": 0.1787109375, "learning_rate": 7.340715197175194e-06, "loss": 1.0224, "num_tokens": 7091058016.0, "step": 7230 }, { "epoch": 1.2889483065953655, "grad_norm": 0.181640625, "learning_rate": 7.338341760575998e-06, "loss": 1.0161, "num_tokens": 7097300550.0, "step": 7231 }, { "epoch": 1.289126559714795, "grad_norm": 0.1787109375, "learning_rate": 7.33596862910356e-06, "loss": 1.008, "num_tokens": 7103557146.0, "step": 7232 }, { "epoch": 1.2893048128342246, "grad_norm": 0.173828125, "learning_rate": 7.333595802955629e-06, "loss": 1.0016, "num_tokens": 7109837891.0, "step": 7233 }, { "epoch": 1.2894830659536543, "grad_norm": 0.177734375, "learning_rate": 7.331223282329936e-06, "loss": 1.0179, "num_tokens": 7116123273.0, "step": 7234 }, { "epoch": 1.2896613190730837, "grad_norm": 0.1787109375, "learning_rate": 7.3288510674241804e-06, "loss": 0.9952, "num_tokens": 7122386634.0, "step": 7235 }, { "epoch": 1.2898395721925133, "grad_norm": 0.177734375, "learning_rate": 7.326479158436046e-06, "loss": 1.0029, "num_tokens": 7128648384.0, "step": 7236 }, { "epoch": 1.290017825311943, "grad_norm": 0.181640625, "learning_rate": 7.324107555563178e-06, "loss": 1.0212, "num_tokens": 7134850703.0, "step": 7237 }, { "epoch": 1.2901960784313726, "grad_norm": 0.1796875, "learning_rate": 7.321736259003205e-06, "loss": 1.0105, "num_tokens": 7141105277.0, "step": 7238 }, { "epoch": 1.290374331550802, "grad_norm": 0.17578125, "learning_rate": 7.3193652689537265e-06, "loss": 0.996, "num_tokens": 7147391017.0, "step": 7239 }, { "epoch": 1.2905525846702317, "grad_norm": 0.181640625, "learning_rate": 7.316994585612318e-06, "loss": 1.0171, "num_tokens": 7153675888.0, "step": 7240 }, { "epoch": 1.2907308377896614, "grad_norm": 0.1728515625, "learning_rate": 7.31462420917653e-06, "loss": 1.0172, "num_tokens": 7159959183.0, "step": 7241 }, { "epoch": 1.290909090909091, "grad_norm": 0.1787109375, "learning_rate": 7.312254139843886e-06, "loss": 0.9929, "num_tokens": 7166221740.0, "step": 7242 }, { "epoch": 1.2910873440285204, "grad_norm": 0.1748046875, "learning_rate": 7.309884377811887e-06, "loss": 1.0398, "num_tokens": 7172486893.0, "step": 7243 }, { "epoch": 1.29126559714795, "grad_norm": 0.1826171875, "learning_rate": 7.307514923278003e-06, "loss": 0.9865, "num_tokens": 7178738917.0, "step": 7244 }, { "epoch": 1.2914438502673797, "grad_norm": 0.1845703125, "learning_rate": 7.305145776439679e-06, "loss": 1.0329, "num_tokens": 7184997585.0, "step": 7245 }, { "epoch": 1.2916221033868092, "grad_norm": 0.1806640625, "learning_rate": 7.3027769374943406e-06, "loss": 1.023, "num_tokens": 7191261463.0, "step": 7246 }, { "epoch": 1.2918003565062388, "grad_norm": 0.1845703125, "learning_rate": 7.300408406639378e-06, "loss": 1.0083, "num_tokens": 7197545972.0, "step": 7247 }, { "epoch": 1.2919786096256685, "grad_norm": 0.1787109375, "learning_rate": 7.29804018407217e-06, "loss": 1.0247, "num_tokens": 7203810163.0, "step": 7248 }, { "epoch": 1.2921568627450981, "grad_norm": 0.1806640625, "learning_rate": 7.295672269990053e-06, "loss": 1.0469, "num_tokens": 7210092972.0, "step": 7249 }, { "epoch": 1.2923351158645278, "grad_norm": 0.1767578125, "learning_rate": 7.293304664590352e-06, "loss": 0.9962, "num_tokens": 7216357180.0, "step": 7250 }, { "epoch": 1.2925133689839572, "grad_norm": 0.1806640625, "learning_rate": 7.2909373680703545e-06, "loss": 1.0221, "num_tokens": 7222635201.0, "step": 7251 }, { "epoch": 1.2926916221033868, "grad_norm": 0.1787109375, "learning_rate": 7.2885703806273335e-06, "loss": 1.0249, "num_tokens": 7228918389.0, "step": 7252 }, { "epoch": 1.2928698752228165, "grad_norm": 0.1884765625, "learning_rate": 7.286203702458528e-06, "loss": 1.0331, "num_tokens": 7235192562.0, "step": 7253 }, { "epoch": 1.293048128342246, "grad_norm": 0.18359375, "learning_rate": 7.28383733376115e-06, "loss": 1.0086, "num_tokens": 7241475333.0, "step": 7254 }, { "epoch": 1.2932263814616756, "grad_norm": 0.177734375, "learning_rate": 7.281471274732395e-06, "loss": 1.0303, "num_tokens": 7247733783.0, "step": 7255 }, { "epoch": 1.2934046345811052, "grad_norm": 0.17578125, "learning_rate": 7.279105525569423e-06, "loss": 1.0076, "num_tokens": 7254016088.0, "step": 7256 }, { "epoch": 1.2935828877005346, "grad_norm": 0.17578125, "learning_rate": 7.276740086469376e-06, "loss": 1.0386, "num_tokens": 7260265688.0, "step": 7257 }, { "epoch": 1.2937611408199643, "grad_norm": 0.1826171875, "learning_rate": 7.274374957629362e-06, "loss": 1.0266, "num_tokens": 7266549573.0, "step": 7258 }, { "epoch": 1.293939393939394, "grad_norm": 0.1826171875, "learning_rate": 7.272010139246475e-06, "loss": 1.0106, "num_tokens": 7272768619.0, "step": 7259 }, { "epoch": 1.2941176470588236, "grad_norm": 0.1796875, "learning_rate": 7.2696456315177725e-06, "loss": 1.0096, "num_tokens": 7279012175.0, "step": 7260 }, { "epoch": 1.2942959001782532, "grad_norm": 0.181640625, "learning_rate": 7.267281434640285e-06, "loss": 1.0388, "num_tokens": 7285270215.0, "step": 7261 }, { "epoch": 1.2944741532976827, "grad_norm": 0.17578125, "learning_rate": 7.264917548811026e-06, "loss": 1.0364, "num_tokens": 7291554266.0, "step": 7262 }, { "epoch": 1.2946524064171123, "grad_norm": 0.1767578125, "learning_rate": 7.2625539742269776e-06, "loss": 1.0151, "num_tokens": 7297839677.0, "step": 7263 }, { "epoch": 1.294830659536542, "grad_norm": 0.181640625, "learning_rate": 7.260190711085097e-06, "loss": 1.0343, "num_tokens": 7304123056.0, "step": 7264 }, { "epoch": 1.2950089126559714, "grad_norm": 0.1826171875, "learning_rate": 7.257827759582315e-06, "loss": 1.0012, "num_tokens": 7310407975.0, "step": 7265 }, { "epoch": 1.295187165775401, "grad_norm": 0.1826171875, "learning_rate": 7.255465119915537e-06, "loss": 1.051, "num_tokens": 7316691793.0, "step": 7266 }, { "epoch": 1.2953654188948307, "grad_norm": 0.181640625, "learning_rate": 7.253102792281645e-06, "loss": 0.9956, "num_tokens": 7322975595.0, "step": 7267 }, { "epoch": 1.2955436720142601, "grad_norm": 0.17578125, "learning_rate": 7.250740776877489e-06, "loss": 1.0419, "num_tokens": 7329252267.0, "step": 7268 }, { "epoch": 1.2957219251336898, "grad_norm": 0.1904296875, "learning_rate": 7.2483790738998934e-06, "loss": 1.0099, "num_tokens": 7335486490.0, "step": 7269 }, { "epoch": 1.2959001782531194, "grad_norm": 0.1845703125, "learning_rate": 7.246017683545665e-06, "loss": 0.9974, "num_tokens": 7341772547.0, "step": 7270 }, { "epoch": 1.296078431372549, "grad_norm": 0.1845703125, "learning_rate": 7.243656606011577e-06, "loss": 1.0472, "num_tokens": 7348053816.0, "step": 7271 }, { "epoch": 1.2962566844919787, "grad_norm": 0.177734375, "learning_rate": 7.24129584149438e-06, "loss": 1.0137, "num_tokens": 7354337414.0, "step": 7272 }, { "epoch": 1.2964349376114082, "grad_norm": 0.1748046875, "learning_rate": 7.238935390190791e-06, "loss": 1.0193, "num_tokens": 7360621521.0, "step": 7273 }, { "epoch": 1.2966131907308378, "grad_norm": 0.1787109375, "learning_rate": 7.236575252297515e-06, "loss": 0.9733, "num_tokens": 7366889001.0, "step": 7274 }, { "epoch": 1.2967914438502675, "grad_norm": 0.1796875, "learning_rate": 7.234215428011222e-06, "loss": 1.0072, "num_tokens": 7373142243.0, "step": 7275 }, { "epoch": 1.2969696969696969, "grad_norm": 0.1845703125, "learning_rate": 7.231855917528547e-06, "loss": 1.0043, "num_tokens": 7379374866.0, "step": 7276 }, { "epoch": 1.2971479500891265, "grad_norm": 0.1826171875, "learning_rate": 7.229496721046119e-06, "loss": 1.0138, "num_tokens": 7385629197.0, "step": 7277 }, { "epoch": 1.2973262032085562, "grad_norm": 0.181640625, "learning_rate": 7.227137838760527e-06, "loss": 1.019, "num_tokens": 7391911176.0, "step": 7278 }, { "epoch": 1.2975044563279856, "grad_norm": 0.18359375, "learning_rate": 7.224779270868336e-06, "loss": 1.0197, "num_tokens": 7398179257.0, "step": 7279 }, { "epoch": 1.2976827094474153, "grad_norm": 0.1728515625, "learning_rate": 7.222421017566086e-06, "loss": 1.038, "num_tokens": 7404447512.0, "step": 7280 }, { "epoch": 1.297860962566845, "grad_norm": 0.1728515625, "learning_rate": 7.220063079050293e-06, "loss": 1.0271, "num_tokens": 7410731666.0, "step": 7281 }, { "epoch": 1.2980392156862746, "grad_norm": 0.173828125, "learning_rate": 7.217705455517441e-06, "loss": 1.0002, "num_tokens": 7416985849.0, "step": 7282 }, { "epoch": 1.2982174688057042, "grad_norm": 0.1796875, "learning_rate": 7.215348147163996e-06, "loss": 1.022, "num_tokens": 7423270342.0, "step": 7283 }, { "epoch": 1.2983957219251336, "grad_norm": 0.1787109375, "learning_rate": 7.212991154186388e-06, "loss": 1.0317, "num_tokens": 7429521579.0, "step": 7284 }, { "epoch": 1.2985739750445633, "grad_norm": 0.173828125, "learning_rate": 7.210634476781031e-06, "loss": 1.0184, "num_tokens": 7435778657.0, "step": 7285 }, { "epoch": 1.298752228163993, "grad_norm": 0.18359375, "learning_rate": 7.208278115144303e-06, "loss": 1.0211, "num_tokens": 7442061839.0, "step": 7286 }, { "epoch": 1.2989304812834224, "grad_norm": 0.1826171875, "learning_rate": 7.205922069472557e-06, "loss": 1.0637, "num_tokens": 7448327025.0, "step": 7287 }, { "epoch": 1.299108734402852, "grad_norm": 0.171875, "learning_rate": 7.203566339962131e-06, "loss": 1.0297, "num_tokens": 7454605955.0, "step": 7288 }, { "epoch": 1.2992869875222817, "grad_norm": 0.1748046875, "learning_rate": 7.201210926809325e-06, "loss": 1.0218, "num_tokens": 7460889693.0, "step": 7289 }, { "epoch": 1.299465240641711, "grad_norm": 0.17578125, "learning_rate": 7.198855830210418e-06, "loss": 1.0197, "num_tokens": 7467167006.0, "step": 7290 }, { "epoch": 1.2996434937611407, "grad_norm": 0.185546875, "learning_rate": 7.1965010503616575e-06, "loss": 1.0357, "num_tokens": 7473450803.0, "step": 7291 }, { "epoch": 1.2998217468805704, "grad_norm": 0.1787109375, "learning_rate": 7.194146587459267e-06, "loss": 1.0491, "num_tokens": 7479732931.0, "step": 7292 }, { "epoch": 1.3, "grad_norm": 0.1826171875, "learning_rate": 7.19179244169945e-06, "loss": 1.048, "num_tokens": 7486000447.0, "step": 7293 }, { "epoch": 1.3001782531194297, "grad_norm": 0.177734375, "learning_rate": 7.1894386132783676e-06, "loss": 1.0213, "num_tokens": 7492268018.0, "step": 7294 }, { "epoch": 1.3003565062388591, "grad_norm": 0.17578125, "learning_rate": 7.187085102392175e-06, "loss": 1.0402, "num_tokens": 7498551634.0, "step": 7295 }, { "epoch": 1.3005347593582888, "grad_norm": 0.177734375, "learning_rate": 7.184731909236988e-06, "loss": 1.032, "num_tokens": 7504796522.0, "step": 7296 }, { "epoch": 1.3007130124777184, "grad_norm": 0.169921875, "learning_rate": 7.182379034008897e-06, "loss": 1.0373, "num_tokens": 7511078779.0, "step": 7297 }, { "epoch": 1.3008912655971479, "grad_norm": 0.1748046875, "learning_rate": 7.1800264769039694e-06, "loss": 1.0194, "num_tokens": 7517362048.0, "step": 7298 }, { "epoch": 1.3010695187165775, "grad_norm": 0.173828125, "learning_rate": 7.177674238118244e-06, "loss": 1.0403, "num_tokens": 7523628730.0, "step": 7299 }, { "epoch": 1.3012477718360071, "grad_norm": 0.1767578125, "learning_rate": 7.17532231784773e-06, "loss": 1.0471, "num_tokens": 7529912831.0, "step": 7300 }, { "epoch": 1.3014260249554368, "grad_norm": 0.1748046875, "learning_rate": 7.172970716288416e-06, "loss": 0.9978, "num_tokens": 7536194613.0, "step": 7301 }, { "epoch": 1.3016042780748662, "grad_norm": 0.181640625, "learning_rate": 7.170619433636263e-06, "loss": 1.0131, "num_tokens": 7542476827.0, "step": 7302 }, { "epoch": 1.3017825311942959, "grad_norm": 0.1767578125, "learning_rate": 7.168268470087203e-06, "loss": 1.0022, "num_tokens": 7548753398.0, "step": 7303 }, { "epoch": 1.3019607843137255, "grad_norm": 0.1796875, "learning_rate": 7.1659178258371405e-06, "loss": 1.0293, "num_tokens": 7554980528.0, "step": 7304 }, { "epoch": 1.3021390374331552, "grad_norm": 0.169921875, "learning_rate": 7.1635675010819564e-06, "loss": 1.0391, "num_tokens": 7561264272.0, "step": 7305 }, { "epoch": 1.3023172905525846, "grad_norm": 0.177734375, "learning_rate": 7.161217496017507e-06, "loss": 1.0083, "num_tokens": 7567541927.0, "step": 7306 }, { "epoch": 1.3024955436720143, "grad_norm": 0.171875, "learning_rate": 7.158867810839614e-06, "loss": 1.0262, "num_tokens": 7573822021.0, "step": 7307 }, { "epoch": 1.302673796791444, "grad_norm": 0.173828125, "learning_rate": 7.156518445744076e-06, "loss": 1.0022, "num_tokens": 7580094486.0, "step": 7308 }, { "epoch": 1.3028520499108733, "grad_norm": 0.177734375, "learning_rate": 7.154169400926669e-06, "loss": 1.0149, "num_tokens": 7586362002.0, "step": 7309 }, { "epoch": 1.303030303030303, "grad_norm": 0.17578125, "learning_rate": 7.1518206765831414e-06, "loss": 1.0308, "num_tokens": 7592646534.0, "step": 7310 }, { "epoch": 1.3032085561497326, "grad_norm": 0.173828125, "learning_rate": 7.14947227290921e-06, "loss": 1.0173, "num_tokens": 7598880056.0, "step": 7311 }, { "epoch": 1.3033868092691623, "grad_norm": 0.1796875, "learning_rate": 7.147124190100567e-06, "loss": 0.9886, "num_tokens": 7605150526.0, "step": 7312 }, { "epoch": 1.303565062388592, "grad_norm": 0.1806640625, "learning_rate": 7.144776428352884e-06, "loss": 1.0175, "num_tokens": 7611432374.0, "step": 7313 }, { "epoch": 1.3037433155080214, "grad_norm": 0.17578125, "learning_rate": 7.142428987861794e-06, "loss": 1.0164, "num_tokens": 7617717284.0, "step": 7314 }, { "epoch": 1.303921568627451, "grad_norm": 0.17578125, "learning_rate": 7.14008186882291e-06, "loss": 1.0286, "num_tokens": 7623988939.0, "step": 7315 }, { "epoch": 1.3040998217468807, "grad_norm": 0.173828125, "learning_rate": 7.137735071431822e-06, "loss": 1.0248, "num_tokens": 7630245808.0, "step": 7316 }, { "epoch": 1.30427807486631, "grad_norm": 0.173828125, "learning_rate": 7.1353885958840895e-06, "loss": 1.0148, "num_tokens": 7636530815.0, "step": 7317 }, { "epoch": 1.3044563279857397, "grad_norm": 0.1787109375, "learning_rate": 7.1330424423752396e-06, "loss": 0.9969, "num_tokens": 7642814337.0, "step": 7318 }, { "epoch": 1.3046345811051694, "grad_norm": 0.1796875, "learning_rate": 7.130696611100778e-06, "loss": 0.9793, "num_tokens": 7649066781.0, "step": 7319 }, { "epoch": 1.3048128342245988, "grad_norm": 0.1953125, "learning_rate": 7.128351102256193e-06, "loss": 1.029, "num_tokens": 7655348626.0, "step": 7320 }, { "epoch": 1.3049910873440285, "grad_norm": 0.1806640625, "learning_rate": 7.126005916036929e-06, "loss": 1.0381, "num_tokens": 7661632029.0, "step": 7321 }, { "epoch": 1.3051693404634581, "grad_norm": 0.1806640625, "learning_rate": 7.123661052638411e-06, "loss": 1.0245, "num_tokens": 7667887787.0, "step": 7322 }, { "epoch": 1.3053475935828878, "grad_norm": 0.17578125, "learning_rate": 7.1213165122560315e-06, "loss": 1.0187, "num_tokens": 7674148390.0, "step": 7323 }, { "epoch": 1.3055258467023174, "grad_norm": 0.1796875, "learning_rate": 7.118972295085172e-06, "loss": 1.0021, "num_tokens": 7680432447.0, "step": 7324 }, { "epoch": 1.3057040998217468, "grad_norm": 0.1728515625, "learning_rate": 7.116628401321171e-06, "loss": 1.026, "num_tokens": 7686716238.0, "step": 7325 }, { "epoch": 1.3058823529411765, "grad_norm": 0.1767578125, "learning_rate": 7.114284831159342e-06, "loss": 1.0216, "num_tokens": 7692975617.0, "step": 7326 }, { "epoch": 1.3060606060606061, "grad_norm": 0.1904296875, "learning_rate": 7.111941584794983e-06, "loss": 1.0136, "num_tokens": 7699211605.0, "step": 7327 }, { "epoch": 1.3062388591800356, "grad_norm": 0.177734375, "learning_rate": 7.109598662423358e-06, "loss": 1.0139, "num_tokens": 7705480626.0, "step": 7328 }, { "epoch": 1.3064171122994652, "grad_norm": 0.19140625, "learning_rate": 7.1072560642397006e-06, "loss": 1.0244, "num_tokens": 7711718588.0, "step": 7329 }, { "epoch": 1.3065953654188949, "grad_norm": 0.1767578125, "learning_rate": 7.104913790439209e-06, "loss": 1.0083, "num_tokens": 7717992464.0, "step": 7330 }, { "epoch": 1.3067736185383243, "grad_norm": 0.177734375, "learning_rate": 7.102571841217083e-06, "loss": 1.0356, "num_tokens": 7724266107.0, "step": 7331 }, { "epoch": 1.306951871657754, "grad_norm": 0.1806640625, "learning_rate": 7.100230216768467e-06, "loss": 1.0142, "num_tokens": 7730482154.0, "step": 7332 }, { "epoch": 1.3071301247771836, "grad_norm": 0.1748046875, "learning_rate": 7.097888917288493e-06, "loss": 1.0335, "num_tokens": 7736766662.0, "step": 7333 }, { "epoch": 1.3073083778966132, "grad_norm": 0.1787109375, "learning_rate": 7.095547942972256e-06, "loss": 0.993, "num_tokens": 7743025691.0, "step": 7334 }, { "epoch": 1.307486631016043, "grad_norm": 0.1796875, "learning_rate": 7.093207294014839e-06, "loss": 1.0148, "num_tokens": 7749307860.0, "step": 7335 }, { "epoch": 1.3076648841354723, "grad_norm": 0.1787109375, "learning_rate": 7.090866970611288e-06, "loss": 1.013, "num_tokens": 7755590988.0, "step": 7336 }, { "epoch": 1.307843137254902, "grad_norm": 0.1923828125, "learning_rate": 7.088526972956611e-06, "loss": 1.02, "num_tokens": 7761850096.0, "step": 7337 }, { "epoch": 1.3080213903743316, "grad_norm": 0.1767578125, "learning_rate": 7.086187301245813e-06, "loss": 1.0153, "num_tokens": 7768104286.0, "step": 7338 }, { "epoch": 1.308199643493761, "grad_norm": 0.181640625, "learning_rate": 7.083847955673855e-06, "loss": 1.0206, "num_tokens": 7774387108.0, "step": 7339 }, { "epoch": 1.3083778966131907, "grad_norm": 0.1767578125, "learning_rate": 7.081508936435675e-06, "loss": 1.0439, "num_tokens": 7780652368.0, "step": 7340 }, { "epoch": 1.3085561497326204, "grad_norm": 0.177734375, "learning_rate": 7.079170243726179e-06, "loss": 1.0269, "num_tokens": 7786895285.0, "step": 7341 }, { "epoch": 1.3087344028520498, "grad_norm": 0.177734375, "learning_rate": 7.076831877740262e-06, "loss": 1.0261, "num_tokens": 7793152469.0, "step": 7342 }, { "epoch": 1.3089126559714794, "grad_norm": 0.1845703125, "learning_rate": 7.074493838672772e-06, "loss": 1.0258, "num_tokens": 7799436686.0, "step": 7343 }, { "epoch": 1.309090909090909, "grad_norm": 0.1806640625, "learning_rate": 7.0721561267185416e-06, "loss": 1.0082, "num_tokens": 7805701952.0, "step": 7344 }, { "epoch": 1.3092691622103387, "grad_norm": 0.181640625, "learning_rate": 7.069818742072372e-06, "loss": 1.0256, "num_tokens": 7811975800.0, "step": 7345 }, { "epoch": 1.3094474153297684, "grad_norm": 0.1904296875, "learning_rate": 7.067481684929038e-06, "loss": 0.9954, "num_tokens": 7818259344.0, "step": 7346 }, { "epoch": 1.3096256684491978, "grad_norm": 0.1865234375, "learning_rate": 7.065144955483285e-06, "loss": 1.0128, "num_tokens": 7824529302.0, "step": 7347 }, { "epoch": 1.3098039215686275, "grad_norm": 0.189453125, "learning_rate": 7.062808553929834e-06, "loss": 1.05, "num_tokens": 7830792563.0, "step": 7348 }, { "epoch": 1.309982174688057, "grad_norm": 0.1875, "learning_rate": 7.060472480463381e-06, "loss": 1.0031, "num_tokens": 7837075786.0, "step": 7349 }, { "epoch": 1.3101604278074865, "grad_norm": 0.171875, "learning_rate": 7.058136735278589e-06, "loss": 1.0226, "num_tokens": 7843357700.0, "step": 7350 }, { "epoch": 1.3103386809269162, "grad_norm": 0.1767578125, "learning_rate": 7.055801318570099e-06, "loss": 1.0087, "num_tokens": 7849641668.0, "step": 7351 }, { "epoch": 1.3105169340463458, "grad_norm": 0.1748046875, "learning_rate": 7.053466230532518e-06, "loss": 1.0138, "num_tokens": 7855903690.0, "step": 7352 }, { "epoch": 1.3106951871657753, "grad_norm": 0.1796875, "learning_rate": 7.051131471360428e-06, "loss": 1.0117, "num_tokens": 7862157166.0, "step": 7353 }, { "epoch": 1.310873440285205, "grad_norm": 0.181640625, "learning_rate": 7.04879704124839e-06, "loss": 1.0162, "num_tokens": 7868423669.0, "step": 7354 }, { "epoch": 1.3110516934046346, "grad_norm": 0.171875, "learning_rate": 7.046462940390926e-06, "loss": 1.022, "num_tokens": 7874704179.0, "step": 7355 }, { "epoch": 1.3112299465240642, "grad_norm": 0.17578125, "learning_rate": 7.0441291689825455e-06, "loss": 1.0302, "num_tokens": 7880943496.0, "step": 7356 }, { "epoch": 1.3114081996434939, "grad_norm": 0.1708984375, "learning_rate": 7.0417957272177175e-06, "loss": 1.0119, "num_tokens": 7887207039.0, "step": 7357 }, { "epoch": 1.3115864527629233, "grad_norm": 0.1884765625, "learning_rate": 7.039462615290889e-06, "loss": 1.0267, "num_tokens": 7893464116.0, "step": 7358 }, { "epoch": 1.311764705882353, "grad_norm": 0.197265625, "learning_rate": 7.037129833396477e-06, "loss": 1.0189, "num_tokens": 7899707932.0, "step": 7359 }, { "epoch": 1.3119429590017826, "grad_norm": 0.177734375, "learning_rate": 7.034797381728875e-06, "loss": 1.0097, "num_tokens": 7905990412.0, "step": 7360 }, { "epoch": 1.312121212121212, "grad_norm": 0.1767578125, "learning_rate": 7.0324652604824486e-06, "loss": 1.0196, "num_tokens": 7912241226.0, "step": 7361 }, { "epoch": 1.3122994652406417, "grad_norm": 0.1845703125, "learning_rate": 7.030133469851523e-06, "loss": 1.0147, "num_tokens": 7918527181.0, "step": 7362 }, { "epoch": 1.3124777183600713, "grad_norm": 0.17578125, "learning_rate": 7.02780201003042e-06, "loss": 1.0227, "num_tokens": 7924761989.0, "step": 7363 }, { "epoch": 1.312655971479501, "grad_norm": 0.1826171875, "learning_rate": 7.025470881213416e-06, "loss": 1.0352, "num_tokens": 7931044559.0, "step": 7364 }, { "epoch": 1.3128342245989304, "grad_norm": 0.177734375, "learning_rate": 7.023140083594764e-06, "loss": 1.0197, "num_tokens": 7937296511.0, "step": 7365 }, { "epoch": 1.31301247771836, "grad_norm": 0.171875, "learning_rate": 7.0208096173686866e-06, "loss": 1.0504, "num_tokens": 7943561848.0, "step": 7366 }, { "epoch": 1.3131907308377897, "grad_norm": 0.177734375, "learning_rate": 7.0184794827293925e-06, "loss": 0.9824, "num_tokens": 7949846506.0, "step": 7367 }, { "epoch": 1.3133689839572193, "grad_norm": 0.1796875, "learning_rate": 7.01614967987104e-06, "loss": 1.0035, "num_tokens": 7956128725.0, "step": 7368 }, { "epoch": 1.3135472370766488, "grad_norm": 0.173828125, "learning_rate": 7.013820208987776e-06, "loss": 1.0294, "num_tokens": 7962397958.0, "step": 7369 }, { "epoch": 1.3137254901960784, "grad_norm": 0.17578125, "learning_rate": 7.011491070273719e-06, "loss": 1.0217, "num_tokens": 7968646517.0, "step": 7370 }, { "epoch": 1.313903743315508, "grad_norm": 0.1728515625, "learning_rate": 7.009162263922955e-06, "loss": 1.0311, "num_tokens": 7974930310.0, "step": 7371 }, { "epoch": 1.3140819964349375, "grad_norm": 0.181640625, "learning_rate": 7.006833790129544e-06, "loss": 1.0232, "num_tokens": 7981199908.0, "step": 7372 }, { "epoch": 1.3142602495543672, "grad_norm": 0.18359375, "learning_rate": 7.004505649087514e-06, "loss": 1.0066, "num_tokens": 7987453764.0, "step": 7373 }, { "epoch": 1.3144385026737968, "grad_norm": 0.173828125, "learning_rate": 7.002177840990876e-06, "loss": 1.0396, "num_tokens": 7993731245.0, "step": 7374 }, { "epoch": 1.3146167557932265, "grad_norm": 0.173828125, "learning_rate": 6.999850366033607e-06, "loss": 0.9964, "num_tokens": 8000009630.0, "step": 7375 }, { "epoch": 1.314795008912656, "grad_norm": 0.1826171875, "learning_rate": 6.997523224409648e-06, "loss": 1.0617, "num_tokens": 8006246986.0, "step": 7376 }, { "epoch": 1.3149732620320855, "grad_norm": 0.1787109375, "learning_rate": 6.995196416312928e-06, "loss": 0.9995, "num_tokens": 8012502540.0, "step": 7377 }, { "epoch": 1.3151515151515152, "grad_norm": 0.1845703125, "learning_rate": 6.9928699419373365e-06, "loss": 0.9985, "num_tokens": 8018785350.0, "step": 7378 }, { "epoch": 1.3153297682709448, "grad_norm": 0.185546875, "learning_rate": 6.990543801476742e-06, "loss": 0.986, "num_tokens": 8025010443.0, "step": 7379 }, { "epoch": 1.3155080213903743, "grad_norm": 0.181640625, "learning_rate": 6.988217995124975e-06, "loss": 0.9951, "num_tokens": 8031294599.0, "step": 7380 }, { "epoch": 1.315686274509804, "grad_norm": 0.1767578125, "learning_rate": 6.985892523075855e-06, "loss": 1.0401, "num_tokens": 8037522544.0, "step": 7381 }, { "epoch": 1.3158645276292336, "grad_norm": 0.185546875, "learning_rate": 6.983567385523164e-06, "loss": 1.0178, "num_tokens": 8043804605.0, "step": 7382 }, { "epoch": 1.316042780748663, "grad_norm": 0.1845703125, "learning_rate": 6.981242582660646e-06, "loss": 1.0186, "num_tokens": 8050089823.0, "step": 7383 }, { "epoch": 1.3162210338680926, "grad_norm": 0.173828125, "learning_rate": 6.978918114682035e-06, "loss": 1.0059, "num_tokens": 8056372432.0, "step": 7384 }, { "epoch": 1.3163992869875223, "grad_norm": 0.1787109375, "learning_rate": 6.976593981781028e-06, "loss": 0.9974, "num_tokens": 8062631940.0, "step": 7385 }, { "epoch": 1.316577540106952, "grad_norm": 0.17578125, "learning_rate": 6.974270184151298e-06, "loss": 1.066, "num_tokens": 8068872582.0, "step": 7386 }, { "epoch": 1.3167557932263816, "grad_norm": 0.181640625, "learning_rate": 6.971946721986478e-06, "loss": 1.0206, "num_tokens": 8075125266.0, "step": 7387 }, { "epoch": 1.316934046345811, "grad_norm": 0.173828125, "learning_rate": 6.969623595480195e-06, "loss": 1.0182, "num_tokens": 8081408760.0, "step": 7388 }, { "epoch": 1.3171122994652407, "grad_norm": 0.193359375, "learning_rate": 6.96730080482603e-06, "loss": 1.0178, "num_tokens": 8087682531.0, "step": 7389 }, { "epoch": 1.3172905525846703, "grad_norm": 0.1826171875, "learning_rate": 6.9649783502175425e-06, "loss": 1.0562, "num_tokens": 8093964322.0, "step": 7390 }, { "epoch": 1.3174688057040997, "grad_norm": 0.1748046875, "learning_rate": 6.9626562318482616e-06, "loss": 0.9862, "num_tokens": 8100248511.0, "step": 7391 }, { "epoch": 1.3176470588235294, "grad_norm": 0.1875, "learning_rate": 6.9603344499116895e-06, "loss": 1.0463, "num_tokens": 8106509156.0, "step": 7392 }, { "epoch": 1.317825311942959, "grad_norm": 0.1787109375, "learning_rate": 6.958013004601303e-06, "loss": 1.0029, "num_tokens": 8112780749.0, "step": 7393 }, { "epoch": 1.3180035650623885, "grad_norm": 0.1767578125, "learning_rate": 6.955691896110546e-06, "loss": 1.0422, "num_tokens": 8119064328.0, "step": 7394 }, { "epoch": 1.3181818181818181, "grad_norm": 0.1748046875, "learning_rate": 6.9533711246328396e-06, "loss": 0.9806, "num_tokens": 8125327160.0, "step": 7395 }, { "epoch": 1.3183600713012478, "grad_norm": 0.177734375, "learning_rate": 6.951050690361574e-06, "loss": 0.9986, "num_tokens": 8131581206.0, "step": 7396 }, { "epoch": 1.3185383244206774, "grad_norm": 0.1796875, "learning_rate": 6.9487305934901114e-06, "loss": 1.0189, "num_tokens": 8137786946.0, "step": 7397 }, { "epoch": 1.318716577540107, "grad_norm": 0.17578125, "learning_rate": 6.946410834211784e-06, "loss": 1.0318, "num_tokens": 8144065283.0, "step": 7398 }, { "epoch": 1.3188948306595365, "grad_norm": 0.1845703125, "learning_rate": 6.9440914127199e-06, "loss": 1.0384, "num_tokens": 8150349055.0, "step": 7399 }, { "epoch": 1.3190730837789661, "grad_norm": 0.1796875, "learning_rate": 6.941772329207737e-06, "loss": 1.025, "num_tokens": 8156632096.0, "step": 7400 }, { "epoch": 1.3192513368983958, "grad_norm": 0.1796875, "learning_rate": 6.939453583868538e-06, "loss": 1.0345, "num_tokens": 8162884967.0, "step": 7401 }, { "epoch": 1.3194295900178252, "grad_norm": 0.1806640625, "learning_rate": 6.9371351768955355e-06, "loss": 0.9789, "num_tokens": 8169163974.0, "step": 7402 }, { "epoch": 1.3196078431372549, "grad_norm": 0.1796875, "learning_rate": 6.934817108481919e-06, "loss": 1.0123, "num_tokens": 8175410630.0, "step": 7403 }, { "epoch": 1.3197860962566845, "grad_norm": 0.1787109375, "learning_rate": 6.9324993788208505e-06, "loss": 1.013, "num_tokens": 8181694436.0, "step": 7404 }, { "epoch": 1.319964349376114, "grad_norm": 0.1748046875, "learning_rate": 6.930181988105468e-06, "loss": 1.0224, "num_tokens": 8187980114.0, "step": 7405 }, { "epoch": 1.3201426024955436, "grad_norm": 0.1728515625, "learning_rate": 6.927864936528885e-06, "loss": 1.0263, "num_tokens": 8194248380.0, "step": 7406 }, { "epoch": 1.3203208556149733, "grad_norm": 0.1865234375, "learning_rate": 6.925548224284177e-06, "loss": 1.0354, "num_tokens": 8200516361.0, "step": 7407 }, { "epoch": 1.320499108734403, "grad_norm": 0.177734375, "learning_rate": 6.923231851564394e-06, "loss": 0.9848, "num_tokens": 8206795866.0, "step": 7408 }, { "epoch": 1.3206773618538326, "grad_norm": 0.1796875, "learning_rate": 6.920915818562563e-06, "loss": 1.0196, "num_tokens": 8213055139.0, "step": 7409 }, { "epoch": 1.320855614973262, "grad_norm": 0.1767578125, "learning_rate": 6.918600125471684e-06, "loss": 0.9973, "num_tokens": 8219339354.0, "step": 7410 }, { "epoch": 1.3210338680926916, "grad_norm": 0.177734375, "learning_rate": 6.916284772484718e-06, "loss": 1.0037, "num_tokens": 8225578377.0, "step": 7411 }, { "epoch": 1.3212121212121213, "grad_norm": 0.17578125, "learning_rate": 6.913969759794601e-06, "loss": 1.0162, "num_tokens": 8231806526.0, "step": 7412 }, { "epoch": 1.3213903743315507, "grad_norm": 0.1796875, "learning_rate": 6.911655087594256e-06, "loss": 1.0041, "num_tokens": 8238037353.0, "step": 7413 }, { "epoch": 1.3215686274509804, "grad_norm": 0.177734375, "learning_rate": 6.909340756076552e-06, "loss": 0.9814, "num_tokens": 8244314412.0, "step": 7414 }, { "epoch": 1.32174688057041, "grad_norm": 0.1845703125, "learning_rate": 6.907026765434346e-06, "loss": 1.047, "num_tokens": 8250597956.0, "step": 7415 }, { "epoch": 1.3219251336898394, "grad_norm": 0.18359375, "learning_rate": 6.904713115860468e-06, "loss": 1.0403, "num_tokens": 8256882599.0, "step": 7416 }, { "epoch": 1.322103386809269, "grad_norm": 0.1767578125, "learning_rate": 6.902399807547713e-06, "loss": 1.0489, "num_tokens": 8263167221.0, "step": 7417 }, { "epoch": 1.3222816399286987, "grad_norm": 0.17578125, "learning_rate": 6.900086840688849e-06, "loss": 1.0365, "num_tokens": 8269446728.0, "step": 7418 }, { "epoch": 1.3224598930481284, "grad_norm": 0.177734375, "learning_rate": 6.8977742154766115e-06, "loss": 1.0409, "num_tokens": 8275707799.0, "step": 7419 }, { "epoch": 1.322638146167558, "grad_norm": 0.1748046875, "learning_rate": 6.89546193210372e-06, "loss": 1.0099, "num_tokens": 8281942832.0, "step": 7420 }, { "epoch": 1.3228163992869875, "grad_norm": 0.189453125, "learning_rate": 6.893149990762856e-06, "loss": 1.0491, "num_tokens": 8288225540.0, "step": 7421 }, { "epoch": 1.3229946524064171, "grad_norm": 0.1826171875, "learning_rate": 6.890838391646666e-06, "loss": 1.0312, "num_tokens": 8294478955.0, "step": 7422 }, { "epoch": 1.3231729055258468, "grad_norm": 0.16796875, "learning_rate": 6.888527134947785e-06, "loss": 1.0058, "num_tokens": 8300741502.0, "step": 7423 }, { "epoch": 1.3233511586452762, "grad_norm": 0.1787109375, "learning_rate": 6.886216220858807e-06, "loss": 1.0279, "num_tokens": 8307025579.0, "step": 7424 }, { "epoch": 1.3235294117647058, "grad_norm": 0.1796875, "learning_rate": 6.883905649572302e-06, "loss": 1.0192, "num_tokens": 8313308723.0, "step": 7425 }, { "epoch": 1.3237076648841355, "grad_norm": 0.1767578125, "learning_rate": 6.881595421280806e-06, "loss": 1.0168, "num_tokens": 8319591873.0, "step": 7426 }, { "epoch": 1.3238859180035651, "grad_norm": 0.1767578125, "learning_rate": 6.879285536176839e-06, "loss": 1.0466, "num_tokens": 8325876268.0, "step": 7427 }, { "epoch": 1.3240641711229946, "grad_norm": 0.1767578125, "learning_rate": 6.876975994452877e-06, "loss": 1.0749, "num_tokens": 8332159761.0, "step": 7428 }, { "epoch": 1.3242424242424242, "grad_norm": 0.173828125, "learning_rate": 6.874666796301379e-06, "loss": 1.0245, "num_tokens": 8338439949.0, "step": 7429 }, { "epoch": 1.3244206773618539, "grad_norm": 0.177734375, "learning_rate": 6.8723579419147704e-06, "loss": 1.0262, "num_tokens": 8344695533.0, "step": 7430 }, { "epoch": 1.3245989304812835, "grad_norm": 0.189453125, "learning_rate": 6.870049431485446e-06, "loss": 1.0304, "num_tokens": 8350951142.0, "step": 7431 }, { "epoch": 1.324777183600713, "grad_norm": 0.1787109375, "learning_rate": 6.867741265205776e-06, "loss": 1.0351, "num_tokens": 8357191281.0, "step": 7432 }, { "epoch": 1.3249554367201426, "grad_norm": 0.1826171875, "learning_rate": 6.865433443268097e-06, "loss": 1.0108, "num_tokens": 8363475261.0, "step": 7433 }, { "epoch": 1.3251336898395722, "grad_norm": 0.17578125, "learning_rate": 6.863125965864728e-06, "loss": 0.9952, "num_tokens": 8369698012.0, "step": 7434 }, { "epoch": 1.3253119429590017, "grad_norm": 0.1748046875, "learning_rate": 6.860818833187945e-06, "loss": 0.9976, "num_tokens": 8375981999.0, "step": 7435 }, { "epoch": 1.3254901960784313, "grad_norm": 0.1767578125, "learning_rate": 6.858512045430006e-06, "loss": 1.0415, "num_tokens": 8382256967.0, "step": 7436 }, { "epoch": 1.325668449197861, "grad_norm": 0.1796875, "learning_rate": 6.856205602783134e-06, "loss": 1.0347, "num_tokens": 8388540109.0, "step": 7437 }, { "epoch": 1.3258467023172906, "grad_norm": 0.1728515625, "learning_rate": 6.853899505439523e-06, "loss": 1.0459, "num_tokens": 8394824156.0, "step": 7438 }, { "epoch": 1.3260249554367203, "grad_norm": 0.1796875, "learning_rate": 6.851593753591345e-06, "loss": 1.0014, "num_tokens": 8401107293.0, "step": 7439 }, { "epoch": 1.3262032085561497, "grad_norm": 0.1767578125, "learning_rate": 6.849288347430734e-06, "loss": 1.0221, "num_tokens": 8407379615.0, "step": 7440 }, { "epoch": 1.3263814616755794, "grad_norm": 0.1748046875, "learning_rate": 6.846983287149806e-06, "loss": 1.0362, "num_tokens": 8413609480.0, "step": 7441 }, { "epoch": 1.326559714795009, "grad_norm": 0.1806640625, "learning_rate": 6.844678572940637e-06, "loss": 1.025, "num_tokens": 8419893446.0, "step": 7442 }, { "epoch": 1.3267379679144384, "grad_norm": 0.1787109375, "learning_rate": 6.842374204995283e-06, "loss": 1.0256, "num_tokens": 8426168255.0, "step": 7443 }, { "epoch": 1.326916221033868, "grad_norm": 0.18359375, "learning_rate": 6.840070183505767e-06, "loss": 1.0027, "num_tokens": 8432450739.0, "step": 7444 }, { "epoch": 1.3270944741532977, "grad_norm": 0.1767578125, "learning_rate": 6.837766508664081e-06, "loss": 1.0326, "num_tokens": 8438728928.0, "step": 7445 }, { "epoch": 1.3272727272727272, "grad_norm": 0.1796875, "learning_rate": 6.835463180662192e-06, "loss": 1.0214, "num_tokens": 8444990570.0, "step": 7446 }, { "epoch": 1.3274509803921568, "grad_norm": 0.177734375, "learning_rate": 6.833160199692033e-06, "loss": 1.0136, "num_tokens": 8451241272.0, "step": 7447 }, { "epoch": 1.3276292335115865, "grad_norm": 0.181640625, "learning_rate": 6.83085756594552e-06, "loss": 1.028, "num_tokens": 8457475104.0, "step": 7448 }, { "epoch": 1.327807486631016, "grad_norm": 0.1806640625, "learning_rate": 6.828555279614526e-06, "loss": 1.0354, "num_tokens": 8463756397.0, "step": 7449 }, { "epoch": 1.3279857397504458, "grad_norm": 0.1806640625, "learning_rate": 6.826253340890906e-06, "loss": 1.0124, "num_tokens": 8470038434.0, "step": 7450 }, { "epoch": 1.3281639928698752, "grad_norm": 0.1796875, "learning_rate": 6.823951749966471e-06, "loss": 1.0303, "num_tokens": 8476318289.0, "step": 7451 }, { "epoch": 1.3283422459893048, "grad_norm": 0.181640625, "learning_rate": 6.821650507033029e-06, "loss": 1.0187, "num_tokens": 8482572876.0, "step": 7452 }, { "epoch": 1.3285204991087345, "grad_norm": 0.1806640625, "learning_rate": 6.819349612282329e-06, "loss": 1.0081, "num_tokens": 8488826021.0, "step": 7453 }, { "epoch": 1.328698752228164, "grad_norm": 0.17578125, "learning_rate": 6.817049065906107e-06, "loss": 1.0211, "num_tokens": 8495046444.0, "step": 7454 }, { "epoch": 1.3288770053475936, "grad_norm": 0.177734375, "learning_rate": 6.814748868096076e-06, "loss": 1.0334, "num_tokens": 8501289207.0, "step": 7455 }, { "epoch": 1.3290552584670232, "grad_norm": 0.177734375, "learning_rate": 6.812449019043905e-06, "loss": 1.0223, "num_tokens": 8507560540.0, "step": 7456 }, { "epoch": 1.3292335115864526, "grad_norm": 0.181640625, "learning_rate": 6.810149518941244e-06, "loss": 0.9727, "num_tokens": 8513815985.0, "step": 7457 }, { "epoch": 1.3294117647058823, "grad_norm": 0.1728515625, "learning_rate": 6.807850367979705e-06, "loss": 1.0214, "num_tokens": 8520101952.0, "step": 7458 }, { "epoch": 1.329590017825312, "grad_norm": 0.177734375, "learning_rate": 6.80555156635089e-06, "loss": 1.0249, "num_tokens": 8526367526.0, "step": 7459 }, { "epoch": 1.3297682709447416, "grad_norm": 0.185546875, "learning_rate": 6.803253114246344e-06, "loss": 1.0198, "num_tokens": 8532633490.0, "step": 7460 }, { "epoch": 1.3299465240641712, "grad_norm": 0.185546875, "learning_rate": 6.800955011857603e-06, "loss": 1.0299, "num_tokens": 8538886888.0, "step": 7461 }, { "epoch": 1.3301247771836007, "grad_norm": 0.1748046875, "learning_rate": 6.7986572593761714e-06, "loss": 1.0103, "num_tokens": 8545164146.0, "step": 7462 }, { "epoch": 1.3303030303030303, "grad_norm": 0.1728515625, "learning_rate": 6.796359856993517e-06, "loss": 1.0161, "num_tokens": 8551448106.0, "step": 7463 }, { "epoch": 1.33048128342246, "grad_norm": 0.185546875, "learning_rate": 6.7940628049010856e-06, "loss": 1.0435, "num_tokens": 8557732293.0, "step": 7464 }, { "epoch": 1.3306595365418894, "grad_norm": 0.1884765625, "learning_rate": 6.7917661032902895e-06, "loss": 0.9872, "num_tokens": 8564015238.0, "step": 7465 }, { "epoch": 1.330837789661319, "grad_norm": 0.1884765625, "learning_rate": 6.789469752352514e-06, "loss": 1.0094, "num_tokens": 8570299620.0, "step": 7466 }, { "epoch": 1.3310160427807487, "grad_norm": 0.193359375, "learning_rate": 6.787173752279119e-06, "loss": 1.0208, "num_tokens": 8576530943.0, "step": 7467 }, { "epoch": 1.3311942959001781, "grad_norm": 0.17578125, "learning_rate": 6.7848781032614205e-06, "loss": 1.0169, "num_tokens": 8582797707.0, "step": 7468 }, { "epoch": 1.3313725490196078, "grad_norm": 0.19140625, "learning_rate": 6.782582805490723e-06, "loss": 1.0283, "num_tokens": 8589056881.0, "step": 7469 }, { "epoch": 1.3315508021390374, "grad_norm": 0.177734375, "learning_rate": 6.780287859158292e-06, "loss": 1.0437, "num_tokens": 8595342552.0, "step": 7470 }, { "epoch": 1.331729055258467, "grad_norm": 0.1865234375, "learning_rate": 6.777993264455367e-06, "loss": 1.0024, "num_tokens": 8601578316.0, "step": 7471 }, { "epoch": 1.3319073083778967, "grad_norm": 0.1943359375, "learning_rate": 6.775699021573152e-06, "loss": 1.0211, "num_tokens": 8607861983.0, "step": 7472 }, { "epoch": 1.3320855614973262, "grad_norm": 0.17578125, "learning_rate": 6.773405130702836e-06, "loss": 0.9911, "num_tokens": 8614141881.0, "step": 7473 }, { "epoch": 1.3322638146167558, "grad_norm": 0.189453125, "learning_rate": 6.771111592035564e-06, "loss": 1.0423, "num_tokens": 8620421463.0, "step": 7474 }, { "epoch": 1.3324420677361855, "grad_norm": 0.1796875, "learning_rate": 6.768818405762455e-06, "loss": 1.0215, "num_tokens": 8626703938.0, "step": 7475 }, { "epoch": 1.3326203208556149, "grad_norm": 0.1748046875, "learning_rate": 6.766525572074606e-06, "loss": 1.0101, "num_tokens": 8632955627.0, "step": 7476 }, { "epoch": 1.3327985739750445, "grad_norm": 0.1796875, "learning_rate": 6.764233091163075e-06, "loss": 1.0016, "num_tokens": 8639208846.0, "step": 7477 }, { "epoch": 1.3329768270944742, "grad_norm": 0.1689453125, "learning_rate": 6.761940963218896e-06, "loss": 1.0043, "num_tokens": 8645493658.0, "step": 7478 }, { "epoch": 1.3331550802139036, "grad_norm": 0.1796875, "learning_rate": 6.75964918843307e-06, "loss": 1.0063, "num_tokens": 8651779455.0, "step": 7479 }, { "epoch": 1.3333333333333333, "grad_norm": 0.1826171875, "learning_rate": 6.757357766996578e-06, "loss": 1.004, "num_tokens": 8658062873.0, "step": 7480 }, { "epoch": 1.333511586452763, "grad_norm": 0.1796875, "learning_rate": 6.7550666991003585e-06, "loss": 1.0307, "num_tokens": 8664347281.0, "step": 7481 }, { "epoch": 1.3336898395721926, "grad_norm": 0.177734375, "learning_rate": 6.752775984935334e-06, "loss": 1.0018, "num_tokens": 8670620995.0, "step": 7482 }, { "epoch": 1.3338680926916222, "grad_norm": 0.1806640625, "learning_rate": 6.750485624692377e-06, "loss": 1.0108, "num_tokens": 8676873272.0, "step": 7483 }, { "epoch": 1.3340463458110516, "grad_norm": 0.1787109375, "learning_rate": 6.748195618562355e-06, "loss": 1.0405, "num_tokens": 8683156392.0, "step": 7484 }, { "epoch": 1.3342245989304813, "grad_norm": 0.1923828125, "learning_rate": 6.745905966736093e-06, "loss": 1.0512, "num_tokens": 8689375222.0, "step": 7485 }, { "epoch": 1.334402852049911, "grad_norm": 0.1865234375, "learning_rate": 6.743616669404384e-06, "loss": 1.0152, "num_tokens": 8695629652.0, "step": 7486 }, { "epoch": 1.3345811051693404, "grad_norm": 0.1748046875, "learning_rate": 6.7413277267579955e-06, "loss": 1.0089, "num_tokens": 8701904618.0, "step": 7487 }, { "epoch": 1.33475935828877, "grad_norm": 0.1787109375, "learning_rate": 6.739039138987673e-06, "loss": 1.0159, "num_tokens": 8708168085.0, "step": 7488 }, { "epoch": 1.3349376114081997, "grad_norm": 0.173828125, "learning_rate": 6.736750906284119e-06, "loss": 1.0066, "num_tokens": 8714422895.0, "step": 7489 }, { "epoch": 1.3351158645276293, "grad_norm": 0.185546875, "learning_rate": 6.734463028838012e-06, "loss": 1.0431, "num_tokens": 8720686977.0, "step": 7490 }, { "epoch": 1.3352941176470587, "grad_norm": 0.17578125, "learning_rate": 6.7321755068400055e-06, "loss": 1.0237, "num_tokens": 8726970706.0, "step": 7491 }, { "epoch": 1.3354723707664884, "grad_norm": 0.185546875, "learning_rate": 6.729888340480716e-06, "loss": 1.0216, "num_tokens": 8733252551.0, "step": 7492 }, { "epoch": 1.335650623885918, "grad_norm": 0.17578125, "learning_rate": 6.727601529950733e-06, "loss": 1.0146, "num_tokens": 8739534345.0, "step": 7493 }, { "epoch": 1.3358288770053477, "grad_norm": 0.1865234375, "learning_rate": 6.725315075440616e-06, "loss": 1.0332, "num_tokens": 8745791449.0, "step": 7494 }, { "epoch": 1.3360071301247771, "grad_norm": 0.171875, "learning_rate": 6.7230289771409e-06, "loss": 1.0192, "num_tokens": 8752031483.0, "step": 7495 }, { "epoch": 1.3361853832442068, "grad_norm": 0.1796875, "learning_rate": 6.720743235242085e-06, "loss": 1.0021, "num_tokens": 8758313174.0, "step": 7496 }, { "epoch": 1.3363636363636364, "grad_norm": 0.1796875, "learning_rate": 6.718457849934641e-06, "loss": 1.0214, "num_tokens": 8764595522.0, "step": 7497 }, { "epoch": 1.3365418894830658, "grad_norm": 0.173828125, "learning_rate": 6.716172821409008e-06, "loss": 1.0152, "num_tokens": 8770861678.0, "step": 7498 }, { "epoch": 1.3367201426024955, "grad_norm": 0.1787109375, "learning_rate": 6.713888149855603e-06, "loss": 1.0089, "num_tokens": 8777117845.0, "step": 7499 }, { "epoch": 1.3368983957219251, "grad_norm": 0.1796875, "learning_rate": 6.711603835464802e-06, "loss": 1.0275, "num_tokens": 8783401557.0, "step": 7500 }, { "epoch": 1.3370766488413548, "grad_norm": 0.17578125, "learning_rate": 6.7093198784269594e-06, "loss": 0.9995, "num_tokens": 8789685741.0, "step": 7501 }, { "epoch": 1.3372549019607844, "grad_norm": 0.1787109375, "learning_rate": 6.7070362789323995e-06, "loss": 1.0405, "num_tokens": 8795967763.0, "step": 7502 }, { "epoch": 1.3374331550802139, "grad_norm": 0.1796875, "learning_rate": 6.704753037171416e-06, "loss": 1.0236, "num_tokens": 8802250647.0, "step": 7503 }, { "epoch": 1.3376114081996435, "grad_norm": 0.177734375, "learning_rate": 6.702470153334271e-06, "loss": 1.0173, "num_tokens": 8808512032.0, "step": 7504 }, { "epoch": 1.3377896613190732, "grad_norm": 0.185546875, "learning_rate": 6.700187627611197e-06, "loss": 1.0197, "num_tokens": 8814794473.0, "step": 7505 }, { "epoch": 1.3379679144385026, "grad_norm": 0.189453125, "learning_rate": 6.697905460192394e-06, "loss": 0.9998, "num_tokens": 8821079171.0, "step": 7506 }, { "epoch": 1.3381461675579323, "grad_norm": 0.185546875, "learning_rate": 6.695623651268041e-06, "loss": 1.0039, "num_tokens": 8827361921.0, "step": 7507 }, { "epoch": 1.338324420677362, "grad_norm": 0.1806640625, "learning_rate": 6.693342201028278e-06, "loss": 0.9942, "num_tokens": 8833646780.0, "step": 7508 }, { "epoch": 1.3385026737967913, "grad_norm": 0.1875, "learning_rate": 6.6910611096632205e-06, "loss": 1.0311, "num_tokens": 8839923807.0, "step": 7509 }, { "epoch": 1.338680926916221, "grad_norm": 0.1767578125, "learning_rate": 6.688780377362954e-06, "loss": 1.0145, "num_tokens": 8846207884.0, "step": 7510 }, { "epoch": 1.3388591800356506, "grad_norm": 0.169921875, "learning_rate": 6.68650000431753e-06, "loss": 0.9924, "num_tokens": 8852471819.0, "step": 7511 }, { "epoch": 1.3390374331550803, "grad_norm": 0.17578125, "learning_rate": 6.6842199907169695e-06, "loss": 1.0256, "num_tokens": 8858754499.0, "step": 7512 }, { "epoch": 1.33921568627451, "grad_norm": 0.177734375, "learning_rate": 6.681940336751276e-06, "loss": 1.0359, "num_tokens": 8865037931.0, "step": 7513 }, { "epoch": 1.3393939393939394, "grad_norm": 0.1845703125, "learning_rate": 6.679661042610405e-06, "loss": 1.0099, "num_tokens": 8871322781.0, "step": 7514 }, { "epoch": 1.339572192513369, "grad_norm": 0.17578125, "learning_rate": 6.677382108484289e-06, "loss": 1.0423, "num_tokens": 8877575890.0, "step": 7515 }, { "epoch": 1.3397504456327987, "grad_norm": 0.181640625, "learning_rate": 6.675103534562839e-06, "loss": 1.0016, "num_tokens": 8883822656.0, "step": 7516 }, { "epoch": 1.339928698752228, "grad_norm": 0.1806640625, "learning_rate": 6.6728253210359275e-06, "loss": 0.9905, "num_tokens": 8890106512.0, "step": 7517 }, { "epoch": 1.3401069518716577, "grad_norm": 0.1787109375, "learning_rate": 6.670547468093396e-06, "loss": 1.0249, "num_tokens": 8896391612.0, "step": 7518 }, { "epoch": 1.3402852049910874, "grad_norm": 0.1708984375, "learning_rate": 6.668269975925056e-06, "loss": 1.0084, "num_tokens": 8902617539.0, "step": 7519 }, { "epoch": 1.3404634581105168, "grad_norm": 0.1767578125, "learning_rate": 6.665992844720699e-06, "loss": 1.0087, "num_tokens": 8908901423.0, "step": 7520 }, { "epoch": 1.3406417112299465, "grad_norm": 0.1796875, "learning_rate": 6.663716074670078e-06, "loss": 1.016, "num_tokens": 8915181807.0, "step": 7521 }, { "epoch": 1.3408199643493761, "grad_norm": 0.1826171875, "learning_rate": 6.6614396659629076e-06, "loss": 1.0277, "num_tokens": 8921464063.0, "step": 7522 }, { "epoch": 1.3409982174688058, "grad_norm": 0.189453125, "learning_rate": 6.659163618788889e-06, "loss": 0.9978, "num_tokens": 8927719109.0, "step": 7523 }, { "epoch": 1.3411764705882354, "grad_norm": 0.1787109375, "learning_rate": 6.656887933337686e-06, "loss": 1.0083, "num_tokens": 8934002294.0, "step": 7524 }, { "epoch": 1.3413547237076648, "grad_norm": 0.1796875, "learning_rate": 6.654612609798929e-06, "loss": 1.0033, "num_tokens": 8940269901.0, "step": 7525 }, { "epoch": 1.3415329768270945, "grad_norm": 0.181640625, "learning_rate": 6.652337648362219e-06, "loss": 1.0115, "num_tokens": 8946535878.0, "step": 7526 }, { "epoch": 1.3417112299465241, "grad_norm": 0.177734375, "learning_rate": 6.6500630492171345e-06, "loss": 1.01, "num_tokens": 8952777308.0, "step": 7527 }, { "epoch": 1.3418894830659536, "grad_norm": 0.1767578125, "learning_rate": 6.647788812553217e-06, "loss": 1.0006, "num_tokens": 8959035331.0, "step": 7528 }, { "epoch": 1.3420677361853832, "grad_norm": 0.1796875, "learning_rate": 6.64551493855998e-06, "loss": 1.0402, "num_tokens": 8965287574.0, "step": 7529 }, { "epoch": 1.3422459893048129, "grad_norm": 0.1787109375, "learning_rate": 6.643241427426901e-06, "loss": 1.0248, "num_tokens": 8971564494.0, "step": 7530 }, { "epoch": 1.3424242424242423, "grad_norm": 0.181640625, "learning_rate": 6.640968279343438e-06, "loss": 1.0474, "num_tokens": 8977839814.0, "step": 7531 }, { "epoch": 1.342602495543672, "grad_norm": 0.18359375, "learning_rate": 6.638695494499008e-06, "loss": 1.0389, "num_tokens": 8984110986.0, "step": 7532 }, { "epoch": 1.3427807486631016, "grad_norm": 0.1865234375, "learning_rate": 6.636423073083002e-06, "loss": 1.0261, "num_tokens": 8990396058.0, "step": 7533 }, { "epoch": 1.3429590017825312, "grad_norm": 0.189453125, "learning_rate": 6.6341510152847875e-06, "loss": 1.0353, "num_tokens": 8996680336.0, "step": 7534 }, { "epoch": 1.343137254901961, "grad_norm": 0.1767578125, "learning_rate": 6.631879321293692e-06, "loss": 1.0307, "num_tokens": 9002963695.0, "step": 7535 }, { "epoch": 1.3433155080213903, "grad_norm": 0.1748046875, "learning_rate": 6.629607991299016e-06, "loss": 1.0096, "num_tokens": 9009246195.0, "step": 7536 }, { "epoch": 1.34349376114082, "grad_norm": 0.177734375, "learning_rate": 6.627337025490032e-06, "loss": 1.0183, "num_tokens": 9015513232.0, "step": 7537 }, { "epoch": 1.3436720142602496, "grad_norm": 0.1767578125, "learning_rate": 6.625066424055978e-06, "loss": 0.9908, "num_tokens": 9021783475.0, "step": 7538 }, { "epoch": 1.343850267379679, "grad_norm": 0.1689453125, "learning_rate": 6.6227961871860645e-06, "loss": 1.0274, "num_tokens": 9028066111.0, "step": 7539 }, { "epoch": 1.3440285204991087, "grad_norm": 0.1748046875, "learning_rate": 6.620526315069466e-06, "loss": 1.0291, "num_tokens": 9034349894.0, "step": 7540 }, { "epoch": 1.3442067736185384, "grad_norm": 0.185546875, "learning_rate": 6.6182568078953404e-06, "loss": 1.0202, "num_tokens": 9040628907.0, "step": 7541 }, { "epoch": 1.3443850267379678, "grad_norm": 0.177734375, "learning_rate": 6.6159876658528e-06, "loss": 1.0038, "num_tokens": 9046914287.0, "step": 7542 }, { "epoch": 1.3445632798573974, "grad_norm": 0.185546875, "learning_rate": 6.613718889130935e-06, "loss": 1.0243, "num_tokens": 9053192307.0, "step": 7543 }, { "epoch": 1.344741532976827, "grad_norm": 0.1728515625, "learning_rate": 6.611450477918804e-06, "loss": 1.012, "num_tokens": 9059449164.0, "step": 7544 }, { "epoch": 1.3449197860962567, "grad_norm": 0.1787109375, "learning_rate": 6.609182432405432e-06, "loss": 0.9702, "num_tokens": 9065685081.0, "step": 7545 }, { "epoch": 1.3450980392156864, "grad_norm": 0.18359375, "learning_rate": 6.606914752779816e-06, "loss": 1.0253, "num_tokens": 9071945485.0, "step": 7546 }, { "epoch": 1.3452762923351158, "grad_norm": 0.189453125, "learning_rate": 6.604647439230918e-06, "loss": 1.0103, "num_tokens": 9078193929.0, "step": 7547 }, { "epoch": 1.3454545454545455, "grad_norm": 0.1787109375, "learning_rate": 6.602380491947684e-06, "loss": 1.0331, "num_tokens": 9084477573.0, "step": 7548 }, { "epoch": 1.345632798573975, "grad_norm": 0.185546875, "learning_rate": 6.600113911119014e-06, "loss": 1.0252, "num_tokens": 9090705537.0, "step": 7549 }, { "epoch": 1.3458110516934045, "grad_norm": 0.177734375, "learning_rate": 6.597847696933781e-06, "loss": 0.9848, "num_tokens": 9096988205.0, "step": 7550 }, { "epoch": 1.3459893048128342, "grad_norm": 0.181640625, "learning_rate": 6.595581849580828e-06, "loss": 1.0302, "num_tokens": 9103272590.0, "step": 7551 }, { "epoch": 1.3461675579322638, "grad_norm": 0.1826171875, "learning_rate": 6.5933163692489765e-06, "loss": 1.039, "num_tokens": 9109544566.0, "step": 7552 }, { "epoch": 1.3463458110516935, "grad_norm": 0.17578125, "learning_rate": 6.591051256127e-06, "loss": 0.9974, "num_tokens": 9115827282.0, "step": 7553 }, { "epoch": 1.346524064171123, "grad_norm": 0.177734375, "learning_rate": 6.588786510403653e-06, "loss": 1.0576, "num_tokens": 9122111723.0, "step": 7554 }, { "epoch": 1.3467023172905526, "grad_norm": 0.1748046875, "learning_rate": 6.586522132267663e-06, "loss": 0.9998, "num_tokens": 9128395455.0, "step": 7555 }, { "epoch": 1.3468805704099822, "grad_norm": 0.1728515625, "learning_rate": 6.584258121907714e-06, "loss": 1.0127, "num_tokens": 9134678892.0, "step": 7556 }, { "epoch": 1.3470588235294119, "grad_norm": 0.1767578125, "learning_rate": 6.5819944795124725e-06, "loss": 1.0172, "num_tokens": 9140962584.0, "step": 7557 }, { "epoch": 1.3472370766488413, "grad_norm": 0.1826171875, "learning_rate": 6.579731205270561e-06, "loss": 0.9862, "num_tokens": 9147226569.0, "step": 7558 }, { "epoch": 1.347415329768271, "grad_norm": 0.1826171875, "learning_rate": 6.577468299370589e-06, "loss": 1.0016, "num_tokens": 9153508899.0, "step": 7559 }, { "epoch": 1.3475935828877006, "grad_norm": 0.173828125, "learning_rate": 6.575205762001115e-06, "loss": 0.9932, "num_tokens": 9159793007.0, "step": 7560 }, { "epoch": 1.34777183600713, "grad_norm": 0.1787109375, "learning_rate": 6.57294359335068e-06, "loss": 1.0257, "num_tokens": 9166078202.0, "step": 7561 }, { "epoch": 1.3479500891265597, "grad_norm": 0.181640625, "learning_rate": 6.5706817936077935e-06, "loss": 1.013, "num_tokens": 9172341499.0, "step": 7562 }, { "epoch": 1.3481283422459893, "grad_norm": 0.17578125, "learning_rate": 6.56842036296093e-06, "loss": 1.0136, "num_tokens": 9178603698.0, "step": 7563 }, { "epoch": 1.348306595365419, "grad_norm": 0.1767578125, "learning_rate": 6.566159301598535e-06, "loss": 1.0144, "num_tokens": 9184841850.0, "step": 7564 }, { "epoch": 1.3484848484848486, "grad_norm": 0.177734375, "learning_rate": 6.563898609709019e-06, "loss": 1.0019, "num_tokens": 9191106293.0, "step": 7565 }, { "epoch": 1.348663101604278, "grad_norm": 0.17578125, "learning_rate": 6.561638287480774e-06, "loss": 1.0073, "num_tokens": 9197374362.0, "step": 7566 }, { "epoch": 1.3488413547237077, "grad_norm": 0.177734375, "learning_rate": 6.559378335102152e-06, "loss": 1.0201, "num_tokens": 9203608674.0, "step": 7567 }, { "epoch": 1.3490196078431373, "grad_norm": 0.171875, "learning_rate": 6.557118752761466e-06, "loss": 1.0087, "num_tokens": 9209890692.0, "step": 7568 }, { "epoch": 1.3491978609625668, "grad_norm": 0.1728515625, "learning_rate": 6.554859540647019e-06, "loss": 1.0072, "num_tokens": 9216171596.0, "step": 7569 }, { "epoch": 1.3493761140819964, "grad_norm": 0.173828125, "learning_rate": 6.552600698947064e-06, "loss": 1.0083, "num_tokens": 9222450518.0, "step": 7570 }, { "epoch": 1.349554367201426, "grad_norm": 0.181640625, "learning_rate": 6.550342227849833e-06, "loss": 1.03, "num_tokens": 9228731450.0, "step": 7571 }, { "epoch": 1.3497326203208555, "grad_norm": 0.1787109375, "learning_rate": 6.548084127543524e-06, "loss": 1.0037, "num_tokens": 9234981097.0, "step": 7572 }, { "epoch": 1.3499108734402852, "grad_norm": 0.1806640625, "learning_rate": 6.545826398216308e-06, "loss": 1.0072, "num_tokens": 9241255589.0, "step": 7573 }, { "epoch": 1.3500891265597148, "grad_norm": 0.171875, "learning_rate": 6.543569040056322e-06, "loss": 1.0308, "num_tokens": 9247487226.0, "step": 7574 }, { "epoch": 1.3502673796791445, "grad_norm": 0.1787109375, "learning_rate": 6.541312053251669e-06, "loss": 1.0245, "num_tokens": 9253769792.0, "step": 7575 }, { "epoch": 1.350445632798574, "grad_norm": 0.1708984375, "learning_rate": 6.5390554379904244e-06, "loss": 1.0102, "num_tokens": 9260054699.0, "step": 7576 }, { "epoch": 1.3506238859180035, "grad_norm": 0.1767578125, "learning_rate": 6.536799194460636e-06, "loss": 1.0116, "num_tokens": 9266321549.0, "step": 7577 }, { "epoch": 1.3508021390374332, "grad_norm": 0.1748046875, "learning_rate": 6.5345433228503145e-06, "loss": 1.0451, "num_tokens": 9272581354.0, "step": 7578 }, { "epoch": 1.3509803921568628, "grad_norm": 0.1806640625, "learning_rate": 6.5322878233474365e-06, "loss": 1.0368, "num_tokens": 9278832772.0, "step": 7579 }, { "epoch": 1.3511586452762923, "grad_norm": 0.18359375, "learning_rate": 6.530032696139964e-06, "loss": 0.979, "num_tokens": 9285117088.0, "step": 7580 }, { "epoch": 1.351336898395722, "grad_norm": 0.18359375, "learning_rate": 6.527777941415812e-06, "loss": 1.0401, "num_tokens": 9291391879.0, "step": 7581 }, { "epoch": 1.3515151515151516, "grad_norm": 0.1748046875, "learning_rate": 6.525523559362871e-06, "loss": 1.0196, "num_tokens": 9297663537.0, "step": 7582 }, { "epoch": 1.351693404634581, "grad_norm": 0.1826171875, "learning_rate": 6.523269550168997e-06, "loss": 1.0236, "num_tokens": 9303920548.0, "step": 7583 }, { "epoch": 1.3518716577540106, "grad_norm": 0.1806640625, "learning_rate": 6.521015914022018e-06, "loss": 0.9984, "num_tokens": 9310204843.0, "step": 7584 }, { "epoch": 1.3520499108734403, "grad_norm": 0.173828125, "learning_rate": 6.518762651109729e-06, "loss": 1.0235, "num_tokens": 9316468132.0, "step": 7585 }, { "epoch": 1.35222816399287, "grad_norm": 0.1748046875, "learning_rate": 6.5165097616198955e-06, "loss": 1.0199, "num_tokens": 9322752628.0, "step": 7586 }, { "epoch": 1.3524064171122996, "grad_norm": 0.1728515625, "learning_rate": 6.5142572457402546e-06, "loss": 0.9884, "num_tokens": 9329019512.0, "step": 7587 }, { "epoch": 1.352584670231729, "grad_norm": 0.1748046875, "learning_rate": 6.512005103658503e-06, "loss": 1.0387, "num_tokens": 9335303967.0, "step": 7588 }, { "epoch": 1.3527629233511587, "grad_norm": 0.1796875, "learning_rate": 6.509753335562319e-06, "loss": 1.0201, "num_tokens": 9341545840.0, "step": 7589 }, { "epoch": 1.3529411764705883, "grad_norm": 0.1845703125, "learning_rate": 6.507501941639337e-06, "loss": 1.0398, "num_tokens": 9347815803.0, "step": 7590 }, { "epoch": 1.3531194295900177, "grad_norm": 0.1826171875, "learning_rate": 6.505250922077169e-06, "loss": 1.0066, "num_tokens": 9354099838.0, "step": 7591 }, { "epoch": 1.3532976827094474, "grad_norm": 0.173828125, "learning_rate": 6.503000277063391e-06, "loss": 1.0172, "num_tokens": 9360354104.0, "step": 7592 }, { "epoch": 1.353475935828877, "grad_norm": 0.17578125, "learning_rate": 6.50075000678555e-06, "loss": 1.0097, "num_tokens": 9366634405.0, "step": 7593 }, { "epoch": 1.3536541889483065, "grad_norm": 0.1796875, "learning_rate": 6.498500111431164e-06, "loss": 1.0388, "num_tokens": 9372890708.0, "step": 7594 }, { "epoch": 1.3538324420677361, "grad_norm": 0.1767578125, "learning_rate": 6.496250591187716e-06, "loss": 1.0232, "num_tokens": 9379145082.0, "step": 7595 }, { "epoch": 1.3540106951871658, "grad_norm": 0.1796875, "learning_rate": 6.494001446242659e-06, "loss": 1.041, "num_tokens": 9385428769.0, "step": 7596 }, { "epoch": 1.3541889483065954, "grad_norm": 0.1826171875, "learning_rate": 6.491752676783411e-06, "loss": 1.0342, "num_tokens": 9391686849.0, "step": 7597 }, { "epoch": 1.354367201426025, "grad_norm": 0.181640625, "learning_rate": 6.489504282997372e-06, "loss": 1.0147, "num_tokens": 9397970414.0, "step": 7598 }, { "epoch": 1.3545454545454545, "grad_norm": 0.1767578125, "learning_rate": 6.4872562650718925e-06, "loss": 1.045, "num_tokens": 9404233042.0, "step": 7599 }, { "epoch": 1.3547237076648841, "grad_norm": 0.177734375, "learning_rate": 6.4850086231942986e-06, "loss": 1.0454, "num_tokens": 9410518819.0, "step": 7600 }, { "epoch": 1.3549019607843138, "grad_norm": 0.177734375, "learning_rate": 6.482761357551896e-06, "loss": 0.9884, "num_tokens": 9416780752.0, "step": 7601 }, { "epoch": 1.3550802139037432, "grad_norm": 0.1767578125, "learning_rate": 6.480514468331943e-06, "loss": 0.9978, "num_tokens": 9423063184.0, "step": 7602 }, { "epoch": 1.3552584670231729, "grad_norm": 0.177734375, "learning_rate": 6.4782679557216765e-06, "loss": 1.031, "num_tokens": 9429345980.0, "step": 7603 }, { "epoch": 1.3554367201426025, "grad_norm": 0.173828125, "learning_rate": 6.476021819908294e-06, "loss": 1.0336, "num_tokens": 9435630125.0, "step": 7604 }, { "epoch": 1.355614973262032, "grad_norm": 0.181640625, "learning_rate": 6.473776061078976e-06, "loss": 1.0321, "num_tokens": 9441870856.0, "step": 7605 }, { "epoch": 1.3557932263814616, "grad_norm": 0.1748046875, "learning_rate": 6.471530679420852e-06, "loss": 1.0132, "num_tokens": 9448069003.0, "step": 7606 }, { "epoch": 1.3559714795008913, "grad_norm": 0.17578125, "learning_rate": 6.469285675121033e-06, "loss": 1.0114, "num_tokens": 9454354060.0, "step": 7607 }, { "epoch": 1.356149732620321, "grad_norm": 0.1826171875, "learning_rate": 6.467041048366598e-06, "loss": 1.0155, "num_tokens": 9460636797.0, "step": 7608 }, { "epoch": 1.3563279857397506, "grad_norm": 0.1767578125, "learning_rate": 6.464796799344593e-06, "loss": 1.0203, "num_tokens": 9466884703.0, "step": 7609 }, { "epoch": 1.35650623885918, "grad_norm": 0.1806640625, "learning_rate": 6.462552928242027e-06, "loss": 0.9972, "num_tokens": 9473134524.0, "step": 7610 }, { "epoch": 1.3566844919786096, "grad_norm": 0.1806640625, "learning_rate": 6.4603094352458854e-06, "loss": 1.0278, "num_tokens": 9479407023.0, "step": 7611 }, { "epoch": 1.3568627450980393, "grad_norm": 0.1806640625, "learning_rate": 6.458066320543119e-06, "loss": 0.9761, "num_tokens": 9485691667.0, "step": 7612 }, { "epoch": 1.3570409982174687, "grad_norm": 0.1806640625, "learning_rate": 6.455823584320653e-06, "loss": 1.0157, "num_tokens": 9491974369.0, "step": 7613 }, { "epoch": 1.3572192513368984, "grad_norm": 0.1806640625, "learning_rate": 6.453581226765359e-06, "loss": 1.0378, "num_tokens": 9498230489.0, "step": 7614 }, { "epoch": 1.357397504456328, "grad_norm": 0.177734375, "learning_rate": 6.451339248064108e-06, "loss": 1.0432, "num_tokens": 9504513770.0, "step": 7615 }, { "epoch": 1.3575757575757577, "grad_norm": 0.1708984375, "learning_rate": 6.449097648403719e-06, "loss": 1.0367, "num_tokens": 9510798637.0, "step": 7616 }, { "epoch": 1.357754010695187, "grad_norm": 0.1796875, "learning_rate": 6.446856427970986e-06, "loss": 0.9932, "num_tokens": 9517061866.0, "step": 7617 }, { "epoch": 1.3579322638146167, "grad_norm": 0.1796875, "learning_rate": 6.444615586952665e-06, "loss": 1.012, "num_tokens": 9523344768.0, "step": 7618 }, { "epoch": 1.3581105169340464, "grad_norm": 0.17578125, "learning_rate": 6.442375125535495e-06, "loss": 1.0065, "num_tokens": 9529604535.0, "step": 7619 }, { "epoch": 1.358288770053476, "grad_norm": 0.1806640625, "learning_rate": 6.440135043906168e-06, "loss": 1.0335, "num_tokens": 9535846316.0, "step": 7620 }, { "epoch": 1.3584670231729055, "grad_norm": 0.1728515625, "learning_rate": 6.4378953422513545e-06, "loss": 1.0221, "num_tokens": 9542122745.0, "step": 7621 }, { "epoch": 1.3586452762923351, "grad_norm": 0.181640625, "learning_rate": 6.4356560207576856e-06, "loss": 1.0476, "num_tokens": 9548386493.0, "step": 7622 }, { "epoch": 1.3588235294117648, "grad_norm": 0.18359375, "learning_rate": 6.433417079611767e-06, "loss": 1.0291, "num_tokens": 9554648817.0, "step": 7623 }, { "epoch": 1.3590017825311942, "grad_norm": 0.1796875, "learning_rate": 6.431178519000168e-06, "loss": 1.064, "num_tokens": 9560906649.0, "step": 7624 }, { "epoch": 1.3591800356506238, "grad_norm": 0.1796875, "learning_rate": 6.428940339109425e-06, "loss": 1.0164, "num_tokens": 9567167773.0, "step": 7625 }, { "epoch": 1.3593582887700535, "grad_norm": 0.1796875, "learning_rate": 6.4267025401260556e-06, "loss": 1.0728, "num_tokens": 9573435212.0, "step": 7626 }, { "epoch": 1.3595365418894831, "grad_norm": 0.17578125, "learning_rate": 6.424465122236529e-06, "loss": 1.0186, "num_tokens": 9579660850.0, "step": 7627 }, { "epoch": 1.3597147950089128, "grad_norm": 0.1728515625, "learning_rate": 6.4222280856272925e-06, "loss": 0.9974, "num_tokens": 9585929814.0, "step": 7628 }, { "epoch": 1.3598930481283422, "grad_norm": 0.1787109375, "learning_rate": 6.419991430484757e-06, "loss": 1.0023, "num_tokens": 9592186041.0, "step": 7629 }, { "epoch": 1.3600713012477719, "grad_norm": 0.1708984375, "learning_rate": 6.4177551569953045e-06, "loss": 1.0514, "num_tokens": 9598469322.0, "step": 7630 }, { "epoch": 1.3602495543672015, "grad_norm": 0.1767578125, "learning_rate": 6.415519265345283e-06, "loss": 1.0513, "num_tokens": 9604634705.0, "step": 7631 }, { "epoch": 1.360427807486631, "grad_norm": 0.1767578125, "learning_rate": 6.413283755721009e-06, "loss": 1.0126, "num_tokens": 9610915489.0, "step": 7632 }, { "epoch": 1.3606060606060606, "grad_norm": 0.1767578125, "learning_rate": 6.411048628308771e-06, "loss": 1.0359, "num_tokens": 9617135486.0, "step": 7633 }, { "epoch": 1.3607843137254902, "grad_norm": 0.1728515625, "learning_rate": 6.408813883294824e-06, "loss": 1.0067, "num_tokens": 9623417289.0, "step": 7634 }, { "epoch": 1.3609625668449197, "grad_norm": 0.1845703125, "learning_rate": 6.406579520865386e-06, "loss": 1.0517, "num_tokens": 9629669270.0, "step": 7635 }, { "epoch": 1.3611408199643493, "grad_norm": 0.1826171875, "learning_rate": 6.404345541206647e-06, "loss": 1.008, "num_tokens": 9635909486.0, "step": 7636 }, { "epoch": 1.361319073083779, "grad_norm": 0.1748046875, "learning_rate": 6.402111944504767e-06, "loss": 1.0156, "num_tokens": 9642193296.0, "step": 7637 }, { "epoch": 1.3614973262032086, "grad_norm": 0.1767578125, "learning_rate": 6.39987873094587e-06, "loss": 1.0314, "num_tokens": 9648444681.0, "step": 7638 }, { "epoch": 1.3616755793226383, "grad_norm": 0.169921875, "learning_rate": 6.397645900716047e-06, "loss": 0.9902, "num_tokens": 9654728741.0, "step": 7639 }, { "epoch": 1.3618538324420677, "grad_norm": 0.1767578125, "learning_rate": 6.39541345400137e-06, "loss": 1.011, "num_tokens": 9661012635.0, "step": 7640 }, { "epoch": 1.3620320855614974, "grad_norm": 0.1806640625, "learning_rate": 6.393181390987864e-06, "loss": 1.0011, "num_tokens": 9667296586.0, "step": 7641 }, { "epoch": 1.362210338680927, "grad_norm": 0.1875, "learning_rate": 6.3909497118615245e-06, "loss": 1.0296, "num_tokens": 9673581315.0, "step": 7642 }, { "epoch": 1.3623885918003564, "grad_norm": 0.171875, "learning_rate": 6.388718416808319e-06, "loss": 1.0124, "num_tokens": 9679864133.0, "step": 7643 }, { "epoch": 1.362566844919786, "grad_norm": 0.17578125, "learning_rate": 6.386487506014189e-06, "loss": 1.0267, "num_tokens": 9686146059.0, "step": 7644 }, { "epoch": 1.3627450980392157, "grad_norm": 0.1787109375, "learning_rate": 6.384256979665027e-06, "loss": 0.9999, "num_tokens": 9692430874.0, "step": 7645 }, { "epoch": 1.3629233511586452, "grad_norm": 0.181640625, "learning_rate": 6.382026837946707e-06, "loss": 0.9997, "num_tokens": 9698685937.0, "step": 7646 }, { "epoch": 1.3631016042780748, "grad_norm": 0.1787109375, "learning_rate": 6.379797081045064e-06, "loss": 1.0221, "num_tokens": 9704955456.0, "step": 7647 }, { "epoch": 1.3632798573975045, "grad_norm": 0.1826171875, "learning_rate": 6.377567709145911e-06, "loss": 1.0061, "num_tokens": 9711190168.0, "step": 7648 }, { "epoch": 1.363458110516934, "grad_norm": 0.1787109375, "learning_rate": 6.375338722435019e-06, "loss": 1.034, "num_tokens": 9717468754.0, "step": 7649 }, { "epoch": 1.3636363636363638, "grad_norm": 0.1787109375, "learning_rate": 6.373110121098125e-06, "loss": 1.0142, "num_tokens": 9723753263.0, "step": 7650 }, { "epoch": 1.3638146167557932, "grad_norm": 0.17578125, "learning_rate": 6.37088190532095e-06, "loss": 1.0447, "num_tokens": 9730014380.0, "step": 7651 }, { "epoch": 1.3639928698752228, "grad_norm": 0.1806640625, "learning_rate": 6.368654075289163e-06, "loss": 1.0364, "num_tokens": 9736299256.0, "step": 7652 }, { "epoch": 1.3641711229946525, "grad_norm": 0.1767578125, "learning_rate": 6.366426631188408e-06, "loss": 1.0036, "num_tokens": 9742551158.0, "step": 7653 }, { "epoch": 1.364349376114082, "grad_norm": 0.185546875, "learning_rate": 6.3641995732043e-06, "loss": 1.0424, "num_tokens": 9748812056.0, "step": 7654 }, { "epoch": 1.3645276292335116, "grad_norm": 0.1787109375, "learning_rate": 6.361972901522426e-06, "loss": 1.026, "num_tokens": 9755092362.0, "step": 7655 }, { "epoch": 1.3647058823529412, "grad_norm": 0.17578125, "learning_rate": 6.35974661632833e-06, "loss": 1.0355, "num_tokens": 9761367702.0, "step": 7656 }, { "epoch": 1.3648841354723706, "grad_norm": 0.1865234375, "learning_rate": 6.357520717807533e-06, "loss": 1.0068, "num_tokens": 9767627760.0, "step": 7657 }, { "epoch": 1.3650623885918003, "grad_norm": 0.1865234375, "learning_rate": 6.35529520614551e-06, "loss": 1.0329, "num_tokens": 9773910760.0, "step": 7658 }, { "epoch": 1.36524064171123, "grad_norm": 0.1806640625, "learning_rate": 6.353070081527726e-06, "loss": 1.0263, "num_tokens": 9780173884.0, "step": 7659 }, { "epoch": 1.3654188948306596, "grad_norm": 0.1796875, "learning_rate": 6.350845344139593e-06, "loss": 1.0632, "num_tokens": 9786456602.0, "step": 7660 }, { "epoch": 1.3655971479500892, "grad_norm": 0.1904296875, "learning_rate": 6.348620994166496e-06, "loss": 1.0085, "num_tokens": 9792713862.0, "step": 7661 }, { "epoch": 1.3657754010695187, "grad_norm": 0.177734375, "learning_rate": 6.346397031793801e-06, "loss": 0.9824, "num_tokens": 9798996423.0, "step": 7662 }, { "epoch": 1.3659536541889483, "grad_norm": 0.1806640625, "learning_rate": 6.344173457206823e-06, "loss": 1.0124, "num_tokens": 9805278116.0, "step": 7663 }, { "epoch": 1.366131907308378, "grad_norm": 0.18359375, "learning_rate": 6.341950270590856e-06, "loss": 1.0101, "num_tokens": 9811552180.0, "step": 7664 }, { "epoch": 1.3663101604278074, "grad_norm": 0.181640625, "learning_rate": 6.3397274721311565e-06, "loss": 1.0311, "num_tokens": 9817833189.0, "step": 7665 }, { "epoch": 1.366488413547237, "grad_norm": 0.1787109375, "learning_rate": 6.337505062012953e-06, "loss": 1.0175, "num_tokens": 9824115547.0, "step": 7666 }, { "epoch": 1.3666666666666667, "grad_norm": 0.1728515625, "learning_rate": 6.3352830404214425e-06, "loss": 1.0219, "num_tokens": 9830395998.0, "step": 7667 }, { "epoch": 1.3668449197860961, "grad_norm": 0.1884765625, "learning_rate": 6.333061407541778e-06, "loss": 0.9951, "num_tokens": 9836681676.0, "step": 7668 }, { "epoch": 1.3670231729055258, "grad_norm": 0.1708984375, "learning_rate": 6.330840163559096e-06, "loss": 0.9876, "num_tokens": 9842961819.0, "step": 7669 }, { "epoch": 1.3672014260249554, "grad_norm": 0.1806640625, "learning_rate": 6.32861930865849e-06, "loss": 1.0178, "num_tokens": 9849233956.0, "step": 7670 }, { "epoch": 1.367379679144385, "grad_norm": 0.1748046875, "learning_rate": 6.326398843025026e-06, "loss": 1.0137, "num_tokens": 9855514498.0, "step": 7671 }, { "epoch": 1.3675579322638147, "grad_norm": 0.1787109375, "learning_rate": 6.324178766843729e-06, "loss": 1.0086, "num_tokens": 9861799498.0, "step": 7672 }, { "epoch": 1.3677361853832442, "grad_norm": 0.1767578125, "learning_rate": 6.321959080299611e-06, "loss": 1.0584, "num_tokens": 9868045922.0, "step": 7673 }, { "epoch": 1.3679144385026738, "grad_norm": 0.1806640625, "learning_rate": 6.31973978357763e-06, "loss": 1.0235, "num_tokens": 9874305198.0, "step": 7674 }, { "epoch": 1.3680926916221035, "grad_norm": 0.1767578125, "learning_rate": 6.3175208768627225e-06, "loss": 1.0349, "num_tokens": 9880567533.0, "step": 7675 }, { "epoch": 1.3682709447415329, "grad_norm": 0.173828125, "learning_rate": 6.315302360339791e-06, "loss": 1.0397, "num_tokens": 9886843341.0, "step": 7676 }, { "epoch": 1.3684491978609625, "grad_norm": 0.169921875, "learning_rate": 6.3130842341937045e-06, "loss": 1.0315, "num_tokens": 9893115627.0, "step": 7677 }, { "epoch": 1.3686274509803922, "grad_norm": 0.173828125, "learning_rate": 6.310866498609301e-06, "loss": 1.0397, "num_tokens": 9899375475.0, "step": 7678 }, { "epoch": 1.3688057040998218, "grad_norm": 0.17578125, "learning_rate": 6.308649153771379e-06, "loss": 0.9964, "num_tokens": 9905642921.0, "step": 7679 }, { "epoch": 1.3689839572192513, "grad_norm": 0.171875, "learning_rate": 6.306432199864721e-06, "loss": 1.0107, "num_tokens": 9911928146.0, "step": 7680 }, { "epoch": 1.369162210338681, "grad_norm": 0.1865234375, "learning_rate": 6.304215637074058e-06, "loss": 1.0173, "num_tokens": 9918172866.0, "step": 7681 }, { "epoch": 1.3693404634581106, "grad_norm": 0.1826171875, "learning_rate": 6.301999465584099e-06, "loss": 1.0444, "num_tokens": 9924457280.0, "step": 7682 }, { "epoch": 1.3695187165775402, "grad_norm": 0.1728515625, "learning_rate": 6.299783685579519e-06, "loss": 1.0019, "num_tokens": 9930727300.0, "step": 7683 }, { "epoch": 1.3696969696969696, "grad_norm": 0.1806640625, "learning_rate": 6.297568297244958e-06, "loss": 1.0174, "num_tokens": 9937011005.0, "step": 7684 }, { "epoch": 1.3698752228163993, "grad_norm": 0.173828125, "learning_rate": 6.295353300765026e-06, "loss": 1.0149, "num_tokens": 9943293786.0, "step": 7685 }, { "epoch": 1.370053475935829, "grad_norm": 0.1923828125, "learning_rate": 6.293138696324295e-06, "loss": 1.0237, "num_tokens": 9949548243.0, "step": 7686 }, { "epoch": 1.3702317290552584, "grad_norm": 0.1826171875, "learning_rate": 6.2909244841073145e-06, "loss": 1.0149, "num_tokens": 9955830801.0, "step": 7687 }, { "epoch": 1.370409982174688, "grad_norm": 0.17578125, "learning_rate": 6.288710664298592e-06, "loss": 1.0157, "num_tokens": 9962064819.0, "step": 7688 }, { "epoch": 1.3705882352941177, "grad_norm": 0.1767578125, "learning_rate": 6.2864972370826085e-06, "loss": 1.0244, "num_tokens": 9968347799.0, "step": 7689 }, { "epoch": 1.3707664884135473, "grad_norm": 0.17578125, "learning_rate": 6.2842842026438045e-06, "loss": 1.021, "num_tokens": 9974597758.0, "step": 7690 }, { "epoch": 1.370944741532977, "grad_norm": 0.1767578125, "learning_rate": 6.282071561166596e-06, "loss": 1.0099, "num_tokens": 9980880370.0, "step": 7691 }, { "epoch": 1.3711229946524064, "grad_norm": 0.1787109375, "learning_rate": 6.279859312835364e-06, "loss": 1.0094, "num_tokens": 9987145271.0, "step": 7692 }, { "epoch": 1.371301247771836, "grad_norm": 0.1787109375, "learning_rate": 6.277647457834448e-06, "loss": 1.0078, "num_tokens": 9993404014.0, "step": 7693 }, { "epoch": 1.3714795008912657, "grad_norm": 0.1806640625, "learning_rate": 6.275435996348173e-06, "loss": 0.9978, "num_tokens": 9999686943.0, "step": 7694 }, { "epoch": 1.3716577540106951, "grad_norm": 0.1767578125, "learning_rate": 6.273224928560815e-06, "loss": 1.0262, "num_tokens": 10005970073.0, "step": 7695 }, { "epoch": 1.3718360071301248, "grad_norm": 0.1845703125, "learning_rate": 6.2710142546566225e-06, "loss": 1.0359, "num_tokens": 10012252769.0, "step": 7696 }, { "epoch": 1.3720142602495544, "grad_norm": 0.185546875, "learning_rate": 6.2688039748198096e-06, "loss": 1.0115, "num_tokens": 10018493948.0, "step": 7697 }, { "epoch": 1.3721925133689838, "grad_norm": 0.181640625, "learning_rate": 6.2665940892345675e-06, "loss": 1.0399, "num_tokens": 10024767539.0, "step": 7698 }, { "epoch": 1.3723707664884135, "grad_norm": 0.1728515625, "learning_rate": 6.26438459808504e-06, "loss": 1.0062, "num_tokens": 10031035486.0, "step": 7699 }, { "epoch": 1.3725490196078431, "grad_norm": 0.1748046875, "learning_rate": 6.262175501555341e-06, "loss": 1.0036, "num_tokens": 10037320473.0, "step": 7700 }, { "epoch": 1.3727272727272728, "grad_norm": 0.1748046875, "learning_rate": 6.259966799829562e-06, "loss": 0.995, "num_tokens": 10043605019.0, "step": 7701 }, { "epoch": 1.3729055258467024, "grad_norm": 0.1748046875, "learning_rate": 6.257758493091754e-06, "loss": 1.0169, "num_tokens": 10049876152.0, "step": 7702 }, { "epoch": 1.3730837789661319, "grad_norm": 0.181640625, "learning_rate": 6.255550581525933e-06, "loss": 0.9915, "num_tokens": 10056158192.0, "step": 7703 }, { "epoch": 1.3732620320855615, "grad_norm": 0.1748046875, "learning_rate": 6.253343065316079e-06, "loss": 0.9943, "num_tokens": 10062411977.0, "step": 7704 }, { "epoch": 1.3734402852049912, "grad_norm": 0.17578125, "learning_rate": 6.251135944646163e-06, "loss": 1.0462, "num_tokens": 10068695528.0, "step": 7705 }, { "epoch": 1.3736185383244206, "grad_norm": 0.1787109375, "learning_rate": 6.248929219700086e-06, "loss": 1.0233, "num_tokens": 10074978959.0, "step": 7706 }, { "epoch": 1.3737967914438503, "grad_norm": 0.1845703125, "learning_rate": 6.246722890661741e-06, "loss": 0.9943, "num_tokens": 10081262244.0, "step": 7707 }, { "epoch": 1.37397504456328, "grad_norm": 0.17578125, "learning_rate": 6.2445169577149836e-06, "loss": 1.0193, "num_tokens": 10087449707.0, "step": 7708 }, { "epoch": 1.3741532976827093, "grad_norm": 0.1748046875, "learning_rate": 6.242311421043634e-06, "loss": 1.0235, "num_tokens": 10093725524.0, "step": 7709 }, { "epoch": 1.374331550802139, "grad_norm": 0.1708984375, "learning_rate": 6.240106280831482e-06, "loss": 0.9864, "num_tokens": 10100010408.0, "step": 7710 }, { "epoch": 1.3745098039215686, "grad_norm": 0.1787109375, "learning_rate": 6.237901537262276e-06, "loss": 1.0031, "num_tokens": 10106296127.0, "step": 7711 }, { "epoch": 1.3746880570409983, "grad_norm": 0.1708984375, "learning_rate": 6.235697190519745e-06, "loss": 1.02, "num_tokens": 10112546890.0, "step": 7712 }, { "epoch": 1.374866310160428, "grad_norm": 0.1748046875, "learning_rate": 6.233493240787578e-06, "loss": 1.0029, "num_tokens": 10118830939.0, "step": 7713 }, { "epoch": 1.3750445632798574, "grad_norm": 0.18359375, "learning_rate": 6.23128968824942e-06, "loss": 0.9972, "num_tokens": 10125114156.0, "step": 7714 }, { "epoch": 1.375222816399287, "grad_norm": 0.1767578125, "learning_rate": 6.229086533088906e-06, "loss": 1.0427, "num_tokens": 10131367120.0, "step": 7715 }, { "epoch": 1.3754010695187167, "grad_norm": 0.1796875, "learning_rate": 6.226883775489619e-06, "loss": 1.0163, "num_tokens": 10137619438.0, "step": 7716 }, { "epoch": 1.375579322638146, "grad_norm": 0.171875, "learning_rate": 6.224681415635116e-06, "loss": 1.0085, "num_tokens": 10143894506.0, "step": 7717 }, { "epoch": 1.3757575757575757, "grad_norm": 0.1845703125, "learning_rate": 6.222479453708918e-06, "loss": 1.0231, "num_tokens": 10150146869.0, "step": 7718 }, { "epoch": 1.3759358288770054, "grad_norm": 0.173828125, "learning_rate": 6.220277889894521e-06, "loss": 1.0002, "num_tokens": 10156429865.0, "step": 7719 }, { "epoch": 1.3761140819964348, "grad_norm": 0.1787109375, "learning_rate": 6.218076724375377e-06, "loss": 1.0073, "num_tokens": 10162713213.0, "step": 7720 }, { "epoch": 1.3762923351158645, "grad_norm": 0.17578125, "learning_rate": 6.215875957334912e-06, "loss": 1.0084, "num_tokens": 10168987762.0, "step": 7721 }, { "epoch": 1.3764705882352941, "grad_norm": 0.1767578125, "learning_rate": 6.213675588956516e-06, "loss": 1.021, "num_tokens": 10175250181.0, "step": 7722 }, { "epoch": 1.3766488413547238, "grad_norm": 0.1767578125, "learning_rate": 6.211475619423544e-06, "loss": 0.9887, "num_tokens": 10181533661.0, "step": 7723 }, { "epoch": 1.3768270944741534, "grad_norm": 0.181640625, "learning_rate": 6.209276048919322e-06, "loss": 1.0349, "num_tokens": 10187817150.0, "step": 7724 }, { "epoch": 1.3770053475935828, "grad_norm": 0.1787109375, "learning_rate": 6.2070768776271365e-06, "loss": 1.0283, "num_tokens": 10194074432.0, "step": 7725 }, { "epoch": 1.3771836007130125, "grad_norm": 0.1787109375, "learning_rate": 6.204878105730251e-06, "loss": 1.0315, "num_tokens": 10200323592.0, "step": 7726 }, { "epoch": 1.3773618538324421, "grad_norm": 0.177734375, "learning_rate": 6.202679733411887e-06, "loss": 1.0017, "num_tokens": 10206565208.0, "step": 7727 }, { "epoch": 1.3775401069518716, "grad_norm": 0.1767578125, "learning_rate": 6.200481760855237e-06, "loss": 1.0358, "num_tokens": 10212832342.0, "step": 7728 }, { "epoch": 1.3777183600713012, "grad_norm": 0.1826171875, "learning_rate": 6.198284188243455e-06, "loss": 1.0269, "num_tokens": 10219100772.0, "step": 7729 }, { "epoch": 1.3778966131907309, "grad_norm": 0.17578125, "learning_rate": 6.196087015759669e-06, "loss": 1.0162, "num_tokens": 10225325117.0, "step": 7730 }, { "epoch": 1.3780748663101603, "grad_norm": 0.1748046875, "learning_rate": 6.193890243586967e-06, "loss": 1.0312, "num_tokens": 10231605599.0, "step": 7731 }, { "epoch": 1.37825311942959, "grad_norm": 0.181640625, "learning_rate": 6.191693871908404e-06, "loss": 1.0352, "num_tokens": 10237890167.0, "step": 7732 }, { "epoch": 1.3784313725490196, "grad_norm": 0.1806640625, "learning_rate": 6.189497900907011e-06, "loss": 0.9966, "num_tokens": 10244172142.0, "step": 7733 }, { "epoch": 1.3786096256684492, "grad_norm": 0.177734375, "learning_rate": 6.187302330765774e-06, "loss": 1.0035, "num_tokens": 10250457513.0, "step": 7734 }, { "epoch": 1.378787878787879, "grad_norm": 0.1748046875, "learning_rate": 6.185107161667654e-06, "loss": 1.0224, "num_tokens": 10256742975.0, "step": 7735 }, { "epoch": 1.3789661319073083, "grad_norm": 0.1767578125, "learning_rate": 6.182912393795571e-06, "loss": 1.0066, "num_tokens": 10263002383.0, "step": 7736 }, { "epoch": 1.379144385026738, "grad_norm": 0.1796875, "learning_rate": 6.1807180273324175e-06, "loss": 1.0221, "num_tokens": 10269286546.0, "step": 7737 }, { "epoch": 1.3793226381461676, "grad_norm": 0.1748046875, "learning_rate": 6.178524062461049e-06, "loss": 1.0129, "num_tokens": 10275542737.0, "step": 7738 }, { "epoch": 1.379500891265597, "grad_norm": 0.185546875, "learning_rate": 6.176330499364287e-06, "loss": 1.0398, "num_tokens": 10281780020.0, "step": 7739 }, { "epoch": 1.3796791443850267, "grad_norm": 0.177734375, "learning_rate": 6.174137338224928e-06, "loss": 1.0124, "num_tokens": 10288057117.0, "step": 7740 }, { "epoch": 1.3798573975044564, "grad_norm": 0.17578125, "learning_rate": 6.171944579225726e-06, "loss": 1.0158, "num_tokens": 10294342531.0, "step": 7741 }, { "epoch": 1.380035650623886, "grad_norm": 0.17578125, "learning_rate": 6.169752222549404e-06, "loss": 1.0269, "num_tokens": 10300627941.0, "step": 7742 }, { "epoch": 1.3802139037433154, "grad_norm": 0.16796875, "learning_rate": 6.167560268378646e-06, "loss": 1.0013, "num_tokens": 10306911849.0, "step": 7743 }, { "epoch": 1.380392156862745, "grad_norm": 0.177734375, "learning_rate": 6.165368716896119e-06, "loss": 1.0161, "num_tokens": 10313197311.0, "step": 7744 }, { "epoch": 1.3805704099821747, "grad_norm": 0.173828125, "learning_rate": 6.1631775682844375e-06, "loss": 1.0531, "num_tokens": 10319435264.0, "step": 7745 }, { "epoch": 1.3807486631016044, "grad_norm": 0.1787109375, "learning_rate": 6.160986822726188e-06, "loss": 1.0285, "num_tokens": 10325704004.0, "step": 7746 }, { "epoch": 1.3809269162210338, "grad_norm": 0.173828125, "learning_rate": 6.158796480403932e-06, "loss": 1.0028, "num_tokens": 10331979315.0, "step": 7747 }, { "epoch": 1.3811051693404635, "grad_norm": 0.17578125, "learning_rate": 6.156606541500192e-06, "loss": 1.0146, "num_tokens": 10338247020.0, "step": 7748 }, { "epoch": 1.381283422459893, "grad_norm": 0.1806640625, "learning_rate": 6.154417006197453e-06, "loss": 1.0029, "num_tokens": 10344486998.0, "step": 7749 }, { "epoch": 1.3814616755793225, "grad_norm": 0.17578125, "learning_rate": 6.152227874678164e-06, "loss": 1.0027, "num_tokens": 10350770812.0, "step": 7750 }, { "epoch": 1.3816399286987522, "grad_norm": 0.171875, "learning_rate": 6.150039147124759e-06, "loss": 0.9762, "num_tokens": 10357053861.0, "step": 7751 }, { "epoch": 1.3818181818181818, "grad_norm": 0.1796875, "learning_rate": 6.147850823719615e-06, "loss": 1.0125, "num_tokens": 10363319173.0, "step": 7752 }, { "epoch": 1.3819964349376115, "grad_norm": 0.177734375, "learning_rate": 6.145662904645082e-06, "loss": 1.0502, "num_tokens": 10369601787.0, "step": 7753 }, { "epoch": 1.3821746880570411, "grad_norm": 0.181640625, "learning_rate": 6.143475390083493e-06, "loss": 1.0147, "num_tokens": 10375855434.0, "step": 7754 }, { "epoch": 1.3823529411764706, "grad_norm": 0.166015625, "learning_rate": 6.141288280217124e-06, "loss": 1.0318, "num_tokens": 10382120800.0, "step": 7755 }, { "epoch": 1.3825311942959002, "grad_norm": 0.177734375, "learning_rate": 6.139101575228231e-06, "loss": 1.0275, "num_tokens": 10388391617.0, "step": 7756 }, { "epoch": 1.3827094474153299, "grad_norm": 0.17578125, "learning_rate": 6.136915275299029e-06, "loss": 1.0224, "num_tokens": 10394675176.0, "step": 7757 }, { "epoch": 1.3828877005347593, "grad_norm": 0.1904296875, "learning_rate": 6.134729380611708e-06, "loss": 1.0355, "num_tokens": 10400945240.0, "step": 7758 }, { "epoch": 1.383065953654189, "grad_norm": 0.173828125, "learning_rate": 6.132543891348421e-06, "loss": 1.0137, "num_tokens": 10407149965.0, "step": 7759 }, { "epoch": 1.3832442067736186, "grad_norm": 0.1806640625, "learning_rate": 6.130358807691274e-06, "loss": 0.9906, "num_tokens": 10413403584.0, "step": 7760 }, { "epoch": 1.383422459893048, "grad_norm": 0.171875, "learning_rate": 6.128174129822359e-06, "loss": 0.9992, "num_tokens": 10419688754.0, "step": 7761 }, { "epoch": 1.3836007130124777, "grad_norm": 0.1796875, "learning_rate": 6.125989857923726e-06, "loss": 1.0145, "num_tokens": 10425961326.0, "step": 7762 }, { "epoch": 1.3837789661319073, "grad_norm": 0.177734375, "learning_rate": 6.1238059921773885e-06, "loss": 1.0217, "num_tokens": 10432244044.0, "step": 7763 }, { "epoch": 1.383957219251337, "grad_norm": 0.1748046875, "learning_rate": 6.1216225327653256e-06, "loss": 1.0257, "num_tokens": 10438528883.0, "step": 7764 }, { "epoch": 1.3841354723707666, "grad_norm": 0.173828125, "learning_rate": 6.119439479869491e-06, "loss": 1.0159, "num_tokens": 10444805928.0, "step": 7765 }, { "epoch": 1.384313725490196, "grad_norm": 0.1767578125, "learning_rate": 6.117256833671798e-06, "loss": 0.9829, "num_tokens": 10451088474.0, "step": 7766 }, { "epoch": 1.3844919786096257, "grad_norm": 0.1767578125, "learning_rate": 6.115074594354127e-06, "loss": 0.9948, "num_tokens": 10457337145.0, "step": 7767 }, { "epoch": 1.3846702317290553, "grad_norm": 0.177734375, "learning_rate": 6.1128927620983195e-06, "loss": 1.0331, "num_tokens": 10463595705.0, "step": 7768 }, { "epoch": 1.3848484848484848, "grad_norm": 0.1787109375, "learning_rate": 6.110711337086195e-06, "loss": 1.0219, "num_tokens": 10469876180.0, "step": 7769 }, { "epoch": 1.3850267379679144, "grad_norm": 0.185546875, "learning_rate": 6.108530319499527e-06, "loss": 1.0199, "num_tokens": 10476136185.0, "step": 7770 }, { "epoch": 1.385204991087344, "grad_norm": 0.173828125, "learning_rate": 6.106349709520059e-06, "loss": 1.0078, "num_tokens": 10482408856.0, "step": 7771 }, { "epoch": 1.3853832442067735, "grad_norm": 0.169921875, "learning_rate": 6.104169507329508e-06, "loss": 1.0175, "num_tokens": 10488690343.0, "step": 7772 }, { "epoch": 1.3855614973262032, "grad_norm": 0.1806640625, "learning_rate": 6.101989713109546e-06, "loss": 0.9912, "num_tokens": 10494973243.0, "step": 7773 }, { "epoch": 1.3857397504456328, "grad_norm": 0.171875, "learning_rate": 6.09981032704182e-06, "loss": 1.0351, "num_tokens": 10501257702.0, "step": 7774 }, { "epoch": 1.3859180035650625, "grad_norm": 0.171875, "learning_rate": 6.0976313493079346e-06, "loss": 1.0184, "num_tokens": 10507521539.0, "step": 7775 }, { "epoch": 1.386096256684492, "grad_norm": 0.1796875, "learning_rate": 6.095452780089463e-06, "loss": 1.0005, "num_tokens": 10513805066.0, "step": 7776 }, { "epoch": 1.3862745098039215, "grad_norm": 0.1865234375, "learning_rate": 6.093274619567953e-06, "loss": 1.0212, "num_tokens": 10520087902.0, "step": 7777 }, { "epoch": 1.3864527629233512, "grad_norm": 0.1845703125, "learning_rate": 6.0910968679249004e-06, "loss": 1.0167, "num_tokens": 10526349901.0, "step": 7778 }, { "epoch": 1.3866310160427808, "grad_norm": 0.171875, "learning_rate": 6.088919525341788e-06, "loss": 0.9716, "num_tokens": 10532582369.0, "step": 7779 }, { "epoch": 1.3868092691622103, "grad_norm": 0.177734375, "learning_rate": 6.08674259200005e-06, "loss": 1.0299, "num_tokens": 10538845834.0, "step": 7780 }, { "epoch": 1.38698752228164, "grad_norm": 0.17578125, "learning_rate": 6.08456606808109e-06, "loss": 1.0482, "num_tokens": 10545113037.0, "step": 7781 }, { "epoch": 1.3871657754010696, "grad_norm": 0.173828125, "learning_rate": 6.082389953766281e-06, "loss": 1.0595, "num_tokens": 10551389887.0, "step": 7782 }, { "epoch": 1.387344028520499, "grad_norm": 0.181640625, "learning_rate": 6.080214249236955e-06, "loss": 1.0156, "num_tokens": 10557636437.0, "step": 7783 }, { "epoch": 1.3875222816399286, "grad_norm": 0.1748046875, "learning_rate": 6.0780389546744175e-06, "loss": 1.0349, "num_tokens": 10563896944.0, "step": 7784 }, { "epoch": 1.3877005347593583, "grad_norm": 0.1767578125, "learning_rate": 6.0758640702599316e-06, "loss": 1.002, "num_tokens": 10570180634.0, "step": 7785 }, { "epoch": 1.387878787878788, "grad_norm": 0.1728515625, "learning_rate": 6.073689596174737e-06, "loss": 1.0496, "num_tokens": 10576385198.0, "step": 7786 }, { "epoch": 1.3880570409982176, "grad_norm": 0.17578125, "learning_rate": 6.07151553260003e-06, "loss": 1.0163, "num_tokens": 10582652159.0, "step": 7787 }, { "epoch": 1.388235294117647, "grad_norm": 0.1796875, "learning_rate": 6.069341879716977e-06, "loss": 1.03, "num_tokens": 10588876775.0, "step": 7788 }, { "epoch": 1.3884135472370767, "grad_norm": 0.1669921875, "learning_rate": 6.067168637706704e-06, "loss": 0.9907, "num_tokens": 10595159899.0, "step": 7789 }, { "epoch": 1.3885918003565063, "grad_norm": 0.173828125, "learning_rate": 6.064995806750318e-06, "loss": 1.004, "num_tokens": 10601443338.0, "step": 7790 }, { "epoch": 1.3887700534759357, "grad_norm": 0.1767578125, "learning_rate": 6.062823387028871e-06, "loss": 1.0225, "num_tokens": 10607700675.0, "step": 7791 }, { "epoch": 1.3889483065953654, "grad_norm": 0.171875, "learning_rate": 6.060651378723393e-06, "loss": 1.0299, "num_tokens": 10613964853.0, "step": 7792 }, { "epoch": 1.389126559714795, "grad_norm": 0.17578125, "learning_rate": 6.0584797820148835e-06, "loss": 1.0085, "num_tokens": 10620222408.0, "step": 7793 }, { "epoch": 1.3893048128342245, "grad_norm": 0.1826171875, "learning_rate": 6.0563085970843e-06, "loss": 1.0207, "num_tokens": 10626487961.0, "step": 7794 }, { "epoch": 1.3894830659536541, "grad_norm": 0.173828125, "learning_rate": 6.054137824112565e-06, "loss": 1.0092, "num_tokens": 10632772116.0, "step": 7795 }, { "epoch": 1.3896613190730838, "grad_norm": 0.17578125, "learning_rate": 6.051967463280568e-06, "loss": 1.0473, "num_tokens": 10639031787.0, "step": 7796 }, { "epoch": 1.3898395721925134, "grad_norm": 0.1806640625, "learning_rate": 6.049797514769171e-06, "loss": 1.023, "num_tokens": 10645287595.0, "step": 7797 }, { "epoch": 1.390017825311943, "grad_norm": 0.1728515625, "learning_rate": 6.047627978759201e-06, "loss": 1.0132, "num_tokens": 10651570991.0, "step": 7798 }, { "epoch": 1.3901960784313725, "grad_norm": 0.173828125, "learning_rate": 6.045458855431429e-06, "loss": 1.0064, "num_tokens": 10657824294.0, "step": 7799 }, { "epoch": 1.3903743315508021, "grad_norm": 0.17578125, "learning_rate": 6.043290144966622e-06, "loss": 1.0026, "num_tokens": 10664106536.0, "step": 7800 }, { "epoch": 1.3905525846702318, "grad_norm": 0.1767578125, "learning_rate": 6.041121847545496e-06, "loss": 1.027, "num_tokens": 10670389236.0, "step": 7801 }, { "epoch": 1.3907308377896612, "grad_norm": 0.1748046875, "learning_rate": 6.0389539633487346e-06, "loss": 1.0114, "num_tokens": 10676672354.0, "step": 7802 }, { "epoch": 1.3909090909090909, "grad_norm": 0.177734375, "learning_rate": 6.036786492556985e-06, "loss": 1.0427, "num_tokens": 10682929206.0, "step": 7803 }, { "epoch": 1.3910873440285205, "grad_norm": 0.1748046875, "learning_rate": 6.0346194353508725e-06, "loss": 1.0155, "num_tokens": 10689211332.0, "step": 7804 }, { "epoch": 1.39126559714795, "grad_norm": 0.1728515625, "learning_rate": 6.032452791910974e-06, "loss": 0.9909, "num_tokens": 10695496975.0, "step": 7805 }, { "epoch": 1.3914438502673796, "grad_norm": 0.171875, "learning_rate": 6.030286562417831e-06, "loss": 1.0233, "num_tokens": 10701744860.0, "step": 7806 }, { "epoch": 1.3916221033868093, "grad_norm": 0.1787109375, "learning_rate": 6.028120747051958e-06, "loss": 1.022, "num_tokens": 10708025823.0, "step": 7807 }, { "epoch": 1.391800356506239, "grad_norm": 0.1748046875, "learning_rate": 6.025955345993837e-06, "loss": 0.9861, "num_tokens": 10714311925.0, "step": 7808 }, { "epoch": 1.3919786096256686, "grad_norm": 0.18359375, "learning_rate": 6.023790359423909e-06, "loss": 1.0193, "num_tokens": 10720568493.0, "step": 7809 }, { "epoch": 1.392156862745098, "grad_norm": 0.1708984375, "learning_rate": 6.021625787522578e-06, "loss": 1.0048, "num_tokens": 10726851517.0, "step": 7810 }, { "epoch": 1.3923351158645276, "grad_norm": 0.1728515625, "learning_rate": 6.019461630470229e-06, "loss": 1.0104, "num_tokens": 10733135055.0, "step": 7811 }, { "epoch": 1.3925133689839573, "grad_norm": 0.1796875, "learning_rate": 6.017297888447194e-06, "loss": 1.0251, "num_tokens": 10739403546.0, "step": 7812 }, { "epoch": 1.3926916221033867, "grad_norm": 0.1767578125, "learning_rate": 6.0151345616337845e-06, "loss": 1.0273, "num_tokens": 10745686516.0, "step": 7813 }, { "epoch": 1.3928698752228164, "grad_norm": 0.1787109375, "learning_rate": 6.0129716502102574e-06, "loss": 1.0329, "num_tokens": 10751952834.0, "step": 7814 }, { "epoch": 1.393048128342246, "grad_norm": 0.17578125, "learning_rate": 6.010809154356863e-06, "loss": 1.0082, "num_tokens": 10758166955.0, "step": 7815 }, { "epoch": 1.3932263814616757, "grad_norm": 0.173828125, "learning_rate": 6.008647074253795e-06, "loss": 1.0171, "num_tokens": 10764423631.0, "step": 7816 }, { "epoch": 1.3934046345811053, "grad_norm": 0.1767578125, "learning_rate": 6.006485410081222e-06, "loss": 1.0185, "num_tokens": 10770669547.0, "step": 7817 }, { "epoch": 1.3935828877005347, "grad_norm": 0.1787109375, "learning_rate": 6.004324162019273e-06, "loss": 1.0388, "num_tokens": 10776945852.0, "step": 7818 }, { "epoch": 1.3937611408199644, "grad_norm": 0.1748046875, "learning_rate": 6.002163330248052e-06, "loss": 1.0372, "num_tokens": 10783228868.0, "step": 7819 }, { "epoch": 1.393939393939394, "grad_norm": 0.1708984375, "learning_rate": 6.000002914947617e-06, "loss": 1.0095, "num_tokens": 10789491826.0, "step": 7820 }, { "epoch": 1.3941176470588235, "grad_norm": 0.1845703125, "learning_rate": 5.997842916297995e-06, "loss": 0.9734, "num_tokens": 10795724456.0, "step": 7821 }, { "epoch": 1.3942959001782531, "grad_norm": 0.1845703125, "learning_rate": 5.995683334479183e-06, "loss": 1.0297, "num_tokens": 10802007995.0, "step": 7822 }, { "epoch": 1.3944741532976828, "grad_norm": 0.1875, "learning_rate": 5.993524169671137e-06, "loss": 1.0278, "num_tokens": 10808291897.0, "step": 7823 }, { "epoch": 1.3946524064171122, "grad_norm": 0.1787109375, "learning_rate": 5.991365422053781e-06, "loss": 1.0239, "num_tokens": 10814533652.0, "step": 7824 }, { "epoch": 1.3948306595365418, "grad_norm": 0.173828125, "learning_rate": 5.9892070918069985e-06, "loss": 1.0221, "num_tokens": 10820817746.0, "step": 7825 }, { "epoch": 1.3950089126559715, "grad_norm": 0.1796875, "learning_rate": 5.987049179110653e-06, "loss": 1.012, "num_tokens": 10827102114.0, "step": 7826 }, { "epoch": 1.3951871657754011, "grad_norm": 0.1767578125, "learning_rate": 5.9848916841445606e-06, "loss": 1.0381, "num_tokens": 10833384188.0, "step": 7827 }, { "epoch": 1.3953654188948308, "grad_norm": 0.1767578125, "learning_rate": 5.9827346070885024e-06, "loss": 1.0177, "num_tokens": 10839667409.0, "step": 7828 }, { "epoch": 1.3955436720142602, "grad_norm": 0.1767578125, "learning_rate": 5.980577948122233e-06, "loss": 1.0193, "num_tokens": 10845916716.0, "step": 7829 }, { "epoch": 1.3957219251336899, "grad_norm": 0.1728515625, "learning_rate": 5.978421707425463e-06, "loss": 1.0217, "num_tokens": 10852200384.0, "step": 7830 }, { "epoch": 1.3959001782531195, "grad_norm": 0.1689453125, "learning_rate": 5.976265885177874e-06, "loss": 0.9857, "num_tokens": 10858483906.0, "step": 7831 }, { "epoch": 1.396078431372549, "grad_norm": 0.1796875, "learning_rate": 5.97411048155911e-06, "loss": 1.0011, "num_tokens": 10864769292.0, "step": 7832 }, { "epoch": 1.3962566844919786, "grad_norm": 0.1796875, "learning_rate": 5.971955496748784e-06, "loss": 1.0215, "num_tokens": 10871031837.0, "step": 7833 }, { "epoch": 1.3964349376114082, "grad_norm": 0.17578125, "learning_rate": 5.96980093092647e-06, "loss": 0.9907, "num_tokens": 10877314171.0, "step": 7834 }, { "epoch": 1.3966131907308377, "grad_norm": 0.1806640625, "learning_rate": 5.967646784271709e-06, "loss": 1.0438, "num_tokens": 10883568307.0, "step": 7835 }, { "epoch": 1.3967914438502673, "grad_norm": 0.17578125, "learning_rate": 5.965493056964005e-06, "loss": 1.0141, "num_tokens": 10889841204.0, "step": 7836 }, { "epoch": 1.396969696969697, "grad_norm": 0.1708984375, "learning_rate": 5.963339749182829e-06, "loss": 1.0016, "num_tokens": 10896096427.0, "step": 7837 }, { "epoch": 1.3971479500891266, "grad_norm": 0.171875, "learning_rate": 5.9611868611076175e-06, "loss": 1.0143, "num_tokens": 10902380945.0, "step": 7838 }, { "epoch": 1.3973262032085563, "grad_norm": 0.1806640625, "learning_rate": 5.959034392917766e-06, "loss": 1.02, "num_tokens": 10908655496.0, "step": 7839 }, { "epoch": 1.3975044563279857, "grad_norm": 0.1748046875, "learning_rate": 5.956882344792649e-06, "loss": 1.0131, "num_tokens": 10914940549.0, "step": 7840 }, { "epoch": 1.3976827094474154, "grad_norm": 0.1826171875, "learning_rate": 5.954730716911595e-06, "loss": 1.0186, "num_tokens": 10921223180.0, "step": 7841 }, { "epoch": 1.397860962566845, "grad_norm": 0.177734375, "learning_rate": 5.952579509453894e-06, "loss": 1.0202, "num_tokens": 10927464399.0, "step": 7842 }, { "epoch": 1.3980392156862744, "grad_norm": 0.17578125, "learning_rate": 5.950428722598807e-06, "loss": 0.9874, "num_tokens": 10933748641.0, "step": 7843 }, { "epoch": 1.398217468805704, "grad_norm": 0.1796875, "learning_rate": 5.94827835652557e-06, "loss": 1.0427, "num_tokens": 10940015826.0, "step": 7844 }, { "epoch": 1.3983957219251337, "grad_norm": 0.17578125, "learning_rate": 5.946128411413362e-06, "loss": 1.0083, "num_tokens": 10946280113.0, "step": 7845 }, { "epoch": 1.3985739750445632, "grad_norm": 0.17578125, "learning_rate": 5.943978887441339e-06, "loss": 1.0155, "num_tokens": 10952565817.0, "step": 7846 }, { "epoch": 1.3987522281639928, "grad_norm": 0.177734375, "learning_rate": 5.941829784788628e-06, "loss": 1.003, "num_tokens": 10958820258.0, "step": 7847 }, { "epoch": 1.3989304812834225, "grad_norm": 0.1767578125, "learning_rate": 5.9396811036343114e-06, "loss": 1.045, "num_tokens": 10965104465.0, "step": 7848 }, { "epoch": 1.399108734402852, "grad_norm": 0.181640625, "learning_rate": 5.9375328441574385e-06, "loss": 1.0172, "num_tokens": 10971389420.0, "step": 7849 }, { "epoch": 1.3992869875222818, "grad_norm": 0.171875, "learning_rate": 5.93538500653702e-06, "loss": 0.9984, "num_tokens": 10977674693.0, "step": 7850 }, { "epoch": 1.3994652406417112, "grad_norm": 0.1806640625, "learning_rate": 5.933237590952048e-06, "loss": 1.0536, "num_tokens": 10983932019.0, "step": 7851 }, { "epoch": 1.3996434937611408, "grad_norm": 0.181640625, "learning_rate": 5.931090597581455e-06, "loss": 1.0277, "num_tokens": 10990215166.0, "step": 7852 }, { "epoch": 1.3998217468805705, "grad_norm": 0.1748046875, "learning_rate": 5.928944026604151e-06, "loss": 1.0128, "num_tokens": 10996497655.0, "step": 7853 }, { "epoch": 1.4, "grad_norm": 0.1796875, "learning_rate": 5.926797878199018e-06, "loss": 1.0119, "num_tokens": 11002778783.0, "step": 7854 }, { "epoch": 1.4001782531194296, "grad_norm": 0.177734375, "learning_rate": 5.92465215254489e-06, "loss": 1.0008, "num_tokens": 11009063420.0, "step": 7855 }, { "epoch": 1.4003565062388592, "grad_norm": 0.1845703125, "learning_rate": 5.922506849820571e-06, "loss": 1.0078, "num_tokens": 11015320399.0, "step": 7856 }, { "epoch": 1.4005347593582886, "grad_norm": 0.181640625, "learning_rate": 5.920361970204828e-06, "loss": 1.0105, "num_tokens": 11021603071.0, "step": 7857 }, { "epoch": 1.4007130124777183, "grad_norm": 0.19140625, "learning_rate": 5.9182175138763986e-06, "loss": 1.0128, "num_tokens": 11027863102.0, "step": 7858 }, { "epoch": 1.400891265597148, "grad_norm": 0.19140625, "learning_rate": 5.916073481013982e-06, "loss": 1.0194, "num_tokens": 11034117049.0, "step": 7859 }, { "epoch": 1.4010695187165776, "grad_norm": 0.1923828125, "learning_rate": 5.913929871796231e-06, "loss": 0.9963, "num_tokens": 11040346806.0, "step": 7860 }, { "epoch": 1.4012477718360072, "grad_norm": 0.181640625, "learning_rate": 5.9117866864017825e-06, "loss": 1.0202, "num_tokens": 11046599635.0, "step": 7861 }, { "epoch": 1.4014260249554367, "grad_norm": 0.1748046875, "learning_rate": 5.909643925009225e-06, "loss": 0.9844, "num_tokens": 11052848852.0, "step": 7862 }, { "epoch": 1.4016042780748663, "grad_norm": 0.1787109375, "learning_rate": 5.907501587797115e-06, "loss": 1.0007, "num_tokens": 11059102156.0, "step": 7863 }, { "epoch": 1.401782531194296, "grad_norm": 0.177734375, "learning_rate": 5.905359674943974e-06, "loss": 1.0067, "num_tokens": 11065353212.0, "step": 7864 }, { "epoch": 1.4019607843137254, "grad_norm": 0.17578125, "learning_rate": 5.90321818662829e-06, "loss": 1.0221, "num_tokens": 11071632523.0, "step": 7865 }, { "epoch": 1.402139037433155, "grad_norm": 0.1796875, "learning_rate": 5.901077123028514e-06, "loss": 1.0197, "num_tokens": 11077916770.0, "step": 7866 }, { "epoch": 1.4023172905525847, "grad_norm": 0.169921875, "learning_rate": 5.898936484323059e-06, "loss": 0.9971, "num_tokens": 11084185167.0, "step": 7867 }, { "epoch": 1.4024955436720141, "grad_norm": 0.1787109375, "learning_rate": 5.896796270690305e-06, "loss": 1.0036, "num_tokens": 11090453259.0, "step": 7868 }, { "epoch": 1.4026737967914438, "grad_norm": 0.177734375, "learning_rate": 5.894656482308596e-06, "loss": 1.0149, "num_tokens": 11096685607.0, "step": 7869 }, { "epoch": 1.4028520499108734, "grad_norm": 0.1728515625, "learning_rate": 5.892517119356245e-06, "loss": 1.0204, "num_tokens": 11102922608.0, "step": 7870 }, { "epoch": 1.403030303030303, "grad_norm": 0.17578125, "learning_rate": 5.8903781820115165e-06, "loss": 1.0228, "num_tokens": 11109190891.0, "step": 7871 }, { "epoch": 1.4032085561497327, "grad_norm": 0.1708984375, "learning_rate": 5.88823967045266e-06, "loss": 0.9909, "num_tokens": 11115463022.0, "step": 7872 }, { "epoch": 1.4033868092691621, "grad_norm": 0.173828125, "learning_rate": 5.886101584857872e-06, "loss": 1.0288, "num_tokens": 11121716771.0, "step": 7873 }, { "epoch": 1.4035650623885918, "grad_norm": 0.1767578125, "learning_rate": 5.883963925405321e-06, "loss": 0.9797, "num_tokens": 11128001267.0, "step": 7874 }, { "epoch": 1.4037433155080214, "grad_norm": 0.1708984375, "learning_rate": 5.881826692273136e-06, "loss": 1.0243, "num_tokens": 11134281787.0, "step": 7875 }, { "epoch": 1.4039215686274509, "grad_norm": 0.1806640625, "learning_rate": 5.879689885639417e-06, "loss": 1.0268, "num_tokens": 11140564464.0, "step": 7876 }, { "epoch": 1.4040998217468805, "grad_norm": 0.1796875, "learning_rate": 5.877553505682222e-06, "loss": 1.0247, "num_tokens": 11146848730.0, "step": 7877 }, { "epoch": 1.4042780748663102, "grad_norm": 0.1806640625, "learning_rate": 5.8754175525795745e-06, "loss": 1.0384, "num_tokens": 11153132700.0, "step": 7878 }, { "epoch": 1.4044563279857398, "grad_norm": 0.169921875, "learning_rate": 5.873282026509468e-06, "loss": 1.0001, "num_tokens": 11159392686.0, "step": 7879 }, { "epoch": 1.4046345811051695, "grad_norm": 0.1767578125, "learning_rate": 5.871146927649856e-06, "loss": 1.0314, "num_tokens": 11165673946.0, "step": 7880 }, { "epoch": 1.404812834224599, "grad_norm": 0.1767578125, "learning_rate": 5.869012256178653e-06, "loss": 1.0177, "num_tokens": 11171937206.0, "step": 7881 }, { "epoch": 1.4049910873440286, "grad_norm": 0.177734375, "learning_rate": 5.866878012273747e-06, "loss": 0.9669, "num_tokens": 11178219876.0, "step": 7882 }, { "epoch": 1.4051693404634582, "grad_norm": 0.1767578125, "learning_rate": 5.8647441961129795e-06, "loss": 1.0544, "num_tokens": 11184498152.0, "step": 7883 }, { "epoch": 1.4053475935828876, "grad_norm": 0.1796875, "learning_rate": 5.862610807874163e-06, "loss": 1.0777, "num_tokens": 11190755497.0, "step": 7884 }, { "epoch": 1.4055258467023173, "grad_norm": 0.173828125, "learning_rate": 5.860477847735073e-06, "loss": 1.0216, "num_tokens": 11197039461.0, "step": 7885 }, { "epoch": 1.405704099821747, "grad_norm": 0.1748046875, "learning_rate": 5.858345315873455e-06, "loss": 1.0293, "num_tokens": 11203295774.0, "step": 7886 }, { "epoch": 1.4058823529411764, "grad_norm": 0.1708984375, "learning_rate": 5.856213212467008e-06, "loss": 1.0401, "num_tokens": 11209546181.0, "step": 7887 }, { "epoch": 1.406060606060606, "grad_norm": 0.1748046875, "learning_rate": 5.854081537693401e-06, "loss": 0.9821, "num_tokens": 11215828770.0, "step": 7888 }, { "epoch": 1.4062388591800357, "grad_norm": 0.177734375, "learning_rate": 5.851950291730266e-06, "loss": 0.9803, "num_tokens": 11222090151.0, "step": 7889 }, { "epoch": 1.4064171122994653, "grad_norm": 0.173828125, "learning_rate": 5.849819474755207e-06, "loss": 1.0171, "num_tokens": 11228312442.0, "step": 7890 }, { "epoch": 1.406595365418895, "grad_norm": 0.1708984375, "learning_rate": 5.847689086945778e-06, "loss": 1.0224, "num_tokens": 11234568033.0, "step": 7891 }, { "epoch": 1.4067736185383244, "grad_norm": 0.171875, "learning_rate": 5.845559128479504e-06, "loss": 0.9762, "num_tokens": 11240852082.0, "step": 7892 }, { "epoch": 1.406951871657754, "grad_norm": 0.173828125, "learning_rate": 5.843429599533881e-06, "loss": 1.0253, "num_tokens": 11247083221.0, "step": 7893 }, { "epoch": 1.4071301247771837, "grad_norm": 0.17578125, "learning_rate": 5.84130050028636e-06, "loss": 1.0133, "num_tokens": 11253335741.0, "step": 7894 }, { "epoch": 1.4073083778966131, "grad_norm": 0.1650390625, "learning_rate": 5.839171830914359e-06, "loss": 1.0025, "num_tokens": 11259593289.0, "step": 7895 }, { "epoch": 1.4074866310160428, "grad_norm": 0.1826171875, "learning_rate": 5.837043591595257e-06, "loss": 1.0001, "num_tokens": 11265850506.0, "step": 7896 }, { "epoch": 1.4076648841354724, "grad_norm": 0.1728515625, "learning_rate": 5.834915782506412e-06, "loss": 0.9916, "num_tokens": 11272130430.0, "step": 7897 }, { "epoch": 1.4078431372549018, "grad_norm": 0.1748046875, "learning_rate": 5.832788403825123e-06, "loss": 1.0246, "num_tokens": 11278412533.0, "step": 7898 }, { "epoch": 1.4080213903743315, "grad_norm": 0.171875, "learning_rate": 5.830661455728666e-06, "loss": 1.0398, "num_tokens": 11284697694.0, "step": 7899 }, { "epoch": 1.4081996434937611, "grad_norm": 0.1796875, "learning_rate": 5.828534938394287e-06, "loss": 1.0136, "num_tokens": 11290951328.0, "step": 7900 }, { "epoch": 1.4083778966131908, "grad_norm": 0.1708984375, "learning_rate": 5.826408851999185e-06, "loss": 1.0099, "num_tokens": 11297236157.0, "step": 7901 }, { "epoch": 1.4085561497326204, "grad_norm": 0.1787109375, "learning_rate": 5.8242831967205285e-06, "loss": 1.0303, "num_tokens": 11303497704.0, "step": 7902 }, { "epoch": 1.4087344028520499, "grad_norm": 0.1767578125, "learning_rate": 5.822157972735443e-06, "loss": 1.0074, "num_tokens": 11309750380.0, "step": 7903 }, { "epoch": 1.4089126559714795, "grad_norm": 0.173828125, "learning_rate": 5.820033180221032e-06, "loss": 1.0053, "num_tokens": 11316026403.0, "step": 7904 }, { "epoch": 1.4090909090909092, "grad_norm": 0.177734375, "learning_rate": 5.817908819354355e-06, "loss": 1.0163, "num_tokens": 11322310682.0, "step": 7905 }, { "epoch": 1.4092691622103386, "grad_norm": 0.171875, "learning_rate": 5.815784890312426e-06, "loss": 1.0319, "num_tokens": 11328577806.0, "step": 7906 }, { "epoch": 1.4094474153297682, "grad_norm": 0.1708984375, "learning_rate": 5.8136613932722406e-06, "loss": 1.0076, "num_tokens": 11334844237.0, "step": 7907 }, { "epoch": 1.409625668449198, "grad_norm": 0.1748046875, "learning_rate": 5.8115383284107494e-06, "loss": 1.0317, "num_tokens": 11341124901.0, "step": 7908 }, { "epoch": 1.4098039215686273, "grad_norm": 0.171875, "learning_rate": 5.809415695904866e-06, "loss": 1.0478, "num_tokens": 11347407576.0, "step": 7909 }, { "epoch": 1.409982174688057, "grad_norm": 0.18359375, "learning_rate": 5.807293495931468e-06, "loss": 1.0168, "num_tokens": 11353690508.0, "step": 7910 }, { "epoch": 1.4101604278074866, "grad_norm": 0.1787109375, "learning_rate": 5.8051717286674025e-06, "loss": 1.0219, "num_tokens": 11359973088.0, "step": 7911 }, { "epoch": 1.4103386809269163, "grad_norm": 0.1748046875, "learning_rate": 5.8030503942894776e-06, "loss": 1.0449, "num_tokens": 11366257548.0, "step": 7912 }, { "epoch": 1.410516934046346, "grad_norm": 0.181640625, "learning_rate": 5.800929492974462e-06, "loss": 1.0251, "num_tokens": 11372508395.0, "step": 7913 }, { "epoch": 1.4106951871657754, "grad_norm": 0.17578125, "learning_rate": 5.79880902489909e-06, "loss": 1.0484, "num_tokens": 11378772039.0, "step": 7914 }, { "epoch": 1.410873440285205, "grad_norm": 0.1728515625, "learning_rate": 5.796688990240063e-06, "loss": 1.0058, "num_tokens": 11385053748.0, "step": 7915 }, { "epoch": 1.4110516934046347, "grad_norm": 0.1787109375, "learning_rate": 5.794569389174043e-06, "loss": 1.0048, "num_tokens": 11391306286.0, "step": 7916 }, { "epoch": 1.411229946524064, "grad_norm": 0.173828125, "learning_rate": 5.792450221877652e-06, "loss": 1.0106, "num_tokens": 11397534959.0, "step": 7917 }, { "epoch": 1.4114081996434937, "grad_norm": 0.1708984375, "learning_rate": 5.79033148852749e-06, "loss": 0.9944, "num_tokens": 11403818326.0, "step": 7918 }, { "epoch": 1.4115864527629234, "grad_norm": 0.169921875, "learning_rate": 5.788213189300107e-06, "loss": 1.0172, "num_tokens": 11410102358.0, "step": 7919 }, { "epoch": 1.4117647058823528, "grad_norm": 0.177734375, "learning_rate": 5.786095324372021e-06, "loss": 0.9704, "num_tokens": 11416372717.0, "step": 7920 }, { "epoch": 1.4119429590017825, "grad_norm": 0.1845703125, "learning_rate": 5.783977893919712e-06, "loss": 1.0253, "num_tokens": 11422641789.0, "step": 7921 }, { "epoch": 1.412121212121212, "grad_norm": 0.17578125, "learning_rate": 5.7818608981196314e-06, "loss": 1.0066, "num_tokens": 11428926317.0, "step": 7922 }, { "epoch": 1.4122994652406418, "grad_norm": 0.1767578125, "learning_rate": 5.7797443371481835e-06, "loss": 1.0153, "num_tokens": 11435195365.0, "step": 7923 }, { "epoch": 1.4124777183600714, "grad_norm": 0.1728515625, "learning_rate": 5.777628211181743e-06, "loss": 1.0319, "num_tokens": 11441455392.0, "step": 7924 }, { "epoch": 1.4126559714795008, "grad_norm": 0.171875, "learning_rate": 5.775512520396649e-06, "loss": 1.0108, "num_tokens": 11447738799.0, "step": 7925 }, { "epoch": 1.4128342245989305, "grad_norm": 0.177734375, "learning_rate": 5.773397264969203e-06, "loss": 1.0161, "num_tokens": 11453993467.0, "step": 7926 }, { "epoch": 1.4130124777183601, "grad_norm": 0.1708984375, "learning_rate": 5.771282445075668e-06, "loss": 0.975, "num_tokens": 11460278207.0, "step": 7927 }, { "epoch": 1.4131907308377896, "grad_norm": 0.1767578125, "learning_rate": 5.7691680608922725e-06, "loss": 1.0249, "num_tokens": 11466561007.0, "step": 7928 }, { "epoch": 1.4133689839572192, "grad_norm": 0.1728515625, "learning_rate": 5.767054112595208e-06, "loss": 1.0205, "num_tokens": 11472831397.0, "step": 7929 }, { "epoch": 1.4135472370766489, "grad_norm": 0.1728515625, "learning_rate": 5.764940600360632e-06, "loss": 1.0135, "num_tokens": 11479116129.0, "step": 7930 }, { "epoch": 1.4137254901960783, "grad_norm": 0.1748046875, "learning_rate": 5.76282752436466e-06, "loss": 1.0021, "num_tokens": 11485321846.0, "step": 7931 }, { "epoch": 1.413903743315508, "grad_norm": 0.17578125, "learning_rate": 5.76071488478338e-06, "loss": 1.0173, "num_tokens": 11491601647.0, "step": 7932 }, { "epoch": 1.4140819964349376, "grad_norm": 0.1748046875, "learning_rate": 5.758602681792837e-06, "loss": 1.009, "num_tokens": 11497826077.0, "step": 7933 }, { "epoch": 1.4142602495543672, "grad_norm": 0.1767578125, "learning_rate": 5.756490915569042e-06, "loss": 1.0262, "num_tokens": 11504098472.0, "step": 7934 }, { "epoch": 1.414438502673797, "grad_norm": 0.1845703125, "learning_rate": 5.754379586287964e-06, "loss": 1.0133, "num_tokens": 11510379446.0, "step": 7935 }, { "epoch": 1.4146167557932263, "grad_norm": 0.1689453125, "learning_rate": 5.7522686941255516e-06, "loss": 1.0072, "num_tokens": 11516632573.0, "step": 7936 }, { "epoch": 1.414795008912656, "grad_norm": 0.171875, "learning_rate": 5.750158239257693e-06, "loss": 1.011, "num_tokens": 11522917211.0, "step": 7937 }, { "epoch": 1.4149732620320856, "grad_norm": 0.1767578125, "learning_rate": 5.748048221860257e-06, "loss": 1.0072, "num_tokens": 11529136218.0, "step": 7938 }, { "epoch": 1.415151515151515, "grad_norm": 0.169921875, "learning_rate": 5.745938642109077e-06, "loss": 1.0057, "num_tokens": 11535407947.0, "step": 7939 }, { "epoch": 1.4153297682709447, "grad_norm": 0.171875, "learning_rate": 5.74382950017994e-06, "loss": 1.007, "num_tokens": 11541679740.0, "step": 7940 }, { "epoch": 1.4155080213903743, "grad_norm": 0.17578125, "learning_rate": 5.741720796248604e-06, "loss": 1.0173, "num_tokens": 11547962498.0, "step": 7941 }, { "epoch": 1.415686274509804, "grad_norm": 0.173828125, "learning_rate": 5.739612530490782e-06, "loss": 0.9893, "num_tokens": 11554246209.0, "step": 7942 }, { "epoch": 1.4158645276292336, "grad_norm": 0.1708984375, "learning_rate": 5.737504703082164e-06, "loss": 1.0391, "num_tokens": 11560528504.0, "step": 7943 }, { "epoch": 1.416042780748663, "grad_norm": 0.1767578125, "learning_rate": 5.735397314198394e-06, "loss": 0.9766, "num_tokens": 11566811109.0, "step": 7944 }, { "epoch": 1.4162210338680927, "grad_norm": 0.1748046875, "learning_rate": 5.733290364015074e-06, "loss": 1.0434, "num_tokens": 11573080684.0, "step": 7945 }, { "epoch": 1.4163992869875224, "grad_norm": 0.17578125, "learning_rate": 5.731183852707785e-06, "loss": 1.0118, "num_tokens": 11579365022.0, "step": 7946 }, { "epoch": 1.4165775401069518, "grad_norm": 0.171875, "learning_rate": 5.729077780452059e-06, "loss": 0.9884, "num_tokens": 11585618101.0, "step": 7947 }, { "epoch": 1.4167557932263815, "grad_norm": 0.1767578125, "learning_rate": 5.7269721474233965e-06, "loss": 1.033, "num_tokens": 11591897237.0, "step": 7948 }, { "epoch": 1.416934046345811, "grad_norm": 0.1767578125, "learning_rate": 5.724866953797257e-06, "loss": 1.02, "num_tokens": 11598166742.0, "step": 7949 }, { "epoch": 1.4171122994652405, "grad_norm": 0.1767578125, "learning_rate": 5.722762199749073e-06, "loss": 1.0275, "num_tokens": 11604449795.0, "step": 7950 }, { "epoch": 1.4172905525846702, "grad_norm": 0.17578125, "learning_rate": 5.720657885454235e-06, "loss": 1.0222, "num_tokens": 11610729151.0, "step": 7951 }, { "epoch": 1.4174688057040998, "grad_norm": 0.1689453125, "learning_rate": 5.7185540110880846e-06, "loss": 1.0307, "num_tokens": 11617013030.0, "step": 7952 }, { "epoch": 1.4176470588235295, "grad_norm": 0.1796875, "learning_rate": 5.716450576825949e-06, "loss": 1.0196, "num_tokens": 11623243235.0, "step": 7953 }, { "epoch": 1.4178253119429591, "grad_norm": 0.17578125, "learning_rate": 5.714347582843104e-06, "loss": 1.0393, "num_tokens": 11629497941.0, "step": 7954 }, { "epoch": 1.4180035650623886, "grad_norm": 0.171875, "learning_rate": 5.712245029314792e-06, "loss": 1.0085, "num_tokens": 11635782146.0, "step": 7955 }, { "epoch": 1.4181818181818182, "grad_norm": 0.1708984375, "learning_rate": 5.710142916416219e-06, "loss": 0.9913, "num_tokens": 11642004960.0, "step": 7956 }, { "epoch": 1.4183600713012479, "grad_norm": 0.1728515625, "learning_rate": 5.7080412443225564e-06, "loss": 1.0121, "num_tokens": 11648280745.0, "step": 7957 }, { "epoch": 1.4185383244206773, "grad_norm": 0.171875, "learning_rate": 5.705940013208937e-06, "loss": 1.0209, "num_tokens": 11654546177.0, "step": 7958 }, { "epoch": 1.418716577540107, "grad_norm": 0.1796875, "learning_rate": 5.703839223250456e-06, "loss": 1.0299, "num_tokens": 11660831859.0, "step": 7959 }, { "epoch": 1.4188948306595366, "grad_norm": 0.181640625, "learning_rate": 5.701738874622171e-06, "loss": 1.0424, "num_tokens": 11667084285.0, "step": 7960 }, { "epoch": 1.419073083778966, "grad_norm": 0.173828125, "learning_rate": 5.699638967499109e-06, "loss": 1.0237, "num_tokens": 11673332078.0, "step": 7961 }, { "epoch": 1.4192513368983957, "grad_norm": 0.181640625, "learning_rate": 5.697539502056251e-06, "loss": 1.0353, "num_tokens": 11679615045.0, "step": 7962 }, { "epoch": 1.4194295900178253, "grad_norm": 0.1787109375, "learning_rate": 5.695440478468546e-06, "loss": 1.0392, "num_tokens": 11685894942.0, "step": 7963 }, { "epoch": 1.419607843137255, "grad_norm": 0.1826171875, "learning_rate": 5.693341896910909e-06, "loss": 0.9842, "num_tokens": 11692179027.0, "step": 7964 }, { "epoch": 1.4197860962566846, "grad_norm": 0.177734375, "learning_rate": 5.691243757558215e-06, "loss": 1.0048, "num_tokens": 11698462405.0, "step": 7965 }, { "epoch": 1.419964349376114, "grad_norm": 0.1796875, "learning_rate": 5.689146060585301e-06, "loss": 1.0632, "num_tokens": 11704732252.0, "step": 7966 }, { "epoch": 1.4201426024955437, "grad_norm": 0.171875, "learning_rate": 5.687048806166969e-06, "loss": 0.9908, "num_tokens": 11711015938.0, "step": 7967 }, { "epoch": 1.4203208556149733, "grad_norm": 0.1787109375, "learning_rate": 5.684951994477983e-06, "loss": 1.0193, "num_tokens": 11717288407.0, "step": 7968 }, { "epoch": 1.4204991087344028, "grad_norm": 0.1845703125, "learning_rate": 5.6828556256930725e-06, "loss": 1.0028, "num_tokens": 11723573294.0, "step": 7969 }, { "epoch": 1.4206773618538324, "grad_norm": 0.1826171875, "learning_rate": 5.680759699986924e-06, "loss": 1.0076, "num_tokens": 11729856535.0, "step": 7970 }, { "epoch": 1.420855614973262, "grad_norm": 0.18359375, "learning_rate": 5.678664217534198e-06, "loss": 0.9993, "num_tokens": 11736133533.0, "step": 7971 }, { "epoch": 1.4210338680926915, "grad_norm": 0.177734375, "learning_rate": 5.676569178509505e-06, "loss": 1.006, "num_tokens": 11742414420.0, "step": 7972 }, { "epoch": 1.4212121212121211, "grad_norm": 0.17578125, "learning_rate": 5.674474583087433e-06, "loss": 0.9975, "num_tokens": 11748685346.0, "step": 7973 }, { "epoch": 1.4213903743315508, "grad_norm": 0.171875, "learning_rate": 5.672380431442517e-06, "loss": 1.0272, "num_tokens": 11754952065.0, "step": 7974 }, { "epoch": 1.4215686274509804, "grad_norm": 0.1943359375, "learning_rate": 5.6702867237492675e-06, "loss": 1.0561, "num_tokens": 11761236065.0, "step": 7975 }, { "epoch": 1.42174688057041, "grad_norm": 0.171875, "learning_rate": 5.668193460182152e-06, "loss": 0.9907, "num_tokens": 11767521333.0, "step": 7976 }, { "epoch": 1.4219251336898395, "grad_norm": 0.1767578125, "learning_rate": 5.666100640915604e-06, "loss": 1.0621, "num_tokens": 11773750711.0, "step": 7977 }, { "epoch": 1.4221033868092692, "grad_norm": 0.181640625, "learning_rate": 5.664008266124014e-06, "loss": 1.0049, "num_tokens": 11780014643.0, "step": 7978 }, { "epoch": 1.4222816399286988, "grad_norm": 0.177734375, "learning_rate": 5.6619163359817455e-06, "loss": 1.0093, "num_tokens": 11786279704.0, "step": 7979 }, { "epoch": 1.4224598930481283, "grad_norm": 0.1728515625, "learning_rate": 5.659824850663119e-06, "loss": 0.9913, "num_tokens": 11792536315.0, "step": 7980 }, { "epoch": 1.422638146167558, "grad_norm": 0.185546875, "learning_rate": 5.657733810342414e-06, "loss": 1.0344, "num_tokens": 11798789018.0, "step": 7981 }, { "epoch": 1.4228163992869876, "grad_norm": 0.1826171875, "learning_rate": 5.655643215193887e-06, "loss": 1.0024, "num_tokens": 11805052539.0, "step": 7982 }, { "epoch": 1.422994652406417, "grad_norm": 0.1787109375, "learning_rate": 5.653553065391737e-06, "loss": 1.0118, "num_tokens": 11811310230.0, "step": 7983 }, { "epoch": 1.4231729055258466, "grad_norm": 0.1787109375, "learning_rate": 5.651463361110141e-06, "loss": 1.0034, "num_tokens": 11817594481.0, "step": 7984 }, { "epoch": 1.4233511586452763, "grad_norm": 0.1728515625, "learning_rate": 5.649374102523232e-06, "loss": 1.0211, "num_tokens": 11823877955.0, "step": 7985 }, { "epoch": 1.423529411764706, "grad_norm": 0.1796875, "learning_rate": 5.647285289805111e-06, "loss": 1.0231, "num_tokens": 11830124544.0, "step": 7986 }, { "epoch": 1.4237076648841356, "grad_norm": 0.17578125, "learning_rate": 5.6451969231298405e-06, "loss": 1.0455, "num_tokens": 11836391867.0, "step": 7987 }, { "epoch": 1.423885918003565, "grad_norm": 0.169921875, "learning_rate": 5.643109002671445e-06, "loss": 1.0122, "num_tokens": 11842675437.0, "step": 7988 }, { "epoch": 1.4240641711229947, "grad_norm": 0.1845703125, "learning_rate": 5.641021528603905e-06, "loss": 1.0559, "num_tokens": 11848947388.0, "step": 7989 }, { "epoch": 1.4242424242424243, "grad_norm": 0.1796875, "learning_rate": 5.638934501101179e-06, "loss": 1.0126, "num_tokens": 11855229963.0, "step": 7990 }, { "epoch": 1.4244206773618537, "grad_norm": 0.1787109375, "learning_rate": 5.636847920337172e-06, "loss": 1.0261, "num_tokens": 11861487315.0, "step": 7991 }, { "epoch": 1.4245989304812834, "grad_norm": 0.1796875, "learning_rate": 5.634761786485761e-06, "loss": 1.0104, "num_tokens": 11867750261.0, "step": 7992 }, { "epoch": 1.424777183600713, "grad_norm": 0.185546875, "learning_rate": 5.632676099720786e-06, "loss": 1.018, "num_tokens": 11874033623.0, "step": 7993 }, { "epoch": 1.4249554367201425, "grad_norm": 0.1787109375, "learning_rate": 5.630590860216048e-06, "loss": 0.9971, "num_tokens": 11880274180.0, "step": 7994 }, { "epoch": 1.4251336898395721, "grad_norm": 0.1806640625, "learning_rate": 5.628506068145308e-06, "loss": 0.9971, "num_tokens": 11886541537.0, "step": 7995 }, { "epoch": 1.4253119429590018, "grad_norm": 0.181640625, "learning_rate": 5.6264217236822894e-06, "loss": 0.9792, "num_tokens": 11892826016.0, "step": 7996 }, { "epoch": 1.4254901960784314, "grad_norm": 0.1767578125, "learning_rate": 5.6243378270006935e-06, "loss": 1.0179, "num_tokens": 11899091061.0, "step": 7997 }, { "epoch": 1.425668449197861, "grad_norm": 0.177734375, "learning_rate": 5.622254378274158e-06, "loss": 1.0248, "num_tokens": 11905345229.0, "step": 7998 }, { "epoch": 1.4258467023172905, "grad_norm": 0.17578125, "learning_rate": 5.620171377676301e-06, "loss": 1.0121, "num_tokens": 11911586154.0, "step": 7999 }, { "epoch": 1.4260249554367201, "grad_norm": 0.1826171875, "learning_rate": 5.618088825380703e-06, "loss": 1.032, "num_tokens": 11917847346.0, "step": 8000 }, { "epoch": 1.4262032085561498, "grad_norm": 0.17578125, "learning_rate": 5.616006721560903e-06, "loss": 1.004, "num_tokens": 11924128442.0, "step": 8001 }, { "epoch": 1.4263814616755792, "grad_norm": 0.1865234375, "learning_rate": 5.6139250663904e-06, "loss": 1.0091, "num_tokens": 11930392623.0, "step": 8002 }, { "epoch": 1.4265597147950089, "grad_norm": 0.1728515625, "learning_rate": 5.611843860042657e-06, "loss": 1.0307, "num_tokens": 11936676615.0, "step": 8003 }, { "epoch": 1.4267379679144385, "grad_norm": 0.1787109375, "learning_rate": 5.609763102691108e-06, "loss": 1.0133, "num_tokens": 11942941712.0, "step": 8004 }, { "epoch": 1.4269162210338682, "grad_norm": 0.1708984375, "learning_rate": 5.607682794509144e-06, "loss": 0.9965, "num_tokens": 11949228447.0, "step": 8005 }, { "epoch": 1.4270944741532978, "grad_norm": 0.1748046875, "learning_rate": 5.605602935670106e-06, "loss": 1.0287, "num_tokens": 11955494174.0, "step": 8006 }, { "epoch": 1.4272727272727272, "grad_norm": 0.1796875, "learning_rate": 5.603523526347319e-06, "loss": 1.0024, "num_tokens": 11961751755.0, "step": 8007 }, { "epoch": 1.427450980392157, "grad_norm": 0.169921875, "learning_rate": 5.6014445667140585e-06, "loss": 0.9944, "num_tokens": 11968035568.0, "step": 8008 }, { "epoch": 1.4276292335115865, "grad_norm": 0.173828125, "learning_rate": 5.599366056943562e-06, "loss": 1.0242, "num_tokens": 11974302866.0, "step": 8009 }, { "epoch": 1.427807486631016, "grad_norm": 0.17578125, "learning_rate": 5.5972879972090335e-06, "loss": 1.0323, "num_tokens": 11980576183.0, "step": 8010 }, { "epoch": 1.4279857397504456, "grad_norm": 0.177734375, "learning_rate": 5.595210387683642e-06, "loss": 1.023, "num_tokens": 11986807841.0, "step": 8011 }, { "epoch": 1.4281639928698753, "grad_norm": 0.1787109375, "learning_rate": 5.59313322854051e-06, "loss": 0.9814, "num_tokens": 11993064331.0, "step": 8012 }, { "epoch": 1.4283422459893047, "grad_norm": 0.1904296875, "learning_rate": 5.591056519952731e-06, "loss": 1.0403, "num_tokens": 11999345981.0, "step": 8013 }, { "epoch": 1.4285204991087344, "grad_norm": 0.1767578125, "learning_rate": 5.588980262093356e-06, "loss": 0.9965, "num_tokens": 12005629580.0, "step": 8014 }, { "epoch": 1.428698752228164, "grad_norm": 0.1806640625, "learning_rate": 5.586904455135398e-06, "loss": 0.9912, "num_tokens": 12011911199.0, "step": 8015 }, { "epoch": 1.4288770053475937, "grad_norm": 0.173828125, "learning_rate": 5.584829099251839e-06, "loss": 1.0248, "num_tokens": 12018194075.0, "step": 8016 }, { "epoch": 1.4290552584670233, "grad_norm": 0.17578125, "learning_rate": 5.582754194615612e-06, "loss": 1.006, "num_tokens": 12024476962.0, "step": 8017 }, { "epoch": 1.4292335115864527, "grad_norm": 0.1787109375, "learning_rate": 5.580679741399628e-06, "loss": 1.0402, "num_tokens": 12030729193.0, "step": 8018 }, { "epoch": 1.4294117647058824, "grad_norm": 0.1748046875, "learning_rate": 5.578605739776745e-06, "loss": 1.0381, "num_tokens": 12036993107.0, "step": 8019 }, { "epoch": 1.429590017825312, "grad_norm": 0.171875, "learning_rate": 5.5765321899197944e-06, "loss": 1.0275, "num_tokens": 12043273116.0, "step": 8020 }, { "epoch": 1.4297682709447415, "grad_norm": 0.177734375, "learning_rate": 5.574459092001563e-06, "loss": 1.018, "num_tokens": 12049510468.0, "step": 8021 }, { "epoch": 1.429946524064171, "grad_norm": 0.17578125, "learning_rate": 5.572386446194803e-06, "loss": 1.0053, "num_tokens": 12055793978.0, "step": 8022 }, { "epoch": 1.4301247771836008, "grad_norm": 0.1796875, "learning_rate": 5.570314252672228e-06, "loss": 1.0221, "num_tokens": 12062077101.0, "step": 8023 }, { "epoch": 1.4303030303030302, "grad_norm": 0.1845703125, "learning_rate": 5.568242511606512e-06, "loss": 1.0282, "num_tokens": 12068356205.0, "step": 8024 }, { "epoch": 1.4304812834224598, "grad_norm": 0.1767578125, "learning_rate": 5.566171223170298e-06, "loss": 1.0215, "num_tokens": 12074640460.0, "step": 8025 }, { "epoch": 1.4306595365418895, "grad_norm": 0.173828125, "learning_rate": 5.564100387536185e-06, "loss": 1.017, "num_tokens": 12080925636.0, "step": 8026 }, { "epoch": 1.4308377896613191, "grad_norm": 0.17578125, "learning_rate": 5.562030004876737e-06, "loss": 1.0128, "num_tokens": 12087190108.0, "step": 8027 }, { "epoch": 1.4310160427807488, "grad_norm": 0.1796875, "learning_rate": 5.55996007536448e-06, "loss": 0.9905, "num_tokens": 12093456560.0, "step": 8028 }, { "epoch": 1.4311942959001782, "grad_norm": 0.17578125, "learning_rate": 5.557890599171899e-06, "loss": 1.0284, "num_tokens": 12099703451.0, "step": 8029 }, { "epoch": 1.4313725490196079, "grad_norm": 0.173828125, "learning_rate": 5.5558215764714456e-06, "loss": 1.0331, "num_tokens": 12105960607.0, "step": 8030 }, { "epoch": 1.4315508021390375, "grad_norm": 0.1767578125, "learning_rate": 5.553753007435529e-06, "loss": 1.0146, "num_tokens": 12112243440.0, "step": 8031 }, { "epoch": 1.431729055258467, "grad_norm": 0.177734375, "learning_rate": 5.551684892236528e-06, "loss": 0.9892, "num_tokens": 12118518788.0, "step": 8032 }, { "epoch": 1.4319073083778966, "grad_norm": 0.1767578125, "learning_rate": 5.549617231046779e-06, "loss": 1.0214, "num_tokens": 12124802199.0, "step": 8033 }, { "epoch": 1.4320855614973262, "grad_norm": 0.177734375, "learning_rate": 5.547550024038577e-06, "loss": 1.0246, "num_tokens": 12131067289.0, "step": 8034 }, { "epoch": 1.4322638146167557, "grad_norm": 0.1767578125, "learning_rate": 5.5454832713841825e-06, "loss": 1.0054, "num_tokens": 12137327771.0, "step": 8035 }, { "epoch": 1.4324420677361853, "grad_norm": 0.1748046875, "learning_rate": 5.543416973255827e-06, "loss": 1.0484, "num_tokens": 12143583654.0, "step": 8036 }, { "epoch": 1.432620320855615, "grad_norm": 0.1728515625, "learning_rate": 5.541351129825685e-06, "loss": 1.0223, "num_tokens": 12149850366.0, "step": 8037 }, { "epoch": 1.4327985739750446, "grad_norm": 0.1787109375, "learning_rate": 5.539285741265905e-06, "loss": 1.047, "num_tokens": 12156074767.0, "step": 8038 }, { "epoch": 1.4329768270944743, "grad_norm": 0.1826171875, "learning_rate": 5.5372208077486025e-06, "loss": 1.0574, "num_tokens": 12162354628.0, "step": 8039 }, { "epoch": 1.4331550802139037, "grad_norm": 0.177734375, "learning_rate": 5.535156329445845e-06, "loss": 1.04, "num_tokens": 12168637349.0, "step": 8040 }, { "epoch": 1.4333333333333333, "grad_norm": 0.1767578125, "learning_rate": 5.533092306529666e-06, "loss": 1.0115, "num_tokens": 12174908389.0, "step": 8041 }, { "epoch": 1.433511586452763, "grad_norm": 0.1875, "learning_rate": 5.531028739172059e-06, "loss": 0.9974, "num_tokens": 12181190702.0, "step": 8042 }, { "epoch": 1.4336898395721924, "grad_norm": 0.1787109375, "learning_rate": 5.5289656275449885e-06, "loss": 1.0194, "num_tokens": 12187451509.0, "step": 8043 }, { "epoch": 1.433868092691622, "grad_norm": 0.1787109375, "learning_rate": 5.526902971820368e-06, "loss": 1.0279, "num_tokens": 12193735474.0, "step": 8044 }, { "epoch": 1.4340463458110517, "grad_norm": 0.1748046875, "learning_rate": 5.524840772170077e-06, "loss": 1.0382, "num_tokens": 12200012246.0, "step": 8045 }, { "epoch": 1.4342245989304812, "grad_norm": 0.177734375, "learning_rate": 5.522779028765965e-06, "loss": 1.0329, "num_tokens": 12206286477.0, "step": 8046 }, { "epoch": 1.4344028520499108, "grad_norm": 0.1826171875, "learning_rate": 5.520717741779834e-06, "loss": 0.9872, "num_tokens": 12212524166.0, "step": 8047 }, { "epoch": 1.4345811051693405, "grad_norm": 0.1748046875, "learning_rate": 5.518656911383455e-06, "loss": 1.0405, "num_tokens": 12218804740.0, "step": 8048 }, { "epoch": 1.43475935828877, "grad_norm": 0.181640625, "learning_rate": 5.516596537748549e-06, "loss": 1.0238, "num_tokens": 12225056246.0, "step": 8049 }, { "epoch": 1.4349376114081998, "grad_norm": 0.1796875, "learning_rate": 5.514536621046816e-06, "loss": 1.0082, "num_tokens": 12231313097.0, "step": 8050 }, { "epoch": 1.4351158645276292, "grad_norm": 0.1806640625, "learning_rate": 5.5124771614499115e-06, "loss": 1.0028, "num_tokens": 12237584746.0, "step": 8051 }, { "epoch": 1.4352941176470588, "grad_norm": 0.185546875, "learning_rate": 5.510418159129438e-06, "loss": 1.0147, "num_tokens": 12243869261.0, "step": 8052 }, { "epoch": 1.4354723707664885, "grad_norm": 0.173828125, "learning_rate": 5.508359614256984e-06, "loss": 1.0011, "num_tokens": 12250154556.0, "step": 8053 }, { "epoch": 1.435650623885918, "grad_norm": 0.171875, "learning_rate": 5.5063015270040855e-06, "loss": 1.0082, "num_tokens": 12256437865.0, "step": 8054 }, { "epoch": 1.4358288770053476, "grad_norm": 0.17578125, "learning_rate": 5.504243897542242e-06, "loss": 1.0405, "num_tokens": 12262660708.0, "step": 8055 }, { "epoch": 1.4360071301247772, "grad_norm": 0.17578125, "learning_rate": 5.502186726042913e-06, "loss": 1.0343, "num_tokens": 12268930110.0, "step": 8056 }, { "epoch": 1.4361853832442066, "grad_norm": 0.1845703125, "learning_rate": 5.500130012677532e-06, "loss": 0.9901, "num_tokens": 12275164058.0, "step": 8057 }, { "epoch": 1.4363636363636363, "grad_norm": 0.17578125, "learning_rate": 5.498073757617477e-06, "loss": 0.9906, "num_tokens": 12281416414.0, "step": 8058 }, { "epoch": 1.436541889483066, "grad_norm": 0.173828125, "learning_rate": 5.496017961034101e-06, "loss": 1.0224, "num_tokens": 12287701974.0, "step": 8059 }, { "epoch": 1.4367201426024956, "grad_norm": 0.1767578125, "learning_rate": 5.493962623098713e-06, "loss": 1.028, "num_tokens": 12293986057.0, "step": 8060 }, { "epoch": 1.4368983957219252, "grad_norm": 0.1767578125, "learning_rate": 5.491907743982583e-06, "loss": 1.0212, "num_tokens": 12300270247.0, "step": 8061 }, { "epoch": 1.4370766488413547, "grad_norm": 0.1728515625, "learning_rate": 5.4898533238569465e-06, "loss": 1.0271, "num_tokens": 12306528523.0, "step": 8062 }, { "epoch": 1.4372549019607843, "grad_norm": 0.1796875, "learning_rate": 5.487799362892994e-06, "loss": 1.0515, "num_tokens": 12312813880.0, "step": 8063 }, { "epoch": 1.437433155080214, "grad_norm": 0.171875, "learning_rate": 5.48574586126189e-06, "loss": 0.9999, "num_tokens": 12319070570.0, "step": 8064 }, { "epoch": 1.4376114081996434, "grad_norm": 0.1689453125, "learning_rate": 5.483692819134749e-06, "loss": 0.9999, "num_tokens": 12325325897.0, "step": 8065 }, { "epoch": 1.437789661319073, "grad_norm": 0.17578125, "learning_rate": 5.481640236682653e-06, "loss": 1.0055, "num_tokens": 12331609333.0, "step": 8066 }, { "epoch": 1.4379679144385027, "grad_norm": 0.1708984375, "learning_rate": 5.479588114076642e-06, "loss": 1.0315, "num_tokens": 12337893427.0, "step": 8067 }, { "epoch": 1.4381461675579323, "grad_norm": 0.1708984375, "learning_rate": 5.477536451487721e-06, "loss": 1.0194, "num_tokens": 12344176541.0, "step": 8068 }, { "epoch": 1.438324420677362, "grad_norm": 0.1806640625, "learning_rate": 5.475485249086856e-06, "loss": 1.0032, "num_tokens": 12350458091.0, "step": 8069 }, { "epoch": 1.4385026737967914, "grad_norm": 0.181640625, "learning_rate": 5.4734345070449706e-06, "loss": 1.0333, "num_tokens": 12356743621.0, "step": 8070 }, { "epoch": 1.438680926916221, "grad_norm": 0.1748046875, "learning_rate": 5.471384225532959e-06, "loss": 1.0059, "num_tokens": 12363013770.0, "step": 8071 }, { "epoch": 1.4388591800356507, "grad_norm": 0.1826171875, "learning_rate": 5.469334404721669e-06, "loss": 0.9984, "num_tokens": 12369251178.0, "step": 8072 }, { "epoch": 1.4390374331550801, "grad_norm": 0.1689453125, "learning_rate": 5.467285044781914e-06, "loss": 1.0149, "num_tokens": 12375536009.0, "step": 8073 }, { "epoch": 1.4392156862745098, "grad_norm": 0.1767578125, "learning_rate": 5.465236145884464e-06, "loss": 1.0115, "num_tokens": 12381766263.0, "step": 8074 }, { "epoch": 1.4393939393939394, "grad_norm": 0.171875, "learning_rate": 5.463187708200058e-06, "loss": 1.014, "num_tokens": 12388010823.0, "step": 8075 }, { "epoch": 1.4395721925133689, "grad_norm": 0.169921875, "learning_rate": 5.461139731899391e-06, "loss": 1.0348, "num_tokens": 12394267374.0, "step": 8076 }, { "epoch": 1.4397504456327985, "grad_norm": 0.1796875, "learning_rate": 5.459092217153118e-06, "loss": 1.0191, "num_tokens": 12400552524.0, "step": 8077 }, { "epoch": 1.4399286987522282, "grad_norm": 0.1748046875, "learning_rate": 5.457045164131866e-06, "loss": 1.0297, "num_tokens": 12406836175.0, "step": 8078 }, { "epoch": 1.4401069518716578, "grad_norm": 0.18359375, "learning_rate": 5.454998573006211e-06, "loss": 1.0015, "num_tokens": 12413093998.0, "step": 8079 }, { "epoch": 1.4402852049910875, "grad_norm": 0.177734375, "learning_rate": 5.4529524439467005e-06, "loss": 1.0026, "num_tokens": 12419373658.0, "step": 8080 }, { "epoch": 1.440463458110517, "grad_norm": 0.1865234375, "learning_rate": 5.450906777123832e-06, "loss": 1.0276, "num_tokens": 12425657139.0, "step": 8081 }, { "epoch": 1.4406417112299466, "grad_norm": 0.1796875, "learning_rate": 5.448861572708081e-06, "loss": 0.9893, "num_tokens": 12431941943.0, "step": 8082 }, { "epoch": 1.4408199643493762, "grad_norm": 0.17578125, "learning_rate": 5.446816830869865e-06, "loss": 1.0222, "num_tokens": 12438219933.0, "step": 8083 }, { "epoch": 1.4409982174688056, "grad_norm": 0.173828125, "learning_rate": 5.444772551779576e-06, "loss": 1.0122, "num_tokens": 12444504527.0, "step": 8084 }, { "epoch": 1.4411764705882353, "grad_norm": 0.1748046875, "learning_rate": 5.4427287356075676e-06, "loss": 1.0227, "num_tokens": 12450762408.0, "step": 8085 }, { "epoch": 1.441354723707665, "grad_norm": 0.17578125, "learning_rate": 5.440685382524148e-06, "loss": 1.0127, "num_tokens": 12457046544.0, "step": 8086 }, { "epoch": 1.4415329768270944, "grad_norm": 0.1767578125, "learning_rate": 5.438642492699591e-06, "loss": 1.0112, "num_tokens": 12463333007.0, "step": 8087 }, { "epoch": 1.441711229946524, "grad_norm": 0.17578125, "learning_rate": 5.436600066304129e-06, "loss": 1.0048, "num_tokens": 12469587295.0, "step": 8088 }, { "epoch": 1.4418894830659537, "grad_norm": 0.17578125, "learning_rate": 5.434558103507963e-06, "loss": 1.0159, "num_tokens": 12475871044.0, "step": 8089 }, { "epoch": 1.4420677361853833, "grad_norm": 0.1796875, "learning_rate": 5.432516604481249e-06, "loss": 1.0233, "num_tokens": 12482131947.0, "step": 8090 }, { "epoch": 1.442245989304813, "grad_norm": 0.1884765625, "learning_rate": 5.430475569394099e-06, "loss": 1.0229, "num_tokens": 12488414655.0, "step": 8091 }, { "epoch": 1.4424242424242424, "grad_norm": 0.173828125, "learning_rate": 5.428434998416602e-06, "loss": 0.9869, "num_tokens": 12494665787.0, "step": 8092 }, { "epoch": 1.442602495543672, "grad_norm": 0.1748046875, "learning_rate": 5.426394891718793e-06, "loss": 0.9923, "num_tokens": 12500949879.0, "step": 8093 }, { "epoch": 1.4427807486631017, "grad_norm": 0.177734375, "learning_rate": 5.424355249470677e-06, "loss": 1.0132, "num_tokens": 12507233600.0, "step": 8094 }, { "epoch": 1.4429590017825311, "grad_norm": 0.1728515625, "learning_rate": 5.422316071842214e-06, "loss": 1.0342, "num_tokens": 12513486271.0, "step": 8095 }, { "epoch": 1.4431372549019608, "grad_norm": 0.17578125, "learning_rate": 5.420277359003335e-06, "loss": 1.0089, "num_tokens": 12519771116.0, "step": 8096 }, { "epoch": 1.4433155080213904, "grad_norm": 0.173828125, "learning_rate": 5.418239111123927e-06, "loss": 0.994, "num_tokens": 12526055488.0, "step": 8097 }, { "epoch": 1.4434937611408198, "grad_norm": 0.1767578125, "learning_rate": 5.416201328373829e-06, "loss": 1.0374, "num_tokens": 12532340656.0, "step": 8098 }, { "epoch": 1.4436720142602495, "grad_norm": 0.173828125, "learning_rate": 5.414164010922858e-06, "loss": 1.0066, "num_tokens": 12538623275.0, "step": 8099 }, { "epoch": 1.4438502673796791, "grad_norm": 0.1787109375, "learning_rate": 5.412127158940781e-06, "loss": 1.0183, "num_tokens": 12544878342.0, "step": 8100 }, { "epoch": 1.4440285204991088, "grad_norm": 0.171875, "learning_rate": 5.410090772597329e-06, "loss": 1.0219, "num_tokens": 12551139875.0, "step": 8101 }, { "epoch": 1.4442067736185384, "grad_norm": 0.169921875, "learning_rate": 5.408054852062193e-06, "loss": 1.0255, "num_tokens": 12557424682.0, "step": 8102 }, { "epoch": 1.4443850267379679, "grad_norm": 0.1767578125, "learning_rate": 5.40601939750503e-06, "loss": 0.9716, "num_tokens": 12563661275.0, "step": 8103 }, { "epoch": 1.4445632798573975, "grad_norm": 0.1787109375, "learning_rate": 5.4039844090954555e-06, "loss": 1.0101, "num_tokens": 12569925319.0, "step": 8104 }, { "epoch": 1.4447415329768272, "grad_norm": 0.1806640625, "learning_rate": 5.401949887003044e-06, "loss": 1.0195, "num_tokens": 12576183376.0, "step": 8105 }, { "epoch": 1.4449197860962566, "grad_norm": 0.1796875, "learning_rate": 5.399915831397331e-06, "loss": 1.0086, "num_tokens": 12582467918.0, "step": 8106 }, { "epoch": 1.4450980392156862, "grad_norm": 0.1865234375, "learning_rate": 5.397882242447816e-06, "loss": 1.0599, "num_tokens": 12588735277.0, "step": 8107 }, { "epoch": 1.445276292335116, "grad_norm": 0.177734375, "learning_rate": 5.395849120323959e-06, "loss": 0.9938, "num_tokens": 12595010229.0, "step": 8108 }, { "epoch": 1.4454545454545453, "grad_norm": 0.171875, "learning_rate": 5.393816465195177e-06, "loss": 1.0452, "num_tokens": 12601276408.0, "step": 8109 }, { "epoch": 1.445632798573975, "grad_norm": 0.1728515625, "learning_rate": 5.391784277230858e-06, "loss": 1.0562, "num_tokens": 12607561148.0, "step": 8110 }, { "epoch": 1.4458110516934046, "grad_norm": 0.1767578125, "learning_rate": 5.3897525566003404e-06, "loss": 1.0365, "num_tokens": 12613845300.0, "step": 8111 }, { "epoch": 1.4459893048128343, "grad_norm": 0.1748046875, "learning_rate": 5.387721303472931e-06, "loss": 1.0359, "num_tokens": 12620108199.0, "step": 8112 }, { "epoch": 1.446167557932264, "grad_norm": 0.1806640625, "learning_rate": 5.38569051801789e-06, "loss": 1.0057, "num_tokens": 12626350337.0, "step": 8113 }, { "epoch": 1.4463458110516934, "grad_norm": 0.169921875, "learning_rate": 5.383660200404446e-06, "loss": 1.0183, "num_tokens": 12632635156.0, "step": 8114 }, { "epoch": 1.446524064171123, "grad_norm": 0.1787109375, "learning_rate": 5.381630350801785e-06, "loss": 1.0013, "num_tokens": 12638920248.0, "step": 8115 }, { "epoch": 1.4467023172905527, "grad_norm": 0.1728515625, "learning_rate": 5.379600969379052e-06, "loss": 1.0142, "num_tokens": 12645172132.0, "step": 8116 }, { "epoch": 1.446880570409982, "grad_norm": 0.177734375, "learning_rate": 5.377572056305362e-06, "loss": 1.0523, "num_tokens": 12651430322.0, "step": 8117 }, { "epoch": 1.4470588235294117, "grad_norm": 0.1845703125, "learning_rate": 5.375543611749783e-06, "loss": 1.0113, "num_tokens": 12657670873.0, "step": 8118 }, { "epoch": 1.4472370766488414, "grad_norm": 0.181640625, "learning_rate": 5.373515635881342e-06, "loss": 1.0273, "num_tokens": 12663930908.0, "step": 8119 }, { "epoch": 1.4474153297682708, "grad_norm": 0.173828125, "learning_rate": 5.371488128869035e-06, "loss": 1.0614, "num_tokens": 12670191047.0, "step": 8120 }, { "epoch": 1.4475935828877005, "grad_norm": 0.18359375, "learning_rate": 5.369461090881811e-06, "loss": 1.0401, "num_tokens": 12676474652.0, "step": 8121 }, { "epoch": 1.44777183600713, "grad_norm": 0.1650390625, "learning_rate": 5.367434522088586e-06, "loss": 0.9785, "num_tokens": 12682752061.0, "step": 8122 }, { "epoch": 1.4479500891265598, "grad_norm": 0.177734375, "learning_rate": 5.365408422658231e-06, "loss": 1.0114, "num_tokens": 12689005273.0, "step": 8123 }, { "epoch": 1.4481283422459894, "grad_norm": 0.1748046875, "learning_rate": 5.363382792759585e-06, "loss": 1.0282, "num_tokens": 12695288743.0, "step": 8124 }, { "epoch": 1.4483065953654188, "grad_norm": 0.1787109375, "learning_rate": 5.361357632561444e-06, "loss": 1.0043, "num_tokens": 12701563595.0, "step": 8125 }, { "epoch": 1.4484848484848485, "grad_norm": 0.1708984375, "learning_rate": 5.359332942232565e-06, "loss": 1.0326, "num_tokens": 12707804908.0, "step": 8126 }, { "epoch": 1.4486631016042781, "grad_norm": 0.1748046875, "learning_rate": 5.357308721941659e-06, "loss": 1.0283, "num_tokens": 12714088130.0, "step": 8127 }, { "epoch": 1.4488413547237076, "grad_norm": 0.177734375, "learning_rate": 5.355284971857419e-06, "loss": 1.0427, "num_tokens": 12720368257.0, "step": 8128 }, { "epoch": 1.4490196078431372, "grad_norm": 0.1767578125, "learning_rate": 5.353261692148473e-06, "loss": 1.0431, "num_tokens": 12726652888.0, "step": 8129 }, { "epoch": 1.4491978609625669, "grad_norm": 0.1787109375, "learning_rate": 5.35123888298342e-06, "loss": 1.0247, "num_tokens": 12732928776.0, "step": 8130 }, { "epoch": 1.4493761140819965, "grad_norm": 0.1845703125, "learning_rate": 5.34921654453083e-06, "loss": 1.0092, "num_tokens": 12739180438.0, "step": 8131 }, { "epoch": 1.449554367201426, "grad_norm": 0.17578125, "learning_rate": 5.34719467695922e-06, "loss": 0.999, "num_tokens": 12745432701.0, "step": 8132 }, { "epoch": 1.4497326203208556, "grad_norm": 0.177734375, "learning_rate": 5.345173280437073e-06, "loss": 1.008, "num_tokens": 12751705308.0, "step": 8133 }, { "epoch": 1.4499108734402852, "grad_norm": 0.171875, "learning_rate": 5.343152355132828e-06, "loss": 1.0332, "num_tokens": 12757954434.0, "step": 8134 }, { "epoch": 1.450089126559715, "grad_norm": 0.173828125, "learning_rate": 5.341131901214897e-06, "loss": 1.0458, "num_tokens": 12764237402.0, "step": 8135 }, { "epoch": 1.4502673796791443, "grad_norm": 0.17578125, "learning_rate": 5.339111918851646e-06, "loss": 1.0287, "num_tokens": 12770520786.0, "step": 8136 }, { "epoch": 1.450445632798574, "grad_norm": 0.17578125, "learning_rate": 5.337092408211393e-06, "loss": 1.0064, "num_tokens": 12776751089.0, "step": 8137 }, { "epoch": 1.4506238859180036, "grad_norm": 0.1748046875, "learning_rate": 5.335073369462423e-06, "loss": 1.0101, "num_tokens": 12783031363.0, "step": 8138 }, { "epoch": 1.450802139037433, "grad_norm": 0.181640625, "learning_rate": 5.33305480277299e-06, "loss": 1.0127, "num_tokens": 12789315467.0, "step": 8139 }, { "epoch": 1.4509803921568627, "grad_norm": 0.1767578125, "learning_rate": 5.331036708311299e-06, "loss": 1.0149, "num_tokens": 12795573257.0, "step": 8140 }, { "epoch": 1.4511586452762923, "grad_norm": 0.177734375, "learning_rate": 5.329019086245519e-06, "loss": 1.0484, "num_tokens": 12801855074.0, "step": 8141 }, { "epoch": 1.451336898395722, "grad_norm": 0.1787109375, "learning_rate": 5.327001936743774e-06, "loss": 1.0076, "num_tokens": 12808117893.0, "step": 8142 }, { "epoch": 1.4515151515151516, "grad_norm": 0.1748046875, "learning_rate": 5.324985259974165e-06, "loss": 1.0042, "num_tokens": 12814370825.0, "step": 8143 }, { "epoch": 1.451693404634581, "grad_norm": 0.1767578125, "learning_rate": 5.322969056104729e-06, "loss": 1.0375, "num_tokens": 12820655423.0, "step": 8144 }, { "epoch": 1.4518716577540107, "grad_norm": 0.1796875, "learning_rate": 5.3209533253034796e-06, "loss": 1.0144, "num_tokens": 12826938209.0, "step": 8145 }, { "epoch": 1.4520499108734404, "grad_norm": 0.16796875, "learning_rate": 5.318938067738395e-06, "loss": 1.0309, "num_tokens": 12833222752.0, "step": 8146 }, { "epoch": 1.4522281639928698, "grad_norm": 0.1748046875, "learning_rate": 5.316923283577401e-06, "loss": 1.0367, "num_tokens": 12839490107.0, "step": 8147 }, { "epoch": 1.4524064171122995, "grad_norm": 0.181640625, "learning_rate": 5.314908972988393e-06, "loss": 1.0249, "num_tokens": 12845733542.0, "step": 8148 }, { "epoch": 1.452584670231729, "grad_norm": 0.1787109375, "learning_rate": 5.312895136139218e-06, "loss": 1.0108, "num_tokens": 12852017464.0, "step": 8149 }, { "epoch": 1.4527629233511585, "grad_norm": 0.181640625, "learning_rate": 5.310881773197699e-06, "loss": 1.0194, "num_tokens": 12858272485.0, "step": 8150 }, { "epoch": 1.4529411764705882, "grad_norm": 0.1796875, "learning_rate": 5.308868884331607e-06, "loss": 0.9986, "num_tokens": 12864522197.0, "step": 8151 }, { "epoch": 1.4531194295900178, "grad_norm": 0.1708984375, "learning_rate": 5.306856469708669e-06, "loss": 1.036, "num_tokens": 12870753916.0, "step": 8152 }, { "epoch": 1.4532976827094475, "grad_norm": 0.17578125, "learning_rate": 5.304844529496586e-06, "loss": 0.9997, "num_tokens": 12877001026.0, "step": 8153 }, { "epoch": 1.4534759358288771, "grad_norm": 0.173828125, "learning_rate": 5.302833063863017e-06, "loss": 0.9996, "num_tokens": 12883286178.0, "step": 8154 }, { "epoch": 1.4536541889483066, "grad_norm": 0.185546875, "learning_rate": 5.300822072975571e-06, "loss": 1.0106, "num_tokens": 12889563938.0, "step": 8155 }, { "epoch": 1.4538324420677362, "grad_norm": 0.181640625, "learning_rate": 5.298811557001824e-06, "loss": 1.0218, "num_tokens": 12895806616.0, "step": 8156 }, { "epoch": 1.4540106951871659, "grad_norm": 0.173828125, "learning_rate": 5.2968015161093204e-06, "loss": 1.0245, "num_tokens": 12902089683.0, "step": 8157 }, { "epoch": 1.4541889483065953, "grad_norm": 0.169921875, "learning_rate": 5.294791950465551e-06, "loss": 1.0095, "num_tokens": 12908373437.0, "step": 8158 }, { "epoch": 1.454367201426025, "grad_norm": 0.17578125, "learning_rate": 5.292782860237975e-06, "loss": 0.9913, "num_tokens": 12914651624.0, "step": 8159 }, { "epoch": 1.4545454545454546, "grad_norm": 0.1728515625, "learning_rate": 5.2907742455940105e-06, "loss": 1.0348, "num_tokens": 12920909891.0, "step": 8160 }, { "epoch": 1.454723707664884, "grad_norm": 0.1728515625, "learning_rate": 5.2887661067010355e-06, "loss": 1.0208, "num_tokens": 12927177005.0, "step": 8161 }, { "epoch": 1.4549019607843137, "grad_norm": 0.1865234375, "learning_rate": 5.2867584437263876e-06, "loss": 0.9916, "num_tokens": 12933423627.0, "step": 8162 }, { "epoch": 1.4550802139037433, "grad_norm": 0.1787109375, "learning_rate": 5.284751256837366e-06, "loss": 1.0469, "num_tokens": 12939707530.0, "step": 8163 }, { "epoch": 1.455258467023173, "grad_norm": 0.1748046875, "learning_rate": 5.282744546201233e-06, "loss": 1.0233, "num_tokens": 12945959167.0, "step": 8164 }, { "epoch": 1.4554367201426026, "grad_norm": 0.171875, "learning_rate": 5.280738311985205e-06, "loss": 0.9882, "num_tokens": 12952242264.0, "step": 8165 }, { "epoch": 1.455614973262032, "grad_norm": 0.173828125, "learning_rate": 5.278732554356465e-06, "loss": 1.0425, "num_tokens": 12958525488.0, "step": 8166 }, { "epoch": 1.4557932263814617, "grad_norm": 0.17578125, "learning_rate": 5.27672727348215e-06, "loss": 1.0145, "num_tokens": 12964796323.0, "step": 8167 }, { "epoch": 1.4559714795008913, "grad_norm": 0.1728515625, "learning_rate": 5.274722469529361e-06, "loss": 1.0125, "num_tokens": 12971079676.0, "step": 8168 }, { "epoch": 1.4561497326203208, "grad_norm": 0.177734375, "learning_rate": 5.272718142665161e-06, "loss": 1.0007, "num_tokens": 12977318284.0, "step": 8169 }, { "epoch": 1.4563279857397504, "grad_norm": 0.1826171875, "learning_rate": 5.270714293056564e-06, "loss": 1.0462, "num_tokens": 12983603350.0, "step": 8170 }, { "epoch": 1.45650623885918, "grad_norm": 0.1826171875, "learning_rate": 5.268710920870563e-06, "loss": 1.02, "num_tokens": 12989833667.0, "step": 8171 }, { "epoch": 1.4566844919786095, "grad_norm": 0.1748046875, "learning_rate": 5.26670802627409e-06, "loss": 1.0384, "num_tokens": 12996094198.0, "step": 8172 }, { "epoch": 1.4568627450980391, "grad_norm": 0.1796875, "learning_rate": 5.264705609434051e-06, "loss": 1.0451, "num_tokens": 13002379011.0, "step": 8173 }, { "epoch": 1.4570409982174688, "grad_norm": 0.171875, "learning_rate": 5.262703670517306e-06, "loss": 1.0124, "num_tokens": 13008648480.0, "step": 8174 }, { "epoch": 1.4572192513368984, "grad_norm": 0.181640625, "learning_rate": 5.260702209690678e-06, "loss": 0.9995, "num_tokens": 13014901820.0, "step": 8175 }, { "epoch": 1.457397504456328, "grad_norm": 0.171875, "learning_rate": 5.258701227120949e-06, "loss": 1.0083, "num_tokens": 13021163897.0, "step": 8176 }, { "epoch": 1.4575757575757575, "grad_norm": 0.181640625, "learning_rate": 5.256700722974857e-06, "loss": 1.0064, "num_tokens": 13027448944.0, "step": 8177 }, { "epoch": 1.4577540106951872, "grad_norm": 0.1787109375, "learning_rate": 5.254700697419113e-06, "loss": 1.0275, "num_tokens": 13033732324.0, "step": 8178 }, { "epoch": 1.4579322638146168, "grad_norm": 0.1767578125, "learning_rate": 5.252701150620373e-06, "loss": 1.0505, "num_tokens": 13039992891.0, "step": 8179 }, { "epoch": 1.4581105169340463, "grad_norm": 0.1787109375, "learning_rate": 5.250702082745263e-06, "loss": 1.0213, "num_tokens": 13046276009.0, "step": 8180 }, { "epoch": 1.458288770053476, "grad_norm": 0.1748046875, "learning_rate": 5.248703493960361e-06, "loss": 0.9963, "num_tokens": 13052560951.0, "step": 8181 }, { "epoch": 1.4584670231729056, "grad_norm": 0.1748046875, "learning_rate": 5.246705384432218e-06, "loss": 1.0238, "num_tokens": 13058822900.0, "step": 8182 }, { "epoch": 1.458645276292335, "grad_norm": 0.181640625, "learning_rate": 5.24470775432733e-06, "loss": 1.0508, "num_tokens": 13065106768.0, "step": 8183 }, { "epoch": 1.4588235294117646, "grad_norm": 0.1787109375, "learning_rate": 5.2427106038121575e-06, "loss": 1.019, "num_tokens": 13071338883.0, "step": 8184 }, { "epoch": 1.4590017825311943, "grad_norm": 0.1708984375, "learning_rate": 5.240713933053133e-06, "loss": 1.0126, "num_tokens": 13077604180.0, "step": 8185 }, { "epoch": 1.459180035650624, "grad_norm": 0.1728515625, "learning_rate": 5.238717742216634e-06, "loss": 1.0323, "num_tokens": 13083888041.0, "step": 8186 }, { "epoch": 1.4593582887700536, "grad_norm": 0.171875, "learning_rate": 5.236722031469004e-06, "loss": 1.0347, "num_tokens": 13090172418.0, "step": 8187 }, { "epoch": 1.459536541889483, "grad_norm": 0.1787109375, "learning_rate": 5.2347268009765424e-06, "loss": 1.0036, "num_tokens": 13096456138.0, "step": 8188 }, { "epoch": 1.4597147950089127, "grad_norm": 0.177734375, "learning_rate": 5.232732050905522e-06, "loss": 0.9916, "num_tokens": 13102726450.0, "step": 8189 }, { "epoch": 1.4598930481283423, "grad_norm": 0.1767578125, "learning_rate": 5.230737781422157e-06, "loss": 1.013, "num_tokens": 13108995869.0, "step": 8190 }, { "epoch": 1.4600713012477717, "grad_norm": 0.173828125, "learning_rate": 5.228743992692629e-06, "loss": 1.022, "num_tokens": 13115277739.0, "step": 8191 }, { "epoch": 1.4602495543672014, "grad_norm": 0.169921875, "learning_rate": 5.2267506848830885e-06, "loss": 1.0568, "num_tokens": 13121542754.0, "step": 8192 }, { "epoch": 1.460427807486631, "grad_norm": 0.173828125, "learning_rate": 5.224757858159633e-06, "loss": 1.0428, "num_tokens": 13127826406.0, "step": 8193 }, { "epoch": 1.4606060606060607, "grad_norm": 0.173828125, "learning_rate": 5.222765512688328e-06, "loss": 0.9896, "num_tokens": 13134064903.0, "step": 8194 }, { "epoch": 1.4607843137254901, "grad_norm": 0.1796875, "learning_rate": 5.220773648635191e-06, "loss": 0.9992, "num_tokens": 13140320446.0, "step": 8195 }, { "epoch": 1.4609625668449198, "grad_norm": 0.173828125, "learning_rate": 5.218782266166211e-06, "loss": 1.0054, "num_tokens": 13146605402.0, "step": 8196 }, { "epoch": 1.4611408199643494, "grad_norm": 0.181640625, "learning_rate": 5.216791365447331e-06, "loss": 1.0542, "num_tokens": 13152889830.0, "step": 8197 }, { "epoch": 1.461319073083779, "grad_norm": 0.17578125, "learning_rate": 5.2148009466444424e-06, "loss": 1.0205, "num_tokens": 13159171594.0, "step": 8198 }, { "epoch": 1.4614973262032085, "grad_norm": 0.1767578125, "learning_rate": 5.2128110099234174e-06, "loss": 1.0192, "num_tokens": 13165456034.0, "step": 8199 }, { "epoch": 1.4616755793226381, "grad_norm": 0.16796875, "learning_rate": 5.2108215554500766e-06, "loss": 1.0008, "num_tokens": 13171719653.0, "step": 8200 }, { "epoch": 1.4618538324420678, "grad_norm": 0.1845703125, "learning_rate": 5.2088325833901986e-06, "loss": 1.0319, "num_tokens": 13178000779.0, "step": 8201 }, { "epoch": 1.4620320855614972, "grad_norm": 0.177734375, "learning_rate": 5.206844093909523e-06, "loss": 1.0079, "num_tokens": 13184284814.0, "step": 8202 }, { "epoch": 1.4622103386809269, "grad_norm": 0.169921875, "learning_rate": 5.204856087173757e-06, "loss": 0.9836, "num_tokens": 13190568019.0, "step": 8203 }, { "epoch": 1.4623885918003565, "grad_norm": 0.1806640625, "learning_rate": 5.202868563348562e-06, "loss": 1.0042, "num_tokens": 13196853018.0, "step": 8204 }, { "epoch": 1.4625668449197862, "grad_norm": 0.17578125, "learning_rate": 5.200881522599551e-06, "loss": 1.0019, "num_tokens": 13203136468.0, "step": 8205 }, { "epoch": 1.4627450980392158, "grad_norm": 0.1787109375, "learning_rate": 5.198894965092312e-06, "loss": 1.0543, "num_tokens": 13209420702.0, "step": 8206 }, { "epoch": 1.4629233511586452, "grad_norm": 0.1796875, "learning_rate": 5.196908890992383e-06, "loss": 1.0256, "num_tokens": 13215693568.0, "step": 8207 }, { "epoch": 1.463101604278075, "grad_norm": 0.1796875, "learning_rate": 5.194923300465261e-06, "loss": 1.0453, "num_tokens": 13221976337.0, "step": 8208 }, { "epoch": 1.4632798573975045, "grad_norm": 0.1728515625, "learning_rate": 5.1929381936764064e-06, "loss": 0.9988, "num_tokens": 13228232978.0, "step": 8209 }, { "epoch": 1.463458110516934, "grad_norm": 0.17578125, "learning_rate": 5.190953570791243e-06, "loss": 1.0173, "num_tokens": 13234486381.0, "step": 8210 }, { "epoch": 1.4636363636363636, "grad_norm": 0.1728515625, "learning_rate": 5.188969431975146e-06, "loss": 1.0326, "num_tokens": 13240765362.0, "step": 8211 }, { "epoch": 1.4638146167557933, "grad_norm": 0.173828125, "learning_rate": 5.186985777393456e-06, "loss": 1.02, "num_tokens": 13247049555.0, "step": 8212 }, { "epoch": 1.4639928698752227, "grad_norm": 0.169921875, "learning_rate": 5.185002607211471e-06, "loss": 1.018, "num_tokens": 13253314234.0, "step": 8213 }, { "epoch": 1.4641711229946524, "grad_norm": 0.17578125, "learning_rate": 5.183019921594448e-06, "loss": 1.0151, "num_tokens": 13259568948.0, "step": 8214 }, { "epoch": 1.464349376114082, "grad_norm": 0.1865234375, "learning_rate": 5.181037720707607e-06, "loss": 1.0235, "num_tokens": 13265822688.0, "step": 8215 }, { "epoch": 1.4645276292335117, "grad_norm": 0.1787109375, "learning_rate": 5.1790560047161166e-06, "loss": 1.0263, "num_tokens": 13272049948.0, "step": 8216 }, { "epoch": 1.4647058823529413, "grad_norm": 0.169921875, "learning_rate": 5.1770747737851265e-06, "loss": 1.0526, "num_tokens": 13278332584.0, "step": 8217 }, { "epoch": 1.4648841354723707, "grad_norm": 0.1748046875, "learning_rate": 5.175094028079726e-06, "loss": 1.0313, "num_tokens": 13284615901.0, "step": 8218 }, { "epoch": 1.4650623885918004, "grad_norm": 0.1787109375, "learning_rate": 5.1731137677649725e-06, "loss": 1.0112, "num_tokens": 13290900036.0, "step": 8219 }, { "epoch": 1.46524064171123, "grad_norm": 0.1767578125, "learning_rate": 5.17113399300588e-06, "loss": 1.0414, "num_tokens": 13297184645.0, "step": 8220 }, { "epoch": 1.4654188948306595, "grad_norm": 0.1728515625, "learning_rate": 5.169154703967425e-06, "loss": 1.0223, "num_tokens": 13303446806.0, "step": 8221 }, { "epoch": 1.465597147950089, "grad_norm": 0.1728515625, "learning_rate": 5.1671759008145404e-06, "loss": 1.0076, "num_tokens": 13309730351.0, "step": 8222 }, { "epoch": 1.4657754010695188, "grad_norm": 0.173828125, "learning_rate": 5.1651975837121204e-06, "loss": 1.0523, "num_tokens": 13316006412.0, "step": 8223 }, { "epoch": 1.4659536541889482, "grad_norm": 0.1767578125, "learning_rate": 5.163219752825022e-06, "loss": 1.0038, "num_tokens": 13322254735.0, "step": 8224 }, { "epoch": 1.4661319073083778, "grad_norm": 0.181640625, "learning_rate": 5.161242408318057e-06, "loss": 1.0284, "num_tokens": 13328489349.0, "step": 8225 }, { "epoch": 1.4663101604278075, "grad_norm": 0.1767578125, "learning_rate": 5.159265550355997e-06, "loss": 1.0144, "num_tokens": 13334773380.0, "step": 8226 }, { "epoch": 1.4664884135472371, "grad_norm": 0.169921875, "learning_rate": 5.157289179103569e-06, "loss": 1.0369, "num_tokens": 13341033879.0, "step": 8227 }, { "epoch": 1.4666666666666668, "grad_norm": 0.1728515625, "learning_rate": 5.155313294725476e-06, "loss": 1.0425, "num_tokens": 13347318278.0, "step": 8228 }, { "epoch": 1.4668449197860962, "grad_norm": 0.1767578125, "learning_rate": 5.153337897386359e-06, "loss": 1.0146, "num_tokens": 13353601768.0, "step": 8229 }, { "epoch": 1.4670231729055259, "grad_norm": 0.1806640625, "learning_rate": 5.151362987250829e-06, "loss": 1.0463, "num_tokens": 13359884678.0, "step": 8230 }, { "epoch": 1.4672014260249555, "grad_norm": 0.1728515625, "learning_rate": 5.149388564483462e-06, "loss": 1.0273, "num_tokens": 13366153932.0, "step": 8231 }, { "epoch": 1.467379679144385, "grad_norm": 0.1787109375, "learning_rate": 5.147414629248782e-06, "loss": 1.038, "num_tokens": 13372437743.0, "step": 8232 }, { "epoch": 1.4675579322638146, "grad_norm": 0.1748046875, "learning_rate": 5.145441181711279e-06, "loss": 1.0286, "num_tokens": 13378720193.0, "step": 8233 }, { "epoch": 1.4677361853832442, "grad_norm": 0.177734375, "learning_rate": 5.1434682220353985e-06, "loss": 1.0365, "num_tokens": 13384994443.0, "step": 8234 }, { "epoch": 1.4679144385026737, "grad_norm": 0.17578125, "learning_rate": 5.1414957503855515e-06, "loss": 1.0075, "num_tokens": 13391276706.0, "step": 8235 }, { "epoch": 1.4680926916221033, "grad_norm": 0.1748046875, "learning_rate": 5.139523766926106e-06, "loss": 0.9854, "num_tokens": 13397562082.0, "step": 8236 }, { "epoch": 1.468270944741533, "grad_norm": 0.1767578125, "learning_rate": 5.137552271821378e-06, "loss": 0.996, "num_tokens": 13403836163.0, "step": 8237 }, { "epoch": 1.4684491978609626, "grad_norm": 0.173828125, "learning_rate": 5.135581265235663e-06, "loss": 1.0339, "num_tokens": 13410118224.0, "step": 8238 }, { "epoch": 1.4686274509803923, "grad_norm": 0.1767578125, "learning_rate": 5.1336107473331995e-06, "loss": 1.0257, "num_tokens": 13416400905.0, "step": 8239 }, { "epoch": 1.4688057040998217, "grad_norm": 0.1708984375, "learning_rate": 5.131640718278194e-06, "loss": 1.0178, "num_tokens": 13422658619.0, "step": 8240 }, { "epoch": 1.4689839572192513, "grad_norm": 0.1748046875, "learning_rate": 5.129671178234806e-06, "loss": 1.001, "num_tokens": 13428942212.0, "step": 8241 }, { "epoch": 1.469162210338681, "grad_norm": 0.1767578125, "learning_rate": 5.127702127367162e-06, "loss": 1.0197, "num_tokens": 13435209818.0, "step": 8242 }, { "epoch": 1.4693404634581104, "grad_norm": 0.1748046875, "learning_rate": 5.125733565839346e-06, "loss": 1.0438, "num_tokens": 13441493420.0, "step": 8243 }, { "epoch": 1.46951871657754, "grad_norm": 0.1728515625, "learning_rate": 5.123765493815388e-06, "loss": 1.0159, "num_tokens": 13447775656.0, "step": 8244 }, { "epoch": 1.4696969696969697, "grad_norm": 0.166015625, "learning_rate": 5.121797911459296e-06, "loss": 1.0141, "num_tokens": 13454058644.0, "step": 8245 }, { "epoch": 1.4698752228163992, "grad_norm": 0.173828125, "learning_rate": 5.11983081893503e-06, "loss": 1.0194, "num_tokens": 13460343764.0, "step": 8246 }, { "epoch": 1.4700534759358288, "grad_norm": 0.1669921875, "learning_rate": 5.1178642164065025e-06, "loss": 1.021, "num_tokens": 13466626492.0, "step": 8247 }, { "epoch": 1.4702317290552585, "grad_norm": 0.1728515625, "learning_rate": 5.115898104037593e-06, "loss": 1.0239, "num_tokens": 13472909179.0, "step": 8248 }, { "epoch": 1.470409982174688, "grad_norm": 0.17578125, "learning_rate": 5.11393248199214e-06, "loss": 1.0205, "num_tokens": 13479190287.0, "step": 8249 }, { "epoch": 1.4705882352941178, "grad_norm": 0.1845703125, "learning_rate": 5.11196735043394e-06, "loss": 1.0495, "num_tokens": 13485474271.0, "step": 8250 }, { "epoch": 1.4707664884135472, "grad_norm": 0.173828125, "learning_rate": 5.110002709526745e-06, "loss": 1.0206, "num_tokens": 13491745306.0, "step": 8251 }, { "epoch": 1.4709447415329768, "grad_norm": 0.1708984375, "learning_rate": 5.1080385594342686e-06, "loss": 1.0325, "num_tokens": 13498015729.0, "step": 8252 }, { "epoch": 1.4711229946524065, "grad_norm": 0.1787109375, "learning_rate": 5.106074900320186e-06, "loss": 1.002, "num_tokens": 13504300049.0, "step": 8253 }, { "epoch": 1.471301247771836, "grad_norm": 0.169921875, "learning_rate": 5.1041117323481294e-06, "loss": 1.0252, "num_tokens": 13510546970.0, "step": 8254 }, { "epoch": 1.4714795008912656, "grad_norm": 0.21484375, "learning_rate": 5.102149055681684e-06, "loss": 1.0111, "num_tokens": 13516801037.0, "step": 8255 }, { "epoch": 1.4716577540106952, "grad_norm": 0.177734375, "learning_rate": 5.1001868704844104e-06, "loss": 1.0215, "num_tokens": 13523085414.0, "step": 8256 }, { "epoch": 1.4718360071301249, "grad_norm": 0.173828125, "learning_rate": 5.098225176919811e-06, "loss": 1.0301, "num_tokens": 13529359631.0, "step": 8257 }, { "epoch": 1.4720142602495543, "grad_norm": 0.173828125, "learning_rate": 5.096263975151356e-06, "loss": 1.0149, "num_tokens": 13535643627.0, "step": 8258 }, { "epoch": 1.472192513368984, "grad_norm": 0.177734375, "learning_rate": 5.094303265342473e-06, "loss": 0.9939, "num_tokens": 13541927607.0, "step": 8259 }, { "epoch": 1.4723707664884136, "grad_norm": 0.1728515625, "learning_rate": 5.0923430476565474e-06, "loss": 1.0275, "num_tokens": 13548212323.0, "step": 8260 }, { "epoch": 1.4725490196078432, "grad_norm": 0.1748046875, "learning_rate": 5.0903833222569244e-06, "loss": 1.0172, "num_tokens": 13554492490.0, "step": 8261 }, { "epoch": 1.4727272727272727, "grad_norm": 0.1787109375, "learning_rate": 5.088424089306907e-06, "loss": 0.9996, "num_tokens": 13560775957.0, "step": 8262 }, { "epoch": 1.4729055258467023, "grad_norm": 0.1806640625, "learning_rate": 5.086465348969764e-06, "loss": 1.0502, "num_tokens": 13567037351.0, "step": 8263 }, { "epoch": 1.473083778966132, "grad_norm": 0.1796875, "learning_rate": 5.084507101408713e-06, "loss": 1.0123, "num_tokens": 13573319710.0, "step": 8264 }, { "epoch": 1.4732620320855614, "grad_norm": 0.1689453125, "learning_rate": 5.082549346786938e-06, "loss": 1.0164, "num_tokens": 13579544147.0, "step": 8265 }, { "epoch": 1.473440285204991, "grad_norm": 0.17578125, "learning_rate": 5.080592085267575e-06, "loss": 0.9811, "num_tokens": 13585825975.0, "step": 8266 }, { "epoch": 1.4736185383244207, "grad_norm": 0.1787109375, "learning_rate": 5.078635317013727e-06, "loss": 1.0212, "num_tokens": 13592095792.0, "step": 8267 }, { "epoch": 1.4737967914438503, "grad_norm": 0.1806640625, "learning_rate": 5.0766790421884505e-06, "loss": 1.0084, "num_tokens": 13598378825.0, "step": 8268 }, { "epoch": 1.47397504456328, "grad_norm": 0.177734375, "learning_rate": 5.074723260954757e-06, "loss": 1.0485, "num_tokens": 13604649608.0, "step": 8269 }, { "epoch": 1.4741532976827094, "grad_norm": 0.1728515625, "learning_rate": 5.072767973475632e-06, "loss": 1.0021, "num_tokens": 13610933982.0, "step": 8270 }, { "epoch": 1.474331550802139, "grad_norm": 0.1787109375, "learning_rate": 5.070813179914005e-06, "loss": 1.019, "num_tokens": 13617204979.0, "step": 8271 }, { "epoch": 1.4745098039215687, "grad_norm": 0.17578125, "learning_rate": 5.068858880432768e-06, "loss": 1.0241, "num_tokens": 13623488842.0, "step": 8272 }, { "epoch": 1.4746880570409981, "grad_norm": 0.171875, "learning_rate": 5.066905075194775e-06, "loss": 1.0055, "num_tokens": 13629772706.0, "step": 8273 }, { "epoch": 1.4748663101604278, "grad_norm": 0.1748046875, "learning_rate": 5.06495176436284e-06, "loss": 1.0131, "num_tokens": 13636056634.0, "step": 8274 }, { "epoch": 1.4750445632798574, "grad_norm": 0.1708984375, "learning_rate": 5.0629989480997284e-06, "loss": 0.9993, "num_tokens": 13642308388.0, "step": 8275 }, { "epoch": 1.4752228163992869, "grad_norm": 0.1884765625, "learning_rate": 5.061046626568167e-06, "loss": 1.0099, "num_tokens": 13648577869.0, "step": 8276 }, { "epoch": 1.4754010695187165, "grad_norm": 0.1748046875, "learning_rate": 5.059094799930848e-06, "loss": 1.0199, "num_tokens": 13654828198.0, "step": 8277 }, { "epoch": 1.4755793226381462, "grad_norm": 0.1748046875, "learning_rate": 5.057143468350416e-06, "loss": 1.0164, "num_tokens": 13661078691.0, "step": 8278 }, { "epoch": 1.4757575757575758, "grad_norm": 0.1767578125, "learning_rate": 5.055192631989477e-06, "loss": 0.999, "num_tokens": 13667363169.0, "step": 8279 }, { "epoch": 1.4759358288770055, "grad_norm": 0.171875, "learning_rate": 5.053242291010589e-06, "loss": 1.0055, "num_tokens": 13673646524.0, "step": 8280 }, { "epoch": 1.476114081996435, "grad_norm": 0.171875, "learning_rate": 5.051292445576281e-06, "loss": 1.001, "num_tokens": 13679926892.0, "step": 8281 }, { "epoch": 1.4762923351158646, "grad_norm": 0.1806640625, "learning_rate": 5.0493430958490364e-06, "loss": 1.014, "num_tokens": 13686210581.0, "step": 8282 }, { "epoch": 1.4764705882352942, "grad_norm": 0.1767578125, "learning_rate": 5.047394241991283e-06, "loss": 1.016, "num_tokens": 13692467994.0, "step": 8283 }, { "epoch": 1.4766488413547236, "grad_norm": 0.17578125, "learning_rate": 5.045445884165429e-06, "loss": 1.0142, "num_tokens": 13698725852.0, "step": 8284 }, { "epoch": 1.4768270944741533, "grad_norm": 0.1796875, "learning_rate": 5.043498022533829e-06, "loss": 1.0227, "num_tokens": 13705008889.0, "step": 8285 }, { "epoch": 1.477005347593583, "grad_norm": 0.1708984375, "learning_rate": 5.041550657258798e-06, "loss": 1.0171, "num_tokens": 13711293461.0, "step": 8286 }, { "epoch": 1.4771836007130124, "grad_norm": 0.17578125, "learning_rate": 5.03960378850261e-06, "loss": 1.0262, "num_tokens": 13717559058.0, "step": 8287 }, { "epoch": 1.477361853832442, "grad_norm": 0.1708984375, "learning_rate": 5.037657416427499e-06, "loss": 1.0093, "num_tokens": 13723843502.0, "step": 8288 }, { "epoch": 1.4775401069518717, "grad_norm": 0.1767578125, "learning_rate": 5.035711541195661e-06, "loss": 1.04, "num_tokens": 13730094042.0, "step": 8289 }, { "epoch": 1.4777183600713013, "grad_norm": 0.1767578125, "learning_rate": 5.033766162969238e-06, "loss": 1.0267, "num_tokens": 13736354005.0, "step": 8290 }, { "epoch": 1.477896613190731, "grad_norm": 0.169921875, "learning_rate": 5.0318212819103405e-06, "loss": 1.0251, "num_tokens": 13742639045.0, "step": 8291 }, { "epoch": 1.4780748663101604, "grad_norm": 0.169921875, "learning_rate": 5.029876898181041e-06, "loss": 0.9824, "num_tokens": 13748924520.0, "step": 8292 }, { "epoch": 1.47825311942959, "grad_norm": 0.1689453125, "learning_rate": 5.02793301194336e-06, "loss": 1.0095, "num_tokens": 13755186922.0, "step": 8293 }, { "epoch": 1.4784313725490197, "grad_norm": 0.17578125, "learning_rate": 5.025989623359282e-06, "loss": 1.0246, "num_tokens": 13761442263.0, "step": 8294 }, { "epoch": 1.4786096256684491, "grad_norm": 0.173828125, "learning_rate": 5.024046732590755e-06, "loss": 1.0318, "num_tokens": 13767707907.0, "step": 8295 }, { "epoch": 1.4787878787878788, "grad_norm": 0.1689453125, "learning_rate": 5.022104339799678e-06, "loss": 1.0377, "num_tokens": 13773981610.0, "step": 8296 }, { "epoch": 1.4789661319073084, "grad_norm": 0.1708984375, "learning_rate": 5.020162445147914e-06, "loss": 1.0065, "num_tokens": 13780266648.0, "step": 8297 }, { "epoch": 1.4791443850267378, "grad_norm": 0.177734375, "learning_rate": 5.018221048797273e-06, "loss": 1.0245, "num_tokens": 13786542169.0, "step": 8298 }, { "epoch": 1.4793226381461675, "grad_norm": 0.1767578125, "learning_rate": 5.016280150909538e-06, "loss": 1.0031, "num_tokens": 13792826200.0, "step": 8299 }, { "epoch": 1.4795008912655971, "grad_norm": 0.171875, "learning_rate": 5.014339751646444e-06, "loss": 1.0012, "num_tokens": 13799109769.0, "step": 8300 }, { "epoch": 1.4796791443850268, "grad_norm": 0.169921875, "learning_rate": 5.0123998511696845e-06, "loss": 0.9915, "num_tokens": 13805390816.0, "step": 8301 }, { "epoch": 1.4798573975044564, "grad_norm": 0.1728515625, "learning_rate": 5.010460449640909e-06, "loss": 1.0423, "num_tokens": 13811674030.0, "step": 8302 }, { "epoch": 1.4800356506238859, "grad_norm": 0.173828125, "learning_rate": 5.0085215472217345e-06, "loss": 1.0123, "num_tokens": 13817959194.0, "step": 8303 }, { "epoch": 1.4802139037433155, "grad_norm": 0.173828125, "learning_rate": 5.006583144073725e-06, "loss": 0.9846, "num_tokens": 13824243553.0, "step": 8304 }, { "epoch": 1.4803921568627452, "grad_norm": 0.1748046875, "learning_rate": 5.00464524035841e-06, "loss": 1.0114, "num_tokens": 13830484923.0, "step": 8305 }, { "epoch": 1.4805704099821746, "grad_norm": 0.1708984375, "learning_rate": 5.002707836237275e-06, "loss": 1.0236, "num_tokens": 13836768173.0, "step": 8306 }, { "epoch": 1.4807486631016042, "grad_norm": 0.171875, "learning_rate": 5.000770931871766e-06, "loss": 1.0484, "num_tokens": 13843052304.0, "step": 8307 }, { "epoch": 1.480926916221034, "grad_norm": 0.1689453125, "learning_rate": 4.998834527423283e-06, "loss": 1.0217, "num_tokens": 13849332896.0, "step": 8308 }, { "epoch": 1.4811051693404633, "grad_norm": 0.1708984375, "learning_rate": 4.996898623053184e-06, "loss": 0.9851, "num_tokens": 13855618392.0, "step": 8309 }, { "epoch": 1.481283422459893, "grad_norm": 0.1787109375, "learning_rate": 4.9949632189227965e-06, "loss": 1.0305, "num_tokens": 13861884881.0, "step": 8310 }, { "epoch": 1.4814616755793226, "grad_norm": 0.177734375, "learning_rate": 4.993028315193393e-06, "loss": 1.0361, "num_tokens": 13868132732.0, "step": 8311 }, { "epoch": 1.4816399286987523, "grad_norm": 0.1767578125, "learning_rate": 4.991093912026212e-06, "loss": 1.0252, "num_tokens": 13874402157.0, "step": 8312 }, { "epoch": 1.481818181818182, "grad_norm": 0.1728515625, "learning_rate": 4.989160009582445e-06, "loss": 0.9969, "num_tokens": 13880658705.0, "step": 8313 }, { "epoch": 1.4819964349376114, "grad_norm": 0.1748046875, "learning_rate": 4.9872266080232445e-06, "loss": 1.0512, "num_tokens": 13886913208.0, "step": 8314 }, { "epoch": 1.482174688057041, "grad_norm": 0.173828125, "learning_rate": 4.985293707509724e-06, "loss": 1.0128, "num_tokens": 13893196632.0, "step": 8315 }, { "epoch": 1.4823529411764707, "grad_norm": 0.169921875, "learning_rate": 4.983361308202948e-06, "loss": 0.998, "num_tokens": 13899479524.0, "step": 8316 }, { "epoch": 1.4825311942959, "grad_norm": 0.177734375, "learning_rate": 4.981429410263948e-06, "loss": 1.0413, "num_tokens": 13905762271.0, "step": 8317 }, { "epoch": 1.4827094474153297, "grad_norm": 0.1728515625, "learning_rate": 4.979498013853709e-06, "loss": 1.0074, "num_tokens": 13912046311.0, "step": 8318 }, { "epoch": 1.4828877005347594, "grad_norm": 0.17578125, "learning_rate": 4.977567119133173e-06, "loss": 1.0286, "num_tokens": 13918311840.0, "step": 8319 }, { "epoch": 1.483065953654189, "grad_norm": 0.177734375, "learning_rate": 4.975636726263244e-06, "loss": 1.0003, "num_tokens": 13924574185.0, "step": 8320 }, { "epoch": 1.4832442067736185, "grad_norm": 0.16796875, "learning_rate": 4.973706835404781e-06, "loss": 1.0262, "num_tokens": 13930827217.0, "step": 8321 }, { "epoch": 1.483422459893048, "grad_norm": 0.1748046875, "learning_rate": 4.971777446718601e-06, "loss": 0.9948, "num_tokens": 13937111470.0, "step": 8322 }, { "epoch": 1.4836007130124778, "grad_norm": 0.16796875, "learning_rate": 4.969848560365478e-06, "loss": 1.038, "num_tokens": 13943395414.0, "step": 8323 }, { "epoch": 1.4837789661319074, "grad_norm": 0.1748046875, "learning_rate": 4.967920176506153e-06, "loss": 1.0355, "num_tokens": 13949678898.0, "step": 8324 }, { "epoch": 1.4839572192513368, "grad_norm": 0.1728515625, "learning_rate": 4.965992295301315e-06, "loss": 0.9872, "num_tokens": 13955927139.0, "step": 8325 }, { "epoch": 1.4841354723707665, "grad_norm": 0.1748046875, "learning_rate": 4.964064916911615e-06, "loss": 1.0014, "num_tokens": 13962212187.0, "step": 8326 }, { "epoch": 1.4843137254901961, "grad_norm": 0.181640625, "learning_rate": 4.96213804149766e-06, "loss": 1.0269, "num_tokens": 13968443265.0, "step": 8327 }, { "epoch": 1.4844919786096256, "grad_norm": 0.1826171875, "learning_rate": 4.960211669220024e-06, "loss": 1.0012, "num_tokens": 13974718561.0, "step": 8328 }, { "epoch": 1.4846702317290552, "grad_norm": 0.1796875, "learning_rate": 4.958285800239224e-06, "loss": 1.0406, "num_tokens": 13980956178.0, "step": 8329 }, { "epoch": 1.4848484848484849, "grad_norm": 0.1796875, "learning_rate": 4.956360434715744e-06, "loss": 1.0288, "num_tokens": 13987239046.0, "step": 8330 }, { "epoch": 1.4850267379679145, "grad_norm": 0.1728515625, "learning_rate": 4.95443557281003e-06, "loss": 1.0169, "num_tokens": 13993521854.0, "step": 8331 }, { "epoch": 1.4852049910873442, "grad_norm": 0.1748046875, "learning_rate": 4.952511214682477e-06, "loss": 1.0455, "num_tokens": 13999807286.0, "step": 8332 }, { "epoch": 1.4853832442067736, "grad_norm": 0.173828125, "learning_rate": 4.950587360493444e-06, "loss": 1.0215, "num_tokens": 14006068234.0, "step": 8333 }, { "epoch": 1.4855614973262032, "grad_norm": 0.1787109375, "learning_rate": 4.948664010403243e-06, "loss": 1.0053, "num_tokens": 14012321594.0, "step": 8334 }, { "epoch": 1.485739750445633, "grad_norm": 0.1826171875, "learning_rate": 4.946741164572152e-06, "loss": 1.0417, "num_tokens": 14018605226.0, "step": 8335 }, { "epoch": 1.4859180035650623, "grad_norm": 0.169921875, "learning_rate": 4.944818823160403e-06, "loss": 1.0325, "num_tokens": 14024889176.0, "step": 8336 }, { "epoch": 1.486096256684492, "grad_norm": 0.17578125, "learning_rate": 4.9428969863281766e-06, "loss": 1.0244, "num_tokens": 14031153961.0, "step": 8337 }, { "epoch": 1.4862745098039216, "grad_norm": 0.17578125, "learning_rate": 4.940975654235628e-06, "loss": 1.012, "num_tokens": 14037427922.0, "step": 8338 }, { "epoch": 1.486452762923351, "grad_norm": 0.1748046875, "learning_rate": 4.9390548270428594e-06, "loss": 0.9998, "num_tokens": 14043712503.0, "step": 8339 }, { "epoch": 1.4866310160427807, "grad_norm": 0.1796875, "learning_rate": 4.937134504909935e-06, "loss": 1.0091, "num_tokens": 14049994011.0, "step": 8340 }, { "epoch": 1.4868092691622103, "grad_norm": 0.1806640625, "learning_rate": 4.935214687996871e-06, "loss": 1.0297, "num_tokens": 14056275697.0, "step": 8341 }, { "epoch": 1.48698752228164, "grad_norm": 0.177734375, "learning_rate": 4.933295376463652e-06, "loss": 1.0083, "num_tokens": 14062540513.0, "step": 8342 }, { "epoch": 1.4871657754010696, "grad_norm": 0.173828125, "learning_rate": 4.9313765704702145e-06, "loss": 1.0469, "num_tokens": 14068801479.0, "step": 8343 }, { "epoch": 1.487344028520499, "grad_norm": 0.1748046875, "learning_rate": 4.929458270176447e-06, "loss": 0.9926, "num_tokens": 14075036964.0, "step": 8344 }, { "epoch": 1.4875222816399287, "grad_norm": 0.16796875, "learning_rate": 4.927540475742208e-06, "loss": 1.0285, "num_tokens": 14081320062.0, "step": 8345 }, { "epoch": 1.4877005347593584, "grad_norm": 0.1806640625, "learning_rate": 4.925623187327304e-06, "loss": 1.0334, "num_tokens": 14087602489.0, "step": 8346 }, { "epoch": 1.4878787878787878, "grad_norm": 0.17578125, "learning_rate": 4.923706405091506e-06, "loss": 1.0283, "num_tokens": 14093865867.0, "step": 8347 }, { "epoch": 1.4880570409982175, "grad_norm": 0.1787109375, "learning_rate": 4.921790129194534e-06, "loss": 1.0103, "num_tokens": 14100150392.0, "step": 8348 }, { "epoch": 1.488235294117647, "grad_norm": 0.173828125, "learning_rate": 4.91987435979608e-06, "loss": 1.0357, "num_tokens": 14106434432.0, "step": 8349 }, { "epoch": 1.4884135472370765, "grad_norm": 0.1767578125, "learning_rate": 4.917959097055782e-06, "loss": 1.0139, "num_tokens": 14112699867.0, "step": 8350 }, { "epoch": 1.4885918003565062, "grad_norm": 0.181640625, "learning_rate": 4.916044341133237e-06, "loss": 1.0608, "num_tokens": 14118956214.0, "step": 8351 }, { "epoch": 1.4887700534759358, "grad_norm": 0.1826171875, "learning_rate": 4.914130092188005e-06, "loss": 1.0238, "num_tokens": 14125184922.0, "step": 8352 }, { "epoch": 1.4889483065953655, "grad_norm": 0.181640625, "learning_rate": 4.912216350379598e-06, "loss": 1.0398, "num_tokens": 14131442092.0, "step": 8353 }, { "epoch": 1.4891265597147951, "grad_norm": 0.1728515625, "learning_rate": 4.910303115867492e-06, "loss": 1.0353, "num_tokens": 14137701452.0, "step": 8354 }, { "epoch": 1.4893048128342246, "grad_norm": 0.1708984375, "learning_rate": 4.908390388811111e-06, "loss": 1.0259, "num_tokens": 14143941938.0, "step": 8355 }, { "epoch": 1.4894830659536542, "grad_norm": 0.169921875, "learning_rate": 4.9064781693698504e-06, "loss": 0.9973, "num_tokens": 14150226412.0, "step": 8356 }, { "epoch": 1.4896613190730839, "grad_norm": 0.1767578125, "learning_rate": 4.904566457703053e-06, "loss": 1.0223, "num_tokens": 14156510984.0, "step": 8357 }, { "epoch": 1.4898395721925133, "grad_norm": 0.1787109375, "learning_rate": 4.902655253970022e-06, "loss": 0.9917, "num_tokens": 14162794148.0, "step": 8358 }, { "epoch": 1.490017825311943, "grad_norm": 0.173828125, "learning_rate": 4.900744558330016e-06, "loss": 1.0157, "num_tokens": 14169053711.0, "step": 8359 }, { "epoch": 1.4901960784313726, "grad_norm": 0.1767578125, "learning_rate": 4.8988343709422585e-06, "loss": 1.0138, "num_tokens": 14175310626.0, "step": 8360 }, { "epoch": 1.490374331550802, "grad_norm": 0.17578125, "learning_rate": 4.896924691965922e-06, "loss": 1.0141, "num_tokens": 14181592927.0, "step": 8361 }, { "epoch": 1.4905525846702317, "grad_norm": 0.1806640625, "learning_rate": 4.895015521560138e-06, "loss": 1.0071, "num_tokens": 14187877392.0, "step": 8362 }, { "epoch": 1.4907308377896613, "grad_norm": 0.1728515625, "learning_rate": 4.893106859884006e-06, "loss": 0.9881, "num_tokens": 14194161239.0, "step": 8363 }, { "epoch": 1.490909090909091, "grad_norm": 0.1728515625, "learning_rate": 4.891198707096571e-06, "loss": 1.0437, "num_tokens": 14200417854.0, "step": 8364 }, { "epoch": 1.4910873440285206, "grad_norm": 0.17578125, "learning_rate": 4.889291063356838e-06, "loss": 1.0218, "num_tokens": 14206692629.0, "step": 8365 }, { "epoch": 1.49126559714795, "grad_norm": 0.1826171875, "learning_rate": 4.887383928823775e-06, "loss": 1.0371, "num_tokens": 14212977156.0, "step": 8366 }, { "epoch": 1.4914438502673797, "grad_norm": 0.1689453125, "learning_rate": 4.885477303656301e-06, "loss": 1.012, "num_tokens": 14219219559.0, "step": 8367 }, { "epoch": 1.4916221033868093, "grad_norm": 0.1748046875, "learning_rate": 4.883571188013296e-06, "loss": 1.009, "num_tokens": 14225475985.0, "step": 8368 }, { "epoch": 1.4918003565062388, "grad_norm": 0.1728515625, "learning_rate": 4.881665582053595e-06, "loss": 1.0109, "num_tokens": 14231743603.0, "step": 8369 }, { "epoch": 1.4919786096256684, "grad_norm": 0.1708984375, "learning_rate": 4.8797604859359985e-06, "loss": 1.0342, "num_tokens": 14238007027.0, "step": 8370 }, { "epoch": 1.492156862745098, "grad_norm": 0.1708984375, "learning_rate": 4.877855899819253e-06, "loss": 1.0176, "num_tokens": 14244293671.0, "step": 8371 }, { "epoch": 1.4923351158645275, "grad_norm": 0.177734375, "learning_rate": 4.875951823862072e-06, "loss": 1.0249, "num_tokens": 14250522616.0, "step": 8372 }, { "epoch": 1.4925133689839571, "grad_norm": 0.177734375, "learning_rate": 4.8740482582231176e-06, "loss": 1.0376, "num_tokens": 14256806851.0, "step": 8373 }, { "epoch": 1.4926916221033868, "grad_norm": 0.1796875, "learning_rate": 4.872145203061023e-06, "loss": 1.0063, "num_tokens": 14263005303.0, "step": 8374 }, { "epoch": 1.4928698752228164, "grad_norm": 0.171875, "learning_rate": 4.870242658534361e-06, "loss": 1.0065, "num_tokens": 14269263051.0, "step": 8375 }, { "epoch": 1.493048128342246, "grad_norm": 0.177734375, "learning_rate": 4.868340624801672e-06, "loss": 1.0058, "num_tokens": 14275514207.0, "step": 8376 }, { "epoch": 1.4932263814616755, "grad_norm": 0.1708984375, "learning_rate": 4.866439102021458e-06, "loss": 1.003, "num_tokens": 14281797509.0, "step": 8377 }, { "epoch": 1.4934046345811052, "grad_norm": 0.1787109375, "learning_rate": 4.864538090352172e-06, "loss": 0.9998, "num_tokens": 14288066549.0, "step": 8378 }, { "epoch": 1.4935828877005348, "grad_norm": 0.171875, "learning_rate": 4.862637589952224e-06, "loss": 1.0057, "num_tokens": 14294346016.0, "step": 8379 }, { "epoch": 1.4937611408199643, "grad_norm": 0.171875, "learning_rate": 4.86073760097998e-06, "loss": 1.0384, "num_tokens": 14300599271.0, "step": 8380 }, { "epoch": 1.493939393939394, "grad_norm": 0.1796875, "learning_rate": 4.858838123593775e-06, "loss": 1.0198, "num_tokens": 14306865490.0, "step": 8381 }, { "epoch": 1.4941176470588236, "grad_norm": 0.1748046875, "learning_rate": 4.8569391579518885e-06, "loss": 1.0271, "num_tokens": 14313122014.0, "step": 8382 }, { "epoch": 1.4942959001782532, "grad_norm": 0.177734375, "learning_rate": 4.855040704212556e-06, "loss": 1.0038, "num_tokens": 14319346635.0, "step": 8383 }, { "epoch": 1.4944741532976826, "grad_norm": 0.173828125, "learning_rate": 4.853142762533984e-06, "loss": 1.0126, "num_tokens": 14325627278.0, "step": 8384 }, { "epoch": 1.4946524064171123, "grad_norm": 0.1796875, "learning_rate": 4.851245333074326e-06, "loss": 0.9995, "num_tokens": 14331884847.0, "step": 8385 }, { "epoch": 1.494830659536542, "grad_norm": 0.169921875, "learning_rate": 4.849348415991695e-06, "loss": 1.0411, "num_tokens": 14338162925.0, "step": 8386 }, { "epoch": 1.4950089126559716, "grad_norm": 0.16796875, "learning_rate": 4.847452011444157e-06, "loss": 1.0376, "num_tokens": 14344445495.0, "step": 8387 }, { "epoch": 1.495187165775401, "grad_norm": 0.1845703125, "learning_rate": 4.845556119589748e-06, "loss": 1.0369, "num_tokens": 14350706231.0, "step": 8388 }, { "epoch": 1.4953654188948307, "grad_norm": 0.1748046875, "learning_rate": 4.843660740586452e-06, "loss": 1.0065, "num_tokens": 14356938815.0, "step": 8389 }, { "epoch": 1.4955436720142603, "grad_norm": 0.17578125, "learning_rate": 4.841765874592203e-06, "loss": 0.9881, "num_tokens": 14363213674.0, "step": 8390 }, { "epoch": 1.4957219251336897, "grad_norm": 0.1708984375, "learning_rate": 4.839871521764908e-06, "loss": 1.0061, "num_tokens": 14369498007.0, "step": 8391 }, { "epoch": 1.4959001782531194, "grad_norm": 0.17578125, "learning_rate": 4.837977682262423e-06, "loss": 1.0145, "num_tokens": 14375763796.0, "step": 8392 }, { "epoch": 1.496078431372549, "grad_norm": 0.1708984375, "learning_rate": 4.8360843562425595e-06, "loss": 1.0127, "num_tokens": 14382046697.0, "step": 8393 }, { "epoch": 1.4962566844919787, "grad_norm": 0.177734375, "learning_rate": 4.834191543863088e-06, "loss": 1.0238, "num_tokens": 14388303342.0, "step": 8394 }, { "epoch": 1.4964349376114083, "grad_norm": 0.17578125, "learning_rate": 4.832299245281743e-06, "loss": 1.007, "num_tokens": 14394556336.0, "step": 8395 }, { "epoch": 1.4966131907308378, "grad_norm": 0.1787109375, "learning_rate": 4.830407460656205e-06, "loss": 1.0085, "num_tokens": 14400840712.0, "step": 8396 }, { "epoch": 1.4967914438502674, "grad_norm": 0.173828125, "learning_rate": 4.828516190144119e-06, "loss": 0.9944, "num_tokens": 14407124382.0, "step": 8397 }, { "epoch": 1.496969696969697, "grad_norm": 0.177734375, "learning_rate": 4.826625433903083e-06, "loss": 1.019, "num_tokens": 14413382363.0, "step": 8398 }, { "epoch": 1.4971479500891265, "grad_norm": 0.1708984375, "learning_rate": 4.824735192090656e-06, "loss": 0.987, "num_tokens": 14419667757.0, "step": 8399 }, { "epoch": 1.4973262032085561, "grad_norm": 0.1748046875, "learning_rate": 4.82284546486435e-06, "loss": 1.0357, "num_tokens": 14425925633.0, "step": 8400 }, { "epoch": 1.4975044563279858, "grad_norm": 0.17578125, "learning_rate": 4.820956252381637e-06, "loss": 1.0212, "num_tokens": 14432207667.0, "step": 8401 }, { "epoch": 1.4976827094474152, "grad_norm": 0.17578125, "learning_rate": 4.819067554799947e-06, "loss": 1.0013, "num_tokens": 14438430596.0, "step": 8402 }, { "epoch": 1.4978609625668449, "grad_norm": 0.1689453125, "learning_rate": 4.817179372276667e-06, "loss": 1.0101, "num_tokens": 14444707879.0, "step": 8403 }, { "epoch": 1.4980392156862745, "grad_norm": 0.1708984375, "learning_rate": 4.815291704969136e-06, "loss": 1.0173, "num_tokens": 14450990292.0, "step": 8404 }, { "epoch": 1.4982174688057042, "grad_norm": 0.1826171875, "learning_rate": 4.813404553034656e-06, "loss": 1.0088, "num_tokens": 14457265091.0, "step": 8405 }, { "epoch": 1.4983957219251338, "grad_norm": 0.1796875, "learning_rate": 4.811517916630482e-06, "loss": 0.9996, "num_tokens": 14463549210.0, "step": 8406 }, { "epoch": 1.4985739750445632, "grad_norm": 0.169921875, "learning_rate": 4.80963179591383e-06, "loss": 1.0148, "num_tokens": 14469832252.0, "step": 8407 }, { "epoch": 1.498752228163993, "grad_norm": 0.1787109375, "learning_rate": 4.8077461910418656e-06, "loss": 1.0022, "num_tokens": 14476109537.0, "step": 8408 }, { "epoch": 1.4989304812834225, "grad_norm": 0.1806640625, "learning_rate": 4.805861102171724e-06, "loss": 1.0439, "num_tokens": 14482374298.0, "step": 8409 }, { "epoch": 1.499108734402852, "grad_norm": 0.173828125, "learning_rate": 4.803976529460487e-06, "loss": 1.0335, "num_tokens": 14488658871.0, "step": 8410 }, { "epoch": 1.4992869875222816, "grad_norm": 0.1796875, "learning_rate": 4.8020924730651955e-06, "loss": 1.0099, "num_tokens": 14494943176.0, "step": 8411 }, { "epoch": 1.4994652406417113, "grad_norm": 0.1689453125, "learning_rate": 4.800208933142849e-06, "loss": 1.0435, "num_tokens": 14501203338.0, "step": 8412 }, { "epoch": 1.4996434937611407, "grad_norm": 0.17578125, "learning_rate": 4.798325909850405e-06, "loss": 1.003, "num_tokens": 14507470189.0, "step": 8413 }, { "epoch": 1.4998217468805704, "grad_norm": 0.173828125, "learning_rate": 4.796443403344772e-06, "loss": 1.0286, "num_tokens": 14513755790.0, "step": 8414 }, { "epoch": 1.5, "grad_norm": 0.1728515625, "learning_rate": 4.7945614137828206e-06, "loss": 0.9957, "num_tokens": 14520031394.0, "step": 8415 }, { "epoch": 1.5001782531194294, "grad_norm": 0.1943359375, "learning_rate": 4.792679941321381e-06, "loss": 1.0779, "num_tokens": 14526315409.0, "step": 8416 }, { "epoch": 1.5003565062388593, "grad_norm": 0.1689453125, "learning_rate": 4.790798986117236e-06, "loss": 0.9922, "num_tokens": 14532597314.0, "step": 8417 }, { "epoch": 1.5005347593582887, "grad_norm": 0.1728515625, "learning_rate": 4.788918548327124e-06, "loss": 1.0551, "num_tokens": 14538879736.0, "step": 8418 }, { "epoch": 1.5007130124777184, "grad_norm": 0.1787109375, "learning_rate": 4.78703862810774e-06, "loss": 1.0213, "num_tokens": 14545145729.0, "step": 8419 }, { "epoch": 1.500891265597148, "grad_norm": 0.1748046875, "learning_rate": 4.7851592256157465e-06, "loss": 1.0119, "num_tokens": 14551428608.0, "step": 8420 }, { "epoch": 1.5010695187165775, "grad_norm": 0.173828125, "learning_rate": 4.783280341007747e-06, "loss": 1.0291, "num_tokens": 14557695194.0, "step": 8421 }, { "epoch": 1.501247771836007, "grad_norm": 0.17578125, "learning_rate": 4.7814019744403075e-06, "loss": 1.0501, "num_tokens": 14563977046.0, "step": 8422 }, { "epoch": 1.5014260249554368, "grad_norm": 0.177734375, "learning_rate": 4.77952412606996e-06, "loss": 1.0188, "num_tokens": 14570236441.0, "step": 8423 }, { "epoch": 1.5016042780748662, "grad_norm": 0.1796875, "learning_rate": 4.777646796053183e-06, "loss": 1.0608, "num_tokens": 14576520287.0, "step": 8424 }, { "epoch": 1.501782531194296, "grad_norm": 0.1748046875, "learning_rate": 4.775769984546415e-06, "loss": 1.0188, "num_tokens": 14582773006.0, "step": 8425 }, { "epoch": 1.5019607843137255, "grad_norm": 0.177734375, "learning_rate": 4.773893691706045e-06, "loss": 1.0258, "num_tokens": 14589037615.0, "step": 8426 }, { "epoch": 1.5021390374331551, "grad_norm": 0.177734375, "learning_rate": 4.772017917688436e-06, "loss": 1.0526, "num_tokens": 14595289661.0, "step": 8427 }, { "epoch": 1.5023172905525848, "grad_norm": 0.16796875, "learning_rate": 4.770142662649892e-06, "loss": 1.0029, "num_tokens": 14601572704.0, "step": 8428 }, { "epoch": 1.5024955436720142, "grad_norm": 0.17578125, "learning_rate": 4.768267926746674e-06, "loss": 1.0141, "num_tokens": 14607802138.0, "step": 8429 }, { "epoch": 1.5026737967914439, "grad_norm": 0.1748046875, "learning_rate": 4.766393710135009e-06, "loss": 0.9941, "num_tokens": 14614062292.0, "step": 8430 }, { "epoch": 1.5028520499108735, "grad_norm": 0.1787109375, "learning_rate": 4.764520012971073e-06, "loss": 0.9858, "num_tokens": 14620337588.0, "step": 8431 }, { "epoch": 1.503030303030303, "grad_norm": 0.1748046875, "learning_rate": 4.762646835411005e-06, "loss": 1.0316, "num_tokens": 14626622146.0, "step": 8432 }, { "epoch": 1.5032085561497326, "grad_norm": 0.1748046875, "learning_rate": 4.760774177610892e-06, "loss": 1.0152, "num_tokens": 14632882360.0, "step": 8433 }, { "epoch": 1.5033868092691622, "grad_norm": 0.185546875, "learning_rate": 4.75890203972679e-06, "loss": 1.0551, "num_tokens": 14639164974.0, "step": 8434 }, { "epoch": 1.5035650623885917, "grad_norm": 0.173828125, "learning_rate": 4.757030421914702e-06, "loss": 0.9874, "num_tokens": 14645450822.0, "step": 8435 }, { "epoch": 1.5037433155080215, "grad_norm": 0.18359375, "learning_rate": 4.755159324330584e-06, "loss": 1.0322, "num_tokens": 14651725243.0, "step": 8436 }, { "epoch": 1.503921568627451, "grad_norm": 0.177734375, "learning_rate": 4.753288747130365e-06, "loss": 1.0232, "num_tokens": 14658009235.0, "step": 8437 }, { "epoch": 1.5040998217468806, "grad_norm": 0.171875, "learning_rate": 4.7514186904699125e-06, "loss": 0.9989, "num_tokens": 14664285609.0, "step": 8438 }, { "epoch": 1.5042780748663103, "grad_norm": 0.169921875, "learning_rate": 4.749549154505065e-06, "loss": 1.0135, "num_tokens": 14670569505.0, "step": 8439 }, { "epoch": 1.5044563279857397, "grad_norm": 0.1796875, "learning_rate": 4.747680139391604e-06, "loss": 1.0193, "num_tokens": 14676803599.0, "step": 8440 }, { "epoch": 1.5046345811051693, "grad_norm": 0.1708984375, "learning_rate": 4.7458116452852825e-06, "loss": 1.0217, "num_tokens": 14683026103.0, "step": 8441 }, { "epoch": 1.504812834224599, "grad_norm": 0.171875, "learning_rate": 4.743943672341799e-06, "loss": 1.0335, "num_tokens": 14689278174.0, "step": 8442 }, { "epoch": 1.5049910873440284, "grad_norm": 0.173828125, "learning_rate": 4.742076220716811e-06, "loss": 1.0584, "num_tokens": 14695525810.0, "step": 8443 }, { "epoch": 1.505169340463458, "grad_norm": 0.1826171875, "learning_rate": 4.740209290565937e-06, "loss": 1.0206, "num_tokens": 14701809814.0, "step": 8444 }, { "epoch": 1.5053475935828877, "grad_norm": 0.177734375, "learning_rate": 4.7383428820447466e-06, "loss": 1.0265, "num_tokens": 14708068946.0, "step": 8445 }, { "epoch": 1.5055258467023171, "grad_norm": 0.1689453125, "learning_rate": 4.736476995308767e-06, "loss": 1.0067, "num_tokens": 14714354378.0, "step": 8446 }, { "epoch": 1.505704099821747, "grad_norm": 0.17578125, "learning_rate": 4.734611630513482e-06, "loss": 1.014, "num_tokens": 14720637792.0, "step": 8447 }, { "epoch": 1.5058823529411764, "grad_norm": 0.173828125, "learning_rate": 4.7327467878143375e-06, "loss": 1.0277, "num_tokens": 14726920947.0, "step": 8448 }, { "epoch": 1.506060606060606, "grad_norm": 0.1796875, "learning_rate": 4.730882467366729e-06, "loss": 1.0237, "num_tokens": 14733204585.0, "step": 8449 }, { "epoch": 1.5062388591800357, "grad_norm": 0.1689453125, "learning_rate": 4.729018669326009e-06, "loss": 1.0104, "num_tokens": 14739489165.0, "step": 8450 }, { "epoch": 1.5064171122994652, "grad_norm": 0.1708984375, "learning_rate": 4.727155393847491e-06, "loss": 1.0041, "num_tokens": 14745771402.0, "step": 8451 }, { "epoch": 1.5065953654188948, "grad_norm": 0.1708984375, "learning_rate": 4.725292641086439e-06, "loss": 1.0146, "num_tokens": 14752055486.0, "step": 8452 }, { "epoch": 1.5067736185383245, "grad_norm": 0.169921875, "learning_rate": 4.72343041119808e-06, "loss": 1.059, "num_tokens": 14758340513.0, "step": 8453 }, { "epoch": 1.506951871657754, "grad_norm": 0.173828125, "learning_rate": 4.721568704337589e-06, "loss": 0.9989, "num_tokens": 14764624068.0, "step": 8454 }, { "epoch": 1.5071301247771836, "grad_norm": 0.17578125, "learning_rate": 4.719707520660108e-06, "loss": 1.0266, "num_tokens": 14770848261.0, "step": 8455 }, { "epoch": 1.5073083778966132, "grad_norm": 0.1748046875, "learning_rate": 4.7178468603207274e-06, "loss": 1.0113, "num_tokens": 14777125362.0, "step": 8456 }, { "epoch": 1.5074866310160426, "grad_norm": 0.1748046875, "learning_rate": 4.715986723474497e-06, "loss": 1.0247, "num_tokens": 14783400967.0, "step": 8457 }, { "epoch": 1.5076648841354725, "grad_norm": 0.1787109375, "learning_rate": 4.714127110276421e-06, "loss": 1.0153, "num_tokens": 14789640664.0, "step": 8458 }, { "epoch": 1.507843137254902, "grad_norm": 0.1748046875, "learning_rate": 4.712268020881463e-06, "loss": 0.9771, "num_tokens": 14795924601.0, "step": 8459 }, { "epoch": 1.5080213903743316, "grad_norm": 0.17578125, "learning_rate": 4.710409455444541e-06, "loss": 1.0444, "num_tokens": 14802181601.0, "step": 8460 }, { "epoch": 1.5081996434937612, "grad_norm": 0.171875, "learning_rate": 4.708551414120529e-06, "loss": 1.0013, "num_tokens": 14808427541.0, "step": 8461 }, { "epoch": 1.5083778966131907, "grad_norm": 0.1708984375, "learning_rate": 4.706693897064254e-06, "loss": 1.0057, "num_tokens": 14814710997.0, "step": 8462 }, { "epoch": 1.5085561497326203, "grad_norm": 0.1748046875, "learning_rate": 4.704836904430512e-06, "loss": 1.0085, "num_tokens": 14820964558.0, "step": 8463 }, { "epoch": 1.50873440285205, "grad_norm": 0.1748046875, "learning_rate": 4.702980436374042e-06, "loss": 1.0207, "num_tokens": 14827248576.0, "step": 8464 }, { "epoch": 1.5089126559714794, "grad_norm": 0.1796875, "learning_rate": 4.70112449304954e-06, "loss": 0.9876, "num_tokens": 14833522603.0, "step": 8465 }, { "epoch": 1.509090909090909, "grad_norm": 0.1748046875, "learning_rate": 4.6992690746116724e-06, "loss": 1.0196, "num_tokens": 14839785780.0, "step": 8466 }, { "epoch": 1.5092691622103387, "grad_norm": 0.1787109375, "learning_rate": 4.697414181215042e-06, "loss": 1.0281, "num_tokens": 14846030625.0, "step": 8467 }, { "epoch": 1.5094474153297681, "grad_norm": 0.1708984375, "learning_rate": 4.695559813014222e-06, "loss": 1.013, "num_tokens": 14852315126.0, "step": 8468 }, { "epoch": 1.509625668449198, "grad_norm": 0.173828125, "learning_rate": 4.693705970163732e-06, "loss": 1.0464, "num_tokens": 14858558577.0, "step": 8469 }, { "epoch": 1.5098039215686274, "grad_norm": 0.1689453125, "learning_rate": 4.691852652818061e-06, "loss": 1.0067, "num_tokens": 14864823973.0, "step": 8470 }, { "epoch": 1.509982174688057, "grad_norm": 0.1728515625, "learning_rate": 4.689999861131642e-06, "loss": 1.0277, "num_tokens": 14871105075.0, "step": 8471 }, { "epoch": 1.5101604278074867, "grad_norm": 0.169921875, "learning_rate": 4.688147595258869e-06, "loss": 1.0001, "num_tokens": 14877388971.0, "step": 8472 }, { "epoch": 1.5103386809269161, "grad_norm": 0.173828125, "learning_rate": 4.686295855354089e-06, "loss": 1.0063, "num_tokens": 14883622739.0, "step": 8473 }, { "epoch": 1.5105169340463458, "grad_norm": 0.1767578125, "learning_rate": 4.684444641571616e-06, "loss": 0.991, "num_tokens": 14889846039.0, "step": 8474 }, { "epoch": 1.5106951871657754, "grad_norm": 0.1748046875, "learning_rate": 4.682593954065704e-06, "loss": 1.0278, "num_tokens": 14896122802.0, "step": 8475 }, { "epoch": 1.5108734402852049, "grad_norm": 0.171875, "learning_rate": 4.6807437929905715e-06, "loss": 1.0324, "num_tokens": 14902385505.0, "step": 8476 }, { "epoch": 1.5110516934046347, "grad_norm": 0.17578125, "learning_rate": 4.678894158500398e-06, "loss": 1.007, "num_tokens": 14908669493.0, "step": 8477 }, { "epoch": 1.5112299465240642, "grad_norm": 0.1748046875, "learning_rate": 4.677045050749311e-06, "loss": 1.0116, "num_tokens": 14914953516.0, "step": 8478 }, { "epoch": 1.5114081996434936, "grad_norm": 0.1787109375, "learning_rate": 4.675196469891396e-06, "loss": 1.003, "num_tokens": 14921209952.0, "step": 8479 }, { "epoch": 1.5115864527629235, "grad_norm": 0.1748046875, "learning_rate": 4.673348416080694e-06, "loss": 0.9928, "num_tokens": 14927494207.0, "step": 8480 }, { "epoch": 1.511764705882353, "grad_norm": 0.1728515625, "learning_rate": 4.6715008894712114e-06, "loss": 1.0125, "num_tokens": 14933775606.0, "step": 8481 }, { "epoch": 1.5119429590017825, "grad_norm": 0.171875, "learning_rate": 4.669653890216899e-06, "loss": 1.0317, "num_tokens": 14940056036.0, "step": 8482 }, { "epoch": 1.5121212121212122, "grad_norm": 0.1767578125, "learning_rate": 4.667807418471662e-06, "loss": 1.0146, "num_tokens": 14946322481.0, "step": 8483 }, { "epoch": 1.5122994652406416, "grad_norm": 0.18359375, "learning_rate": 4.665961474389373e-06, "loss": 1.023, "num_tokens": 14952606617.0, "step": 8484 }, { "epoch": 1.5124777183600713, "grad_norm": 0.17578125, "learning_rate": 4.664116058123856e-06, "loss": 1.017, "num_tokens": 14958879946.0, "step": 8485 }, { "epoch": 1.512655971479501, "grad_norm": 0.1796875, "learning_rate": 4.662271169828887e-06, "loss": 1.0519, "num_tokens": 14965162428.0, "step": 8486 }, { "epoch": 1.5128342245989304, "grad_norm": 0.17578125, "learning_rate": 4.660426809658199e-06, "loss": 1.0289, "num_tokens": 14971403599.0, "step": 8487 }, { "epoch": 1.5130124777183602, "grad_norm": 0.177734375, "learning_rate": 4.658582977765488e-06, "loss": 1.0211, "num_tokens": 14977685576.0, "step": 8488 }, { "epoch": 1.5131907308377897, "grad_norm": 0.173828125, "learning_rate": 4.656739674304403e-06, "loss": 1.0293, "num_tokens": 14983942509.0, "step": 8489 }, { "epoch": 1.5133689839572193, "grad_norm": 0.173828125, "learning_rate": 4.654896899428536e-06, "loss": 1.008, "num_tokens": 14990227447.0, "step": 8490 }, { "epoch": 1.513547237076649, "grad_norm": 0.173828125, "learning_rate": 4.653054653291455e-06, "loss": 1.0171, "num_tokens": 14996511102.0, "step": 8491 }, { "epoch": 1.5137254901960784, "grad_norm": 0.1767578125, "learning_rate": 4.651212936046671e-06, "loss": 1.0349, "num_tokens": 15002796308.0, "step": 8492 }, { "epoch": 1.513903743315508, "grad_norm": 0.177734375, "learning_rate": 4.649371747847657e-06, "loss": 1.0227, "num_tokens": 15009078234.0, "step": 8493 }, { "epoch": 1.5140819964349377, "grad_norm": 0.1728515625, "learning_rate": 4.647531088847835e-06, "loss": 1.0291, "num_tokens": 15015334892.0, "step": 8494 }, { "epoch": 1.514260249554367, "grad_norm": 0.1787109375, "learning_rate": 4.645690959200594e-06, "loss": 1.0511, "num_tokens": 15021616329.0, "step": 8495 }, { "epoch": 1.5144385026737968, "grad_norm": 0.1748046875, "learning_rate": 4.6438513590592686e-06, "loss": 1.0154, "num_tokens": 15027877502.0, "step": 8496 }, { "epoch": 1.5146167557932264, "grad_norm": 0.16796875, "learning_rate": 4.642012288577153e-06, "loss": 1.0161, "num_tokens": 15034137785.0, "step": 8497 }, { "epoch": 1.5147950089126558, "grad_norm": 0.1806640625, "learning_rate": 4.6401737479075e-06, "loss": 1.0538, "num_tokens": 15040411934.0, "step": 8498 }, { "epoch": 1.5149732620320857, "grad_norm": 0.1748046875, "learning_rate": 4.638335737203512e-06, "loss": 1.0477, "num_tokens": 15046696014.0, "step": 8499 }, { "epoch": 1.5151515151515151, "grad_norm": 0.1787109375, "learning_rate": 4.6364982566183536e-06, "loss": 1.0224, "num_tokens": 15052980404.0, "step": 8500 }, { "epoch": 1.5153297682709448, "grad_norm": 0.1806640625, "learning_rate": 4.634661306305138e-06, "loss": 1.042, "num_tokens": 15059189870.0, "step": 8501 }, { "epoch": 1.5155080213903744, "grad_norm": 0.1767578125, "learning_rate": 4.632824886416943e-06, "loss": 0.99, "num_tokens": 15065472350.0, "step": 8502 }, { "epoch": 1.5156862745098039, "grad_norm": 0.1767578125, "learning_rate": 4.630988997106799e-06, "loss": 1.009, "num_tokens": 15071706815.0, "step": 8503 }, { "epoch": 1.5158645276292335, "grad_norm": 0.1748046875, "learning_rate": 4.629153638527688e-06, "loss": 1.0306, "num_tokens": 15077991021.0, "step": 8504 }, { "epoch": 1.5160427807486632, "grad_norm": 0.1806640625, "learning_rate": 4.627318810832552e-06, "loss": 1.0261, "num_tokens": 15084275244.0, "step": 8505 }, { "epoch": 1.5162210338680926, "grad_norm": 0.1826171875, "learning_rate": 4.6254845141742876e-06, "loss": 1.0342, "num_tokens": 15090559148.0, "step": 8506 }, { "epoch": 1.5163992869875222, "grad_norm": 0.1748046875, "learning_rate": 4.623650748705746e-06, "loss": 1.0376, "num_tokens": 15096802283.0, "step": 8507 }, { "epoch": 1.516577540106952, "grad_norm": 0.169921875, "learning_rate": 4.621817514579733e-06, "loss": 1.0545, "num_tokens": 15103086478.0, "step": 8508 }, { "epoch": 1.5167557932263813, "grad_norm": 0.16796875, "learning_rate": 4.619984811949019e-06, "loss": 1.0333, "num_tokens": 15109356195.0, "step": 8509 }, { "epoch": 1.5169340463458112, "grad_norm": 0.1728515625, "learning_rate": 4.618152640966319e-06, "loss": 1.0065, "num_tokens": 15115633468.0, "step": 8510 }, { "epoch": 1.5171122994652406, "grad_norm": 0.1728515625, "learning_rate": 4.61632100178431e-06, "loss": 1.0442, "num_tokens": 15121896932.0, "step": 8511 }, { "epoch": 1.5172905525846703, "grad_norm": 0.1767578125, "learning_rate": 4.614489894555621e-06, "loss": 1.0248, "num_tokens": 15128181930.0, "step": 8512 }, { "epoch": 1.5174688057041, "grad_norm": 0.181640625, "learning_rate": 4.612659319432841e-06, "loss": 1.0203, "num_tokens": 15134453266.0, "step": 8513 }, { "epoch": 1.5176470588235293, "grad_norm": 0.18359375, "learning_rate": 4.61082927656851e-06, "loss": 1.017, "num_tokens": 15140707248.0, "step": 8514 }, { "epoch": 1.517825311942959, "grad_norm": 0.181640625, "learning_rate": 4.608999766115123e-06, "loss": 1.0121, "num_tokens": 15146989175.0, "step": 8515 }, { "epoch": 1.5180035650623886, "grad_norm": 0.1748046875, "learning_rate": 4.6071707882251405e-06, "loss": 1.0428, "num_tokens": 15153217416.0, "step": 8516 }, { "epoch": 1.518181818181818, "grad_norm": 0.177734375, "learning_rate": 4.605342343050967e-06, "loss": 1.0234, "num_tokens": 15159500141.0, "step": 8517 }, { "epoch": 1.5183600713012477, "grad_norm": 0.1708984375, "learning_rate": 4.603514430744971e-06, "loss": 1.0238, "num_tokens": 15165763286.0, "step": 8518 }, { "epoch": 1.5185383244206774, "grad_norm": 0.1708984375, "learning_rate": 4.6016870514594655e-06, "loss": 0.9937, "num_tokens": 15172015607.0, "step": 8519 }, { "epoch": 1.5187165775401068, "grad_norm": 0.1767578125, "learning_rate": 4.599860205346737e-06, "loss": 1.0422, "num_tokens": 15178260301.0, "step": 8520 }, { "epoch": 1.5188948306595367, "grad_norm": 0.1767578125, "learning_rate": 4.598033892559008e-06, "loss": 1.0472, "num_tokens": 15184532386.0, "step": 8521 }, { "epoch": 1.519073083778966, "grad_norm": 0.173828125, "learning_rate": 4.596208113248467e-06, "loss": 0.9964, "num_tokens": 15190817554.0, "step": 8522 }, { "epoch": 1.5192513368983958, "grad_norm": 0.171875, "learning_rate": 4.5943828675672616e-06, "loss": 1.0021, "num_tokens": 15197102566.0, "step": 8523 }, { "epoch": 1.5194295900178254, "grad_norm": 0.1748046875, "learning_rate": 4.592558155667485e-06, "loss": 0.9992, "num_tokens": 15203366461.0, "step": 8524 }, { "epoch": 1.5196078431372548, "grad_norm": 0.169921875, "learning_rate": 4.590733977701193e-06, "loss": 1.0097, "num_tokens": 15209627528.0, "step": 8525 }, { "epoch": 1.5197860962566845, "grad_norm": 0.171875, "learning_rate": 4.588910333820391e-06, "loss": 1.0212, "num_tokens": 15215884606.0, "step": 8526 }, { "epoch": 1.5199643493761141, "grad_norm": 0.169921875, "learning_rate": 4.587087224177049e-06, "loss": 1.0144, "num_tokens": 15222120158.0, "step": 8527 }, { "epoch": 1.5201426024955436, "grad_norm": 0.169921875, "learning_rate": 4.5852646489230885e-06, "loss": 1.0162, "num_tokens": 15228382849.0, "step": 8528 }, { "epoch": 1.5203208556149732, "grad_norm": 0.1748046875, "learning_rate": 4.5834426082103755e-06, "loss": 1.0033, "num_tokens": 15234644185.0, "step": 8529 }, { "epoch": 1.5204991087344029, "grad_norm": 0.173828125, "learning_rate": 4.58162110219075e-06, "loss": 0.9922, "num_tokens": 15240927281.0, "step": 8530 }, { "epoch": 1.5206773618538323, "grad_norm": 0.1796875, "learning_rate": 4.579800131015995e-06, "loss": 1.0228, "num_tokens": 15247183752.0, "step": 8531 }, { "epoch": 1.5208556149732622, "grad_norm": 0.1767578125, "learning_rate": 4.577979694837854e-06, "loss": 1.0171, "num_tokens": 15253458851.0, "step": 8532 }, { "epoch": 1.5210338680926916, "grad_norm": 0.173828125, "learning_rate": 4.576159793808019e-06, "loss": 1.0308, "num_tokens": 15259741385.0, "step": 8533 }, { "epoch": 1.5212121212121212, "grad_norm": 0.17578125, "learning_rate": 4.57434042807815e-06, "loss": 0.995, "num_tokens": 15265989868.0, "step": 8534 }, { "epoch": 1.5213903743315509, "grad_norm": 0.1708984375, "learning_rate": 4.572521597799856e-06, "loss": 1.0123, "num_tokens": 15272263159.0, "step": 8535 }, { "epoch": 1.5215686274509803, "grad_norm": 0.177734375, "learning_rate": 4.570703303124689e-06, "loss": 1.0211, "num_tokens": 15278545801.0, "step": 8536 }, { "epoch": 1.52174688057041, "grad_norm": 0.1728515625, "learning_rate": 4.56888554420418e-06, "loss": 1.0252, "num_tokens": 15284830920.0, "step": 8537 }, { "epoch": 1.5219251336898396, "grad_norm": 0.1708984375, "learning_rate": 4.567068321189798e-06, "loss": 1.0255, "num_tokens": 15291113064.0, "step": 8538 }, { "epoch": 1.522103386809269, "grad_norm": 0.1748046875, "learning_rate": 4.565251634232974e-06, "loss": 1.0261, "num_tokens": 15297357582.0, "step": 8539 }, { "epoch": 1.522281639928699, "grad_norm": 0.1728515625, "learning_rate": 4.563435483485088e-06, "loss": 1.0265, "num_tokens": 15303641117.0, "step": 8540 }, { "epoch": 1.5224598930481283, "grad_norm": 0.1787109375, "learning_rate": 4.561619869097488e-06, "loss": 1.0059, "num_tokens": 15309901389.0, "step": 8541 }, { "epoch": 1.5226381461675578, "grad_norm": 0.1748046875, "learning_rate": 4.559804791221465e-06, "loss": 1.0414, "num_tokens": 15316164065.0, "step": 8542 }, { "epoch": 1.5228163992869876, "grad_norm": 0.1728515625, "learning_rate": 4.5579902500082715e-06, "loss": 0.9951, "num_tokens": 15322425940.0, "step": 8543 }, { "epoch": 1.522994652406417, "grad_norm": 0.177734375, "learning_rate": 4.556176245609113e-06, "loss": 1.0162, "num_tokens": 15328710458.0, "step": 8544 }, { "epoch": 1.5231729055258467, "grad_norm": 0.16796875, "learning_rate": 4.554362778175151e-06, "loss": 1.004, "num_tokens": 15334993342.0, "step": 8545 }, { "epoch": 1.5233511586452764, "grad_norm": 0.1728515625, "learning_rate": 4.5525498478575015e-06, "loss": 1.0131, "num_tokens": 15341275945.0, "step": 8546 }, { "epoch": 1.5235294117647058, "grad_norm": 0.171875, "learning_rate": 4.5507374548072334e-06, "loss": 1.0018, "num_tokens": 15347562453.0, "step": 8547 }, { "epoch": 1.5237076648841354, "grad_norm": 0.1748046875, "learning_rate": 4.548925599175381e-06, "loss": 1.013, "num_tokens": 15353833854.0, "step": 8548 }, { "epoch": 1.523885918003565, "grad_norm": 0.1689453125, "learning_rate": 4.547114281112922e-06, "loss": 1.0271, "num_tokens": 15360117284.0, "step": 8549 }, { "epoch": 1.5240641711229945, "grad_norm": 0.1767578125, "learning_rate": 4.545303500770795e-06, "loss": 1.0493, "num_tokens": 15366362850.0, "step": 8550 }, { "epoch": 1.5242424242424244, "grad_norm": 0.173828125, "learning_rate": 4.543493258299892e-06, "loss": 1.0471, "num_tokens": 15372623033.0, "step": 8551 }, { "epoch": 1.5244206773618538, "grad_norm": 0.181640625, "learning_rate": 4.541683553851061e-06, "loss": 0.9995, "num_tokens": 15378883515.0, "step": 8552 }, { "epoch": 1.5245989304812835, "grad_norm": 0.1708984375, "learning_rate": 4.539874387575106e-06, "loss": 0.9932, "num_tokens": 15385146238.0, "step": 8553 }, { "epoch": 1.5247771836007131, "grad_norm": 0.1767578125, "learning_rate": 4.538065759622781e-06, "loss": 1.0124, "num_tokens": 15391411642.0, "step": 8554 }, { "epoch": 1.5249554367201426, "grad_norm": 0.173828125, "learning_rate": 4.5362576701448054e-06, "loss": 1.0058, "num_tokens": 15397654622.0, "step": 8555 }, { "epoch": 1.5251336898395722, "grad_norm": 0.1728515625, "learning_rate": 4.5344501192918464e-06, "loss": 0.9909, "num_tokens": 15403940041.0, "step": 8556 }, { "epoch": 1.5253119429590019, "grad_norm": 0.173828125, "learning_rate": 4.532643107214526e-06, "loss": 1.0007, "num_tokens": 15410194441.0, "step": 8557 }, { "epoch": 1.5254901960784313, "grad_norm": 0.17578125, "learning_rate": 4.530836634063423e-06, "loss": 1.0198, "num_tokens": 15416481130.0, "step": 8558 }, { "epoch": 1.525668449197861, "grad_norm": 0.171875, "learning_rate": 4.529030699989071e-06, "loss": 1.0315, "num_tokens": 15422749463.0, "step": 8559 }, { "epoch": 1.5258467023172906, "grad_norm": 0.173828125, "learning_rate": 4.527225305141959e-06, "loss": 1.0503, "num_tokens": 15429032003.0, "step": 8560 }, { "epoch": 1.52602495543672, "grad_norm": 0.1708984375, "learning_rate": 4.525420449672529e-06, "loss": 0.9878, "num_tokens": 15435314042.0, "step": 8561 }, { "epoch": 1.5262032085561499, "grad_norm": 0.171875, "learning_rate": 4.523616133731184e-06, "loss": 1.0365, "num_tokens": 15441596905.0, "step": 8562 }, { "epoch": 1.5263814616755793, "grad_norm": 0.1748046875, "learning_rate": 4.521812357468278e-06, "loss": 1.0623, "num_tokens": 15447843243.0, "step": 8563 }, { "epoch": 1.526559714795009, "grad_norm": 0.1748046875, "learning_rate": 4.520009121034118e-06, "loss": 1.0342, "num_tokens": 15454125073.0, "step": 8564 }, { "epoch": 1.5267379679144386, "grad_norm": 0.1728515625, "learning_rate": 4.518206424578965e-06, "loss": 0.9818, "num_tokens": 15460390243.0, "step": 8565 }, { "epoch": 1.526916221033868, "grad_norm": 0.1748046875, "learning_rate": 4.516404268253047e-06, "loss": 1.0507, "num_tokens": 15466629822.0, "step": 8566 }, { "epoch": 1.5270944741532977, "grad_norm": 0.169921875, "learning_rate": 4.51460265220653e-06, "loss": 1.0126, "num_tokens": 15472889270.0, "step": 8567 }, { "epoch": 1.5272727272727273, "grad_norm": 0.17578125, "learning_rate": 4.5128015765895426e-06, "loss": 0.9922, "num_tokens": 15479160507.0, "step": 8568 }, { "epoch": 1.5274509803921568, "grad_norm": 0.177734375, "learning_rate": 4.511001041552174e-06, "loss": 1.0134, "num_tokens": 15485370555.0, "step": 8569 }, { "epoch": 1.5276292335115864, "grad_norm": 0.1728515625, "learning_rate": 4.509201047244461e-06, "loss": 1.0261, "num_tokens": 15491600822.0, "step": 8570 }, { "epoch": 1.527807486631016, "grad_norm": 0.17578125, "learning_rate": 4.507401593816398e-06, "loss": 1.0116, "num_tokens": 15497885521.0, "step": 8571 }, { "epoch": 1.5279857397504455, "grad_norm": 0.1669921875, "learning_rate": 4.505602681417931e-06, "loss": 0.9935, "num_tokens": 15504135008.0, "step": 8572 }, { "epoch": 1.5281639928698754, "grad_norm": 0.169921875, "learning_rate": 4.503804310198968e-06, "loss": 0.9823, "num_tokens": 15510418901.0, "step": 8573 }, { "epoch": 1.5283422459893048, "grad_norm": 0.173828125, "learning_rate": 4.502006480309369e-06, "loss": 1.0102, "num_tokens": 15516703158.0, "step": 8574 }, { "epoch": 1.5285204991087344, "grad_norm": 0.173828125, "learning_rate": 4.500209191898938e-06, "loss": 1.0176, "num_tokens": 15522936575.0, "step": 8575 }, { "epoch": 1.528698752228164, "grad_norm": 0.1748046875, "learning_rate": 4.498412445117452e-06, "loss": 1.0317, "num_tokens": 15529221333.0, "step": 8576 }, { "epoch": 1.5288770053475935, "grad_norm": 0.1806640625, "learning_rate": 4.496616240114633e-06, "loss": 1.0452, "num_tokens": 15535502193.0, "step": 8577 }, { "epoch": 1.5290552584670232, "grad_norm": 0.1728515625, "learning_rate": 4.494820577040159e-06, "loss": 1.0287, "num_tokens": 15541786949.0, "step": 8578 }, { "epoch": 1.5292335115864528, "grad_norm": 0.1748046875, "learning_rate": 4.493025456043657e-06, "loss": 1.0353, "num_tokens": 15548070156.0, "step": 8579 }, { "epoch": 1.5294117647058822, "grad_norm": 0.17578125, "learning_rate": 4.491230877274724e-06, "loss": 0.9981, "num_tokens": 15554342760.0, "step": 8580 }, { "epoch": 1.529590017825312, "grad_norm": 0.169921875, "learning_rate": 4.4894368408829e-06, "loss": 1.0003, "num_tokens": 15560611285.0, "step": 8581 }, { "epoch": 1.5297682709447415, "grad_norm": 0.17578125, "learning_rate": 4.4876433470176766e-06, "loss": 1.0382, "num_tokens": 15566856133.0, "step": 8582 }, { "epoch": 1.529946524064171, "grad_norm": 0.173828125, "learning_rate": 4.485850395828513e-06, "loss": 1.0071, "num_tokens": 15573140950.0, "step": 8583 }, { "epoch": 1.5301247771836008, "grad_norm": 0.171875, "learning_rate": 4.484057987464814e-06, "loss": 1.033, "num_tokens": 15579393766.0, "step": 8584 }, { "epoch": 1.5303030303030303, "grad_norm": 0.1806640625, "learning_rate": 4.48226612207594e-06, "loss": 1.0129, "num_tokens": 15585672971.0, "step": 8585 }, { "epoch": 1.53048128342246, "grad_norm": 0.171875, "learning_rate": 4.480474799811206e-06, "loss": 1.0103, "num_tokens": 15591920032.0, "step": 8586 }, { "epoch": 1.5306595365418896, "grad_norm": 0.173828125, "learning_rate": 4.478684020819889e-06, "loss": 1.0517, "num_tokens": 15598179330.0, "step": 8587 }, { "epoch": 1.530837789661319, "grad_norm": 0.173828125, "learning_rate": 4.4768937852512106e-06, "loss": 1.0261, "num_tokens": 15604396654.0, "step": 8588 }, { "epoch": 1.5310160427807487, "grad_norm": 0.1767578125, "learning_rate": 4.475104093254353e-06, "loss": 1.0168, "num_tokens": 15610676082.0, "step": 8589 }, { "epoch": 1.5311942959001783, "grad_norm": 0.1767578125, "learning_rate": 4.473314944978451e-06, "loss": 1.0379, "num_tokens": 15616927164.0, "step": 8590 }, { "epoch": 1.5313725490196077, "grad_norm": 0.169921875, "learning_rate": 4.4715263405725965e-06, "loss": 1.0564, "num_tokens": 15623183088.0, "step": 8591 }, { "epoch": 1.5315508021390374, "grad_norm": 0.1806640625, "learning_rate": 4.469738280185831e-06, "loss": 1.0548, "num_tokens": 15629466234.0, "step": 8592 }, { "epoch": 1.531729055258467, "grad_norm": 0.1748046875, "learning_rate": 4.467950763967154e-06, "loss": 1.0495, "num_tokens": 15635746065.0, "step": 8593 }, { "epoch": 1.5319073083778965, "grad_norm": 0.1728515625, "learning_rate": 4.4661637920655236e-06, "loss": 0.9945, "num_tokens": 15642031882.0, "step": 8594 }, { "epoch": 1.5320855614973263, "grad_norm": 0.181640625, "learning_rate": 4.464377364629846e-06, "loss": 1.0345, "num_tokens": 15648284477.0, "step": 8595 }, { "epoch": 1.5322638146167558, "grad_norm": 0.1748046875, "learning_rate": 4.462591481808985e-06, "loss": 1.0011, "num_tokens": 15654536206.0, "step": 8596 }, { "epoch": 1.5324420677361854, "grad_norm": 0.1796875, "learning_rate": 4.460806143751758e-06, "loss": 1.0266, "num_tokens": 15660743481.0, "step": 8597 }, { "epoch": 1.532620320855615, "grad_norm": 0.177734375, "learning_rate": 4.459021350606938e-06, "loss": 0.9963, "num_tokens": 15667011530.0, "step": 8598 }, { "epoch": 1.5327985739750445, "grad_norm": 0.1728515625, "learning_rate": 4.457237102523253e-06, "loss": 1.0299, "num_tokens": 15673280260.0, "step": 8599 }, { "epoch": 1.5329768270944741, "grad_norm": 0.173828125, "learning_rate": 4.455453399649381e-06, "loss": 0.9993, "num_tokens": 15679563756.0, "step": 8600 }, { "epoch": 1.5331550802139038, "grad_norm": 0.1865234375, "learning_rate": 4.453670242133963e-06, "loss": 1.0103, "num_tokens": 15685791467.0, "step": 8601 }, { "epoch": 1.5333333333333332, "grad_norm": 0.1689453125, "learning_rate": 4.451887630125589e-06, "loss": 1.0339, "num_tokens": 15692058293.0, "step": 8602 }, { "epoch": 1.533511586452763, "grad_norm": 0.1748046875, "learning_rate": 4.450105563772803e-06, "loss": 1.0, "num_tokens": 15698333872.0, "step": 8603 }, { "epoch": 1.5336898395721925, "grad_norm": 0.1748046875, "learning_rate": 4.448324043224104e-06, "loss": 0.989, "num_tokens": 15704586417.0, "step": 8604 }, { "epoch": 1.533868092691622, "grad_norm": 0.17578125, "learning_rate": 4.4465430686279535e-06, "loss": 1.0431, "num_tokens": 15710838169.0, "step": 8605 }, { "epoch": 1.5340463458110518, "grad_norm": 0.173828125, "learning_rate": 4.444762640132751e-06, "loss": 1.0078, "num_tokens": 15717122769.0, "step": 8606 }, { "epoch": 1.5342245989304812, "grad_norm": 0.171875, "learning_rate": 4.442982757886862e-06, "loss": 1.004, "num_tokens": 15723407323.0, "step": 8607 }, { "epoch": 1.534402852049911, "grad_norm": 0.173828125, "learning_rate": 4.4412034220386085e-06, "loss": 1.0425, "num_tokens": 15729663323.0, "step": 8608 }, { "epoch": 1.5345811051693405, "grad_norm": 0.173828125, "learning_rate": 4.439424632736262e-06, "loss": 1.0131, "num_tokens": 15735947443.0, "step": 8609 }, { "epoch": 1.53475935828877, "grad_norm": 0.1767578125, "learning_rate": 4.437646390128047e-06, "loss": 1.0444, "num_tokens": 15742223545.0, "step": 8610 }, { "epoch": 1.5349376114081996, "grad_norm": 0.177734375, "learning_rate": 4.435868694362145e-06, "loss": 1.0171, "num_tokens": 15748505322.0, "step": 8611 }, { "epoch": 1.5351158645276293, "grad_norm": 0.17578125, "learning_rate": 4.434091545586699e-06, "loss": 1.0513, "num_tokens": 15754789584.0, "step": 8612 }, { "epoch": 1.5352941176470587, "grad_norm": 0.171875, "learning_rate": 4.432314943949787e-06, "loss": 1.0294, "num_tokens": 15761054260.0, "step": 8613 }, { "epoch": 1.5354723707664886, "grad_norm": 0.1728515625, "learning_rate": 4.43053888959946e-06, "loss": 1.0136, "num_tokens": 15767306124.0, "step": 8614 }, { "epoch": 1.535650623885918, "grad_norm": 0.1708984375, "learning_rate": 4.428763382683718e-06, "loss": 1.0026, "num_tokens": 15773589903.0, "step": 8615 }, { "epoch": 1.5358288770053476, "grad_norm": 0.17578125, "learning_rate": 4.426988423350514e-06, "loss": 1.02, "num_tokens": 15779850136.0, "step": 8616 }, { "epoch": 1.5360071301247773, "grad_norm": 0.1767578125, "learning_rate": 4.4252140117477544e-06, "loss": 1.0305, "num_tokens": 15786105323.0, "step": 8617 }, { "epoch": 1.5361853832442067, "grad_norm": 0.1708984375, "learning_rate": 4.423440148023298e-06, "loss": 1.0098, "num_tokens": 15792363217.0, "step": 8618 }, { "epoch": 1.5363636363636364, "grad_norm": 0.17578125, "learning_rate": 4.421666832324969e-06, "loss": 1.0238, "num_tokens": 15798647368.0, "step": 8619 }, { "epoch": 1.536541889483066, "grad_norm": 0.1728515625, "learning_rate": 4.419894064800535e-06, "loss": 1.0414, "num_tokens": 15804931948.0, "step": 8620 }, { "epoch": 1.5367201426024955, "grad_norm": 0.17578125, "learning_rate": 4.4181218455977194e-06, "loss": 1.0314, "num_tokens": 15811216277.0, "step": 8621 }, { "epoch": 1.536898395721925, "grad_norm": 0.1748046875, "learning_rate": 4.416350174864199e-06, "loss": 0.9955, "num_tokens": 15817497910.0, "step": 8622 }, { "epoch": 1.5370766488413548, "grad_norm": 0.1826171875, "learning_rate": 4.414579052747613e-06, "loss": 1.0254, "num_tokens": 15823748345.0, "step": 8623 }, { "epoch": 1.5372549019607842, "grad_norm": 0.1748046875, "learning_rate": 4.412808479395547e-06, "loss": 0.9905, "num_tokens": 15830031696.0, "step": 8624 }, { "epoch": 1.537433155080214, "grad_norm": 0.177734375, "learning_rate": 4.411038454955543e-06, "loss": 1.0457, "num_tokens": 15836300690.0, "step": 8625 }, { "epoch": 1.5376114081996435, "grad_norm": 0.1767578125, "learning_rate": 4.4092689795750995e-06, "loss": 1.0413, "num_tokens": 15842552680.0, "step": 8626 }, { "epoch": 1.5377896613190731, "grad_norm": 0.17578125, "learning_rate": 4.407500053401665e-06, "loss": 1.031, "num_tokens": 15848827853.0, "step": 8627 }, { "epoch": 1.5379679144385028, "grad_norm": 0.173828125, "learning_rate": 4.405731676582649e-06, "loss": 1.0227, "num_tokens": 15855111594.0, "step": 8628 }, { "epoch": 1.5381461675579322, "grad_norm": 0.173828125, "learning_rate": 4.4039638492654e-06, "loss": 1.0138, "num_tokens": 15861393536.0, "step": 8629 }, { "epoch": 1.5383244206773619, "grad_norm": 0.1748046875, "learning_rate": 4.402196571597242e-06, "loss": 1.0398, "num_tokens": 15867668954.0, "step": 8630 }, { "epoch": 1.5385026737967915, "grad_norm": 0.169921875, "learning_rate": 4.40042984372544e-06, "loss": 1.0037, "num_tokens": 15873951896.0, "step": 8631 }, { "epoch": 1.538680926916221, "grad_norm": 0.1748046875, "learning_rate": 4.3986636657972136e-06, "loss": 0.9985, "num_tokens": 15880218768.0, "step": 8632 }, { "epoch": 1.5388591800356506, "grad_norm": 0.169921875, "learning_rate": 4.396898037959738e-06, "loss": 1.0177, "num_tokens": 15886496544.0, "step": 8633 }, { "epoch": 1.5390374331550802, "grad_norm": 0.1767578125, "learning_rate": 4.395132960360148e-06, "loss": 1.0178, "num_tokens": 15892752943.0, "step": 8634 }, { "epoch": 1.5392156862745097, "grad_norm": 0.169921875, "learning_rate": 4.393368433145527e-06, "loss": 1.0098, "num_tokens": 15899028458.0, "step": 8635 }, { "epoch": 1.5393939393939395, "grad_norm": 0.173828125, "learning_rate": 4.391604456462908e-06, "loss": 1.044, "num_tokens": 15905312344.0, "step": 8636 }, { "epoch": 1.539572192513369, "grad_norm": 0.173828125, "learning_rate": 4.389841030459289e-06, "loss": 1.0376, "num_tokens": 15911592098.0, "step": 8637 }, { "epoch": 1.5397504456327986, "grad_norm": 0.173828125, "learning_rate": 4.388078155281614e-06, "loss": 1.0388, "num_tokens": 15917860880.0, "step": 8638 }, { "epoch": 1.5399286987522283, "grad_norm": 0.1728515625, "learning_rate": 4.386315831076785e-06, "loss": 1.0026, "num_tokens": 15924114715.0, "step": 8639 }, { "epoch": 1.5401069518716577, "grad_norm": 0.1728515625, "learning_rate": 4.3845540579916545e-06, "loss": 1.023, "num_tokens": 15930374317.0, "step": 8640 }, { "epoch": 1.5402852049910873, "grad_norm": 0.1787109375, "learning_rate": 4.382792836173036e-06, "loss": 1.0127, "num_tokens": 15936658920.0, "step": 8641 }, { "epoch": 1.540463458110517, "grad_norm": 0.1787109375, "learning_rate": 4.38103216576769e-06, "loss": 1.0108, "num_tokens": 15942918941.0, "step": 8642 }, { "epoch": 1.5406417112299464, "grad_norm": 0.1748046875, "learning_rate": 4.379272046922333e-06, "loss": 1.0149, "num_tokens": 15949172665.0, "step": 8643 }, { "epoch": 1.540819964349376, "grad_norm": 0.1806640625, "learning_rate": 4.377512479783637e-06, "loss": 1.0005, "num_tokens": 15955456688.0, "step": 8644 }, { "epoch": 1.5409982174688057, "grad_norm": 0.1748046875, "learning_rate": 4.375753464498227e-06, "loss": 1.0408, "num_tokens": 15961709536.0, "step": 8645 }, { "epoch": 1.5411764705882351, "grad_norm": 0.1796875, "learning_rate": 4.37399500121268e-06, "loss": 1.0071, "num_tokens": 15967995420.0, "step": 8646 }, { "epoch": 1.541354723707665, "grad_norm": 0.1708984375, "learning_rate": 4.372237090073529e-06, "loss": 1.0284, "num_tokens": 15974278453.0, "step": 8647 }, { "epoch": 1.5415329768270944, "grad_norm": 0.1748046875, "learning_rate": 4.370479731227266e-06, "loss": 1.0155, "num_tokens": 15980562101.0, "step": 8648 }, { "epoch": 1.541711229946524, "grad_norm": 0.1826171875, "learning_rate": 4.368722924820329e-06, "loss": 1.0112, "num_tokens": 15986829898.0, "step": 8649 }, { "epoch": 1.5418894830659537, "grad_norm": 0.171875, "learning_rate": 4.366966670999112e-06, "loss": 1.0113, "num_tokens": 15993087436.0, "step": 8650 }, { "epoch": 1.5420677361853832, "grad_norm": 0.1767578125, "learning_rate": 4.365210969909968e-06, "loss": 1.0543, "num_tokens": 15999339896.0, "step": 8651 }, { "epoch": 1.5422459893048128, "grad_norm": 0.1796875, "learning_rate": 4.363455821699194e-06, "loss": 1.0006, "num_tokens": 16005622059.0, "step": 8652 }, { "epoch": 1.5424242424242425, "grad_norm": 0.1767578125, "learning_rate": 4.361701226513051e-06, "loss": 1.0065, "num_tokens": 16011904786.0, "step": 8653 }, { "epoch": 1.542602495543672, "grad_norm": 0.169921875, "learning_rate": 4.359947184497746e-06, "loss": 0.9951, "num_tokens": 16018150127.0, "step": 8654 }, { "epoch": 1.5427807486631016, "grad_norm": 0.177734375, "learning_rate": 4.35819369579945e-06, "loss": 1.0113, "num_tokens": 16024410213.0, "step": 8655 }, { "epoch": 1.5429590017825312, "grad_norm": 0.171875, "learning_rate": 4.3564407605642755e-06, "loss": 1.0595, "num_tokens": 16030678352.0, "step": 8656 }, { "epoch": 1.5431372549019606, "grad_norm": 0.173828125, "learning_rate": 4.354688378938299e-06, "loss": 1.0505, "num_tokens": 16036943829.0, "step": 8657 }, { "epoch": 1.5433155080213905, "grad_norm": 0.171875, "learning_rate": 4.352936551067546e-06, "loss": 1.0184, "num_tokens": 16043229204.0, "step": 8658 }, { "epoch": 1.54349376114082, "grad_norm": 0.173828125, "learning_rate": 4.351185277097995e-06, "loss": 1.0406, "num_tokens": 16049483476.0, "step": 8659 }, { "epoch": 1.5436720142602496, "grad_norm": 0.173828125, "learning_rate": 4.349434557175582e-06, "loss": 1.0533, "num_tokens": 16055767610.0, "step": 8660 }, { "epoch": 1.5438502673796792, "grad_norm": 0.1748046875, "learning_rate": 4.3476843914461896e-06, "loss": 1.0207, "num_tokens": 16062026165.0, "step": 8661 }, { "epoch": 1.5440285204991087, "grad_norm": 0.1806640625, "learning_rate": 4.345934780055667e-06, "loss": 1.0024, "num_tokens": 16068310110.0, "step": 8662 }, { "epoch": 1.5442067736185383, "grad_norm": 0.1728515625, "learning_rate": 4.344185723149806e-06, "loss": 1.0105, "num_tokens": 16074592527.0, "step": 8663 }, { "epoch": 1.544385026737968, "grad_norm": 0.1728515625, "learning_rate": 4.3424372208743574e-06, "loss": 1.0527, "num_tokens": 16080835926.0, "step": 8664 }, { "epoch": 1.5445632798573974, "grad_norm": 0.17578125, "learning_rate": 4.34068927337502e-06, "loss": 1.0294, "num_tokens": 16087115600.0, "step": 8665 }, { "epoch": 1.5447415329768273, "grad_norm": 0.169921875, "learning_rate": 4.338941880797458e-06, "loss": 1.0073, "num_tokens": 16093398737.0, "step": 8666 }, { "epoch": 1.5449197860962567, "grad_norm": 0.177734375, "learning_rate": 4.337195043287277e-06, "loss": 1.0543, "num_tokens": 16099662082.0, "step": 8667 }, { "epoch": 1.5450980392156861, "grad_norm": 0.17578125, "learning_rate": 4.335448760990038e-06, "loss": 0.9972, "num_tokens": 16105944008.0, "step": 8668 }, { "epoch": 1.545276292335116, "grad_norm": 0.1689453125, "learning_rate": 4.333703034051266e-06, "loss": 1.0226, "num_tokens": 16112228997.0, "step": 8669 }, { "epoch": 1.5454545454545454, "grad_norm": 0.177734375, "learning_rate": 4.331957862616432e-06, "loss": 1.028, "num_tokens": 16118514532.0, "step": 8670 }, { "epoch": 1.545632798573975, "grad_norm": 0.16796875, "learning_rate": 4.3302132468309576e-06, "loss": 1.0454, "num_tokens": 16124793498.0, "step": 8671 }, { "epoch": 1.5458110516934047, "grad_norm": 0.169921875, "learning_rate": 4.328469186840222e-06, "loss": 1.0148, "num_tokens": 16131077654.0, "step": 8672 }, { "epoch": 1.5459893048128341, "grad_norm": 0.1728515625, "learning_rate": 4.326725682789561e-06, "loss": 1.0261, "num_tokens": 16137361764.0, "step": 8673 }, { "epoch": 1.5461675579322638, "grad_norm": 0.169921875, "learning_rate": 4.324982734824265e-06, "loss": 1.0194, "num_tokens": 16143626976.0, "step": 8674 }, { "epoch": 1.5463458110516934, "grad_norm": 0.1767578125, "learning_rate": 4.323240343089563e-06, "loss": 1.033, "num_tokens": 16149867352.0, "step": 8675 }, { "epoch": 1.5465240641711229, "grad_norm": 0.173828125, "learning_rate": 4.321498507730658e-06, "loss": 1.017, "num_tokens": 16156127740.0, "step": 8676 }, { "epoch": 1.5467023172905527, "grad_norm": 0.171875, "learning_rate": 4.319757228892694e-06, "loss": 1.0216, "num_tokens": 16162411939.0, "step": 8677 }, { "epoch": 1.5468805704099822, "grad_norm": 0.177734375, "learning_rate": 4.318016506720775e-06, "loss": 1.007, "num_tokens": 16168664354.0, "step": 8678 }, { "epoch": 1.5470588235294118, "grad_norm": 0.1767578125, "learning_rate": 4.316276341359949e-06, "loss": 1.0554, "num_tokens": 16174939500.0, "step": 8679 }, { "epoch": 1.5472370766488415, "grad_norm": 0.17578125, "learning_rate": 4.314536732955233e-06, "loss": 1.0221, "num_tokens": 16181223409.0, "step": 8680 }, { "epoch": 1.547415329768271, "grad_norm": 0.1767578125, "learning_rate": 4.312797681651586e-06, "loss": 1.0354, "num_tokens": 16187508524.0, "step": 8681 }, { "epoch": 1.5475935828877005, "grad_norm": 0.1767578125, "learning_rate": 4.311059187593919e-06, "loss": 1.0478, "num_tokens": 16193793006.0, "step": 8682 }, { "epoch": 1.5477718360071302, "grad_norm": 0.171875, "learning_rate": 4.309321250927106e-06, "loss": 1.0303, "num_tokens": 16200053421.0, "step": 8683 }, { "epoch": 1.5479500891265596, "grad_norm": 0.17578125, "learning_rate": 4.3075838717959676e-06, "loss": 1.0098, "num_tokens": 16206336566.0, "step": 8684 }, { "epoch": 1.5481283422459893, "grad_norm": 0.1767578125, "learning_rate": 4.305847050345281e-06, "loss": 0.9991, "num_tokens": 16212621935.0, "step": 8685 }, { "epoch": 1.548306595365419, "grad_norm": 0.171875, "learning_rate": 4.304110786719773e-06, "loss": 1.0596, "num_tokens": 16218906807.0, "step": 8686 }, { "epoch": 1.5484848484848484, "grad_norm": 0.171875, "learning_rate": 4.302375081064132e-06, "loss": 1.0115, "num_tokens": 16225186651.0, "step": 8687 }, { "epoch": 1.5486631016042782, "grad_norm": 0.1728515625, "learning_rate": 4.300639933522992e-06, "loss": 1.0629, "num_tokens": 16231437699.0, "step": 8688 }, { "epoch": 1.5488413547237077, "grad_norm": 0.173828125, "learning_rate": 4.298905344240942e-06, "loss": 0.9903, "num_tokens": 16237702842.0, "step": 8689 }, { "epoch": 1.5490196078431373, "grad_norm": 0.17578125, "learning_rate": 4.297171313362527e-06, "loss": 1.0059, "num_tokens": 16243972848.0, "step": 8690 }, { "epoch": 1.549197860962567, "grad_norm": 0.17578125, "learning_rate": 4.2954378410322465e-06, "loss": 0.9984, "num_tokens": 16250230613.0, "step": 8691 }, { "epoch": 1.5493761140819964, "grad_norm": 0.173828125, "learning_rate": 4.293704927394546e-06, "loss": 1.0077, "num_tokens": 16256486912.0, "step": 8692 }, { "epoch": 1.549554367201426, "grad_norm": 0.1728515625, "learning_rate": 4.291972572593831e-06, "loss": 1.02, "num_tokens": 16262771320.0, "step": 8693 }, { "epoch": 1.5497326203208557, "grad_norm": 0.173828125, "learning_rate": 4.290240776774462e-06, "loss": 1.0053, "num_tokens": 16269029092.0, "step": 8694 }, { "epoch": 1.549910873440285, "grad_norm": 0.171875, "learning_rate": 4.2885095400807485e-06, "loss": 1.023, "num_tokens": 16275289978.0, "step": 8695 }, { "epoch": 1.5500891265597148, "grad_norm": 0.177734375, "learning_rate": 4.286778862656954e-06, "loss": 1.0068, "num_tokens": 16281543164.0, "step": 8696 }, { "epoch": 1.5502673796791444, "grad_norm": 0.1708984375, "learning_rate": 4.285048744647297e-06, "loss": 1.007, "num_tokens": 16287815944.0, "step": 8697 }, { "epoch": 1.5504456327985738, "grad_norm": 0.169921875, "learning_rate": 4.283319186195948e-06, "loss": 1.023, "num_tokens": 16294099579.0, "step": 8698 }, { "epoch": 1.5506238859180037, "grad_norm": 0.1806640625, "learning_rate": 4.281590187447031e-06, "loss": 1.0295, "num_tokens": 16300356877.0, "step": 8699 }, { "epoch": 1.5508021390374331, "grad_norm": 0.1728515625, "learning_rate": 4.279861748544623e-06, "loss": 1.0281, "num_tokens": 16306640379.0, "step": 8700 }, { "epoch": 1.5509803921568628, "grad_norm": 0.177734375, "learning_rate": 4.278133869632758e-06, "loss": 1.0053, "num_tokens": 16312922854.0, "step": 8701 }, { "epoch": 1.5511586452762924, "grad_norm": 0.17578125, "learning_rate": 4.276406550855421e-06, "loss": 1.0038, "num_tokens": 16319189034.0, "step": 8702 }, { "epoch": 1.5513368983957219, "grad_norm": 0.1689453125, "learning_rate": 4.274679792356547e-06, "loss": 1.013, "num_tokens": 16325473854.0, "step": 8703 }, { "epoch": 1.5515151515151515, "grad_norm": 0.1767578125, "learning_rate": 4.272953594280028e-06, "loss": 0.9947, "num_tokens": 16331708588.0, "step": 8704 }, { "epoch": 1.5516934046345812, "grad_norm": 0.1767578125, "learning_rate": 4.27122795676971e-06, "loss": 1.0036, "num_tokens": 16337991670.0, "step": 8705 }, { "epoch": 1.5518716577540106, "grad_norm": 0.173828125, "learning_rate": 4.269502879969388e-06, "loss": 1.0246, "num_tokens": 16344274789.0, "step": 8706 }, { "epoch": 1.5520499108734402, "grad_norm": 0.1669921875, "learning_rate": 4.267778364022812e-06, "loss": 1.0284, "num_tokens": 16350515505.0, "step": 8707 }, { "epoch": 1.55222816399287, "grad_norm": 0.1748046875, "learning_rate": 4.2660544090736925e-06, "loss": 1.0323, "num_tokens": 16356800004.0, "step": 8708 }, { "epoch": 1.5524064171122993, "grad_norm": 0.1708984375, "learning_rate": 4.2643310152656816e-06, "loss": 0.9962, "num_tokens": 16363082775.0, "step": 8709 }, { "epoch": 1.5525846702317292, "grad_norm": 0.171875, "learning_rate": 4.262608182742391e-06, "loss": 1.0169, "num_tokens": 16369331401.0, "step": 8710 }, { "epoch": 1.5527629233511586, "grad_norm": 0.1787109375, "learning_rate": 4.260885911647385e-06, "loss": 1.0342, "num_tokens": 16375558913.0, "step": 8711 }, { "epoch": 1.5529411764705883, "grad_norm": 0.1748046875, "learning_rate": 4.259164202124186e-06, "loss": 1.0206, "num_tokens": 16381825787.0, "step": 8712 }, { "epoch": 1.553119429590018, "grad_norm": 0.1767578125, "learning_rate": 4.257443054316256e-06, "loss": 1.0306, "num_tokens": 16388090084.0, "step": 8713 }, { "epoch": 1.5532976827094473, "grad_norm": 0.1748046875, "learning_rate": 4.255722468367021e-06, "loss": 1.0347, "num_tokens": 16394373005.0, "step": 8714 }, { "epoch": 1.553475935828877, "grad_norm": 0.1728515625, "learning_rate": 4.25400244441986e-06, "loss": 1.0078, "num_tokens": 16400655730.0, "step": 8715 }, { "epoch": 1.5536541889483066, "grad_norm": 0.1787109375, "learning_rate": 4.252282982618103e-06, "loss": 1.0028, "num_tokens": 16406911798.0, "step": 8716 }, { "epoch": 1.553832442067736, "grad_norm": 0.17578125, "learning_rate": 4.250564083105032e-06, "loss": 1.0642, "num_tokens": 16413173527.0, "step": 8717 }, { "epoch": 1.5540106951871657, "grad_norm": 0.17578125, "learning_rate": 4.248845746023881e-06, "loss": 1.0139, "num_tokens": 16419384719.0, "step": 8718 }, { "epoch": 1.5541889483065954, "grad_norm": 0.173828125, "learning_rate": 4.247127971517844e-06, "loss": 1.0029, "num_tokens": 16425641665.0, "step": 8719 }, { "epoch": 1.5543672014260248, "grad_norm": 0.171875, "learning_rate": 4.245410759730063e-06, "loss": 1.0274, "num_tokens": 16431909372.0, "step": 8720 }, { "epoch": 1.5545454545454547, "grad_norm": 0.1728515625, "learning_rate": 4.243694110803629e-06, "loss": 1.0053, "num_tokens": 16438174241.0, "step": 8721 }, { "epoch": 1.554723707664884, "grad_norm": 0.17578125, "learning_rate": 4.2419780248815945e-06, "loss": 0.9906, "num_tokens": 16444460446.0, "step": 8722 }, { "epoch": 1.5549019607843138, "grad_norm": 0.171875, "learning_rate": 4.2402625021069595e-06, "loss": 1.0248, "num_tokens": 16450721012.0, "step": 8723 }, { "epoch": 1.5550802139037434, "grad_norm": 0.16796875, "learning_rate": 4.238547542622682e-06, "loss": 0.997, "num_tokens": 16457004510.0, "step": 8724 }, { "epoch": 1.5552584670231728, "grad_norm": 0.1728515625, "learning_rate": 4.236833146571663e-06, "loss": 1.0074, "num_tokens": 16463275953.0, "step": 8725 }, { "epoch": 1.5554367201426025, "grad_norm": 0.1689453125, "learning_rate": 4.2351193140967725e-06, "loss": 0.9995, "num_tokens": 16469524307.0, "step": 8726 }, { "epoch": 1.5556149732620321, "grad_norm": 0.17578125, "learning_rate": 4.233406045340823e-06, "loss": 1.0071, "num_tokens": 16475779354.0, "step": 8727 }, { "epoch": 1.5557932263814616, "grad_norm": 0.1728515625, "learning_rate": 4.231693340446572e-06, "loss": 0.9826, "num_tokens": 16482050970.0, "step": 8728 }, { "epoch": 1.5559714795008914, "grad_norm": 0.17578125, "learning_rate": 4.229981199556749e-06, "loss": 1.031, "num_tokens": 16488306898.0, "step": 8729 }, { "epoch": 1.5561497326203209, "grad_norm": 0.173828125, "learning_rate": 4.228269622814025e-06, "loss": 1.0253, "num_tokens": 16494568692.0, "step": 8730 }, { "epoch": 1.5563279857397503, "grad_norm": 0.169921875, "learning_rate": 4.226558610361026e-06, "loss": 1.0111, "num_tokens": 16500807252.0, "step": 8731 }, { "epoch": 1.5565062388591802, "grad_norm": 0.1708984375, "learning_rate": 4.224848162340327e-06, "loss": 1.0117, "num_tokens": 16507077347.0, "step": 8732 }, { "epoch": 1.5566844919786096, "grad_norm": 0.1708984375, "learning_rate": 4.223138278894467e-06, "loss": 1.0047, "num_tokens": 16513357114.0, "step": 8733 }, { "epoch": 1.5568627450980392, "grad_norm": 0.171875, "learning_rate": 4.221428960165928e-06, "loss": 1.0355, "num_tokens": 16519641744.0, "step": 8734 }, { "epoch": 1.5570409982174689, "grad_norm": 0.18359375, "learning_rate": 4.219720206297145e-06, "loss": 1.0257, "num_tokens": 16525925580.0, "step": 8735 }, { "epoch": 1.5572192513368983, "grad_norm": 0.17578125, "learning_rate": 4.218012017430513e-06, "loss": 1.0457, "num_tokens": 16532200135.0, "step": 8736 }, { "epoch": 1.557397504456328, "grad_norm": 0.171875, "learning_rate": 4.216304393708373e-06, "loss": 1.0139, "num_tokens": 16538440073.0, "step": 8737 }, { "epoch": 1.5575757575757576, "grad_norm": 0.173828125, "learning_rate": 4.214597335273022e-06, "loss": 1.0058, "num_tokens": 16544722859.0, "step": 8738 }, { "epoch": 1.557754010695187, "grad_norm": 0.16796875, "learning_rate": 4.212890842266709e-06, "loss": 1.0041, "num_tokens": 16550988201.0, "step": 8739 }, { "epoch": 1.557932263814617, "grad_norm": 0.173828125, "learning_rate": 4.211184914831639e-06, "loss": 0.9755, "num_tokens": 16557256530.0, "step": 8740 }, { "epoch": 1.5581105169340463, "grad_norm": 0.17578125, "learning_rate": 4.209479553109966e-06, "loss": 1.0457, "num_tokens": 16563516586.0, "step": 8741 }, { "epoch": 1.558288770053476, "grad_norm": 0.1728515625, "learning_rate": 4.2077747572437985e-06, "loss": 1.0066, "num_tokens": 16569791942.0, "step": 8742 }, { "epoch": 1.5584670231729056, "grad_norm": 0.1748046875, "learning_rate": 4.2060705273751955e-06, "loss": 1.0193, "num_tokens": 16576075305.0, "step": 8743 }, { "epoch": 1.558645276292335, "grad_norm": 0.1669921875, "learning_rate": 4.204366863646174e-06, "loss": 1.0029, "num_tokens": 16582345754.0, "step": 8744 }, { "epoch": 1.5588235294117647, "grad_norm": 0.171875, "learning_rate": 4.202663766198698e-06, "loss": 1.0237, "num_tokens": 16588609793.0, "step": 8745 }, { "epoch": 1.5590017825311944, "grad_norm": 0.1767578125, "learning_rate": 4.2009612351746855e-06, "loss": 1.0215, "num_tokens": 16594895252.0, "step": 8746 }, { "epoch": 1.5591800356506238, "grad_norm": 0.1748046875, "learning_rate": 4.199259270716014e-06, "loss": 1.0394, "num_tokens": 16601178164.0, "step": 8747 }, { "epoch": 1.5593582887700534, "grad_norm": 0.1708984375, "learning_rate": 4.197557872964505e-06, "loss": 1.0172, "num_tokens": 16607431391.0, "step": 8748 }, { "epoch": 1.559536541889483, "grad_norm": 0.171875, "learning_rate": 4.195857042061937e-06, "loss": 1.0044, "num_tokens": 16613697944.0, "step": 8749 }, { "epoch": 1.5597147950089125, "grad_norm": 0.1728515625, "learning_rate": 4.1941567781500385e-06, "loss": 1.0095, "num_tokens": 16619935413.0, "step": 8750 }, { "epoch": 1.5598930481283424, "grad_norm": 0.1806640625, "learning_rate": 4.192457081370501e-06, "loss": 1.0075, "num_tokens": 16626208256.0, "step": 8751 }, { "epoch": 1.5600713012477718, "grad_norm": 0.1748046875, "learning_rate": 4.1907579518649505e-06, "loss": 1.0, "num_tokens": 16632492731.0, "step": 8752 }, { "epoch": 1.5602495543672015, "grad_norm": 0.169921875, "learning_rate": 4.189059389774979e-06, "loss": 1.0055, "num_tokens": 16638776250.0, "step": 8753 }, { "epoch": 1.5604278074866311, "grad_norm": 0.171875, "learning_rate": 4.18736139524213e-06, "loss": 1.0395, "num_tokens": 16645029312.0, "step": 8754 }, { "epoch": 1.5606060606060606, "grad_norm": 0.169921875, "learning_rate": 4.185663968407897e-06, "loss": 1.0008, "num_tokens": 16651314212.0, "step": 8755 }, { "epoch": 1.5607843137254902, "grad_norm": 0.16796875, "learning_rate": 4.1839671094137266e-06, "loss": 0.9927, "num_tokens": 16657573054.0, "step": 8756 }, { "epoch": 1.5609625668449199, "grad_norm": 0.169921875, "learning_rate": 4.182270818401017e-06, "loss": 1.0, "num_tokens": 16663854811.0, "step": 8757 }, { "epoch": 1.5611408199643493, "grad_norm": 0.166015625, "learning_rate": 4.180575095511126e-06, "loss": 1.0059, "num_tokens": 16670138842.0, "step": 8758 }, { "epoch": 1.561319073083779, "grad_norm": 0.171875, "learning_rate": 4.178879940885353e-06, "loss": 1.0074, "num_tokens": 16676401447.0, "step": 8759 }, { "epoch": 1.5614973262032086, "grad_norm": 0.1669921875, "learning_rate": 4.1771853546649535e-06, "loss": 1.0065, "num_tokens": 16682684085.0, "step": 8760 }, { "epoch": 1.561675579322638, "grad_norm": 0.1708984375, "learning_rate": 4.175491336991144e-06, "loss": 1.0151, "num_tokens": 16688953889.0, "step": 8761 }, { "epoch": 1.5618538324420679, "grad_norm": 0.17578125, "learning_rate": 4.173797888005085e-06, "loss": 1.0072, "num_tokens": 16695175231.0, "step": 8762 }, { "epoch": 1.5620320855614973, "grad_norm": 0.173828125, "learning_rate": 4.1721050078478905e-06, "loss": 1.024, "num_tokens": 16701459011.0, "step": 8763 }, { "epoch": 1.562210338680927, "grad_norm": 0.1787109375, "learning_rate": 4.170412696660628e-06, "loss": 1.0003, "num_tokens": 16707720729.0, "step": 8764 }, { "epoch": 1.5623885918003566, "grad_norm": 0.169921875, "learning_rate": 4.168720954584323e-06, "loss": 1.005, "num_tokens": 16714005959.0, "step": 8765 }, { "epoch": 1.562566844919786, "grad_norm": 0.1689453125, "learning_rate": 4.167029781759948e-06, "loss": 0.9861, "num_tokens": 16720290163.0, "step": 8766 }, { "epoch": 1.5627450980392157, "grad_norm": 0.1796875, "learning_rate": 4.16533917832842e-06, "loss": 1.0266, "num_tokens": 16726573145.0, "step": 8767 }, { "epoch": 1.5629233511586453, "grad_norm": 0.1708984375, "learning_rate": 4.163649144430628e-06, "loss": 1.0142, "num_tokens": 16732850370.0, "step": 8768 }, { "epoch": 1.5631016042780748, "grad_norm": 0.1767578125, "learning_rate": 4.161959680207398e-06, "loss": 1.0199, "num_tokens": 16739134261.0, "step": 8769 }, { "epoch": 1.5632798573975044, "grad_norm": 0.1748046875, "learning_rate": 4.1602707857995136e-06, "loss": 0.9902, "num_tokens": 16745409306.0, "step": 8770 }, { "epoch": 1.563458110516934, "grad_norm": 0.171875, "learning_rate": 4.158582461347709e-06, "loss": 1.0051, "num_tokens": 16751648056.0, "step": 8771 }, { "epoch": 1.5636363636363635, "grad_norm": 0.171875, "learning_rate": 4.156894706992677e-06, "loss": 1.0139, "num_tokens": 16757917075.0, "step": 8772 }, { "epoch": 1.5638146167557934, "grad_norm": 0.171875, "learning_rate": 4.155207522875056e-06, "loss": 0.9897, "num_tokens": 16764187910.0, "step": 8773 }, { "epoch": 1.5639928698752228, "grad_norm": 0.17578125, "learning_rate": 4.15352090913544e-06, "loss": 1.0216, "num_tokens": 16770470646.0, "step": 8774 }, { "epoch": 1.5641711229946524, "grad_norm": 0.177734375, "learning_rate": 4.151834865914374e-06, "loss": 1.0324, "num_tokens": 16776753833.0, "step": 8775 }, { "epoch": 1.564349376114082, "grad_norm": 0.171875, "learning_rate": 4.1501493933523575e-06, "loss": 0.9928, "num_tokens": 16782994126.0, "step": 8776 }, { "epoch": 1.5645276292335115, "grad_norm": 0.1767578125, "learning_rate": 4.148464491589841e-06, "loss": 1.0213, "num_tokens": 16789278454.0, "step": 8777 }, { "epoch": 1.5647058823529412, "grad_norm": 0.1728515625, "learning_rate": 4.1467801607672245e-06, "loss": 1.0449, "num_tokens": 16795538885.0, "step": 8778 }, { "epoch": 1.5648841354723708, "grad_norm": 0.171875, "learning_rate": 4.145096401024868e-06, "loss": 1.0627, "num_tokens": 16801821684.0, "step": 8779 }, { "epoch": 1.5650623885918002, "grad_norm": 0.17578125, "learning_rate": 4.143413212503079e-06, "loss": 1.0446, "num_tokens": 16808105696.0, "step": 8780 }, { "epoch": 1.56524064171123, "grad_norm": 0.1826171875, "learning_rate": 4.141730595342121e-06, "loss": 1.0299, "num_tokens": 16814382628.0, "step": 8781 }, { "epoch": 1.5654188948306595, "grad_norm": 0.1748046875, "learning_rate": 4.140048549682196e-06, "loss": 1.0182, "num_tokens": 16820667607.0, "step": 8782 }, { "epoch": 1.565597147950089, "grad_norm": 0.1748046875, "learning_rate": 4.1383670756634785e-06, "loss": 1.0079, "num_tokens": 16826925907.0, "step": 8783 }, { "epoch": 1.5657754010695188, "grad_norm": 0.1728515625, "learning_rate": 4.136686173426085e-06, "loss": 1.0341, "num_tokens": 16833210881.0, "step": 8784 }, { "epoch": 1.5659536541889483, "grad_norm": 0.1748046875, "learning_rate": 4.135005843110082e-06, "loss": 1.0301, "num_tokens": 16839496981.0, "step": 8785 }, { "epoch": 1.566131907308378, "grad_norm": 0.173828125, "learning_rate": 4.133326084855495e-06, "loss": 1.0468, "num_tokens": 16845765910.0, "step": 8786 }, { "epoch": 1.5663101604278076, "grad_norm": 0.1865234375, "learning_rate": 4.131646898802298e-06, "loss": 1.0095, "num_tokens": 16852023393.0, "step": 8787 }, { "epoch": 1.566488413547237, "grad_norm": 0.171875, "learning_rate": 4.129968285090417e-06, "loss": 1.0407, "num_tokens": 16858290710.0, "step": 8788 }, { "epoch": 1.5666666666666667, "grad_norm": 0.1708984375, "learning_rate": 4.128290243859733e-06, "loss": 0.9854, "num_tokens": 16864552396.0, "step": 8789 }, { "epoch": 1.5668449197860963, "grad_norm": 0.1689453125, "learning_rate": 4.126612775250075e-06, "loss": 1.0142, "num_tokens": 16870824686.0, "step": 8790 }, { "epoch": 1.5670231729055257, "grad_norm": 0.16796875, "learning_rate": 4.124935879401229e-06, "loss": 1.0179, "num_tokens": 16877077457.0, "step": 8791 }, { "epoch": 1.5672014260249556, "grad_norm": 0.1689453125, "learning_rate": 4.12325955645293e-06, "loss": 1.0316, "num_tokens": 16883339414.0, "step": 8792 }, { "epoch": 1.567379679144385, "grad_norm": 0.171875, "learning_rate": 4.121583806544863e-06, "loss": 1.0479, "num_tokens": 16889623246.0, "step": 8793 }, { "epoch": 1.5675579322638145, "grad_norm": 0.17578125, "learning_rate": 4.119908629816676e-06, "loss": 1.0595, "num_tokens": 16895851530.0, "step": 8794 }, { "epoch": 1.5677361853832443, "grad_norm": 0.169921875, "learning_rate": 4.118234026407957e-06, "loss": 1.0236, "num_tokens": 16902124502.0, "step": 8795 }, { "epoch": 1.5679144385026738, "grad_norm": 0.169921875, "learning_rate": 4.1165599964582514e-06, "loss": 1.0064, "num_tokens": 16908406887.0, "step": 8796 }, { "epoch": 1.5680926916221034, "grad_norm": 0.177734375, "learning_rate": 4.114886540107058e-06, "loss": 0.9784, "num_tokens": 16914665906.0, "step": 8797 }, { "epoch": 1.568270944741533, "grad_norm": 0.177734375, "learning_rate": 4.113213657493825e-06, "loss": 1.0139, "num_tokens": 16920920167.0, "step": 8798 }, { "epoch": 1.5684491978609625, "grad_norm": 0.17578125, "learning_rate": 4.111541348757955e-06, "loss": 1.0174, "num_tokens": 16927172274.0, "step": 8799 }, { "epoch": 1.5686274509803921, "grad_norm": 0.169921875, "learning_rate": 4.109869614038798e-06, "loss": 0.9746, "num_tokens": 16933447680.0, "step": 8800 }, { "epoch": 1.5688057040998218, "grad_norm": 0.16796875, "learning_rate": 4.108198453475666e-06, "loss": 1.0155, "num_tokens": 16939731158.0, "step": 8801 }, { "epoch": 1.5689839572192512, "grad_norm": 0.16796875, "learning_rate": 4.106527867207814e-06, "loss": 1.023, "num_tokens": 16946015708.0, "step": 8802 }, { "epoch": 1.569162210338681, "grad_norm": 0.1708984375, "learning_rate": 4.1048578553744525e-06, "loss": 1.0156, "num_tokens": 16952294924.0, "step": 8803 }, { "epoch": 1.5693404634581105, "grad_norm": 0.1767578125, "learning_rate": 4.103188418114745e-06, "loss": 1.0238, "num_tokens": 16958578867.0, "step": 8804 }, { "epoch": 1.56951871657754, "grad_norm": 0.173828125, "learning_rate": 4.101519555567805e-06, "loss": 1.0057, "num_tokens": 16964863440.0, "step": 8805 }, { "epoch": 1.5696969696969698, "grad_norm": 0.177734375, "learning_rate": 4.099851267872699e-06, "loss": 1.0166, "num_tokens": 16971123858.0, "step": 8806 }, { "epoch": 1.5698752228163992, "grad_norm": 0.1689453125, "learning_rate": 4.098183555168443e-06, "loss": 1.0452, "num_tokens": 16977401446.0, "step": 8807 }, { "epoch": 1.570053475935829, "grad_norm": 0.1708984375, "learning_rate": 4.096516417594015e-06, "loss": 1.0078, "num_tokens": 16983684036.0, "step": 8808 }, { "epoch": 1.5702317290552585, "grad_norm": 0.17578125, "learning_rate": 4.094849855288334e-06, "loss": 1.0535, "num_tokens": 16989965353.0, "step": 8809 }, { "epoch": 1.570409982174688, "grad_norm": 0.171875, "learning_rate": 4.093183868390273e-06, "loss": 1.0262, "num_tokens": 16996248741.0, "step": 8810 }, { "epoch": 1.5705882352941176, "grad_norm": 0.1689453125, "learning_rate": 4.09151845703866e-06, "loss": 1.0234, "num_tokens": 17002516702.0, "step": 8811 }, { "epoch": 1.5707664884135473, "grad_norm": 0.17578125, "learning_rate": 4.089853621372279e-06, "loss": 1.0319, "num_tokens": 17008800993.0, "step": 8812 }, { "epoch": 1.5709447415329767, "grad_norm": 0.169921875, "learning_rate": 4.088189361529856e-06, "loss": 1.0151, "num_tokens": 17015082329.0, "step": 8813 }, { "epoch": 1.5711229946524066, "grad_norm": 0.1748046875, "learning_rate": 4.086525677650072e-06, "loss": 1.0163, "num_tokens": 17021363802.0, "step": 8814 }, { "epoch": 1.571301247771836, "grad_norm": 0.1728515625, "learning_rate": 4.084862569871567e-06, "loss": 1.0375, "num_tokens": 17027644916.0, "step": 8815 }, { "epoch": 1.5714795008912656, "grad_norm": 0.1689453125, "learning_rate": 4.083200038332928e-06, "loss": 1.0147, "num_tokens": 17033927703.0, "step": 8816 }, { "epoch": 1.5716577540106953, "grad_norm": 0.17578125, "learning_rate": 4.081538083172691e-06, "loss": 1.03, "num_tokens": 17040212334.0, "step": 8817 }, { "epoch": 1.5718360071301247, "grad_norm": 0.16796875, "learning_rate": 4.079876704529346e-06, "loss": 1.0328, "num_tokens": 17046497035.0, "step": 8818 }, { "epoch": 1.5720142602495544, "grad_norm": 0.1728515625, "learning_rate": 4.0782159025413435e-06, "loss": 1.0151, "num_tokens": 17052779232.0, "step": 8819 }, { "epoch": 1.572192513368984, "grad_norm": 0.173828125, "learning_rate": 4.076555677347075e-06, "loss": 1.0223, "num_tokens": 17059063648.0, "step": 8820 }, { "epoch": 1.5723707664884135, "grad_norm": 0.173828125, "learning_rate": 4.074896029084881e-06, "loss": 1.0294, "num_tokens": 17065332310.0, "step": 8821 }, { "epoch": 1.572549019607843, "grad_norm": 0.1728515625, "learning_rate": 4.073236957893069e-06, "loss": 1.0275, "num_tokens": 17071616583.0, "step": 8822 }, { "epoch": 1.5727272727272728, "grad_norm": 0.17578125, "learning_rate": 4.071578463909884e-06, "loss": 1.0154, "num_tokens": 17077901362.0, "step": 8823 }, { "epoch": 1.5729055258467022, "grad_norm": 0.173828125, "learning_rate": 4.069920547273532e-06, "loss": 1.0343, "num_tokens": 17084185386.0, "step": 8824 }, { "epoch": 1.573083778966132, "grad_norm": 0.173828125, "learning_rate": 4.068263208122164e-06, "loss": 1.0239, "num_tokens": 17090469433.0, "step": 8825 }, { "epoch": 1.5732620320855615, "grad_norm": 0.16796875, "learning_rate": 4.066606446593891e-06, "loss": 1.0397, "num_tokens": 17096753294.0, "step": 8826 }, { "epoch": 1.5734402852049911, "grad_norm": 0.1767578125, "learning_rate": 4.064950262826772e-06, "loss": 1.0207, "num_tokens": 17103030791.0, "step": 8827 }, { "epoch": 1.5736185383244208, "grad_norm": 0.1689453125, "learning_rate": 4.063294656958809e-06, "loss": 1.023, "num_tokens": 17109314156.0, "step": 8828 }, { "epoch": 1.5737967914438502, "grad_norm": 0.169921875, "learning_rate": 4.061639629127972e-06, "loss": 1.0057, "num_tokens": 17115599533.0, "step": 8829 }, { "epoch": 1.5739750445632799, "grad_norm": 0.1787109375, "learning_rate": 4.0599851794721715e-06, "loss": 0.9873, "num_tokens": 17121876057.0, "step": 8830 }, { "epoch": 1.5741532976827095, "grad_norm": 0.1669921875, "learning_rate": 4.058331308129274e-06, "loss": 1.0208, "num_tokens": 17128158788.0, "step": 8831 }, { "epoch": 1.574331550802139, "grad_norm": 0.173828125, "learning_rate": 4.056678015237095e-06, "loss": 1.0016, "num_tokens": 17134442523.0, "step": 8832 }, { "epoch": 1.5745098039215686, "grad_norm": 0.171875, "learning_rate": 4.0550253009334074e-06, "loss": 0.9747, "num_tokens": 17140724532.0, "step": 8833 }, { "epoch": 1.5746880570409982, "grad_norm": 0.1767578125, "learning_rate": 4.053373165355929e-06, "loss": 1.0259, "num_tokens": 17146961282.0, "step": 8834 }, { "epoch": 1.5748663101604277, "grad_norm": 0.1865234375, "learning_rate": 4.051721608642336e-06, "loss": 0.9978, "num_tokens": 17153242509.0, "step": 8835 }, { "epoch": 1.5750445632798575, "grad_norm": 0.169921875, "learning_rate": 4.05007063093025e-06, "loss": 1.0128, "num_tokens": 17159494709.0, "step": 8836 }, { "epoch": 1.575222816399287, "grad_norm": 0.17578125, "learning_rate": 4.048420232357248e-06, "loss": 0.9954, "num_tokens": 17165761869.0, "step": 8837 }, { "epoch": 1.5754010695187166, "grad_norm": 0.1728515625, "learning_rate": 4.046770413060858e-06, "loss": 1.0506, "num_tokens": 17172046688.0, "step": 8838 }, { "epoch": 1.5755793226381463, "grad_norm": 0.1728515625, "learning_rate": 4.045121173178556e-06, "loss": 1.0171, "num_tokens": 17178329927.0, "step": 8839 }, { "epoch": 1.5757575757575757, "grad_norm": 0.17578125, "learning_rate": 4.043472512847782e-06, "loss": 1.0344, "num_tokens": 17184612995.0, "step": 8840 }, { "epoch": 1.5759358288770053, "grad_norm": 0.169921875, "learning_rate": 4.041824432205914e-06, "loss": 1.006, "num_tokens": 17190873406.0, "step": 8841 }, { "epoch": 1.576114081996435, "grad_norm": 0.173828125, "learning_rate": 4.040176931390288e-06, "loss": 1.0216, "num_tokens": 17197156934.0, "step": 8842 }, { "epoch": 1.5762923351158644, "grad_norm": 0.16796875, "learning_rate": 4.03853001053819e-06, "loss": 1.0385, "num_tokens": 17203439556.0, "step": 8843 }, { "epoch": 1.576470588235294, "grad_norm": 0.1708984375, "learning_rate": 4.036883669786858e-06, "loss": 1.0311, "num_tokens": 17209665172.0, "step": 8844 }, { "epoch": 1.5766488413547237, "grad_norm": 0.1728515625, "learning_rate": 4.035237909273484e-06, "loss": 1.0088, "num_tokens": 17215950022.0, "step": 8845 }, { "epoch": 1.5768270944741531, "grad_norm": 0.171875, "learning_rate": 4.033592729135203e-06, "loss": 1.0446, "num_tokens": 17222210411.0, "step": 8846 }, { "epoch": 1.577005347593583, "grad_norm": 0.1728515625, "learning_rate": 4.0319481295091175e-06, "loss": 1.0083, "num_tokens": 17228486714.0, "step": 8847 }, { "epoch": 1.5771836007130124, "grad_norm": 0.1748046875, "learning_rate": 4.030304110532268e-06, "loss": 1.0041, "num_tokens": 17234733343.0, "step": 8848 }, { "epoch": 1.577361853832442, "grad_norm": 0.1748046875, "learning_rate": 4.02866067234165e-06, "loss": 1.0081, "num_tokens": 17241016718.0, "step": 8849 }, { "epoch": 1.5775401069518717, "grad_norm": 0.171875, "learning_rate": 4.027017815074213e-06, "loss": 0.9942, "num_tokens": 17247257800.0, "step": 8850 }, { "epoch": 1.5777183600713012, "grad_norm": 0.171875, "learning_rate": 4.025375538866856e-06, "loss": 1.0039, "num_tokens": 17253541362.0, "step": 8851 }, { "epoch": 1.5778966131907308, "grad_norm": 0.1767578125, "learning_rate": 4.02373384385643e-06, "loss": 1.0375, "num_tokens": 17259776238.0, "step": 8852 }, { "epoch": 1.5780748663101605, "grad_norm": 0.16796875, "learning_rate": 4.022092730179737e-06, "loss": 0.9988, "num_tokens": 17266061632.0, "step": 8853 }, { "epoch": 1.57825311942959, "grad_norm": 0.16796875, "learning_rate": 4.020452197973533e-06, "loss": 1.0234, "num_tokens": 17272347259.0, "step": 8854 }, { "epoch": 1.5784313725490198, "grad_norm": 0.171875, "learning_rate": 4.018812247374525e-06, "loss": 1.0059, "num_tokens": 17278578326.0, "step": 8855 }, { "epoch": 1.5786096256684492, "grad_norm": 0.1767578125, "learning_rate": 4.017172878519369e-06, "loss": 0.9846, "num_tokens": 17284829229.0, "step": 8856 }, { "epoch": 1.5787878787878786, "grad_norm": 0.173828125, "learning_rate": 4.01553409154467e-06, "loss": 1.0037, "num_tokens": 17291107189.0, "step": 8857 }, { "epoch": 1.5789661319073085, "grad_norm": 0.1689453125, "learning_rate": 4.013895886586998e-06, "loss": 0.9911, "num_tokens": 17297367764.0, "step": 8858 }, { "epoch": 1.579144385026738, "grad_norm": 0.1767578125, "learning_rate": 4.012258263782856e-06, "loss": 1.0275, "num_tokens": 17303637638.0, "step": 8859 }, { "epoch": 1.5793226381461676, "grad_norm": 0.16796875, "learning_rate": 4.010621223268709e-06, "loss": 0.9858, "num_tokens": 17309871134.0, "step": 8860 }, { "epoch": 1.5795008912655972, "grad_norm": 0.173828125, "learning_rate": 4.008984765180976e-06, "loss": 1.0357, "num_tokens": 17316136038.0, "step": 8861 }, { "epoch": 1.5796791443850267, "grad_norm": 0.171875, "learning_rate": 4.007348889656022e-06, "loss": 1.0079, "num_tokens": 17322397166.0, "step": 8862 }, { "epoch": 1.5798573975044563, "grad_norm": 0.1708984375, "learning_rate": 4.005713596830163e-06, "loss": 1.0385, "num_tokens": 17328676886.0, "step": 8863 }, { "epoch": 1.580035650623886, "grad_norm": 0.17578125, "learning_rate": 4.0040788868396665e-06, "loss": 1.0124, "num_tokens": 17334961493.0, "step": 8864 }, { "epoch": 1.5802139037433154, "grad_norm": 0.171875, "learning_rate": 4.002444759820758e-06, "loss": 1.0202, "num_tokens": 17341213617.0, "step": 8865 }, { "epoch": 1.5803921568627453, "grad_norm": 0.1650390625, "learning_rate": 4.00081121590961e-06, "loss": 1.0022, "num_tokens": 17347498015.0, "step": 8866 }, { "epoch": 1.5805704099821747, "grad_norm": 0.1748046875, "learning_rate": 3.999178255242339e-06, "loss": 0.9937, "num_tokens": 17353755813.0, "step": 8867 }, { "epoch": 1.5807486631016041, "grad_norm": 0.169921875, "learning_rate": 3.997545877955027e-06, "loss": 1.0061, "num_tokens": 17360016881.0, "step": 8868 }, { "epoch": 1.580926916221034, "grad_norm": 0.1708984375, "learning_rate": 3.995914084183698e-06, "loss": 1.0362, "num_tokens": 17366300205.0, "step": 8869 }, { "epoch": 1.5811051693404634, "grad_norm": 0.169921875, "learning_rate": 3.994282874064329e-06, "loss": 1.0108, "num_tokens": 17372578144.0, "step": 8870 }, { "epoch": 1.581283422459893, "grad_norm": 0.1728515625, "learning_rate": 3.992652247732848e-06, "loss": 1.0595, "num_tokens": 17378834516.0, "step": 8871 }, { "epoch": 1.5814616755793227, "grad_norm": 0.1669921875, "learning_rate": 3.99102220532514e-06, "loss": 1.0065, "num_tokens": 17385089865.0, "step": 8872 }, { "epoch": 1.5816399286987521, "grad_norm": 0.169921875, "learning_rate": 3.989392746977033e-06, "loss": 1.0462, "num_tokens": 17391373977.0, "step": 8873 }, { "epoch": 1.5818181818181818, "grad_norm": 0.1708984375, "learning_rate": 3.987763872824312e-06, "loss": 1.0081, "num_tokens": 17397642921.0, "step": 8874 }, { "epoch": 1.5819964349376114, "grad_norm": 0.171875, "learning_rate": 3.9861355830027095e-06, "loss": 1.0164, "num_tokens": 17403924958.0, "step": 8875 }, { "epoch": 1.5821746880570409, "grad_norm": 0.169921875, "learning_rate": 3.984507877647913e-06, "loss": 1.0065, "num_tokens": 17410208632.0, "step": 8876 }, { "epoch": 1.5823529411764707, "grad_norm": 0.171875, "learning_rate": 3.9828807568955574e-06, "loss": 0.9937, "num_tokens": 17416423675.0, "step": 8877 }, { "epoch": 1.5825311942959002, "grad_norm": 0.173828125, "learning_rate": 3.981254220881231e-06, "loss": 0.9884, "num_tokens": 17422603078.0, "step": 8878 }, { "epoch": 1.5827094474153298, "grad_norm": 0.1650390625, "learning_rate": 3.979628269740478e-06, "loss": 1.0164, "num_tokens": 17428860431.0, "step": 8879 }, { "epoch": 1.5828877005347595, "grad_norm": 0.1708984375, "learning_rate": 3.978002903608784e-06, "loss": 1.0236, "num_tokens": 17435115051.0, "step": 8880 }, { "epoch": 1.583065953654189, "grad_norm": 0.1708984375, "learning_rate": 3.976378122621593e-06, "loss": 0.9996, "num_tokens": 17441367727.0, "step": 8881 }, { "epoch": 1.5832442067736185, "grad_norm": 0.1728515625, "learning_rate": 3.974753926914301e-06, "loss": 0.981, "num_tokens": 17447650821.0, "step": 8882 }, { "epoch": 1.5834224598930482, "grad_norm": 0.173828125, "learning_rate": 3.973130316622248e-06, "loss": 1.022, "num_tokens": 17453932540.0, "step": 8883 }, { "epoch": 1.5836007130124776, "grad_norm": 0.177734375, "learning_rate": 3.971507291880732e-06, "loss": 1.018, "num_tokens": 17460208744.0, "step": 8884 }, { "epoch": 1.5837789661319073, "grad_norm": 0.17578125, "learning_rate": 3.969884852824998e-06, "loss": 1.0046, "num_tokens": 17466468483.0, "step": 8885 }, { "epoch": 1.583957219251337, "grad_norm": 0.171875, "learning_rate": 3.968262999590248e-06, "loss": 1.0283, "num_tokens": 17472750980.0, "step": 8886 }, { "epoch": 1.5841354723707664, "grad_norm": 0.1748046875, "learning_rate": 3.966641732311629e-06, "loss": 1.0118, "num_tokens": 17479021961.0, "step": 8887 }, { "epoch": 1.5843137254901962, "grad_norm": 0.1767578125, "learning_rate": 3.965021051124242e-06, "loss": 1.0054, "num_tokens": 17485279610.0, "step": 8888 }, { "epoch": 1.5844919786096257, "grad_norm": 0.1748046875, "learning_rate": 3.963400956163139e-06, "loss": 1.0102, "num_tokens": 17491534838.0, "step": 8889 }, { "epoch": 1.5846702317290553, "grad_norm": 0.17578125, "learning_rate": 3.961781447563322e-06, "loss": 0.9883, "num_tokens": 17497817395.0, "step": 8890 }, { "epoch": 1.584848484848485, "grad_norm": 0.1689453125, "learning_rate": 3.960162525459747e-06, "loss": 0.9941, "num_tokens": 17504082775.0, "step": 8891 }, { "epoch": 1.5850267379679144, "grad_norm": 0.1806640625, "learning_rate": 3.958544189987312e-06, "loss": 1.0206, "num_tokens": 17510367396.0, "step": 8892 }, { "epoch": 1.585204991087344, "grad_norm": 0.1787109375, "learning_rate": 3.956926441280884e-06, "loss": 1.0034, "num_tokens": 17516652509.0, "step": 8893 }, { "epoch": 1.5853832442067737, "grad_norm": 0.1796875, "learning_rate": 3.955309279475265e-06, "loss": 1.0394, "num_tokens": 17522895234.0, "step": 8894 }, { "epoch": 1.585561497326203, "grad_norm": 0.171875, "learning_rate": 3.953692704705212e-06, "loss": 1.0302, "num_tokens": 17529142450.0, "step": 8895 }, { "epoch": 1.5857397504456328, "grad_norm": 0.1748046875, "learning_rate": 3.952076717105435e-06, "loss": 0.994, "num_tokens": 17535426338.0, "step": 8896 }, { "epoch": 1.5859180035650624, "grad_norm": 0.169921875, "learning_rate": 3.9504613168106e-06, "loss": 1.0167, "num_tokens": 17541685626.0, "step": 8897 }, { "epoch": 1.5860962566844918, "grad_norm": 0.177734375, "learning_rate": 3.948846503955313e-06, "loss": 1.0159, "num_tokens": 17547967285.0, "step": 8898 }, { "epoch": 1.5862745098039217, "grad_norm": 0.1748046875, "learning_rate": 3.947232278674134e-06, "loss": 1.0312, "num_tokens": 17554237200.0, "step": 8899 }, { "epoch": 1.5864527629233511, "grad_norm": 0.173828125, "learning_rate": 3.945618641101583e-06, "loss": 1.002, "num_tokens": 17560521903.0, "step": 8900 }, { "epoch": 1.5866310160427808, "grad_norm": 0.1796875, "learning_rate": 3.944005591372123e-06, "loss": 1.0355, "num_tokens": 17566805879.0, "step": 8901 }, { "epoch": 1.5868092691622104, "grad_norm": 0.169921875, "learning_rate": 3.94239312962017e-06, "loss": 1.0336, "num_tokens": 17573087317.0, "step": 8902 }, { "epoch": 1.5869875222816399, "grad_norm": 0.1728515625, "learning_rate": 3.940781255980087e-06, "loss": 1.0086, "num_tokens": 17579354838.0, "step": 8903 }, { "epoch": 1.5871657754010695, "grad_norm": 0.17578125, "learning_rate": 3.939169970586201e-06, "loss": 1.0368, "num_tokens": 17585638203.0, "step": 8904 }, { "epoch": 1.5873440285204992, "grad_norm": 0.1767578125, "learning_rate": 3.937559273572771e-06, "loss": 1.0093, "num_tokens": 17591920470.0, "step": 8905 }, { "epoch": 1.5875222816399286, "grad_norm": 0.169921875, "learning_rate": 3.935949165074018e-06, "loss": 1.0206, "num_tokens": 17598183948.0, "step": 8906 }, { "epoch": 1.5877005347593582, "grad_norm": 0.1689453125, "learning_rate": 3.934339645224118e-06, "loss": 0.9837, "num_tokens": 17604429151.0, "step": 8907 }, { "epoch": 1.587878787878788, "grad_norm": 0.1767578125, "learning_rate": 3.932730714157189e-06, "loss": 1.0308, "num_tokens": 17610649712.0, "step": 8908 }, { "epoch": 1.5880570409982173, "grad_norm": 0.1689453125, "learning_rate": 3.931122372007303e-06, "loss": 1.0381, "num_tokens": 17616926752.0, "step": 8909 }, { "epoch": 1.5882352941176472, "grad_norm": 0.177734375, "learning_rate": 3.929514618908484e-06, "loss": 1.0279, "num_tokens": 17623184089.0, "step": 8910 }, { "epoch": 1.5884135472370766, "grad_norm": 0.1708984375, "learning_rate": 3.927907454994709e-06, "loss": 1.0509, "num_tokens": 17629466090.0, "step": 8911 }, { "epoch": 1.5885918003565063, "grad_norm": 0.1767578125, "learning_rate": 3.926300880399904e-06, "loss": 0.9909, "num_tokens": 17635717290.0, "step": 8912 }, { "epoch": 1.588770053475936, "grad_norm": 0.1728515625, "learning_rate": 3.924694895257939e-06, "loss": 1.0196, "num_tokens": 17642001288.0, "step": 8913 }, { "epoch": 1.5889483065953653, "grad_norm": 0.1767578125, "learning_rate": 3.923089499702646e-06, "loss": 1.0221, "num_tokens": 17648286045.0, "step": 8914 }, { "epoch": 1.589126559714795, "grad_norm": 0.1796875, "learning_rate": 3.921484693867801e-06, "loss": 0.991, "num_tokens": 17654524270.0, "step": 8915 }, { "epoch": 1.5893048128342246, "grad_norm": 0.1787109375, "learning_rate": 3.919880477887134e-06, "loss": 1.0292, "num_tokens": 17660791041.0, "step": 8916 }, { "epoch": 1.589483065953654, "grad_norm": 0.173828125, "learning_rate": 3.918276851894321e-06, "loss": 1.0286, "num_tokens": 17667048586.0, "step": 8917 }, { "epoch": 1.589661319073084, "grad_norm": 0.1787109375, "learning_rate": 3.916673816023e-06, "loss": 1.0261, "num_tokens": 17673331014.0, "step": 8918 }, { "epoch": 1.5898395721925134, "grad_norm": 0.16796875, "learning_rate": 3.915071370406745e-06, "loss": 1.0572, "num_tokens": 17679615361.0, "step": 8919 }, { "epoch": 1.5900178253119428, "grad_norm": 0.1767578125, "learning_rate": 3.913469515179094e-06, "loss": 1.0112, "num_tokens": 17685871462.0, "step": 8920 }, { "epoch": 1.5901960784313727, "grad_norm": 0.17578125, "learning_rate": 3.911868250473526e-06, "loss": 0.9841, "num_tokens": 17692155304.0, "step": 8921 }, { "epoch": 1.590374331550802, "grad_norm": 0.173828125, "learning_rate": 3.910267576423475e-06, "loss": 1.016, "num_tokens": 17698438708.0, "step": 8922 }, { "epoch": 1.5905525846702318, "grad_norm": 0.1728515625, "learning_rate": 3.9086674931623255e-06, "loss": 1.0187, "num_tokens": 17704721233.0, "step": 8923 }, { "epoch": 1.5907308377896614, "grad_norm": 0.1708984375, "learning_rate": 3.907068000823413e-06, "loss": 1.0149, "num_tokens": 17711005015.0, "step": 8924 }, { "epoch": 1.5909090909090908, "grad_norm": 0.1650390625, "learning_rate": 3.905469099540026e-06, "loss": 1.019, "num_tokens": 17717288776.0, "step": 8925 }, { "epoch": 1.5910873440285205, "grad_norm": 0.177734375, "learning_rate": 3.903870789445399e-06, "loss": 1.0331, "num_tokens": 17723547326.0, "step": 8926 }, { "epoch": 1.5912655971479501, "grad_norm": 0.173828125, "learning_rate": 3.902273070672719e-06, "loss": 1.0257, "num_tokens": 17729830412.0, "step": 8927 }, { "epoch": 1.5914438502673796, "grad_norm": 0.1689453125, "learning_rate": 3.900675943355127e-06, "loss": 0.985, "num_tokens": 17736114789.0, "step": 8928 }, { "epoch": 1.5916221033868094, "grad_norm": 0.1767578125, "learning_rate": 3.899079407625709e-06, "loss": 1.0173, "num_tokens": 17742374566.0, "step": 8929 }, { "epoch": 1.5918003565062389, "grad_norm": 0.1708984375, "learning_rate": 3.897483463617505e-06, "loss": 1.0093, "num_tokens": 17748660306.0, "step": 8930 }, { "epoch": 1.5919786096256683, "grad_norm": 0.1787109375, "learning_rate": 3.895888111463505e-06, "loss": 1.0044, "num_tokens": 17754943589.0, "step": 8931 }, { "epoch": 1.5921568627450982, "grad_norm": 0.16796875, "learning_rate": 3.894293351296652e-06, "loss": 0.9912, "num_tokens": 17761197180.0, "step": 8932 }, { "epoch": 1.5923351158645276, "grad_norm": 0.1787109375, "learning_rate": 3.892699183249838e-06, "loss": 1.0446, "num_tokens": 17767471105.0, "step": 8933 }, { "epoch": 1.5925133689839572, "grad_norm": 0.1748046875, "learning_rate": 3.891105607455904e-06, "loss": 1.0144, "num_tokens": 17773756323.0, "step": 8934 }, { "epoch": 1.5926916221033869, "grad_norm": 0.16796875, "learning_rate": 3.889512624047643e-06, "loss": 1.0364, "num_tokens": 17780015548.0, "step": 8935 }, { "epoch": 1.5928698752228163, "grad_norm": 0.171875, "learning_rate": 3.8879202331577985e-06, "loss": 1.0561, "num_tokens": 17786273124.0, "step": 8936 }, { "epoch": 1.593048128342246, "grad_norm": 0.1669921875, "learning_rate": 3.8863284349190655e-06, "loss": 1.0042, "num_tokens": 17792509088.0, "step": 8937 }, { "epoch": 1.5932263814616756, "grad_norm": 0.1689453125, "learning_rate": 3.8847372294640865e-06, "loss": 1.0223, "num_tokens": 17798786621.0, "step": 8938 }, { "epoch": 1.593404634581105, "grad_norm": 0.1708984375, "learning_rate": 3.8831466169254596e-06, "loss": 1.0125, "num_tokens": 17805069604.0, "step": 8939 }, { "epoch": 1.593582887700535, "grad_norm": 0.171875, "learning_rate": 3.881556597435732e-06, "loss": 1.0055, "num_tokens": 17811353697.0, "step": 8940 }, { "epoch": 1.5937611408199643, "grad_norm": 0.1767578125, "learning_rate": 3.879967171127398e-06, "loss": 1.0192, "num_tokens": 17817635762.0, "step": 8941 }, { "epoch": 1.593939393939394, "grad_norm": 0.17578125, "learning_rate": 3.878378338132902e-06, "loss": 1.0218, "num_tokens": 17823896197.0, "step": 8942 }, { "epoch": 1.5941176470588236, "grad_norm": 0.1748046875, "learning_rate": 3.876790098584651e-06, "loss": 1.0309, "num_tokens": 17830168783.0, "step": 8943 }, { "epoch": 1.594295900178253, "grad_norm": 0.1806640625, "learning_rate": 3.8752024526149855e-06, "loss": 1.0229, "num_tokens": 17836419165.0, "step": 8944 }, { "epoch": 1.5944741532976827, "grad_norm": 0.1748046875, "learning_rate": 3.873615400356205e-06, "loss": 1.0092, "num_tokens": 17842699920.0, "step": 8945 }, { "epoch": 1.5946524064171124, "grad_norm": 0.171875, "learning_rate": 3.872028941940559e-06, "loss": 1.0121, "num_tokens": 17848966688.0, "step": 8946 }, { "epoch": 1.5948306595365418, "grad_norm": 0.1728515625, "learning_rate": 3.870443077500251e-06, "loss": 1.0414, "num_tokens": 17855232324.0, "step": 8947 }, { "epoch": 1.5950089126559714, "grad_norm": 0.181640625, "learning_rate": 3.868857807167429e-06, "loss": 1.0002, "num_tokens": 17861471145.0, "step": 8948 }, { "epoch": 1.595187165775401, "grad_norm": 0.17578125, "learning_rate": 3.867273131074192e-06, "loss": 1.0234, "num_tokens": 17867753191.0, "step": 8949 }, { "epoch": 1.5953654188948305, "grad_norm": 0.1767578125, "learning_rate": 3.8656890493525975e-06, "loss": 1.0247, "num_tokens": 17874021928.0, "step": 8950 }, { "epoch": 1.5955436720142604, "grad_norm": 0.1708984375, "learning_rate": 3.8641055621346414e-06, "loss": 1.034, "num_tokens": 17880305397.0, "step": 8951 }, { "epoch": 1.5957219251336898, "grad_norm": 0.1689453125, "learning_rate": 3.862522669552278e-06, "loss": 1.0098, "num_tokens": 17886588476.0, "step": 8952 }, { "epoch": 1.5959001782531195, "grad_norm": 0.1748046875, "learning_rate": 3.860940371737407e-06, "loss": 1.0158, "num_tokens": 17892869301.0, "step": 8953 }, { "epoch": 1.5960784313725491, "grad_norm": 0.1728515625, "learning_rate": 3.859358668821889e-06, "loss": 1.0114, "num_tokens": 17899130388.0, "step": 8954 }, { "epoch": 1.5962566844919786, "grad_norm": 0.1728515625, "learning_rate": 3.857777560937521e-06, "loss": 1.0154, "num_tokens": 17905359893.0, "step": 8955 }, { "epoch": 1.5964349376114082, "grad_norm": 0.1708984375, "learning_rate": 3.85619704821606e-06, "loss": 1.0371, "num_tokens": 17911613412.0, "step": 8956 }, { "epoch": 1.5966131907308379, "grad_norm": 0.1708984375, "learning_rate": 3.854617130789208e-06, "loss": 1.0177, "num_tokens": 17917894646.0, "step": 8957 }, { "epoch": 1.5967914438502673, "grad_norm": 0.169921875, "learning_rate": 3.853037808788626e-06, "loss": 0.9829, "num_tokens": 17924140856.0, "step": 8958 }, { "epoch": 1.596969696969697, "grad_norm": 0.1728515625, "learning_rate": 3.851459082345912e-06, "loss": 0.9983, "num_tokens": 17930412924.0, "step": 8959 }, { "epoch": 1.5971479500891266, "grad_norm": 0.181640625, "learning_rate": 3.849880951592623e-06, "loss": 1.0346, "num_tokens": 17936632036.0, "step": 8960 }, { "epoch": 1.597326203208556, "grad_norm": 0.169921875, "learning_rate": 3.848303416660269e-06, "loss": 0.9754, "num_tokens": 17942916090.0, "step": 8961 }, { "epoch": 1.5975044563279859, "grad_norm": 0.171875, "learning_rate": 3.846726477680302e-06, "loss": 1.0008, "num_tokens": 17949179691.0, "step": 8962 }, { "epoch": 1.5976827094474153, "grad_norm": 0.1806640625, "learning_rate": 3.845150134784132e-06, "loss": 1.0228, "num_tokens": 17955462973.0, "step": 8963 }, { "epoch": 1.597860962566845, "grad_norm": 0.1728515625, "learning_rate": 3.843574388103112e-06, "loss": 1.0182, "num_tokens": 17961708306.0, "step": 8964 }, { "epoch": 1.5980392156862746, "grad_norm": 0.166015625, "learning_rate": 3.841999237768552e-06, "loss": 0.9856, "num_tokens": 17967966715.0, "step": 8965 }, { "epoch": 1.598217468805704, "grad_norm": 0.177734375, "learning_rate": 3.8404246839117145e-06, "loss": 1.0558, "num_tokens": 17974250876.0, "step": 8966 }, { "epoch": 1.5983957219251337, "grad_norm": 0.173828125, "learning_rate": 3.838850726663796e-06, "loss": 1.0188, "num_tokens": 17980534940.0, "step": 8967 }, { "epoch": 1.5985739750445633, "grad_norm": 0.171875, "learning_rate": 3.837277366155964e-06, "loss": 1.0277, "num_tokens": 17986819735.0, "step": 8968 }, { "epoch": 1.5987522281639928, "grad_norm": 0.1806640625, "learning_rate": 3.835704602519323e-06, "loss": 1.021, "num_tokens": 17993078390.0, "step": 8969 }, { "epoch": 1.5989304812834224, "grad_norm": 0.17578125, "learning_rate": 3.834132435884934e-06, "loss": 1.0509, "num_tokens": 17999362185.0, "step": 8970 }, { "epoch": 1.599108734402852, "grad_norm": 0.173828125, "learning_rate": 3.8325608663838e-06, "loss": 1.0303, "num_tokens": 18005618207.0, "step": 8971 }, { "epoch": 1.5992869875222815, "grad_norm": 0.173828125, "learning_rate": 3.830989894146889e-06, "loss": 1.0128, "num_tokens": 18011850010.0, "step": 8972 }, { "epoch": 1.5994652406417114, "grad_norm": 0.17578125, "learning_rate": 3.829419519305108e-06, "loss": 1.0346, "num_tokens": 18018127257.0, "step": 8973 }, { "epoch": 1.5996434937611408, "grad_norm": 0.171875, "learning_rate": 3.8278497419893115e-06, "loss": 0.9927, "num_tokens": 18024406989.0, "step": 8974 }, { "epoch": 1.5998217468805704, "grad_norm": 0.173828125, "learning_rate": 3.826280562330312e-06, "loss": 0.9827, "num_tokens": 18030665186.0, "step": 8975 }, { "epoch": 1.6, "grad_norm": 0.1728515625, "learning_rate": 3.824711980458874e-06, "loss": 1.0152, "num_tokens": 18036920393.0, "step": 8976 }, { "epoch": 1.6001782531194295, "grad_norm": 0.177734375, "learning_rate": 3.823143996505702e-06, "loss": 1.0091, "num_tokens": 18043204276.0, "step": 8977 }, { "epoch": 1.6003565062388592, "grad_norm": 0.171875, "learning_rate": 3.8215766106014565e-06, "loss": 1.019, "num_tokens": 18049486527.0, "step": 8978 }, { "epoch": 1.6005347593582888, "grad_norm": 0.173828125, "learning_rate": 3.820009822876754e-06, "loss": 1.0239, "num_tokens": 18055766538.0, "step": 8979 }, { "epoch": 1.6007130124777182, "grad_norm": 0.171875, "learning_rate": 3.818443633462149e-06, "loss": 1.0203, "num_tokens": 18062044817.0, "step": 8980 }, { "epoch": 1.6008912655971481, "grad_norm": 0.17578125, "learning_rate": 3.816878042488156e-06, "loss": 1.0232, "num_tokens": 18068330661.0, "step": 8981 }, { "epoch": 1.6010695187165775, "grad_norm": 0.1689453125, "learning_rate": 3.815313050085235e-06, "loss": 1.0184, "num_tokens": 18074613282.0, "step": 8982 }, { "epoch": 1.601247771836007, "grad_norm": 0.173828125, "learning_rate": 3.8137486563837965e-06, "loss": 1.0101, "num_tokens": 18080875451.0, "step": 8983 }, { "epoch": 1.6014260249554368, "grad_norm": 0.166015625, "learning_rate": 3.8121848615142034e-06, "loss": 1.0187, "num_tokens": 18087158142.0, "step": 8984 }, { "epoch": 1.6016042780748663, "grad_norm": 0.171875, "learning_rate": 3.810621665606762e-06, "loss": 1.0128, "num_tokens": 18093440149.0, "step": 8985 }, { "epoch": 1.601782531194296, "grad_norm": 0.1748046875, "learning_rate": 3.8090590687917406e-06, "loss": 1.0376, "num_tokens": 18099722455.0, "step": 8986 }, { "epoch": 1.6019607843137256, "grad_norm": 0.171875, "learning_rate": 3.807497071199349e-06, "loss": 1.0058, "num_tokens": 18106005627.0, "step": 8987 }, { "epoch": 1.602139037433155, "grad_norm": 0.1689453125, "learning_rate": 3.805935672959745e-06, "loss": 1.0387, "num_tokens": 18112287597.0, "step": 8988 }, { "epoch": 1.6023172905525846, "grad_norm": 0.1748046875, "learning_rate": 3.804374874203045e-06, "loss": 1.0137, "num_tokens": 18118569597.0, "step": 8989 }, { "epoch": 1.6024955436720143, "grad_norm": 0.1728515625, "learning_rate": 3.8028146750593078e-06, "loss": 1.0367, "num_tokens": 18124788095.0, "step": 8990 }, { "epoch": 1.6026737967914437, "grad_norm": 0.1689453125, "learning_rate": 3.8012550756585454e-06, "loss": 1.0188, "num_tokens": 18131071736.0, "step": 8991 }, { "epoch": 1.6028520499108736, "grad_norm": 0.1728515625, "learning_rate": 3.799696076130718e-06, "loss": 1.001, "num_tokens": 18137355644.0, "step": 8992 }, { "epoch": 1.603030303030303, "grad_norm": 0.173828125, "learning_rate": 3.798137676605742e-06, "loss": 1.0117, "num_tokens": 18143639089.0, "step": 8993 }, { "epoch": 1.6032085561497325, "grad_norm": 0.1650390625, "learning_rate": 3.7965798772134755e-06, "loss": 1.0065, "num_tokens": 18149905661.0, "step": 8994 }, { "epoch": 1.6033868092691623, "grad_norm": 0.1728515625, "learning_rate": 3.7950226780837317e-06, "loss": 1.0244, "num_tokens": 18156189782.0, "step": 8995 }, { "epoch": 1.6035650623885918, "grad_norm": 0.177734375, "learning_rate": 3.793466079346273e-06, "loss": 1.0286, "num_tokens": 18162453204.0, "step": 8996 }, { "epoch": 1.6037433155080214, "grad_norm": 0.1708984375, "learning_rate": 3.7919100811308087e-06, "loss": 1.0077, "num_tokens": 18168736132.0, "step": 8997 }, { "epoch": 1.603921568627451, "grad_norm": 0.1728515625, "learning_rate": 3.7903546835670024e-06, "loss": 1.0293, "num_tokens": 18174979102.0, "step": 8998 }, { "epoch": 1.6040998217468805, "grad_norm": 0.171875, "learning_rate": 3.7887998867844616e-06, "loss": 1.0159, "num_tokens": 18181264116.0, "step": 8999 }, { "epoch": 1.6042780748663101, "grad_norm": 0.1826171875, "learning_rate": 3.787245690912754e-06, "loss": 1.0109, "num_tokens": 18187543735.0, "step": 9000 }, { "epoch": 1.6044563279857398, "grad_norm": 0.171875, "learning_rate": 3.78569209608139e-06, "loss": 0.9873, "num_tokens": 18193827328.0, "step": 9001 }, { "epoch": 1.6046345811051692, "grad_norm": 0.171875, "learning_rate": 3.784139102419828e-06, "loss": 1.0259, "num_tokens": 18200109788.0, "step": 9002 }, { "epoch": 1.604812834224599, "grad_norm": 0.17578125, "learning_rate": 3.7825867100574774e-06, "loss": 0.9751, "num_tokens": 18206396163.0, "step": 9003 }, { "epoch": 1.6049910873440285, "grad_norm": 0.171875, "learning_rate": 3.7810349191237096e-06, "loss": 1.0034, "num_tokens": 18212681156.0, "step": 9004 }, { "epoch": 1.6051693404634582, "grad_norm": 0.1708984375, "learning_rate": 3.779483729747824e-06, "loss": 1.0154, "num_tokens": 18218964981.0, "step": 9005 }, { "epoch": 1.6053475935828878, "grad_norm": 0.171875, "learning_rate": 3.7779331420590852e-06, "loss": 1.0574, "num_tokens": 18225250467.0, "step": 9006 }, { "epoch": 1.6055258467023172, "grad_norm": 0.1748046875, "learning_rate": 3.776383156186707e-06, "loss": 1.0006, "num_tokens": 18231533097.0, "step": 9007 }, { "epoch": 1.6057040998217469, "grad_norm": 0.1767578125, "learning_rate": 3.774833772259848e-06, "loss": 1.0398, "num_tokens": 18237801529.0, "step": 9008 }, { "epoch": 1.6058823529411765, "grad_norm": 0.16796875, "learning_rate": 3.7732849904076197e-06, "loss": 1.0119, "num_tokens": 18244085736.0, "step": 9009 }, { "epoch": 1.606060606060606, "grad_norm": 0.171875, "learning_rate": 3.7717368107590785e-06, "loss": 1.0215, "num_tokens": 18250343751.0, "step": 9010 }, { "epoch": 1.6062388591800356, "grad_norm": 0.173828125, "learning_rate": 3.7701892334432398e-06, "loss": 1.0195, "num_tokens": 18256627095.0, "step": 9011 }, { "epoch": 1.6064171122994653, "grad_norm": 0.177734375, "learning_rate": 3.7686422585890635e-06, "loss": 1.0136, "num_tokens": 18262905627.0, "step": 9012 }, { "epoch": 1.6065953654188947, "grad_norm": 0.17578125, "learning_rate": 3.7670958863254525e-06, "loss": 1.0298, "num_tokens": 18269169954.0, "step": 9013 }, { "epoch": 1.6067736185383246, "grad_norm": 0.169921875, "learning_rate": 3.7655501167812726e-06, "loss": 1.0232, "num_tokens": 18275427867.0, "step": 9014 }, { "epoch": 1.606951871657754, "grad_norm": 0.17578125, "learning_rate": 3.7640049500853325e-06, "loss": 1.0092, "num_tokens": 18281710845.0, "step": 9015 }, { "epoch": 1.6071301247771836, "grad_norm": 0.1787109375, "learning_rate": 3.762460386366391e-06, "loss": 1.0515, "num_tokens": 18287965821.0, "step": 9016 }, { "epoch": 1.6073083778966133, "grad_norm": 0.1689453125, "learning_rate": 3.760916425753153e-06, "loss": 1.0048, "num_tokens": 18294239485.0, "step": 9017 }, { "epoch": 1.6074866310160427, "grad_norm": 0.1669921875, "learning_rate": 3.759373068374281e-06, "loss": 1.0451, "num_tokens": 18300515252.0, "step": 9018 }, { "epoch": 1.6076648841354724, "grad_norm": 0.1748046875, "learning_rate": 3.7578303143583838e-06, "loss": 1.0201, "num_tokens": 18306730274.0, "step": 9019 }, { "epoch": 1.607843137254902, "grad_norm": 0.1728515625, "learning_rate": 3.756288163834018e-06, "loss": 1.0092, "num_tokens": 18313012486.0, "step": 9020 }, { "epoch": 1.6080213903743314, "grad_norm": 0.173828125, "learning_rate": 3.7547466169296914e-06, "loss": 1.0274, "num_tokens": 18319295653.0, "step": 9021 }, { "epoch": 1.608199643493761, "grad_norm": 0.1787109375, "learning_rate": 3.753205673773862e-06, "loss": 0.9937, "num_tokens": 18325489213.0, "step": 9022 }, { "epoch": 1.6083778966131907, "grad_norm": 0.171875, "learning_rate": 3.751665334494936e-06, "loss": 1.0174, "num_tokens": 18331751565.0, "step": 9023 }, { "epoch": 1.6085561497326202, "grad_norm": 0.173828125, "learning_rate": 3.7501255992212676e-06, "loss": 0.9723, "num_tokens": 18337988650.0, "step": 9024 }, { "epoch": 1.60873440285205, "grad_norm": 0.1728515625, "learning_rate": 3.7485864680811695e-06, "loss": 1.0117, "num_tokens": 18344272185.0, "step": 9025 }, { "epoch": 1.6089126559714795, "grad_norm": 0.169921875, "learning_rate": 3.7470479412028936e-06, "loss": 1.0374, "num_tokens": 18350517849.0, "step": 9026 }, { "epoch": 1.6090909090909091, "grad_norm": 0.1728515625, "learning_rate": 3.7455100187146477e-06, "loss": 1.0303, "num_tokens": 18356801966.0, "step": 9027 }, { "epoch": 1.6092691622103388, "grad_norm": 0.17578125, "learning_rate": 3.7439727007445857e-06, "loss": 1.0343, "num_tokens": 18363085261.0, "step": 9028 }, { "epoch": 1.6094474153297682, "grad_norm": 0.171875, "learning_rate": 3.7424359874208137e-06, "loss": 1.0227, "num_tokens": 18369368970.0, "step": 9029 }, { "epoch": 1.6096256684491979, "grad_norm": 0.17578125, "learning_rate": 3.7408998788713845e-06, "loss": 1.0195, "num_tokens": 18375652833.0, "step": 9030 }, { "epoch": 1.6098039215686275, "grad_norm": 0.1708984375, "learning_rate": 3.7393643752243026e-06, "loss": 1.0194, "num_tokens": 18381897788.0, "step": 9031 }, { "epoch": 1.609982174688057, "grad_norm": 0.1787109375, "learning_rate": 3.737829476607524e-06, "loss": 1.0193, "num_tokens": 18388183563.0, "step": 9032 }, { "epoch": 1.6101604278074866, "grad_norm": 0.17578125, "learning_rate": 3.7362951831489513e-06, "loss": 1.0525, "num_tokens": 18394454201.0, "step": 9033 }, { "epoch": 1.6103386809269162, "grad_norm": 0.171875, "learning_rate": 3.7347614949764377e-06, "loss": 1.0169, "num_tokens": 18400734480.0, "step": 9034 }, { "epoch": 1.6105169340463457, "grad_norm": 0.177734375, "learning_rate": 3.7332284122177843e-06, "loss": 1.0253, "num_tokens": 18407018308.0, "step": 9035 }, { "epoch": 1.6106951871657755, "grad_norm": 0.177734375, "learning_rate": 3.7316959350007454e-06, "loss": 0.9993, "num_tokens": 18413271179.0, "step": 9036 }, { "epoch": 1.610873440285205, "grad_norm": 0.1826171875, "learning_rate": 3.73016406345302e-06, "loss": 1.0446, "num_tokens": 18419510240.0, "step": 9037 }, { "epoch": 1.6110516934046346, "grad_norm": 0.173828125, "learning_rate": 3.728632797702259e-06, "loss": 1.0101, "num_tokens": 18425777283.0, "step": 9038 }, { "epoch": 1.6112299465240643, "grad_norm": 0.1796875, "learning_rate": 3.7271021378760674e-06, "loss": 1.053, "num_tokens": 18432013378.0, "step": 9039 }, { "epoch": 1.6114081996434937, "grad_norm": 0.1767578125, "learning_rate": 3.725572084101992e-06, "loss": 1.0282, "num_tokens": 18438296784.0, "step": 9040 }, { "epoch": 1.6115864527629233, "grad_norm": 0.1689453125, "learning_rate": 3.724042636507533e-06, "loss": 1.0134, "num_tokens": 18444573078.0, "step": 9041 }, { "epoch": 1.611764705882353, "grad_norm": 0.16796875, "learning_rate": 3.7225137952201397e-06, "loss": 1.0007, "num_tokens": 18450830870.0, "step": 9042 }, { "epoch": 1.6119429590017824, "grad_norm": 0.1708984375, "learning_rate": 3.720985560367214e-06, "loss": 1.0309, "num_tokens": 18457113002.0, "step": 9043 }, { "epoch": 1.612121212121212, "grad_norm": 0.1787109375, "learning_rate": 3.7194579320760983e-06, "loss": 1.037, "num_tokens": 18463396725.0, "step": 9044 }, { "epoch": 1.6122994652406417, "grad_norm": 0.1728515625, "learning_rate": 3.71793091047409e-06, "loss": 1.0235, "num_tokens": 18469682065.0, "step": 9045 }, { "epoch": 1.6124777183600711, "grad_norm": 0.1689453125, "learning_rate": 3.7164044956884426e-06, "loss": 1.0262, "num_tokens": 18475944663.0, "step": 9046 }, { "epoch": 1.612655971479501, "grad_norm": 0.173828125, "learning_rate": 3.714878687846348e-06, "loss": 1.0261, "num_tokens": 18482228485.0, "step": 9047 }, { "epoch": 1.6128342245989304, "grad_norm": 0.1767578125, "learning_rate": 3.7133534870749538e-06, "loss": 1.0219, "num_tokens": 18488511564.0, "step": 9048 }, { "epoch": 1.61301247771836, "grad_norm": 0.173828125, "learning_rate": 3.7118288935013523e-06, "loss": 1.045, "num_tokens": 18494794362.0, "step": 9049 }, { "epoch": 1.6131907308377897, "grad_norm": 0.169921875, "learning_rate": 3.710304907252594e-06, "loss": 1.0088, "num_tokens": 18501077142.0, "step": 9050 }, { "epoch": 1.6133689839572192, "grad_norm": 0.17578125, "learning_rate": 3.7087815284556684e-06, "loss": 1.0164, "num_tokens": 18507360444.0, "step": 9051 }, { "epoch": 1.6135472370766488, "grad_norm": 0.1728515625, "learning_rate": 3.707258757237517e-06, "loss": 0.9936, "num_tokens": 18513636839.0, "step": 9052 }, { "epoch": 1.6137254901960785, "grad_norm": 0.1728515625, "learning_rate": 3.7057365937250378e-06, "loss": 1.0186, "num_tokens": 18519921126.0, "step": 9053 }, { "epoch": 1.613903743315508, "grad_norm": 0.1708984375, "learning_rate": 3.7042150380450725e-06, "loss": 1.008, "num_tokens": 18526203465.0, "step": 9054 }, { "epoch": 1.6140819964349378, "grad_norm": 0.1689453125, "learning_rate": 3.702694090324409e-06, "loss": 1.0351, "num_tokens": 18532484509.0, "step": 9055 }, { "epoch": 1.6142602495543672, "grad_norm": 0.171875, "learning_rate": 3.701173750689789e-06, "loss": 1.021, "num_tokens": 18538766611.0, "step": 9056 }, { "epoch": 1.6144385026737966, "grad_norm": 0.17578125, "learning_rate": 3.699654019267905e-06, "loss": 1.0047, "num_tokens": 18545050731.0, "step": 9057 }, { "epoch": 1.6146167557932265, "grad_norm": 0.1708984375, "learning_rate": 3.698134896185399e-06, "loss": 1.0578, "num_tokens": 18551333185.0, "step": 9058 }, { "epoch": 1.614795008912656, "grad_norm": 0.177734375, "learning_rate": 3.6966163815688516e-06, "loss": 1.0433, "num_tokens": 18557597089.0, "step": 9059 }, { "epoch": 1.6149732620320856, "grad_norm": 0.169921875, "learning_rate": 3.695098475544807e-06, "loss": 0.9825, "num_tokens": 18563880757.0, "step": 9060 }, { "epoch": 1.6151515151515152, "grad_norm": 0.1728515625, "learning_rate": 3.6935811782397533e-06, "loss": 1.0185, "num_tokens": 18570133838.0, "step": 9061 }, { "epoch": 1.6153297682709447, "grad_norm": 0.1728515625, "learning_rate": 3.692064489780123e-06, "loss": 1.0124, "num_tokens": 18576392342.0, "step": 9062 }, { "epoch": 1.6155080213903743, "grad_norm": 0.17578125, "learning_rate": 3.6905484102923027e-06, "loss": 1.0059, "num_tokens": 18582677497.0, "step": 9063 }, { "epoch": 1.615686274509804, "grad_norm": 0.1767578125, "learning_rate": 3.6890329399026325e-06, "loss": 1.0514, "num_tokens": 18588911539.0, "step": 9064 }, { "epoch": 1.6158645276292334, "grad_norm": 0.1748046875, "learning_rate": 3.6875180787373926e-06, "loss": 1.0183, "num_tokens": 18595192504.0, "step": 9065 }, { "epoch": 1.6160427807486633, "grad_norm": 0.1767578125, "learning_rate": 3.6860038269228183e-06, "loss": 1.0114, "num_tokens": 18601460332.0, "step": 9066 }, { "epoch": 1.6162210338680927, "grad_norm": 0.1689453125, "learning_rate": 3.6844901845850923e-06, "loss": 1.0297, "num_tokens": 18607674541.0, "step": 9067 }, { "epoch": 1.6163992869875223, "grad_norm": 0.1708984375, "learning_rate": 3.6829771518503444e-06, "loss": 1.0079, "num_tokens": 18613942838.0, "step": 9068 }, { "epoch": 1.616577540106952, "grad_norm": 0.181640625, "learning_rate": 3.6814647288446598e-06, "loss": 1.0258, "num_tokens": 18620199718.0, "step": 9069 }, { "epoch": 1.6167557932263814, "grad_norm": 0.171875, "learning_rate": 3.6799529156940638e-06, "loss": 0.9997, "num_tokens": 18626483666.0, "step": 9070 }, { "epoch": 1.616934046345811, "grad_norm": 0.16796875, "learning_rate": 3.678441712524542e-06, "loss": 1.0184, "num_tokens": 18632755439.0, "step": 9071 }, { "epoch": 1.6171122994652407, "grad_norm": 0.17578125, "learning_rate": 3.6769311194620207e-06, "loss": 1.0041, "num_tokens": 18639025495.0, "step": 9072 }, { "epoch": 1.6172905525846701, "grad_norm": 0.171875, "learning_rate": 3.675421136632378e-06, "loss": 1.0304, "num_tokens": 18645310223.0, "step": 9073 }, { "epoch": 1.6174688057040998, "grad_norm": 0.171875, "learning_rate": 3.6739117641614402e-06, "loss": 0.9887, "num_tokens": 18651559839.0, "step": 9074 }, { "epoch": 1.6176470588235294, "grad_norm": 0.16796875, "learning_rate": 3.672403002174986e-06, "loss": 1.0211, "num_tokens": 18657842919.0, "step": 9075 }, { "epoch": 1.6178253119429589, "grad_norm": 0.1708984375, "learning_rate": 3.6708948507987386e-06, "loss": 1.0093, "num_tokens": 18664104819.0, "step": 9076 }, { "epoch": 1.6180035650623887, "grad_norm": 0.17578125, "learning_rate": 3.66938731015837e-06, "loss": 1.0289, "num_tokens": 18670389891.0, "step": 9077 }, { "epoch": 1.6181818181818182, "grad_norm": 0.1708984375, "learning_rate": 3.6678803803795106e-06, "loss": 1.0198, "num_tokens": 18676673326.0, "step": 9078 }, { "epoch": 1.6183600713012478, "grad_norm": 0.1767578125, "learning_rate": 3.66637406158773e-06, "loss": 1.051, "num_tokens": 18682956677.0, "step": 9079 }, { "epoch": 1.6185383244206775, "grad_norm": 0.17578125, "learning_rate": 3.664868353908549e-06, "loss": 1.0194, "num_tokens": 18689231810.0, "step": 9080 }, { "epoch": 1.618716577540107, "grad_norm": 0.1748046875, "learning_rate": 3.6633632574674393e-06, "loss": 1.0096, "num_tokens": 18695509983.0, "step": 9081 }, { "epoch": 1.6188948306595365, "grad_norm": 0.1787109375, "learning_rate": 3.661858772389822e-06, "loss": 1.0244, "num_tokens": 18701765373.0, "step": 9082 }, { "epoch": 1.6190730837789662, "grad_norm": 0.173828125, "learning_rate": 3.660354898801063e-06, "loss": 1.031, "num_tokens": 18708005422.0, "step": 9083 }, { "epoch": 1.6192513368983956, "grad_norm": 0.169921875, "learning_rate": 3.658851636826482e-06, "loss": 1.0167, "num_tokens": 18714289105.0, "step": 9084 }, { "epoch": 1.6194295900178253, "grad_norm": 0.1689453125, "learning_rate": 3.657348986591349e-06, "loss": 0.9952, "num_tokens": 18720486651.0, "step": 9085 }, { "epoch": 1.619607843137255, "grad_norm": 0.171875, "learning_rate": 3.655846948220877e-06, "loss": 1.0554, "num_tokens": 18726761399.0, "step": 9086 }, { "epoch": 1.6197860962566843, "grad_norm": 0.17578125, "learning_rate": 3.6543455218402323e-06, "loss": 1.0278, "num_tokens": 18732994354.0, "step": 9087 }, { "epoch": 1.6199643493761142, "grad_norm": 0.17578125, "learning_rate": 3.6528447075745264e-06, "loss": 1.0011, "num_tokens": 18739253978.0, "step": 9088 }, { "epoch": 1.6201426024955436, "grad_norm": 0.1669921875, "learning_rate": 3.6513445055488296e-06, "loss": 1.0382, "num_tokens": 18745513113.0, "step": 9089 }, { "epoch": 1.6203208556149733, "grad_norm": 0.173828125, "learning_rate": 3.6498449158881473e-06, "loss": 1.0157, "num_tokens": 18751775148.0, "step": 9090 }, { "epoch": 1.620499108734403, "grad_norm": 0.1796875, "learning_rate": 3.6483459387174393e-06, "loss": 1.0466, "num_tokens": 18758057368.0, "step": 9091 }, { "epoch": 1.6206773618538324, "grad_norm": 0.1708984375, "learning_rate": 3.6468475741616215e-06, "loss": 1.0052, "num_tokens": 18764339636.0, "step": 9092 }, { "epoch": 1.620855614973262, "grad_norm": 0.169921875, "learning_rate": 3.6453498223455506e-06, "loss": 1.0143, "num_tokens": 18770622142.0, "step": 9093 }, { "epoch": 1.6210338680926917, "grad_norm": 0.171875, "learning_rate": 3.643852683394035e-06, "loss": 1.0293, "num_tokens": 18776890080.0, "step": 9094 }, { "epoch": 1.621212121212121, "grad_norm": 0.17578125, "learning_rate": 3.6423561574318277e-06, "loss": 1.0004, "num_tokens": 18783174779.0, "step": 9095 }, { "epoch": 1.6213903743315508, "grad_norm": 0.1689453125, "learning_rate": 3.640860244583643e-06, "loss": 1.0176, "num_tokens": 18789429898.0, "step": 9096 }, { "epoch": 1.6215686274509804, "grad_norm": 0.17578125, "learning_rate": 3.6393649449741287e-06, "loss": 1.0404, "num_tokens": 18795684066.0, "step": 9097 }, { "epoch": 1.6217468805704098, "grad_norm": 0.173828125, "learning_rate": 3.637870258727887e-06, "loss": 1.0176, "num_tokens": 18801952926.0, "step": 9098 }, { "epoch": 1.6219251336898397, "grad_norm": 0.171875, "learning_rate": 3.636376185969477e-06, "loss": 1.0362, "num_tokens": 18808215497.0, "step": 9099 }, { "epoch": 1.6221033868092691, "grad_norm": 0.169921875, "learning_rate": 3.6348827268233956e-06, "loss": 1.0281, "num_tokens": 18814486999.0, "step": 9100 }, { "epoch": 1.6222816399286988, "grad_norm": 0.173828125, "learning_rate": 3.6333898814140943e-06, "loss": 1.0101, "num_tokens": 18820759066.0, "step": 9101 }, { "epoch": 1.6224598930481284, "grad_norm": 0.177734375, "learning_rate": 3.6318976498659707e-06, "loss": 1.0165, "num_tokens": 18827042811.0, "step": 9102 }, { "epoch": 1.6226381461675579, "grad_norm": 0.17578125, "learning_rate": 3.6304060323033753e-06, "loss": 1.0012, "num_tokens": 18833328745.0, "step": 9103 }, { "epoch": 1.6228163992869875, "grad_norm": 0.1748046875, "learning_rate": 3.6289150288506057e-06, "loss": 1.0022, "num_tokens": 18839551109.0, "step": 9104 }, { "epoch": 1.6229946524064172, "grad_norm": 0.1728515625, "learning_rate": 3.627424639631905e-06, "loss": 1.0228, "num_tokens": 18845827541.0, "step": 9105 }, { "epoch": 1.6231729055258466, "grad_norm": 0.1728515625, "learning_rate": 3.6259348647714655e-06, "loss": 1.0135, "num_tokens": 18852075269.0, "step": 9106 }, { "epoch": 1.6233511586452762, "grad_norm": 0.1748046875, "learning_rate": 3.6244457043934355e-06, "loss": 1.028, "num_tokens": 18858352815.0, "step": 9107 }, { "epoch": 1.6235294117647059, "grad_norm": 0.1708984375, "learning_rate": 3.622957158621905e-06, "loss": 1.0718, "num_tokens": 18864619665.0, "step": 9108 }, { "epoch": 1.6237076648841353, "grad_norm": 0.173828125, "learning_rate": 3.6214692275809103e-06, "loss": 1.0285, "num_tokens": 18870904473.0, "step": 9109 }, { "epoch": 1.6238859180035652, "grad_norm": 0.1650390625, "learning_rate": 3.6199819113944495e-06, "loss": 1.0085, "num_tokens": 18877170951.0, "step": 9110 }, { "epoch": 1.6240641711229946, "grad_norm": 0.1767578125, "learning_rate": 3.618495210186456e-06, "loss": 1.0282, "num_tokens": 18883426093.0, "step": 9111 }, { "epoch": 1.6242424242424243, "grad_norm": 0.1728515625, "learning_rate": 3.6170091240808212e-06, "loss": 0.9969, "num_tokens": 18889708719.0, "step": 9112 }, { "epoch": 1.624420677361854, "grad_norm": 0.169921875, "learning_rate": 3.6155236532013716e-06, "loss": 1.035, "num_tokens": 18895971404.0, "step": 9113 }, { "epoch": 1.6245989304812833, "grad_norm": 0.171875, "learning_rate": 3.6140387976719005e-06, "loss": 0.999, "num_tokens": 18902235702.0, "step": 9114 }, { "epoch": 1.624777183600713, "grad_norm": 0.1806640625, "learning_rate": 3.6125545576161387e-06, "loss": 1.0267, "num_tokens": 18908486201.0, "step": 9115 }, { "epoch": 1.6249554367201426, "grad_norm": 0.173828125, "learning_rate": 3.6110709331577675e-06, "loss": 1.0368, "num_tokens": 18914760863.0, "step": 9116 }, { "epoch": 1.625133689839572, "grad_norm": 0.169921875, "learning_rate": 3.6095879244204167e-06, "loss": 1.0152, "num_tokens": 18921034944.0, "step": 9117 }, { "epoch": 1.625311942959002, "grad_norm": 0.1845703125, "learning_rate": 3.608105531527668e-06, "loss": 1.0193, "num_tokens": 18927320324.0, "step": 9118 }, { "epoch": 1.6254901960784314, "grad_norm": 0.17578125, "learning_rate": 3.6066237546030514e-06, "loss": 1.0053, "num_tokens": 18933556521.0, "step": 9119 }, { "epoch": 1.6256684491978608, "grad_norm": 0.1689453125, "learning_rate": 3.605142593770036e-06, "loss": 0.9849, "num_tokens": 18939841555.0, "step": 9120 }, { "epoch": 1.6258467023172907, "grad_norm": 0.1669921875, "learning_rate": 3.6036620491520557e-06, "loss": 1.0197, "num_tokens": 18946126119.0, "step": 9121 }, { "epoch": 1.62602495543672, "grad_norm": 0.17578125, "learning_rate": 3.6021821208724807e-06, "loss": 1.0013, "num_tokens": 18952386865.0, "step": 9122 }, { "epoch": 1.6262032085561497, "grad_norm": 0.1787109375, "learning_rate": 3.6007028090546336e-06, "loss": 1.0263, "num_tokens": 18958670882.0, "step": 9123 }, { "epoch": 1.6263814616755794, "grad_norm": 0.166015625, "learning_rate": 3.599224113821784e-06, "loss": 1.013, "num_tokens": 18964954805.0, "step": 9124 }, { "epoch": 1.6265597147950088, "grad_norm": 0.1728515625, "learning_rate": 3.597746035297156e-06, "loss": 1.0422, "num_tokens": 18971221104.0, "step": 9125 }, { "epoch": 1.6267379679144385, "grad_norm": 0.1728515625, "learning_rate": 3.596268573603917e-06, "loss": 0.9594, "num_tokens": 18977505909.0, "step": 9126 }, { "epoch": 1.6269162210338681, "grad_norm": 0.173828125, "learning_rate": 3.5947917288651835e-06, "loss": 1.0372, "num_tokens": 18983790105.0, "step": 9127 }, { "epoch": 1.6270944741532976, "grad_norm": 0.177734375, "learning_rate": 3.593315501204021e-06, "loss": 1.0194, "num_tokens": 18990007703.0, "step": 9128 }, { "epoch": 1.6272727272727274, "grad_norm": 0.1728515625, "learning_rate": 3.5918398907434448e-06, "loss": 1.0174, "num_tokens": 18996292260.0, "step": 9129 }, { "epoch": 1.6274509803921569, "grad_norm": 0.16796875, "learning_rate": 3.590364897606416e-06, "loss": 1.0045, "num_tokens": 19002574982.0, "step": 9130 }, { "epoch": 1.6276292335115865, "grad_norm": 0.17578125, "learning_rate": 3.5888905219158466e-06, "loss": 1.0012, "num_tokens": 19008858474.0, "step": 9131 }, { "epoch": 1.6278074866310162, "grad_norm": 0.17578125, "learning_rate": 3.5874167637945987e-06, "loss": 1.0286, "num_tokens": 19015141678.0, "step": 9132 }, { "epoch": 1.6279857397504456, "grad_norm": 0.171875, "learning_rate": 3.5859436233654793e-06, "loss": 0.9972, "num_tokens": 19021426335.0, "step": 9133 }, { "epoch": 1.6281639928698752, "grad_norm": 0.1728515625, "learning_rate": 3.584471100751246e-06, "loss": 1.0087, "num_tokens": 19027700094.0, "step": 9134 }, { "epoch": 1.6283422459893049, "grad_norm": 0.17578125, "learning_rate": 3.582999196074604e-06, "loss": 1.0157, "num_tokens": 19033984433.0, "step": 9135 }, { "epoch": 1.6285204991087343, "grad_norm": 0.173828125, "learning_rate": 3.581527909458208e-06, "loss": 1.0045, "num_tokens": 19040245293.0, "step": 9136 }, { "epoch": 1.628698752228164, "grad_norm": 0.1748046875, "learning_rate": 3.5800572410246597e-06, "loss": 1.0568, "num_tokens": 19046527332.0, "step": 9137 }, { "epoch": 1.6288770053475936, "grad_norm": 0.169921875, "learning_rate": 3.5785871908965097e-06, "loss": 1.046, "num_tokens": 19052794361.0, "step": 9138 }, { "epoch": 1.629055258467023, "grad_norm": 0.1708984375, "learning_rate": 3.577117759196259e-06, "loss": 1.0311, "num_tokens": 19059077282.0, "step": 9139 }, { "epoch": 1.629233511586453, "grad_norm": 0.1708984375, "learning_rate": 3.575648946046357e-06, "loss": 1.0233, "num_tokens": 19065361730.0, "step": 9140 }, { "epoch": 1.6294117647058823, "grad_norm": 0.1748046875, "learning_rate": 3.5741807515691974e-06, "loss": 1.0156, "num_tokens": 19071642476.0, "step": 9141 }, { "epoch": 1.629590017825312, "grad_norm": 0.1708984375, "learning_rate": 3.572713175887124e-06, "loss": 1.0423, "num_tokens": 19077926245.0, "step": 9142 }, { "epoch": 1.6297682709447416, "grad_norm": 0.16796875, "learning_rate": 3.571246219122437e-06, "loss": 1.0147, "num_tokens": 19084203447.0, "step": 9143 }, { "epoch": 1.629946524064171, "grad_norm": 0.1689453125, "learning_rate": 3.569779881397372e-06, "loss": 1.0195, "num_tokens": 19090475548.0, "step": 9144 }, { "epoch": 1.6301247771836007, "grad_norm": 0.169921875, "learning_rate": 3.568314162834118e-06, "loss": 1.0068, "num_tokens": 19096755589.0, "step": 9145 }, { "epoch": 1.6303030303030304, "grad_norm": 0.1748046875, "learning_rate": 3.5668490635548194e-06, "loss": 1.0194, "num_tokens": 19102978751.0, "step": 9146 }, { "epoch": 1.6304812834224598, "grad_norm": 0.1689453125, "learning_rate": 3.5653845836815594e-06, "loss": 1.0264, "num_tokens": 19109261876.0, "step": 9147 }, { "epoch": 1.6306595365418894, "grad_norm": 0.171875, "learning_rate": 3.563920723336374e-06, "loss": 1.0173, "num_tokens": 19115544413.0, "step": 9148 }, { "epoch": 1.630837789661319, "grad_norm": 0.17578125, "learning_rate": 3.562457482641246e-06, "loss": 1.005, "num_tokens": 19121797580.0, "step": 9149 }, { "epoch": 1.6310160427807485, "grad_norm": 0.1689453125, "learning_rate": 3.5609948617181134e-06, "loss": 1.0363, "num_tokens": 19128080015.0, "step": 9150 }, { "epoch": 1.6311942959001784, "grad_norm": 0.171875, "learning_rate": 3.55953286068885e-06, "loss": 1.0122, "num_tokens": 19134364788.0, "step": 9151 }, { "epoch": 1.6313725490196078, "grad_norm": 0.1669921875, "learning_rate": 3.558071479675284e-06, "loss": 1.0036, "num_tokens": 19140599599.0, "step": 9152 }, { "epoch": 1.6315508021390375, "grad_norm": 0.171875, "learning_rate": 3.556610718799198e-06, "loss": 1.0357, "num_tokens": 19146883253.0, "step": 9153 }, { "epoch": 1.6317290552584671, "grad_norm": 0.1708984375, "learning_rate": 3.5551505781823157e-06, "loss": 0.9747, "num_tokens": 19153155857.0, "step": 9154 }, { "epoch": 1.6319073083778965, "grad_norm": 0.1748046875, "learning_rate": 3.5536910579463095e-06, "loss": 1.0188, "num_tokens": 19159424318.0, "step": 9155 }, { "epoch": 1.6320855614973262, "grad_norm": 0.1748046875, "learning_rate": 3.5522321582128e-06, "loss": 0.9998, "num_tokens": 19165688939.0, "step": 9156 }, { "epoch": 1.6322638146167558, "grad_norm": 0.16796875, "learning_rate": 3.550773879103362e-06, "loss": 1.0153, "num_tokens": 19171937624.0, "step": 9157 }, { "epoch": 1.6324420677361853, "grad_norm": 0.171875, "learning_rate": 3.5493162207395136e-06, "loss": 0.9998, "num_tokens": 19178192158.0, "step": 9158 }, { "epoch": 1.632620320855615, "grad_norm": 0.17578125, "learning_rate": 3.5478591832427167e-06, "loss": 1.0166, "num_tokens": 19184477873.0, "step": 9159 }, { "epoch": 1.6327985739750446, "grad_norm": 0.17578125, "learning_rate": 3.5464027667343925e-06, "loss": 1.0439, "num_tokens": 19190760834.0, "step": 9160 }, { "epoch": 1.632976827094474, "grad_norm": 0.177734375, "learning_rate": 3.544946971335902e-06, "loss": 1.0482, "num_tokens": 19197000214.0, "step": 9161 }, { "epoch": 1.6331550802139039, "grad_norm": 0.171875, "learning_rate": 3.543491797168556e-06, "loss": 0.9923, "num_tokens": 19203283740.0, "step": 9162 }, { "epoch": 1.6333333333333333, "grad_norm": 0.171875, "learning_rate": 3.5420372443536144e-06, "loss": 0.9994, "num_tokens": 19209567716.0, "step": 9163 }, { "epoch": 1.633511586452763, "grad_norm": 0.1748046875, "learning_rate": 3.540583313012289e-06, "loss": 1.0074, "num_tokens": 19215852812.0, "step": 9164 }, { "epoch": 1.6336898395721926, "grad_norm": 0.18359375, "learning_rate": 3.5391300032657327e-06, "loss": 1.0335, "num_tokens": 19222105465.0, "step": 9165 }, { "epoch": 1.633868092691622, "grad_norm": 0.17578125, "learning_rate": 3.537677315235051e-06, "loss": 1.035, "num_tokens": 19228362705.0, "step": 9166 }, { "epoch": 1.6340463458110517, "grad_norm": 0.1689453125, "learning_rate": 3.5362252490412972e-06, "loss": 1.0202, "num_tokens": 19234634586.0, "step": 9167 }, { "epoch": 1.6342245989304813, "grad_norm": 0.1708984375, "learning_rate": 3.5347738048054714e-06, "loss": 1.0161, "num_tokens": 19240913113.0, "step": 9168 }, { "epoch": 1.6344028520499108, "grad_norm": 0.1708984375, "learning_rate": 3.533322982648525e-06, "loss": 0.9945, "num_tokens": 19247197217.0, "step": 9169 }, { "epoch": 1.6345811051693404, "grad_norm": 0.1796875, "learning_rate": 3.531872782691349e-06, "loss": 1.0328, "num_tokens": 19253447975.0, "step": 9170 }, { "epoch": 1.63475935828877, "grad_norm": 0.1748046875, "learning_rate": 3.530423205054797e-06, "loss": 1.0033, "num_tokens": 19259714336.0, "step": 9171 }, { "epoch": 1.6349376114081995, "grad_norm": 0.1748046875, "learning_rate": 3.528974249859658e-06, "loss": 1.0557, "num_tokens": 19265963195.0, "step": 9172 }, { "epoch": 1.6351158645276294, "grad_norm": 0.169921875, "learning_rate": 3.527525917226676e-06, "loss": 1.0271, "num_tokens": 19272197056.0, "step": 9173 }, { "epoch": 1.6352941176470588, "grad_norm": 0.173828125, "learning_rate": 3.52607820727654e-06, "loss": 1.0069, "num_tokens": 19278482522.0, "step": 9174 }, { "epoch": 1.6354723707664884, "grad_norm": 0.177734375, "learning_rate": 3.5246311201298856e-06, "loss": 1.0222, "num_tokens": 19284742953.0, "step": 9175 }, { "epoch": 1.635650623885918, "grad_norm": 0.1767578125, "learning_rate": 3.523184655907302e-06, "loss": 1.0275, "num_tokens": 19291027974.0, "step": 9176 }, { "epoch": 1.6358288770053475, "grad_norm": 0.17578125, "learning_rate": 3.5217388147293196e-06, "loss": 1.0166, "num_tokens": 19297268611.0, "step": 9177 }, { "epoch": 1.6360071301247772, "grad_norm": 0.166015625, "learning_rate": 3.520293596716426e-06, "loss": 1.0493, "num_tokens": 19303551077.0, "step": 9178 }, { "epoch": 1.6361853832442068, "grad_norm": 0.1826171875, "learning_rate": 3.5188490019890485e-06, "loss": 1.0306, "num_tokens": 19309834206.0, "step": 9179 }, { "epoch": 1.6363636363636362, "grad_norm": 0.177734375, "learning_rate": 3.517405030667567e-06, "loss": 1.0396, "num_tokens": 19316103164.0, "step": 9180 }, { "epoch": 1.6365418894830661, "grad_norm": 0.1796875, "learning_rate": 3.5159616828723055e-06, "loss": 0.996, "num_tokens": 19322387353.0, "step": 9181 }, { "epoch": 1.6367201426024955, "grad_norm": 0.1728515625, "learning_rate": 3.5145189587235397e-06, "loss": 1.0004, "num_tokens": 19328658625.0, "step": 9182 }, { "epoch": 1.636898395721925, "grad_norm": 0.1806640625, "learning_rate": 3.5130768583414928e-06, "loss": 1.0137, "num_tokens": 19334905944.0, "step": 9183 }, { "epoch": 1.6370766488413548, "grad_norm": 0.171875, "learning_rate": 3.5116353818463323e-06, "loss": 1.0124, "num_tokens": 19341165841.0, "step": 9184 }, { "epoch": 1.6372549019607843, "grad_norm": 0.16796875, "learning_rate": 3.510194529358181e-06, "loss": 1.03, "num_tokens": 19347448666.0, "step": 9185 }, { "epoch": 1.637433155080214, "grad_norm": 0.1748046875, "learning_rate": 3.5087543009971026e-06, "loss": 1.0104, "num_tokens": 19353649471.0, "step": 9186 }, { "epoch": 1.6376114081996436, "grad_norm": 0.1708984375, "learning_rate": 3.5073146968831147e-06, "loss": 0.992, "num_tokens": 19359904882.0, "step": 9187 }, { "epoch": 1.637789661319073, "grad_norm": 0.177734375, "learning_rate": 3.505875717136174e-06, "loss": 1.008, "num_tokens": 19366188981.0, "step": 9188 }, { "epoch": 1.6379679144385026, "grad_norm": 0.1748046875, "learning_rate": 3.5044373618761994e-06, "loss": 1.0404, "num_tokens": 19372472299.0, "step": 9189 }, { "epoch": 1.6381461675579323, "grad_norm": 0.1689453125, "learning_rate": 3.502999631223043e-06, "loss": 1.0106, "num_tokens": 19378755872.0, "step": 9190 }, { "epoch": 1.6383244206773617, "grad_norm": 0.169921875, "learning_rate": 3.50156252529651e-06, "loss": 1.0453, "num_tokens": 19385040935.0, "step": 9191 }, { "epoch": 1.6385026737967916, "grad_norm": 0.17578125, "learning_rate": 3.5001260442163604e-06, "loss": 1.0244, "num_tokens": 19391324836.0, "step": 9192 }, { "epoch": 1.638680926916221, "grad_norm": 0.173828125, "learning_rate": 3.4986901881022924e-06, "loss": 1.0371, "num_tokens": 19397581741.0, "step": 9193 }, { "epoch": 1.6388591800356507, "grad_norm": 0.1728515625, "learning_rate": 3.497254957073958e-06, "loss": 0.9986, "num_tokens": 19403833586.0, "step": 9194 }, { "epoch": 1.6390374331550803, "grad_norm": 0.1708984375, "learning_rate": 3.4958203512509533e-06, "loss": 1.0099, "num_tokens": 19410118170.0, "step": 9195 }, { "epoch": 1.6392156862745098, "grad_norm": 0.1767578125, "learning_rate": 3.494386370752829e-06, "loss": 0.9938, "num_tokens": 19416381888.0, "step": 9196 }, { "epoch": 1.6393939393939394, "grad_norm": 0.1708984375, "learning_rate": 3.4929530156990743e-06, "loss": 1.0376, "num_tokens": 19422639806.0, "step": 9197 }, { "epoch": 1.639572192513369, "grad_norm": 0.169921875, "learning_rate": 3.4915202862091312e-06, "loss": 1.0439, "num_tokens": 19428892577.0, "step": 9198 }, { "epoch": 1.6397504456327985, "grad_norm": 0.169921875, "learning_rate": 3.4900881824023906e-06, "loss": 0.9893, "num_tokens": 19435165271.0, "step": 9199 }, { "epoch": 1.6399286987522281, "grad_norm": 0.1669921875, "learning_rate": 3.4886567043981913e-06, "loss": 1.0227, "num_tokens": 19441412942.0, "step": 9200 }, { "epoch": 1.6401069518716578, "grad_norm": 0.1708984375, "learning_rate": 3.4872258523158175e-06, "loss": 1.0169, "num_tokens": 19447695614.0, "step": 9201 }, { "epoch": 1.6402852049910872, "grad_norm": 0.173828125, "learning_rate": 3.4857956262745e-06, "loss": 1.0188, "num_tokens": 19453947707.0, "step": 9202 }, { "epoch": 1.640463458110517, "grad_norm": 0.1669921875, "learning_rate": 3.484366026393424e-06, "loss": 0.9867, "num_tokens": 19460220692.0, "step": 9203 }, { "epoch": 1.6406417112299465, "grad_norm": 0.1708984375, "learning_rate": 3.4829370527917178e-06, "loss": 1.0037, "num_tokens": 19466476509.0, "step": 9204 }, { "epoch": 1.6408199643493762, "grad_norm": 0.1708984375, "learning_rate": 3.481508705588454e-06, "loss": 1.0173, "num_tokens": 19472743507.0, "step": 9205 }, { "epoch": 1.6409982174688058, "grad_norm": 0.1728515625, "learning_rate": 3.4800809849026606e-06, "loss": 1.0008, "num_tokens": 19479027002.0, "step": 9206 }, { "epoch": 1.6411764705882352, "grad_norm": 0.1689453125, "learning_rate": 3.4786538908533095e-06, "loss": 1.0098, "num_tokens": 19485294801.0, "step": 9207 }, { "epoch": 1.6413547237076649, "grad_norm": 0.1728515625, "learning_rate": 3.477227423559321e-06, "loss": 1.0344, "num_tokens": 19491579948.0, "step": 9208 }, { "epoch": 1.6415329768270945, "grad_norm": 0.1728515625, "learning_rate": 3.4758015831395586e-06, "loss": 1.0344, "num_tokens": 19497833702.0, "step": 9209 }, { "epoch": 1.641711229946524, "grad_norm": 0.173828125, "learning_rate": 3.474376369712845e-06, "loss": 1.0248, "num_tokens": 19504099782.0, "step": 9210 }, { "epoch": 1.6418894830659536, "grad_norm": 0.173828125, "learning_rate": 3.4729517833979396e-06, "loss": 1.0155, "num_tokens": 19510349599.0, "step": 9211 }, { "epoch": 1.6420677361853833, "grad_norm": 0.1787109375, "learning_rate": 3.471527824313554e-06, "loss": 0.9941, "num_tokens": 19516606465.0, "step": 9212 }, { "epoch": 1.6422459893048127, "grad_norm": 0.1796875, "learning_rate": 3.470104492578348e-06, "loss": 0.9955, "num_tokens": 19522889800.0, "step": 9213 }, { "epoch": 1.6424242424242426, "grad_norm": 0.171875, "learning_rate": 3.468681788310926e-06, "loss": 1.0162, "num_tokens": 19529172704.0, "step": 9214 }, { "epoch": 1.642602495543672, "grad_norm": 0.1728515625, "learning_rate": 3.4672597116298434e-06, "loss": 1.0197, "num_tokens": 19535457934.0, "step": 9215 }, { "epoch": 1.6427807486631016, "grad_norm": 0.1689453125, "learning_rate": 3.4658382626536003e-06, "loss": 1.0424, "num_tokens": 19541741266.0, "step": 9216 }, { "epoch": 1.6429590017825313, "grad_norm": 0.1787109375, "learning_rate": 3.4644174415006493e-06, "loss": 0.9925, "num_tokens": 19548025104.0, "step": 9217 }, { "epoch": 1.6431372549019607, "grad_norm": 0.1728515625, "learning_rate": 3.462997248289387e-06, "loss": 1.0071, "num_tokens": 19554247282.0, "step": 9218 }, { "epoch": 1.6433155080213904, "grad_norm": 0.1708984375, "learning_rate": 3.4615776831381588e-06, "loss": 1.0217, "num_tokens": 19560529715.0, "step": 9219 }, { "epoch": 1.64349376114082, "grad_norm": 0.173828125, "learning_rate": 3.4601587461652557e-06, "loss": 1.038, "num_tokens": 19566813995.0, "step": 9220 }, { "epoch": 1.6436720142602494, "grad_norm": 0.1767578125, "learning_rate": 3.4587404374889198e-06, "loss": 1.0178, "num_tokens": 19573055665.0, "step": 9221 }, { "epoch": 1.643850267379679, "grad_norm": 0.177734375, "learning_rate": 3.457322757227337e-06, "loss": 0.9958, "num_tokens": 19579340972.0, "step": 9222 }, { "epoch": 1.6440285204991087, "grad_norm": 0.169921875, "learning_rate": 3.4559057054986424e-06, "loss": 1.0271, "num_tokens": 19585595714.0, "step": 9223 }, { "epoch": 1.6442067736185382, "grad_norm": 0.17578125, "learning_rate": 3.4544892824209224e-06, "loss": 1.0106, "num_tokens": 19591878632.0, "step": 9224 }, { "epoch": 1.644385026737968, "grad_norm": 0.169921875, "learning_rate": 3.4530734881122064e-06, "loss": 1.0222, "num_tokens": 19598146133.0, "step": 9225 }, { "epoch": 1.6445632798573975, "grad_norm": 0.1669921875, "learning_rate": 3.451658322690473e-06, "loss": 1.0152, "num_tokens": 19604402996.0, "step": 9226 }, { "epoch": 1.6447415329768271, "grad_norm": 0.1728515625, "learning_rate": 3.4502437862736482e-06, "loss": 1.0441, "num_tokens": 19610658004.0, "step": 9227 }, { "epoch": 1.6449197860962568, "grad_norm": 0.1708984375, "learning_rate": 3.4488298789796047e-06, "loss": 0.9997, "num_tokens": 19616898489.0, "step": 9228 }, { "epoch": 1.6450980392156862, "grad_norm": 0.1728515625, "learning_rate": 3.447416600926165e-06, "loss": 1.0084, "num_tokens": 19623159566.0, "step": 9229 }, { "epoch": 1.6452762923351159, "grad_norm": 0.1689453125, "learning_rate": 3.4460039522310944e-06, "loss": 1.0136, "num_tokens": 19629410449.0, "step": 9230 }, { "epoch": 1.6454545454545455, "grad_norm": 0.1748046875, "learning_rate": 3.4445919330121154e-06, "loss": 1.0345, "num_tokens": 19635670513.0, "step": 9231 }, { "epoch": 1.645632798573975, "grad_norm": 0.1767578125, "learning_rate": 3.443180543386887e-06, "loss": 1.0189, "num_tokens": 19641950666.0, "step": 9232 }, { "epoch": 1.6458110516934046, "grad_norm": 0.171875, "learning_rate": 3.441769783473023e-06, "loss": 1.0136, "num_tokens": 19648207965.0, "step": 9233 }, { "epoch": 1.6459893048128342, "grad_norm": 0.169921875, "learning_rate": 3.440359653388079e-06, "loss": 1.0126, "num_tokens": 19654489360.0, "step": 9234 }, { "epoch": 1.6461675579322637, "grad_norm": 0.17578125, "learning_rate": 3.438950153249568e-06, "loss": 0.9826, "num_tokens": 19660773133.0, "step": 9235 }, { "epoch": 1.6463458110516935, "grad_norm": 0.1689453125, "learning_rate": 3.4375412831749365e-06, "loss": 0.9911, "num_tokens": 19667031958.0, "step": 9236 }, { "epoch": 1.646524064171123, "grad_norm": 0.1689453125, "learning_rate": 3.4361330432815886e-06, "loss": 0.9955, "num_tokens": 19673314850.0, "step": 9237 }, { "epoch": 1.6467023172905526, "grad_norm": 0.17578125, "learning_rate": 3.4347254336868753e-06, "loss": 0.9805, "num_tokens": 19679562295.0, "step": 9238 }, { "epoch": 1.6468805704099823, "grad_norm": 0.177734375, "learning_rate": 3.4333184545080923e-06, "loss": 1.0042, "num_tokens": 19685837394.0, "step": 9239 }, { "epoch": 1.6470588235294117, "grad_norm": 0.1728515625, "learning_rate": 3.4319121058624823e-06, "loss": 1.0464, "num_tokens": 19692120317.0, "step": 9240 }, { "epoch": 1.6472370766488413, "grad_norm": 0.1787109375, "learning_rate": 3.4305063878672347e-06, "loss": 1.0356, "num_tokens": 19698401791.0, "step": 9241 }, { "epoch": 1.647415329768271, "grad_norm": 0.173828125, "learning_rate": 3.429101300639496e-06, "loss": 1.0252, "num_tokens": 19704680932.0, "step": 9242 }, { "epoch": 1.6475935828877004, "grad_norm": 0.1728515625, "learning_rate": 3.4276968442963436e-06, "loss": 0.9949, "num_tokens": 19710938366.0, "step": 9243 }, { "epoch": 1.6477718360071303, "grad_norm": 0.1806640625, "learning_rate": 3.426293018954813e-06, "loss": 1.0607, "num_tokens": 19717178379.0, "step": 9244 }, { "epoch": 1.6479500891265597, "grad_norm": 0.1728515625, "learning_rate": 3.4248898247318885e-06, "loss": 1.0508, "num_tokens": 19723462725.0, "step": 9245 }, { "epoch": 1.6481283422459891, "grad_norm": 0.169921875, "learning_rate": 3.423487261744497e-06, "loss": 0.9902, "num_tokens": 19729746355.0, "step": 9246 }, { "epoch": 1.648306595365419, "grad_norm": 0.1669921875, "learning_rate": 3.4220853301095148e-06, "loss": 1.0278, "num_tokens": 19736004222.0, "step": 9247 }, { "epoch": 1.6484848484848484, "grad_norm": 0.171875, "learning_rate": 3.4206840299437625e-06, "loss": 0.993, "num_tokens": 19742288539.0, "step": 9248 }, { "epoch": 1.648663101604278, "grad_norm": 0.173828125, "learning_rate": 3.419283361364015e-06, "loss": 1.008, "num_tokens": 19748572241.0, "step": 9249 }, { "epoch": 1.6488413547237077, "grad_norm": 0.1708984375, "learning_rate": 3.41788332448699e-06, "loss": 1.0103, "num_tokens": 19754842327.0, "step": 9250 }, { "epoch": 1.6490196078431372, "grad_norm": 0.1708984375, "learning_rate": 3.4164839194293466e-06, "loss": 1.0057, "num_tokens": 19761125373.0, "step": 9251 }, { "epoch": 1.6491978609625668, "grad_norm": 0.1728515625, "learning_rate": 3.415085146307704e-06, "loss": 0.9787, "num_tokens": 19767409353.0, "step": 9252 }, { "epoch": 1.6493761140819965, "grad_norm": 0.1689453125, "learning_rate": 3.4136870052386217e-06, "loss": 1.0094, "num_tokens": 19773679905.0, "step": 9253 }, { "epoch": 1.649554367201426, "grad_norm": 0.1748046875, "learning_rate": 3.4122894963386043e-06, "loss": 1.0136, "num_tokens": 19779938800.0, "step": 9254 }, { "epoch": 1.6497326203208558, "grad_norm": 0.173828125, "learning_rate": 3.4108926197241067e-06, "loss": 1.0532, "num_tokens": 19786221212.0, "step": 9255 }, { "epoch": 1.6499108734402852, "grad_norm": 0.171875, "learning_rate": 3.4094963755115336e-06, "loss": 0.9893, "num_tokens": 19792495521.0, "step": 9256 }, { "epoch": 1.6500891265597148, "grad_norm": 0.1708984375, "learning_rate": 3.408100763817232e-06, "loss": 1.0236, "num_tokens": 19798780101.0, "step": 9257 }, { "epoch": 1.6502673796791445, "grad_norm": 0.1787109375, "learning_rate": 3.4067057847575004e-06, "loss": 1.0145, "num_tokens": 19805032377.0, "step": 9258 }, { "epoch": 1.650445632798574, "grad_norm": 0.1748046875, "learning_rate": 3.405311438448582e-06, "loss": 1.0133, "num_tokens": 19811316630.0, "step": 9259 }, { "epoch": 1.6506238859180036, "grad_norm": 0.1826171875, "learning_rate": 3.403917725006667e-06, "loss": 1.0188, "num_tokens": 19817598765.0, "step": 9260 }, { "epoch": 1.6508021390374332, "grad_norm": 0.1787109375, "learning_rate": 3.4025246445478944e-06, "loss": 1.0024, "num_tokens": 19823865563.0, "step": 9261 }, { "epoch": 1.6509803921568627, "grad_norm": 0.169921875, "learning_rate": 3.4011321971883472e-06, "loss": 1.0356, "num_tokens": 19830150405.0, "step": 9262 }, { "epoch": 1.6511586452762923, "grad_norm": 0.1708984375, "learning_rate": 3.3997403830440647e-06, "loss": 1.04, "num_tokens": 19836406697.0, "step": 9263 }, { "epoch": 1.651336898395722, "grad_norm": 0.171875, "learning_rate": 3.3983492022310216e-06, "loss": 1.0157, "num_tokens": 19842687153.0, "step": 9264 }, { "epoch": 1.6515151515151514, "grad_norm": 0.1806640625, "learning_rate": 3.39695865486515e-06, "loss": 1.031, "num_tokens": 19848970336.0, "step": 9265 }, { "epoch": 1.6516934046345813, "grad_norm": 0.1728515625, "learning_rate": 3.3955687410623172e-06, "loss": 0.9888, "num_tokens": 19855239156.0, "step": 9266 }, { "epoch": 1.6518716577540107, "grad_norm": 0.1728515625, "learning_rate": 3.3941794609383516e-06, "loss": 1.0247, "num_tokens": 19861516652.0, "step": 9267 }, { "epoch": 1.6520499108734403, "grad_norm": 0.1708984375, "learning_rate": 3.3927908146090204e-06, "loss": 1.0133, "num_tokens": 19867801686.0, "step": 9268 }, { "epoch": 1.65222816399287, "grad_norm": 0.1728515625, "learning_rate": 3.3914028021900382e-06, "loss": 1.046, "num_tokens": 19874085494.0, "step": 9269 }, { "epoch": 1.6524064171122994, "grad_norm": 0.16796875, "learning_rate": 3.3900154237970706e-06, "loss": 1.0278, "num_tokens": 19880357607.0, "step": 9270 }, { "epoch": 1.652584670231729, "grad_norm": 0.1728515625, "learning_rate": 3.388628679545727e-06, "loss": 1.0069, "num_tokens": 19886642849.0, "step": 9271 }, { "epoch": 1.6527629233511587, "grad_norm": 0.1689453125, "learning_rate": 3.3872425695515655e-06, "loss": 1.0216, "num_tokens": 19892880631.0, "step": 9272 }, { "epoch": 1.6529411764705881, "grad_norm": 0.17578125, "learning_rate": 3.38585709393009e-06, "loss": 1.0029, "num_tokens": 19899160864.0, "step": 9273 }, { "epoch": 1.6531194295900178, "grad_norm": 0.1708984375, "learning_rate": 3.384472252796754e-06, "loss": 1.0159, "num_tokens": 19905444652.0, "step": 9274 }, { "epoch": 1.6532976827094474, "grad_norm": 0.1728515625, "learning_rate": 3.383088046266955e-06, "loss": 1.0139, "num_tokens": 19911698046.0, "step": 9275 }, { "epoch": 1.6534759358288769, "grad_norm": 0.16796875, "learning_rate": 3.3817044744560414e-06, "loss": 0.9959, "num_tokens": 19917957640.0, "step": 9276 }, { "epoch": 1.6536541889483067, "grad_norm": 0.16796875, "learning_rate": 3.3803215374793014e-06, "loss": 1.0206, "num_tokens": 19924240858.0, "step": 9277 }, { "epoch": 1.6538324420677362, "grad_norm": 0.1806640625, "learning_rate": 3.3789392354519824e-06, "loss": 1.0412, "num_tokens": 19930519544.0, "step": 9278 }, { "epoch": 1.6540106951871658, "grad_norm": 0.1767578125, "learning_rate": 3.3775575684892677e-06, "loss": 1.0399, "num_tokens": 19936798367.0, "step": 9279 }, { "epoch": 1.6541889483065955, "grad_norm": 0.173828125, "learning_rate": 3.3761765367062916e-06, "loss": 1.0167, "num_tokens": 19943051034.0, "step": 9280 }, { "epoch": 1.654367201426025, "grad_norm": 0.1748046875, "learning_rate": 3.37479614021814e-06, "loss": 1.0392, "num_tokens": 19949292063.0, "step": 9281 }, { "epoch": 1.6545454545454545, "grad_norm": 0.173828125, "learning_rate": 3.373416379139838e-06, "loss": 1.0192, "num_tokens": 19955559574.0, "step": 9282 }, { "epoch": 1.6547237076648842, "grad_norm": 0.173828125, "learning_rate": 3.3720372535863615e-06, "loss": 1.0149, "num_tokens": 19961845090.0, "step": 9283 }, { "epoch": 1.6549019607843136, "grad_norm": 0.1669921875, "learning_rate": 3.3706587636726316e-06, "loss": 1.0062, "num_tokens": 19968128758.0, "step": 9284 }, { "epoch": 1.6550802139037433, "grad_norm": 0.166015625, "learning_rate": 3.369280909513523e-06, "loss": 1.0147, "num_tokens": 19974413160.0, "step": 9285 }, { "epoch": 1.655258467023173, "grad_norm": 0.169921875, "learning_rate": 3.3679036912238483e-06, "loss": 1.0194, "num_tokens": 19980697144.0, "step": 9286 }, { "epoch": 1.6554367201426023, "grad_norm": 0.1748046875, "learning_rate": 3.3665271089183747e-06, "loss": 1.0505, "num_tokens": 19986979699.0, "step": 9287 }, { "epoch": 1.6556149732620322, "grad_norm": 0.1708984375, "learning_rate": 3.365151162711809e-06, "loss": 1.0227, "num_tokens": 19993261499.0, "step": 9288 }, { "epoch": 1.6557932263814616, "grad_norm": 0.1728515625, "learning_rate": 3.363775852718816e-06, "loss": 1.0161, "num_tokens": 19999544585.0, "step": 9289 }, { "epoch": 1.6559714795008913, "grad_norm": 0.1767578125, "learning_rate": 3.3624011790539936e-06, "loss": 1.0221, "num_tokens": 20005829866.0, "step": 9290 }, { "epoch": 1.656149732620321, "grad_norm": 0.177734375, "learning_rate": 3.361027141831895e-06, "loss": 1.0494, "num_tokens": 20012059462.0, "step": 9291 }, { "epoch": 1.6563279857397504, "grad_norm": 0.16796875, "learning_rate": 3.359653741167021e-06, "loss": 0.9848, "num_tokens": 20018343440.0, "step": 9292 }, { "epoch": 1.65650623885918, "grad_norm": 0.1728515625, "learning_rate": 3.3582809771738167e-06, "loss": 1.0561, "num_tokens": 20024595219.0, "step": 9293 }, { "epoch": 1.6566844919786097, "grad_norm": 0.1689453125, "learning_rate": 3.3569088499666757e-06, "loss": 1.0183, "num_tokens": 20030877273.0, "step": 9294 }, { "epoch": 1.656862745098039, "grad_norm": 0.1748046875, "learning_rate": 3.3555373596599335e-06, "loss": 1.0227, "num_tokens": 20037131366.0, "step": 9295 }, { "epoch": 1.6570409982174688, "grad_norm": 0.1728515625, "learning_rate": 3.354166506367884e-06, "loss": 1.0082, "num_tokens": 20043413862.0, "step": 9296 }, { "epoch": 1.6572192513368984, "grad_norm": 0.1689453125, "learning_rate": 3.352796290204755e-06, "loss": 1.0308, "num_tokens": 20049662895.0, "step": 9297 }, { "epoch": 1.6573975044563278, "grad_norm": 0.169921875, "learning_rate": 3.3514267112847265e-06, "loss": 1.0215, "num_tokens": 20055946732.0, "step": 9298 }, { "epoch": 1.6575757575757577, "grad_norm": 0.1669921875, "learning_rate": 3.3500577697219295e-06, "loss": 1.0115, "num_tokens": 20062206321.0, "step": 9299 }, { "epoch": 1.6577540106951871, "grad_norm": 0.1669921875, "learning_rate": 3.3486894656304365e-06, "loss": 1.0245, "num_tokens": 20068490736.0, "step": 9300 }, { "epoch": 1.6579322638146168, "grad_norm": 0.1787109375, "learning_rate": 3.347321799124269e-06, "loss": 1.0184, "num_tokens": 20074775401.0, "step": 9301 }, { "epoch": 1.6581105169340464, "grad_norm": 0.17578125, "learning_rate": 3.345954770317391e-06, "loss": 1.0075, "num_tokens": 20081042108.0, "step": 9302 }, { "epoch": 1.6582887700534759, "grad_norm": 0.1728515625, "learning_rate": 3.3445883793237233e-06, "loss": 1.0069, "num_tokens": 20087326092.0, "step": 9303 }, { "epoch": 1.6584670231729055, "grad_norm": 0.173828125, "learning_rate": 3.3432226262571267e-06, "loss": 1.0097, "num_tokens": 20093611359.0, "step": 9304 }, { "epoch": 1.6586452762923352, "grad_norm": 0.173828125, "learning_rate": 3.341857511231405e-06, "loss": 1.0208, "num_tokens": 20099856140.0, "step": 9305 }, { "epoch": 1.6588235294117646, "grad_norm": 0.1748046875, "learning_rate": 3.340493034360317e-06, "loss": 1.0033, "num_tokens": 20106139779.0, "step": 9306 }, { "epoch": 1.6590017825311945, "grad_norm": 0.1689453125, "learning_rate": 3.3391291957575656e-06, "loss": 0.994, "num_tokens": 20112423380.0, "step": 9307 }, { "epoch": 1.6591800356506239, "grad_norm": 0.1748046875, "learning_rate": 3.337765995536798e-06, "loss": 1.047, "num_tokens": 20118691215.0, "step": 9308 }, { "epoch": 1.6593582887700533, "grad_norm": 0.1689453125, "learning_rate": 3.3364034338116088e-06, "loss": 1.0382, "num_tokens": 20124975141.0, "step": 9309 }, { "epoch": 1.6595365418894832, "grad_norm": 0.1689453125, "learning_rate": 3.3350415106955435e-06, "loss": 1.0142, "num_tokens": 20131207677.0, "step": 9310 }, { "epoch": 1.6597147950089126, "grad_norm": 0.1708984375, "learning_rate": 3.3336802263020905e-06, "loss": 1.0387, "num_tokens": 20137490631.0, "step": 9311 }, { "epoch": 1.6598930481283423, "grad_norm": 0.1748046875, "learning_rate": 3.3323195807446853e-06, "loss": 1.011, "num_tokens": 20143759553.0, "step": 9312 }, { "epoch": 1.660071301247772, "grad_norm": 0.171875, "learning_rate": 3.330959574136711e-06, "loss": 1.023, "num_tokens": 20150043578.0, "step": 9313 }, { "epoch": 1.6602495543672013, "grad_norm": 0.1806640625, "learning_rate": 3.329600206591498e-06, "loss": 1.0249, "num_tokens": 20156278401.0, "step": 9314 }, { "epoch": 1.660427807486631, "grad_norm": 0.173828125, "learning_rate": 3.3282414782223217e-06, "loss": 1.0364, "num_tokens": 20162559116.0, "step": 9315 }, { "epoch": 1.6606060606060606, "grad_norm": 0.1767578125, "learning_rate": 3.3268833891424024e-06, "loss": 1.0231, "num_tokens": 20168837180.0, "step": 9316 }, { "epoch": 1.66078431372549, "grad_norm": 0.1748046875, "learning_rate": 3.3255259394649164e-06, "loss": 1.0154, "num_tokens": 20175119773.0, "step": 9317 }, { "epoch": 1.66096256684492, "grad_norm": 0.1689453125, "learning_rate": 3.3241691293029752e-06, "loss": 0.9998, "num_tokens": 20181404627.0, "step": 9318 }, { "epoch": 1.6611408199643494, "grad_norm": 0.17578125, "learning_rate": 3.3228129587696454e-06, "loss": 1.0309, "num_tokens": 20187688402.0, "step": 9319 }, { "epoch": 1.661319073083779, "grad_norm": 0.1728515625, "learning_rate": 3.3214574279779357e-06, "loss": 1.0275, "num_tokens": 20193938440.0, "step": 9320 }, { "epoch": 1.6614973262032087, "grad_norm": 0.1708984375, "learning_rate": 3.320102537040801e-06, "loss": 1.0064, "num_tokens": 20200221713.0, "step": 9321 }, { "epoch": 1.661675579322638, "grad_norm": 0.1728515625, "learning_rate": 3.3187482860711475e-06, "loss": 1.0319, "num_tokens": 20206414425.0, "step": 9322 }, { "epoch": 1.6618538324420677, "grad_norm": 0.166015625, "learning_rate": 3.3173946751818208e-06, "loss": 0.9957, "num_tokens": 20212676062.0, "step": 9323 }, { "epoch": 1.6620320855614974, "grad_norm": 0.1728515625, "learning_rate": 3.316041704485623e-06, "loss": 1.0121, "num_tokens": 20218960981.0, "step": 9324 }, { "epoch": 1.6622103386809268, "grad_norm": 0.169921875, "learning_rate": 3.314689374095294e-06, "loss": 0.9928, "num_tokens": 20225244401.0, "step": 9325 }, { "epoch": 1.6623885918003565, "grad_norm": 0.171875, "learning_rate": 3.3133376841235254e-06, "loss": 1.0131, "num_tokens": 20231481323.0, "step": 9326 }, { "epoch": 1.6625668449197861, "grad_norm": 0.169921875, "learning_rate": 3.311986634682953e-06, "loss": 1.036, "num_tokens": 20237765000.0, "step": 9327 }, { "epoch": 1.6627450980392156, "grad_norm": 0.169921875, "learning_rate": 3.3106362258861613e-06, "loss": 1.028, "num_tokens": 20244002859.0, "step": 9328 }, { "epoch": 1.6629233511586454, "grad_norm": 0.1708984375, "learning_rate": 3.309286457845678e-06, "loss": 0.9833, "num_tokens": 20250274059.0, "step": 9329 }, { "epoch": 1.6631016042780749, "grad_norm": 0.1728515625, "learning_rate": 3.307937330673979e-06, "loss": 1.0128, "num_tokens": 20256500272.0, "step": 9330 }, { "epoch": 1.6632798573975045, "grad_norm": 0.17578125, "learning_rate": 3.3065888444834902e-06, "loss": 1.0204, "num_tokens": 20262785315.0, "step": 9331 }, { "epoch": 1.6634581105169342, "grad_norm": 0.1708984375, "learning_rate": 3.305240999386582e-06, "loss": 1.0272, "num_tokens": 20269071060.0, "step": 9332 }, { "epoch": 1.6636363636363636, "grad_norm": 0.169921875, "learning_rate": 3.3038937954955675e-06, "loss": 1.0025, "num_tokens": 20275340042.0, "step": 9333 }, { "epoch": 1.6638146167557932, "grad_norm": 0.1708984375, "learning_rate": 3.302547232922708e-06, "loss": 1.0095, "num_tokens": 20281610156.0, "step": 9334 }, { "epoch": 1.6639928698752229, "grad_norm": 0.173828125, "learning_rate": 3.30120131178022e-06, "loss": 0.9677, "num_tokens": 20287895163.0, "step": 9335 }, { "epoch": 1.6641711229946523, "grad_norm": 0.169921875, "learning_rate": 3.299856032180253e-06, "loss": 1.028, "num_tokens": 20294175494.0, "step": 9336 }, { "epoch": 1.664349376114082, "grad_norm": 0.1728515625, "learning_rate": 3.298511394234909e-06, "loss": 1.0329, "num_tokens": 20300458402.0, "step": 9337 }, { "epoch": 1.6645276292335116, "grad_norm": 0.1728515625, "learning_rate": 3.2971673980562418e-06, "loss": 0.9911, "num_tokens": 20306716995.0, "step": 9338 }, { "epoch": 1.664705882352941, "grad_norm": 0.1796875, "learning_rate": 3.2958240437562432e-06, "loss": 1.0186, "num_tokens": 20312971760.0, "step": 9339 }, { "epoch": 1.664884135472371, "grad_norm": 0.1689453125, "learning_rate": 3.294481331446857e-06, "loss": 1.0263, "num_tokens": 20319230435.0, "step": 9340 }, { "epoch": 1.6650623885918003, "grad_norm": 0.171875, "learning_rate": 3.2931392612399694e-06, "loss": 1.0117, "num_tokens": 20325508574.0, "step": 9341 }, { "epoch": 1.66524064171123, "grad_norm": 0.1708984375, "learning_rate": 3.291797833247422e-06, "loss": 0.9902, "num_tokens": 20331793288.0, "step": 9342 }, { "epoch": 1.6654188948306596, "grad_norm": 0.1689453125, "learning_rate": 3.290457047580988e-06, "loss": 1.0155, "num_tokens": 20338042529.0, "step": 9343 }, { "epoch": 1.665597147950089, "grad_norm": 0.173828125, "learning_rate": 3.289116904352396e-06, "loss": 1.0313, "num_tokens": 20344325841.0, "step": 9344 }, { "epoch": 1.6657754010695187, "grad_norm": 0.1708984375, "learning_rate": 3.2877774036733255e-06, "loss": 1.0419, "num_tokens": 20350607640.0, "step": 9345 }, { "epoch": 1.6659536541889484, "grad_norm": 0.16796875, "learning_rate": 3.286438545655396e-06, "loss": 1.0041, "num_tokens": 20356892401.0, "step": 9346 }, { "epoch": 1.6661319073083778, "grad_norm": 0.171875, "learning_rate": 3.285100330410174e-06, "loss": 1.0049, "num_tokens": 20363175594.0, "step": 9347 }, { "epoch": 1.6663101604278074, "grad_norm": 0.1748046875, "learning_rate": 3.2837627580491694e-06, "loss": 1.0194, "num_tokens": 20369460166.0, "step": 9348 }, { "epoch": 1.666488413547237, "grad_norm": 0.173828125, "learning_rate": 3.2824258286838497e-06, "loss": 1.0183, "num_tokens": 20375724531.0, "step": 9349 }, { "epoch": 1.6666666666666665, "grad_norm": 0.166015625, "learning_rate": 3.2810895424256186e-06, "loss": 1.0174, "num_tokens": 20382009607.0, "step": 9350 }, { "epoch": 1.6668449197860964, "grad_norm": 0.17578125, "learning_rate": 3.279753899385825e-06, "loss": 1.0132, "num_tokens": 20388291009.0, "step": 9351 }, { "epoch": 1.6670231729055258, "grad_norm": 0.177734375, "learning_rate": 3.2784188996757738e-06, "loss": 1.0273, "num_tokens": 20394550922.0, "step": 9352 }, { "epoch": 1.6672014260249555, "grad_norm": 0.1748046875, "learning_rate": 3.277084543406708e-06, "loss": 0.9951, "num_tokens": 20400835120.0, "step": 9353 }, { "epoch": 1.6673796791443851, "grad_norm": 0.1767578125, "learning_rate": 3.2757508306898213e-06, "loss": 1.0533, "num_tokens": 20407094604.0, "step": 9354 }, { "epoch": 1.6675579322638145, "grad_norm": 0.171875, "learning_rate": 3.274417761636248e-06, "loss": 0.9949, "num_tokens": 20413378499.0, "step": 9355 }, { "epoch": 1.6677361853832442, "grad_norm": 0.1689453125, "learning_rate": 3.2730853363570792e-06, "loss": 1.0327, "num_tokens": 20419661572.0, "step": 9356 }, { "epoch": 1.6679144385026738, "grad_norm": 0.169921875, "learning_rate": 3.2717535549633437e-06, "loss": 1.0351, "num_tokens": 20425943408.0, "step": 9357 }, { "epoch": 1.6680926916221033, "grad_norm": 0.1708984375, "learning_rate": 3.270422417566019e-06, "loss": 0.9856, "num_tokens": 20432217933.0, "step": 9358 }, { "epoch": 1.668270944741533, "grad_norm": 0.1728515625, "learning_rate": 3.2690919242760287e-06, "loss": 1.0027, "num_tokens": 20438476788.0, "step": 9359 }, { "epoch": 1.6684491978609626, "grad_norm": 0.1748046875, "learning_rate": 3.267762075204244e-06, "loss": 0.9838, "num_tokens": 20444750941.0, "step": 9360 }, { "epoch": 1.668627450980392, "grad_norm": 0.1728515625, "learning_rate": 3.26643287046148e-06, "loss": 0.9906, "num_tokens": 20451034233.0, "step": 9361 }, { "epoch": 1.6688057040998219, "grad_norm": 0.1728515625, "learning_rate": 3.265104310158499e-06, "loss": 1.0231, "num_tokens": 20457295965.0, "step": 9362 }, { "epoch": 1.6689839572192513, "grad_norm": 0.171875, "learning_rate": 3.263776394406013e-06, "loss": 0.9964, "num_tokens": 20463565790.0, "step": 9363 }, { "epoch": 1.669162210338681, "grad_norm": 0.1845703125, "learning_rate": 3.262449123314676e-06, "loss": 1.0353, "num_tokens": 20469848054.0, "step": 9364 }, { "epoch": 1.6693404634581106, "grad_norm": 0.1748046875, "learning_rate": 3.2611224969950904e-06, "loss": 1.022, "num_tokens": 20476103806.0, "step": 9365 }, { "epoch": 1.66951871657754, "grad_norm": 0.171875, "learning_rate": 3.259796515557805e-06, "loss": 0.9956, "num_tokens": 20482357181.0, "step": 9366 }, { "epoch": 1.6696969696969697, "grad_norm": 0.1708984375, "learning_rate": 3.2584711791133105e-06, "loss": 1.0195, "num_tokens": 20488638959.0, "step": 9367 }, { "epoch": 1.6698752228163993, "grad_norm": 0.1650390625, "learning_rate": 3.2571464877720513e-06, "loss": 1.0066, "num_tokens": 20494899935.0, "step": 9368 }, { "epoch": 1.6700534759358288, "grad_norm": 0.1806640625, "learning_rate": 3.2558224416444106e-06, "loss": 1.0227, "num_tokens": 20501183678.0, "step": 9369 }, { "epoch": 1.6702317290552586, "grad_norm": 0.16796875, "learning_rate": 3.254499040840725e-06, "loss": 1.0156, "num_tokens": 20507448865.0, "step": 9370 }, { "epoch": 1.670409982174688, "grad_norm": 0.1689453125, "learning_rate": 3.2531762854712733e-06, "loss": 1.0057, "num_tokens": 20513732118.0, "step": 9371 }, { "epoch": 1.6705882352941175, "grad_norm": 0.1748046875, "learning_rate": 3.2518541756462794e-06, "loss": 1.0316, "num_tokens": 20519988540.0, "step": 9372 }, { "epoch": 1.6707664884135474, "grad_norm": 0.173828125, "learning_rate": 3.2505327114759167e-06, "loss": 1.042, "num_tokens": 20526271243.0, "step": 9373 }, { "epoch": 1.6709447415329768, "grad_norm": 0.173828125, "learning_rate": 3.2492118930703016e-06, "loss": 1.0282, "num_tokens": 20532554865.0, "step": 9374 }, { "epoch": 1.6711229946524064, "grad_norm": 0.1806640625, "learning_rate": 3.2478917205394976e-06, "loss": 1.0193, "num_tokens": 20538811468.0, "step": 9375 }, { "epoch": 1.671301247771836, "grad_norm": 0.17578125, "learning_rate": 3.246572193993515e-06, "loss": 1.0811, "num_tokens": 20545062247.0, "step": 9376 }, { "epoch": 1.6714795008912655, "grad_norm": 0.171875, "learning_rate": 3.2452533135423124e-06, "loss": 1.0088, "num_tokens": 20551306464.0, "step": 9377 }, { "epoch": 1.6716577540106952, "grad_norm": 0.1767578125, "learning_rate": 3.2439350792957922e-06, "loss": 1.0046, "num_tokens": 20557589815.0, "step": 9378 }, { "epoch": 1.6718360071301248, "grad_norm": 0.1748046875, "learning_rate": 3.2426174913638008e-06, "loss": 1.0186, "num_tokens": 20563873172.0, "step": 9379 }, { "epoch": 1.6720142602495542, "grad_norm": 0.1708984375, "learning_rate": 3.2413005498561334e-06, "loss": 0.9924, "num_tokens": 20570125808.0, "step": 9380 }, { "epoch": 1.6721925133689841, "grad_norm": 0.169921875, "learning_rate": 3.2399842548825356e-06, "loss": 1.0115, "num_tokens": 20576409652.0, "step": 9381 }, { "epoch": 1.6723707664884135, "grad_norm": 0.1708984375, "learning_rate": 3.238668606552689e-06, "loss": 1.0357, "num_tokens": 20582673606.0, "step": 9382 }, { "epoch": 1.6725490196078432, "grad_norm": 0.1689453125, "learning_rate": 3.237353604976228e-06, "loss": 0.9987, "num_tokens": 20588955839.0, "step": 9383 }, { "epoch": 1.6727272727272728, "grad_norm": 0.17578125, "learning_rate": 3.236039250262733e-06, "loss": 1.0007, "num_tokens": 20595229038.0, "step": 9384 }, { "epoch": 1.6729055258467023, "grad_norm": 0.1728515625, "learning_rate": 3.2347255425217306e-06, "loss": 1.0228, "num_tokens": 20601507221.0, "step": 9385 }, { "epoch": 1.673083778966132, "grad_norm": 0.169921875, "learning_rate": 3.2334124818626905e-06, "loss": 1.0403, "num_tokens": 20607790476.0, "step": 9386 }, { "epoch": 1.6732620320855616, "grad_norm": 0.1708984375, "learning_rate": 3.2321000683950284e-06, "loss": 1.0094, "num_tokens": 20614073468.0, "step": 9387 }, { "epoch": 1.673440285204991, "grad_norm": 0.173828125, "learning_rate": 3.230788302228115e-06, "loss": 1.0035, "num_tokens": 20620328776.0, "step": 9388 }, { "epoch": 1.6736185383244206, "grad_norm": 0.1787109375, "learning_rate": 3.2294771834712536e-06, "loss": 1.0237, "num_tokens": 20626583488.0, "step": 9389 }, { "epoch": 1.6737967914438503, "grad_norm": 0.1728515625, "learning_rate": 3.2281667122336983e-06, "loss": 0.9965, "num_tokens": 20632837406.0, "step": 9390 }, { "epoch": 1.6739750445632797, "grad_norm": 0.1708984375, "learning_rate": 3.2268568886246574e-06, "loss": 1.0337, "num_tokens": 20639119419.0, "step": 9391 }, { "epoch": 1.6741532976827096, "grad_norm": 0.16796875, "learning_rate": 3.2255477127532763e-06, "loss": 1.0123, "num_tokens": 20645404316.0, "step": 9392 }, { "epoch": 1.674331550802139, "grad_norm": 0.171875, "learning_rate": 3.2242391847286485e-06, "loss": 1.0597, "num_tokens": 20651688124.0, "step": 9393 }, { "epoch": 1.6745098039215687, "grad_norm": 0.1767578125, "learning_rate": 3.2229313046598117e-06, "loss": 1.0286, "num_tokens": 20657954459.0, "step": 9394 }, { "epoch": 1.6746880570409983, "grad_norm": 0.1708984375, "learning_rate": 3.221624072655756e-06, "loss": 1.0425, "num_tokens": 20664201885.0, "step": 9395 }, { "epoch": 1.6748663101604278, "grad_norm": 0.173828125, "learning_rate": 3.220317488825413e-06, "loss": 1.0165, "num_tokens": 20670472384.0, "step": 9396 }, { "epoch": 1.6750445632798574, "grad_norm": 0.169921875, "learning_rate": 3.2190115532776556e-06, "loss": 0.9938, "num_tokens": 20676757529.0, "step": 9397 }, { "epoch": 1.675222816399287, "grad_norm": 0.1708984375, "learning_rate": 3.2177062661213123e-06, "loss": 1.0202, "num_tokens": 20683033060.0, "step": 9398 }, { "epoch": 1.6754010695187165, "grad_norm": 0.17578125, "learning_rate": 3.2164016274651523e-06, "loss": 1.0091, "num_tokens": 20689284293.0, "step": 9399 }, { "epoch": 1.6755793226381461, "grad_norm": 0.1728515625, "learning_rate": 3.2150976374178898e-06, "loss": 1.0065, "num_tokens": 20695567743.0, "step": 9400 }, { "epoch": 1.6757575757575758, "grad_norm": 0.1748046875, "learning_rate": 3.213794296088187e-06, "loss": 1.0352, "num_tokens": 20701848973.0, "step": 9401 }, { "epoch": 1.6759358288770052, "grad_norm": 0.169921875, "learning_rate": 3.2124916035846534e-06, "loss": 1.0157, "num_tokens": 20708134962.0, "step": 9402 }, { "epoch": 1.676114081996435, "grad_norm": 0.1689453125, "learning_rate": 3.2111895600158417e-06, "loss": 1.0359, "num_tokens": 20714374328.0, "step": 9403 }, { "epoch": 1.6762923351158645, "grad_norm": 0.1767578125, "learning_rate": 3.209888165490251e-06, "loss": 0.9663, "num_tokens": 20720652125.0, "step": 9404 }, { "epoch": 1.6764705882352942, "grad_norm": 0.1728515625, "learning_rate": 3.208587420116327e-06, "loss": 1.008, "num_tokens": 20726936868.0, "step": 9405 }, { "epoch": 1.6766488413547238, "grad_norm": 0.169921875, "learning_rate": 3.2072873240024606e-06, "loss": 1.023, "num_tokens": 20733184329.0, "step": 9406 }, { "epoch": 1.6768270944741532, "grad_norm": 0.171875, "learning_rate": 3.20598787725699e-06, "loss": 1.004, "num_tokens": 20739444015.0, "step": 9407 }, { "epoch": 1.6770053475935829, "grad_norm": 0.1748046875, "learning_rate": 3.204689079988195e-06, "loss": 1.0046, "num_tokens": 20745706191.0, "step": 9408 }, { "epoch": 1.6771836007130125, "grad_norm": 0.17578125, "learning_rate": 3.2033909323043112e-06, "loss": 1.0082, "num_tokens": 20751974353.0, "step": 9409 }, { "epoch": 1.677361853832442, "grad_norm": 0.1806640625, "learning_rate": 3.2020934343135084e-06, "loss": 1.0184, "num_tokens": 20758229060.0, "step": 9410 }, { "epoch": 1.6775401069518716, "grad_norm": 0.16796875, "learning_rate": 3.20079658612391e-06, "loss": 1.0227, "num_tokens": 20764513565.0, "step": 9411 }, { "epoch": 1.6777183600713013, "grad_norm": 0.17578125, "learning_rate": 3.199500387843581e-06, "loss": 1.0363, "num_tokens": 20770789727.0, "step": 9412 }, { "epoch": 1.6778966131907307, "grad_norm": 0.16796875, "learning_rate": 3.1982048395805335e-06, "loss": 0.9842, "num_tokens": 20777060555.0, "step": 9413 }, { "epoch": 1.6780748663101606, "grad_norm": 0.17578125, "learning_rate": 3.196909941442728e-06, "loss": 1.0424, "num_tokens": 20783345243.0, "step": 9414 }, { "epoch": 1.67825311942959, "grad_norm": 0.1689453125, "learning_rate": 3.1956156935380646e-06, "loss": 1.0314, "num_tokens": 20789629619.0, "step": 9415 }, { "epoch": 1.6784313725490196, "grad_norm": 0.171875, "learning_rate": 3.194322095974398e-06, "loss": 1.0329, "num_tokens": 20795882706.0, "step": 9416 }, { "epoch": 1.6786096256684493, "grad_norm": 0.17578125, "learning_rate": 3.193029148859522e-06, "loss": 1.0142, "num_tokens": 20802106111.0, "step": 9417 }, { "epoch": 1.6787878787878787, "grad_norm": 0.166015625, "learning_rate": 3.1917368523011784e-06, "loss": 1.0063, "num_tokens": 20808388769.0, "step": 9418 }, { "epoch": 1.6789661319073084, "grad_norm": 0.169921875, "learning_rate": 3.190445206407054e-06, "loss": 1.0093, "num_tokens": 20814669818.0, "step": 9419 }, { "epoch": 1.679144385026738, "grad_norm": 0.169921875, "learning_rate": 3.1891542112847817e-06, "loss": 1.0546, "num_tokens": 20820945113.0, "step": 9420 }, { "epoch": 1.6793226381461674, "grad_norm": 0.173828125, "learning_rate": 3.187863867041942e-06, "loss": 1.0379, "num_tokens": 20827210062.0, "step": 9421 }, { "epoch": 1.679500891265597, "grad_norm": 0.1708984375, "learning_rate": 3.1865741737860554e-06, "loss": 0.9912, "num_tokens": 20833475053.0, "step": 9422 }, { "epoch": 1.6796791443850267, "grad_norm": 0.17578125, "learning_rate": 3.185285131624598e-06, "loss": 0.9719, "num_tokens": 20839727554.0, "step": 9423 }, { "epoch": 1.6798573975044562, "grad_norm": 0.1689453125, "learning_rate": 3.1839967406649822e-06, "loss": 1.0302, "num_tokens": 20846000455.0, "step": 9424 }, { "epoch": 1.680035650623886, "grad_norm": 0.1806640625, "learning_rate": 3.1827090010145723e-06, "loss": 1.0071, "num_tokens": 20852257168.0, "step": 9425 }, { "epoch": 1.6802139037433155, "grad_norm": 0.173828125, "learning_rate": 3.181421912780672e-06, "loss": 1.0345, "num_tokens": 20858512885.0, "step": 9426 }, { "epoch": 1.6803921568627451, "grad_norm": 0.1787109375, "learning_rate": 3.180135476070541e-06, "loss": 1.0481, "num_tokens": 20864780872.0, "step": 9427 }, { "epoch": 1.6805704099821748, "grad_norm": 0.173828125, "learning_rate": 3.178849690991373e-06, "loss": 1.0578, "num_tokens": 20871035692.0, "step": 9428 }, { "epoch": 1.6807486631016042, "grad_norm": 0.1708984375, "learning_rate": 3.1775645576503125e-06, "loss": 1.0319, "num_tokens": 20877317053.0, "step": 9429 }, { "epoch": 1.6809269162210339, "grad_norm": 0.1728515625, "learning_rate": 3.176280076154452e-06, "loss": 1.0263, "num_tokens": 20883520347.0, "step": 9430 }, { "epoch": 1.6811051693404635, "grad_norm": 0.1708984375, "learning_rate": 3.1749962466108296e-06, "loss": 1.0197, "num_tokens": 20889773745.0, "step": 9431 }, { "epoch": 1.681283422459893, "grad_norm": 0.173828125, "learning_rate": 3.173713069126424e-06, "loss": 1.0053, "num_tokens": 20896022980.0, "step": 9432 }, { "epoch": 1.6814616755793228, "grad_norm": 0.1689453125, "learning_rate": 3.1724305438081607e-06, "loss": 1.0359, "num_tokens": 20902276671.0, "step": 9433 }, { "epoch": 1.6816399286987522, "grad_norm": 0.166015625, "learning_rate": 3.1711486707629185e-06, "loss": 1.0045, "num_tokens": 20908533291.0, "step": 9434 }, { "epoch": 1.6818181818181817, "grad_norm": 0.1728515625, "learning_rate": 3.1698674500975145e-06, "loss": 1.053, "num_tokens": 20914817429.0, "step": 9435 }, { "epoch": 1.6819964349376115, "grad_norm": 0.1708984375, "learning_rate": 3.168586881918711e-06, "loss": 0.9901, "num_tokens": 20921103080.0, "step": 9436 }, { "epoch": 1.682174688057041, "grad_norm": 0.1708984375, "learning_rate": 3.167306966333216e-06, "loss": 1.0203, "num_tokens": 20927358791.0, "step": 9437 }, { "epoch": 1.6823529411764706, "grad_norm": 0.1689453125, "learning_rate": 3.16602770344769e-06, "loss": 1.0371, "num_tokens": 20933609620.0, "step": 9438 }, { "epoch": 1.6825311942959003, "grad_norm": 0.1728515625, "learning_rate": 3.164749093368733e-06, "loss": 1.0198, "num_tokens": 20939870372.0, "step": 9439 }, { "epoch": 1.6827094474153297, "grad_norm": 0.181640625, "learning_rate": 3.1634711362028887e-06, "loss": 0.9934, "num_tokens": 20946130495.0, "step": 9440 }, { "epoch": 1.6828877005347593, "grad_norm": 0.169921875, "learning_rate": 3.162193832056653e-06, "loss": 1.0369, "num_tokens": 20952412351.0, "step": 9441 }, { "epoch": 1.683065953654189, "grad_norm": 0.1708984375, "learning_rate": 3.1609171810364652e-06, "loss": 1.0206, "num_tokens": 20958697652.0, "step": 9442 }, { "epoch": 1.6832442067736184, "grad_norm": 0.1728515625, "learning_rate": 3.1596411832487056e-06, "loss": 1.0285, "num_tokens": 20964966739.0, "step": 9443 }, { "epoch": 1.6834224598930483, "grad_norm": 0.1708984375, "learning_rate": 3.1583658387997017e-06, "loss": 1.0263, "num_tokens": 20971250679.0, "step": 9444 }, { "epoch": 1.6836007130124777, "grad_norm": 0.1669921875, "learning_rate": 3.1570911477957324e-06, "loss": 0.9944, "num_tokens": 20977533050.0, "step": 9445 }, { "epoch": 1.6837789661319074, "grad_norm": 0.1708984375, "learning_rate": 3.155817110343017e-06, "loss": 0.9971, "num_tokens": 20983816829.0, "step": 9446 }, { "epoch": 1.683957219251337, "grad_norm": 0.1689453125, "learning_rate": 3.1545437265477208e-06, "loss": 1.0002, "num_tokens": 20990102205.0, "step": 9447 }, { "epoch": 1.6841354723707664, "grad_norm": 0.173828125, "learning_rate": 3.1532709965159525e-06, "loss": 1.0101, "num_tokens": 20996384884.0, "step": 9448 }, { "epoch": 1.684313725490196, "grad_norm": 0.171875, "learning_rate": 3.151998920353774e-06, "loss": 1.0096, "num_tokens": 21002644367.0, "step": 9449 }, { "epoch": 1.6844919786096257, "grad_norm": 0.169921875, "learning_rate": 3.1507274981671863e-06, "loss": 0.9874, "num_tokens": 21008916207.0, "step": 9450 }, { "epoch": 1.6846702317290552, "grad_norm": 0.1728515625, "learning_rate": 3.1494567300621333e-06, "loss": 1.0142, "num_tokens": 21015177239.0, "step": 9451 }, { "epoch": 1.6848484848484848, "grad_norm": 0.1708984375, "learning_rate": 3.148186616144513e-06, "loss": 1.0381, "num_tokens": 21021455822.0, "step": 9452 }, { "epoch": 1.6850267379679145, "grad_norm": 0.16796875, "learning_rate": 3.1469171565201615e-06, "loss": 1.0169, "num_tokens": 21027729188.0, "step": 9453 }, { "epoch": 1.685204991087344, "grad_norm": 0.169921875, "learning_rate": 3.1456483512948655e-06, "loss": 0.9906, "num_tokens": 21034006272.0, "step": 9454 }, { "epoch": 1.6853832442067738, "grad_norm": 0.17578125, "learning_rate": 3.1443802005743507e-06, "loss": 1.0245, "num_tokens": 21040250764.0, "step": 9455 }, { "epoch": 1.6855614973262032, "grad_norm": 0.1708984375, "learning_rate": 3.1431127044642977e-06, "loss": 1.018, "num_tokens": 21046534729.0, "step": 9456 }, { "epoch": 1.6857397504456328, "grad_norm": 0.171875, "learning_rate": 3.1418458630703242e-06, "loss": 1.0039, "num_tokens": 21052789969.0, "step": 9457 }, { "epoch": 1.6859180035650625, "grad_norm": 0.1767578125, "learning_rate": 3.1405796764979974e-06, "loss": 1.0496, "num_tokens": 21059019160.0, "step": 9458 }, { "epoch": 1.686096256684492, "grad_norm": 0.1728515625, "learning_rate": 3.1393141448528273e-06, "loss": 0.9841, "num_tokens": 21065304022.0, "step": 9459 }, { "epoch": 1.6862745098039216, "grad_norm": 0.173828125, "learning_rate": 3.1380492682402718e-06, "loss": 1.0103, "num_tokens": 21071568004.0, "step": 9460 }, { "epoch": 1.6864527629233512, "grad_norm": 0.1748046875, "learning_rate": 3.136785046765735e-06, "loss": 0.9968, "num_tokens": 21077838468.0, "step": 9461 }, { "epoch": 1.6866310160427807, "grad_norm": 0.173828125, "learning_rate": 3.1355214805345602e-06, "loss": 1.0375, "num_tokens": 21084098127.0, "step": 9462 }, { "epoch": 1.6868092691622103, "grad_norm": 0.17578125, "learning_rate": 3.134258569652046e-06, "loss": 1.0358, "num_tokens": 21090366397.0, "step": 9463 }, { "epoch": 1.68698752228164, "grad_norm": 0.1708984375, "learning_rate": 3.1329963142234277e-06, "loss": 1.0212, "num_tokens": 21096618707.0, "step": 9464 }, { "epoch": 1.6871657754010694, "grad_norm": 0.1669921875, "learning_rate": 3.1317347143538916e-06, "loss": 1.0222, "num_tokens": 21102903608.0, "step": 9465 }, { "epoch": 1.6873440285204993, "grad_norm": 0.17578125, "learning_rate": 3.1304737701485656e-06, "loss": 1.019, "num_tokens": 21109145478.0, "step": 9466 }, { "epoch": 1.6875222816399287, "grad_norm": 0.1748046875, "learning_rate": 3.1292134817125242e-06, "loss": 1.0251, "num_tokens": 21115430576.0, "step": 9467 }, { "epoch": 1.6877005347593583, "grad_norm": 0.171875, "learning_rate": 3.1279538491507877e-06, "loss": 0.9919, "num_tokens": 21121716117.0, "step": 9468 }, { "epoch": 1.687878787878788, "grad_norm": 0.1767578125, "learning_rate": 3.1266948725683207e-06, "loss": 0.9994, "num_tokens": 21127973718.0, "step": 9469 }, { "epoch": 1.6880570409982174, "grad_norm": 0.1767578125, "learning_rate": 3.125436552070036e-06, "loss": 1.0213, "num_tokens": 21134227325.0, "step": 9470 }, { "epoch": 1.688235294117647, "grad_norm": 0.169921875, "learning_rate": 3.1241788877607883e-06, "loss": 1.0043, "num_tokens": 21140510561.0, "step": 9471 }, { "epoch": 1.6884135472370767, "grad_norm": 0.1748046875, "learning_rate": 3.122921879745381e-06, "loss": 1.0215, "num_tokens": 21146760921.0, "step": 9472 }, { "epoch": 1.6885918003565061, "grad_norm": 0.171875, "learning_rate": 3.1216655281285584e-06, "loss": 1.0083, "num_tokens": 21153018482.0, "step": 9473 }, { "epoch": 1.6887700534759358, "grad_norm": 0.1748046875, "learning_rate": 3.1204098330150135e-06, "loss": 1.0165, "num_tokens": 21159279636.0, "step": 9474 }, { "epoch": 1.6889483065953654, "grad_norm": 0.171875, "learning_rate": 3.1191547945093847e-06, "loss": 1.0093, "num_tokens": 21165565504.0, "step": 9475 }, { "epoch": 1.6891265597147949, "grad_norm": 0.169921875, "learning_rate": 3.11790041271625e-06, "loss": 1.027, "num_tokens": 21171817903.0, "step": 9476 }, { "epoch": 1.6893048128342247, "grad_norm": 0.169921875, "learning_rate": 3.116646687740144e-06, "loss": 1.0035, "num_tokens": 21178100871.0, "step": 9477 }, { "epoch": 1.6894830659536542, "grad_norm": 0.1748046875, "learning_rate": 3.1153936196855365e-06, "loss": 0.9921, "num_tokens": 21184384234.0, "step": 9478 }, { "epoch": 1.6896613190730838, "grad_norm": 0.1748046875, "learning_rate": 3.1141412086568456e-06, "loss": 1.0363, "num_tokens": 21190639925.0, "step": 9479 }, { "epoch": 1.6898395721925135, "grad_norm": 0.169921875, "learning_rate": 3.112889454758434e-06, "loss": 1.0335, "num_tokens": 21196903158.0, "step": 9480 }, { "epoch": 1.690017825311943, "grad_norm": 0.173828125, "learning_rate": 3.111638358094616e-06, "loss": 0.9967, "num_tokens": 21203180936.0, "step": 9481 }, { "epoch": 1.6901960784313725, "grad_norm": 0.1748046875, "learning_rate": 3.1103879187696395e-06, "loss": 1.0498, "num_tokens": 21209463865.0, "step": 9482 }, { "epoch": 1.6903743315508022, "grad_norm": 0.171875, "learning_rate": 3.109138136887705e-06, "loss": 1.0167, "num_tokens": 21215743446.0, "step": 9483 }, { "epoch": 1.6905525846702316, "grad_norm": 0.171875, "learning_rate": 3.1078890125529603e-06, "loss": 1.0193, "num_tokens": 21222026488.0, "step": 9484 }, { "epoch": 1.6907308377896613, "grad_norm": 0.17578125, "learning_rate": 3.1066405458694925e-06, "loss": 1.0047, "num_tokens": 21228310456.0, "step": 9485 }, { "epoch": 1.690909090909091, "grad_norm": 0.1689453125, "learning_rate": 3.1053927369413385e-06, "loss": 1.027, "num_tokens": 21234593292.0, "step": 9486 }, { "epoch": 1.6910873440285203, "grad_norm": 0.173828125, "learning_rate": 3.1041455858724746e-06, "loss": 1.0095, "num_tokens": 21240877570.0, "step": 9487 }, { "epoch": 1.6912655971479502, "grad_norm": 0.169921875, "learning_rate": 3.1028990927668317e-06, "loss": 1.0027, "num_tokens": 21247163178.0, "step": 9488 }, { "epoch": 1.6914438502673796, "grad_norm": 0.177734375, "learning_rate": 3.1016532577282777e-06, "loss": 1.0311, "num_tokens": 21253447696.0, "step": 9489 }, { "epoch": 1.6916221033868093, "grad_norm": 0.1767578125, "learning_rate": 3.1004080808606245e-06, "loss": 1.0144, "num_tokens": 21259717446.0, "step": 9490 }, { "epoch": 1.691800356506239, "grad_norm": 0.1708984375, "learning_rate": 3.0991635622676396e-06, "loss": 1.0172, "num_tokens": 21265981664.0, "step": 9491 }, { "epoch": 1.6919786096256684, "grad_norm": 0.1650390625, "learning_rate": 3.097919702053026e-06, "loss": 1.0366, "num_tokens": 21272262128.0, "step": 9492 }, { "epoch": 1.692156862745098, "grad_norm": 0.166015625, "learning_rate": 3.0966765003204336e-06, "loss": 1.0036, "num_tokens": 21278522484.0, "step": 9493 }, { "epoch": 1.6923351158645277, "grad_norm": 0.1728515625, "learning_rate": 3.095433957173459e-06, "loss": 1.0107, "num_tokens": 21284748706.0, "step": 9494 }, { "epoch": 1.692513368983957, "grad_norm": 0.169921875, "learning_rate": 3.0941920727156454e-06, "loss": 1.0107, "num_tokens": 21291033549.0, "step": 9495 }, { "epoch": 1.692691622103387, "grad_norm": 0.1728515625, "learning_rate": 3.092950847050481e-06, "loss": 1.0427, "num_tokens": 21297292503.0, "step": 9496 }, { "epoch": 1.6928698752228164, "grad_norm": 0.173828125, "learning_rate": 3.0917102802813915e-06, "loss": 1.0161, "num_tokens": 21303578665.0, "step": 9497 }, { "epoch": 1.6930481283422458, "grad_norm": 0.1728515625, "learning_rate": 3.0904703725117574e-06, "loss": 0.9942, "num_tokens": 21309856962.0, "step": 9498 }, { "epoch": 1.6932263814616757, "grad_norm": 0.1689453125, "learning_rate": 3.0892311238449013e-06, "loss": 1.0377, "num_tokens": 21316141814.0, "step": 9499 }, { "epoch": 1.6934046345811051, "grad_norm": 0.1708984375, "learning_rate": 3.0879925343840872e-06, "loss": 0.9893, "num_tokens": 21322396710.0, "step": 9500 }, { "epoch": 1.6935828877005348, "grad_norm": 0.1708984375, "learning_rate": 3.0867546042325282e-06, "loss": 1.0199, "num_tokens": 21328664203.0, "step": 9501 }, { "epoch": 1.6937611408199644, "grad_norm": 0.1708984375, "learning_rate": 3.085517333493382e-06, "loss": 1.0527, "num_tokens": 21334916681.0, "step": 9502 }, { "epoch": 1.6939393939393939, "grad_norm": 0.1689453125, "learning_rate": 3.0842807222697504e-06, "loss": 1.0208, "num_tokens": 21341195141.0, "step": 9503 }, { "epoch": 1.6941176470588235, "grad_norm": 0.177734375, "learning_rate": 3.0830447706646802e-06, "loss": 1.0376, "num_tokens": 21347478768.0, "step": 9504 }, { "epoch": 1.6942959001782532, "grad_norm": 0.16796875, "learning_rate": 3.0818094787811653e-06, "loss": 1.0344, "num_tokens": 21353761532.0, "step": 9505 }, { "epoch": 1.6944741532976826, "grad_norm": 0.1748046875, "learning_rate": 3.0805748467221386e-06, "loss": 1.0277, "num_tokens": 21360009930.0, "step": 9506 }, { "epoch": 1.6946524064171125, "grad_norm": 0.1728515625, "learning_rate": 3.0793408745904863e-06, "loss": 1.0384, "num_tokens": 21366294712.0, "step": 9507 }, { "epoch": 1.6948306595365419, "grad_norm": 0.1689453125, "learning_rate": 3.0781075624890323e-06, "loss": 1.0273, "num_tokens": 21372554714.0, "step": 9508 }, { "epoch": 1.6950089126559715, "grad_norm": 0.1689453125, "learning_rate": 3.076874910520552e-06, "loss": 1.0283, "num_tokens": 21378824777.0, "step": 9509 }, { "epoch": 1.6951871657754012, "grad_norm": 0.1708984375, "learning_rate": 3.07564291878776e-06, "loss": 1.0487, "num_tokens": 21385049402.0, "step": 9510 }, { "epoch": 1.6953654188948306, "grad_norm": 0.1728515625, "learning_rate": 3.0744115873933205e-06, "loss": 1.0681, "num_tokens": 21391334102.0, "step": 9511 }, { "epoch": 1.6955436720142603, "grad_norm": 0.169921875, "learning_rate": 3.0731809164398395e-06, "loss": 1.0299, "num_tokens": 21397563335.0, "step": 9512 }, { "epoch": 1.69572192513369, "grad_norm": 0.171875, "learning_rate": 3.0719509060298685e-06, "loss": 1.0287, "num_tokens": 21403842875.0, "step": 9513 }, { "epoch": 1.6959001782531193, "grad_norm": 0.1748046875, "learning_rate": 3.0707215562659064e-06, "loss": 1.0492, "num_tokens": 21410108683.0, "step": 9514 }, { "epoch": 1.696078431372549, "grad_norm": 0.1748046875, "learning_rate": 3.0694928672503915e-06, "loss": 1.0142, "num_tokens": 21416389929.0, "step": 9515 }, { "epoch": 1.6962566844919786, "grad_norm": 0.17578125, "learning_rate": 3.0682648390857146e-06, "loss": 1.0069, "num_tokens": 21422673643.0, "step": 9516 }, { "epoch": 1.696434937611408, "grad_norm": 0.17578125, "learning_rate": 3.0670374718742067e-06, "loss": 1.0268, "num_tokens": 21428937580.0, "step": 9517 }, { "epoch": 1.696613190730838, "grad_norm": 0.1708984375, "learning_rate": 3.0658107657181437e-06, "loss": 1.0235, "num_tokens": 21435219224.0, "step": 9518 }, { "epoch": 1.6967914438502674, "grad_norm": 0.17578125, "learning_rate": 3.0645847207197475e-06, "loss": 1.0149, "num_tokens": 21441483582.0, "step": 9519 }, { "epoch": 1.696969696969697, "grad_norm": 0.1728515625, "learning_rate": 3.0633593369811844e-06, "loss": 1.0253, "num_tokens": 21447740795.0, "step": 9520 }, { "epoch": 1.6971479500891267, "grad_norm": 0.1708984375, "learning_rate": 3.0621346146045667e-06, "loss": 0.9804, "num_tokens": 21454025229.0, "step": 9521 }, { "epoch": 1.697326203208556, "grad_norm": 0.1728515625, "learning_rate": 3.0609105536919487e-06, "loss": 1.0327, "num_tokens": 21460301367.0, "step": 9522 }, { "epoch": 1.6975044563279857, "grad_norm": 0.177734375, "learning_rate": 3.0596871543453344e-06, "loss": 1.0488, "num_tokens": 21466554989.0, "step": 9523 }, { "epoch": 1.6976827094474154, "grad_norm": 0.171875, "learning_rate": 3.058464416666669e-06, "loss": 1.0132, "num_tokens": 21472821816.0, "step": 9524 }, { "epoch": 1.6978609625668448, "grad_norm": 0.16796875, "learning_rate": 3.0572423407578438e-06, "loss": 1.0162, "num_tokens": 21479087625.0, "step": 9525 }, { "epoch": 1.6980392156862745, "grad_norm": 0.169921875, "learning_rate": 3.0560209267206916e-06, "loss": 1.0147, "num_tokens": 21485373007.0, "step": 9526 }, { "epoch": 1.6982174688057041, "grad_norm": 0.1708984375, "learning_rate": 3.054800174656999e-06, "loss": 0.9989, "num_tokens": 21491656361.0, "step": 9527 }, { "epoch": 1.6983957219251336, "grad_norm": 0.1708984375, "learning_rate": 3.053580084668487e-06, "loss": 1.0202, "num_tokens": 21497940266.0, "step": 9528 }, { "epoch": 1.6985739750445634, "grad_norm": 0.1689453125, "learning_rate": 3.0523606568568253e-06, "loss": 1.0035, "num_tokens": 21504224162.0, "step": 9529 }, { "epoch": 1.6987522281639929, "grad_norm": 0.1689453125, "learning_rate": 3.0511418913236317e-06, "loss": 1.0317, "num_tokens": 21510499283.0, "step": 9530 }, { "epoch": 1.6989304812834225, "grad_norm": 0.1806640625, "learning_rate": 3.0499237881704663e-06, "loss": 1.0098, "num_tokens": 21516782734.0, "step": 9531 }, { "epoch": 1.6991087344028521, "grad_norm": 0.166015625, "learning_rate": 3.0487063474988338e-06, "loss": 1.0544, "num_tokens": 21523064821.0, "step": 9532 }, { "epoch": 1.6992869875222816, "grad_norm": 0.169921875, "learning_rate": 3.0474895694101793e-06, "loss": 1.0106, "num_tokens": 21529349993.0, "step": 9533 }, { "epoch": 1.6994652406417112, "grad_norm": 0.1728515625, "learning_rate": 3.046273454005906e-06, "loss": 1.055, "num_tokens": 21535633616.0, "step": 9534 }, { "epoch": 1.6996434937611409, "grad_norm": 0.173828125, "learning_rate": 3.045058001387345e-06, "loss": 1.0466, "num_tokens": 21541871845.0, "step": 9535 }, { "epoch": 1.6998217468805703, "grad_norm": 0.171875, "learning_rate": 3.0438432116557815e-06, "loss": 1.0155, "num_tokens": 21548137736.0, "step": 9536 }, { "epoch": 1.7, "grad_norm": 0.16796875, "learning_rate": 3.0426290849124475e-06, "loss": 0.9926, "num_tokens": 21554407767.0, "step": 9537 }, { "epoch": 1.7001782531194296, "grad_norm": 0.169921875, "learning_rate": 3.041415621258515e-06, "loss": 1.016, "num_tokens": 21560675854.0, "step": 9538 }, { "epoch": 1.700356506238859, "grad_norm": 0.1669921875, "learning_rate": 3.0402028207951016e-06, "loss": 1.0221, "num_tokens": 21566921942.0, "step": 9539 }, { "epoch": 1.700534759358289, "grad_norm": 0.177734375, "learning_rate": 3.0389906836232695e-06, "loss": 1.0465, "num_tokens": 21573179589.0, "step": 9540 }, { "epoch": 1.7007130124777183, "grad_norm": 0.1728515625, "learning_rate": 3.037779209844028e-06, "loss": 1.0093, "num_tokens": 21579432785.0, "step": 9541 }, { "epoch": 1.700891265597148, "grad_norm": 0.1728515625, "learning_rate": 3.0365683995583316e-06, "loss": 1.0179, "num_tokens": 21585716370.0, "step": 9542 }, { "epoch": 1.7010695187165776, "grad_norm": 0.1728515625, "learning_rate": 3.0353582528670707e-06, "loss": 1.0224, "num_tokens": 21592001132.0, "step": 9543 }, { "epoch": 1.701247771836007, "grad_norm": 0.1728515625, "learning_rate": 3.034148769871092e-06, "loss": 1.0005, "num_tokens": 21598257863.0, "step": 9544 }, { "epoch": 1.7014260249554367, "grad_norm": 0.1708984375, "learning_rate": 3.0329399506711822e-06, "loss": 1.0307, "num_tokens": 21604540657.0, "step": 9545 }, { "epoch": 1.7016042780748664, "grad_norm": 0.1689453125, "learning_rate": 3.0317317953680713e-06, "loss": 0.9963, "num_tokens": 21610801824.0, "step": 9546 }, { "epoch": 1.7017825311942958, "grad_norm": 0.16796875, "learning_rate": 3.030524304062432e-06, "loss": 0.994, "num_tokens": 21617085777.0, "step": 9547 }, { "epoch": 1.7019607843137254, "grad_norm": 0.171875, "learning_rate": 3.0293174768548907e-06, "loss": 1.0418, "num_tokens": 21623368820.0, "step": 9548 }, { "epoch": 1.702139037433155, "grad_norm": 0.1748046875, "learning_rate": 3.0281113138460094e-06, "loss": 1.0021, "num_tokens": 21629653180.0, "step": 9549 }, { "epoch": 1.7023172905525845, "grad_norm": 0.169921875, "learning_rate": 3.026905815136298e-06, "loss": 0.9893, "num_tokens": 21635934814.0, "step": 9550 }, { "epoch": 1.7024955436720144, "grad_norm": 0.16796875, "learning_rate": 3.025700980826212e-06, "loss": 1.0349, "num_tokens": 21642216850.0, "step": 9551 }, { "epoch": 1.7026737967914438, "grad_norm": 0.1787109375, "learning_rate": 3.0244968110161487e-06, "loss": 0.9889, "num_tokens": 21648487352.0, "step": 9552 }, { "epoch": 1.7028520499108735, "grad_norm": 0.16796875, "learning_rate": 3.023293305806453e-06, "loss": 0.9956, "num_tokens": 21654745271.0, "step": 9553 }, { "epoch": 1.7030303030303031, "grad_norm": 0.1708984375, "learning_rate": 3.0220904652974113e-06, "loss": 0.9966, "num_tokens": 21660993622.0, "step": 9554 }, { "epoch": 1.7032085561497325, "grad_norm": 0.1728515625, "learning_rate": 3.0208882895892613e-06, "loss": 1.0064, "num_tokens": 21667269111.0, "step": 9555 }, { "epoch": 1.7033868092691622, "grad_norm": 0.173828125, "learning_rate": 3.0196867787821754e-06, "loss": 1.0402, "num_tokens": 21673553127.0, "step": 9556 }, { "epoch": 1.7035650623885918, "grad_norm": 0.171875, "learning_rate": 3.0184859329762794e-06, "loss": 0.9849, "num_tokens": 21679837681.0, "step": 9557 }, { "epoch": 1.7037433155080213, "grad_norm": 0.1748046875, "learning_rate": 3.0172857522716383e-06, "loss": 1.0172, "num_tokens": 21686123567.0, "step": 9558 }, { "epoch": 1.7039215686274511, "grad_norm": 0.16796875, "learning_rate": 3.0160862367682642e-06, "loss": 1.0028, "num_tokens": 21692406144.0, "step": 9559 }, { "epoch": 1.7040998217468806, "grad_norm": 0.173828125, "learning_rate": 3.0148873865661123e-06, "loss": 1.0022, "num_tokens": 21698686840.0, "step": 9560 }, { "epoch": 1.70427807486631, "grad_norm": 0.17578125, "learning_rate": 3.0136892017650805e-06, "loss": 0.9884, "num_tokens": 21704949662.0, "step": 9561 }, { "epoch": 1.7044563279857399, "grad_norm": 0.169921875, "learning_rate": 3.012491682465019e-06, "loss": 1.0112, "num_tokens": 21711219718.0, "step": 9562 }, { "epoch": 1.7046345811051693, "grad_norm": 0.173828125, "learning_rate": 3.011294828765715e-06, "loss": 0.9891, "num_tokens": 21717475602.0, "step": 9563 }, { "epoch": 1.704812834224599, "grad_norm": 0.169921875, "learning_rate": 3.010098640766902e-06, "loss": 0.9918, "num_tokens": 21723730223.0, "step": 9564 }, { "epoch": 1.7049910873440286, "grad_norm": 0.169921875, "learning_rate": 3.008903118568259e-06, "loss": 1.0175, "num_tokens": 21729993560.0, "step": 9565 }, { "epoch": 1.705169340463458, "grad_norm": 0.173828125, "learning_rate": 3.00770826226941e-06, "loss": 0.9977, "num_tokens": 21736277860.0, "step": 9566 }, { "epoch": 1.7053475935828877, "grad_norm": 0.1748046875, "learning_rate": 3.006514071969921e-06, "loss": 1.0539, "num_tokens": 21742559962.0, "step": 9567 }, { "epoch": 1.7055258467023173, "grad_norm": 0.171875, "learning_rate": 3.005320547769303e-06, "loss": 1.0023, "num_tokens": 21748830772.0, "step": 9568 }, { "epoch": 1.7057040998217468, "grad_norm": 0.1708984375, "learning_rate": 3.0041276897670156e-06, "loss": 0.9972, "num_tokens": 21755098129.0, "step": 9569 }, { "epoch": 1.7058823529411766, "grad_norm": 0.171875, "learning_rate": 3.0029354980624605e-06, "loss": 1.0148, "num_tokens": 21761381673.0, "step": 9570 }, { "epoch": 1.706060606060606, "grad_norm": 0.16796875, "learning_rate": 3.00174397275498e-06, "loss": 1.0194, "num_tokens": 21767641346.0, "step": 9571 }, { "epoch": 1.7062388591800357, "grad_norm": 0.1728515625, "learning_rate": 3.0005531139438655e-06, "loss": 1.0287, "num_tokens": 21773915099.0, "step": 9572 }, { "epoch": 1.7064171122994654, "grad_norm": 0.171875, "learning_rate": 2.9993629217283536e-06, "loss": 0.9983, "num_tokens": 21780171628.0, "step": 9573 }, { "epoch": 1.7065953654188948, "grad_norm": 0.1728515625, "learning_rate": 2.998173396207621e-06, "loss": 1.0231, "num_tokens": 21786455562.0, "step": 9574 }, { "epoch": 1.7067736185383244, "grad_norm": 0.169921875, "learning_rate": 2.9969845374807903e-06, "loss": 1.0422, "num_tokens": 21792737682.0, "step": 9575 }, { "epoch": 1.706951871657754, "grad_norm": 0.1748046875, "learning_rate": 2.9957963456469315e-06, "loss": 1.0509, "num_tokens": 21799020946.0, "step": 9576 }, { "epoch": 1.7071301247771835, "grad_norm": 0.171875, "learning_rate": 2.994608820805056e-06, "loss": 1.0083, "num_tokens": 21805292824.0, "step": 9577 }, { "epoch": 1.7073083778966132, "grad_norm": 0.17578125, "learning_rate": 2.99342196305412e-06, "loss": 1.0441, "num_tokens": 21811577598.0, "step": 9578 }, { "epoch": 1.7074866310160428, "grad_norm": 0.171875, "learning_rate": 2.9922357724930232e-06, "loss": 1.0481, "num_tokens": 21817848818.0, "step": 9579 }, { "epoch": 1.7076648841354722, "grad_norm": 0.169921875, "learning_rate": 2.991050249220616e-06, "loss": 1.0074, "num_tokens": 21824121755.0, "step": 9580 }, { "epoch": 1.707843137254902, "grad_norm": 0.1689453125, "learning_rate": 2.9898653933356865e-06, "loss": 1.0424, "num_tokens": 21830403832.0, "step": 9581 }, { "epoch": 1.7080213903743315, "grad_norm": 0.1748046875, "learning_rate": 2.988681204936964e-06, "loss": 1.0225, "num_tokens": 21836688001.0, "step": 9582 }, { "epoch": 1.7081996434937612, "grad_norm": 0.173828125, "learning_rate": 2.987497684123133e-06, "loss": 1.0014, "num_tokens": 21842949322.0, "step": 9583 }, { "epoch": 1.7083778966131908, "grad_norm": 0.1806640625, "learning_rate": 2.9863148309928136e-06, "loss": 1.0035, "num_tokens": 21849229221.0, "step": 9584 }, { "epoch": 1.7085561497326203, "grad_norm": 0.17578125, "learning_rate": 2.9851326456445757e-06, "loss": 1.0129, "num_tokens": 21855514271.0, "step": 9585 }, { "epoch": 1.70873440285205, "grad_norm": 0.1748046875, "learning_rate": 2.983951128176926e-06, "loss": 1.0154, "num_tokens": 21861798413.0, "step": 9586 }, { "epoch": 1.7089126559714796, "grad_norm": 0.173828125, "learning_rate": 2.9827702786883274e-06, "loss": 1.0331, "num_tokens": 21868083950.0, "step": 9587 }, { "epoch": 1.709090909090909, "grad_norm": 0.17578125, "learning_rate": 2.9815900972771783e-06, "loss": 1.0117, "num_tokens": 21874338450.0, "step": 9588 }, { "epoch": 1.7092691622103386, "grad_norm": 0.1689453125, "learning_rate": 2.9804105840418203e-06, "loss": 1.0017, "num_tokens": 21880622817.0, "step": 9589 }, { "epoch": 1.7094474153297683, "grad_norm": 0.169921875, "learning_rate": 2.9792317390805425e-06, "loss": 1.0048, "num_tokens": 21886907049.0, "step": 9590 }, { "epoch": 1.7096256684491977, "grad_norm": 0.1787109375, "learning_rate": 2.9780535624915817e-06, "loss": 1.0335, "num_tokens": 21893179049.0, "step": 9591 }, { "epoch": 1.7098039215686276, "grad_norm": 0.173828125, "learning_rate": 2.976876054373114e-06, "loss": 1.044, "num_tokens": 21899437418.0, "step": 9592 }, { "epoch": 1.709982174688057, "grad_norm": 0.173828125, "learning_rate": 2.9756992148232598e-06, "loss": 1.013, "num_tokens": 21905672618.0, "step": 9593 }, { "epoch": 1.7101604278074867, "grad_norm": 0.1787109375, "learning_rate": 2.9745230439400896e-06, "loss": 1.0192, "num_tokens": 21911950804.0, "step": 9594 }, { "epoch": 1.7103386809269163, "grad_norm": 0.1689453125, "learning_rate": 2.97334754182161e-06, "loss": 1.0193, "num_tokens": 21918234538.0, "step": 9595 }, { "epoch": 1.7105169340463457, "grad_norm": 0.173828125, "learning_rate": 2.9721727085657805e-06, "loss": 1.0228, "num_tokens": 21924480351.0, "step": 9596 }, { "epoch": 1.7106951871657754, "grad_norm": 0.173828125, "learning_rate": 2.9709985442704926e-06, "loss": 1.0391, "num_tokens": 21930756156.0, "step": 9597 }, { "epoch": 1.710873440285205, "grad_norm": 0.181640625, "learning_rate": 2.9698250490335963e-06, "loss": 1.0395, "num_tokens": 21937018711.0, "step": 9598 }, { "epoch": 1.7110516934046345, "grad_norm": 0.1728515625, "learning_rate": 2.968652222952876e-06, "loss": 1.0519, "num_tokens": 21943299321.0, "step": 9599 }, { "epoch": 1.7112299465240641, "grad_norm": 0.1689453125, "learning_rate": 2.967480066126065e-06, "loss": 1.0081, "num_tokens": 21949552606.0, "step": 9600 }, { "epoch": 1.7114081996434938, "grad_norm": 0.169921875, "learning_rate": 2.966308578650838e-06, "loss": 1.0087, "num_tokens": 21955824936.0, "step": 9601 }, { "epoch": 1.7115864527629232, "grad_norm": 0.181640625, "learning_rate": 2.965137760624816e-06, "loss": 0.9886, "num_tokens": 21962083267.0, "step": 9602 }, { "epoch": 1.711764705882353, "grad_norm": 0.171875, "learning_rate": 2.9639676121455653e-06, "loss": 1.0553, "num_tokens": 21968341675.0, "step": 9603 }, { "epoch": 1.7119429590017825, "grad_norm": 0.173828125, "learning_rate": 2.962798133310592e-06, "loss": 1.0393, "num_tokens": 21974624681.0, "step": 9604 }, { "epoch": 1.7121212121212122, "grad_norm": 0.173828125, "learning_rate": 2.9616293242173506e-06, "loss": 0.9981, "num_tokens": 21980909077.0, "step": 9605 }, { "epoch": 1.7122994652406418, "grad_norm": 0.17578125, "learning_rate": 2.960461184963237e-06, "loss": 1.013, "num_tokens": 21987191936.0, "step": 9606 }, { "epoch": 1.7124777183600712, "grad_norm": 0.1669921875, "learning_rate": 2.9592937156455932e-06, "loss": 0.9774, "num_tokens": 21993432223.0, "step": 9607 }, { "epoch": 1.7126559714795009, "grad_norm": 0.169921875, "learning_rate": 2.9581269163617037e-06, "loss": 1.0021, "num_tokens": 21999706683.0, "step": 9608 }, { "epoch": 1.7128342245989305, "grad_norm": 0.1708984375, "learning_rate": 2.9569607872087995e-06, "loss": 1.015, "num_tokens": 22005990966.0, "step": 9609 }, { "epoch": 1.71301247771836, "grad_norm": 0.17578125, "learning_rate": 2.955795328284055e-06, "loss": 1.0491, "num_tokens": 22012258419.0, "step": 9610 }, { "epoch": 1.7131907308377896, "grad_norm": 0.1748046875, "learning_rate": 2.9546305396845864e-06, "loss": 1.0101, "num_tokens": 22018542520.0, "step": 9611 }, { "epoch": 1.7133689839572193, "grad_norm": 0.16796875, "learning_rate": 2.9534664215074564e-06, "loss": 1.022, "num_tokens": 22024819490.0, "step": 9612 }, { "epoch": 1.7135472370766487, "grad_norm": 0.171875, "learning_rate": 2.9523029738496716e-06, "loss": 0.9996, "num_tokens": 22031079385.0, "step": 9613 }, { "epoch": 1.7137254901960786, "grad_norm": 0.169921875, "learning_rate": 2.951140196808181e-06, "loss": 1.0292, "num_tokens": 22037351590.0, "step": 9614 }, { "epoch": 1.713903743315508, "grad_norm": 0.1689453125, "learning_rate": 2.949978090479879e-06, "loss": 1.0246, "num_tokens": 22043622222.0, "step": 9615 }, { "epoch": 1.7140819964349376, "grad_norm": 0.17578125, "learning_rate": 2.948816654961607e-06, "loss": 1.0349, "num_tokens": 22049836966.0, "step": 9616 }, { "epoch": 1.7142602495543673, "grad_norm": 0.171875, "learning_rate": 2.9476558903501452e-06, "loss": 1.0589, "num_tokens": 22056100830.0, "step": 9617 }, { "epoch": 1.7144385026737967, "grad_norm": 0.1748046875, "learning_rate": 2.9464957967422224e-06, "loss": 1.0293, "num_tokens": 22062384638.0, "step": 9618 }, { "epoch": 1.7146167557932264, "grad_norm": 0.177734375, "learning_rate": 2.9453363742345066e-06, "loss": 1.0325, "num_tokens": 22068666643.0, "step": 9619 }, { "epoch": 1.714795008912656, "grad_norm": 0.171875, "learning_rate": 2.9441776229236154e-06, "loss": 1.0259, "num_tokens": 22074926111.0, "step": 9620 }, { "epoch": 1.7149732620320854, "grad_norm": 0.171875, "learning_rate": 2.9430195429061054e-06, "loss": 1.0361, "num_tokens": 22081150322.0, "step": 9621 }, { "epoch": 1.7151515151515153, "grad_norm": 0.1708984375, "learning_rate": 2.9418621342784797e-06, "loss": 1.0079, "num_tokens": 22087413032.0, "step": 9622 }, { "epoch": 1.7153297682709447, "grad_norm": 0.173828125, "learning_rate": 2.940705397137188e-06, "loss": 1.0246, "num_tokens": 22093697830.0, "step": 9623 }, { "epoch": 1.7155080213903742, "grad_norm": 0.16796875, "learning_rate": 2.9395493315786204e-06, "loss": 1.0298, "num_tokens": 22099982661.0, "step": 9624 }, { "epoch": 1.715686274509804, "grad_norm": 0.1767578125, "learning_rate": 2.9383939376991105e-06, "loss": 1.0462, "num_tokens": 22106266441.0, "step": 9625 }, { "epoch": 1.7158645276292335, "grad_norm": 0.173828125, "learning_rate": 2.9372392155949387e-06, "loss": 1.0401, "num_tokens": 22112487725.0, "step": 9626 }, { "epoch": 1.7160427807486631, "grad_norm": 0.1748046875, "learning_rate": 2.9360851653623306e-06, "loss": 1.0318, "num_tokens": 22118770360.0, "step": 9627 }, { "epoch": 1.7162210338680928, "grad_norm": 0.1787109375, "learning_rate": 2.9349317870974493e-06, "loss": 1.0222, "num_tokens": 22125053960.0, "step": 9628 }, { "epoch": 1.7163992869875222, "grad_norm": 0.1748046875, "learning_rate": 2.933779080896406e-06, "loss": 1.0287, "num_tokens": 22131339999.0, "step": 9629 }, { "epoch": 1.7165775401069518, "grad_norm": 0.171875, "learning_rate": 2.9326270468552603e-06, "loss": 1.0273, "num_tokens": 22137620552.0, "step": 9630 }, { "epoch": 1.7167557932263815, "grad_norm": 0.169921875, "learning_rate": 2.931475685070008e-06, "loss": 0.9999, "num_tokens": 22143858514.0, "step": 9631 }, { "epoch": 1.716934046345811, "grad_norm": 0.1748046875, "learning_rate": 2.930324995636593e-06, "loss": 1.0239, "num_tokens": 22150120974.0, "step": 9632 }, { "epoch": 1.7171122994652408, "grad_norm": 0.1796875, "learning_rate": 2.929174978650902e-06, "loss": 1.0534, "num_tokens": 22156384751.0, "step": 9633 }, { "epoch": 1.7172905525846702, "grad_norm": 0.1708984375, "learning_rate": 2.9280256342087705e-06, "loss": 1.0257, "num_tokens": 22162642709.0, "step": 9634 }, { "epoch": 1.7174688057040999, "grad_norm": 0.173828125, "learning_rate": 2.926876962405968e-06, "loss": 1.0271, "num_tokens": 22168892830.0, "step": 9635 }, { "epoch": 1.7176470588235295, "grad_norm": 0.171875, "learning_rate": 2.9257289633382134e-06, "loss": 1.0175, "num_tokens": 22175160105.0, "step": 9636 }, { "epoch": 1.717825311942959, "grad_norm": 0.1728515625, "learning_rate": 2.9245816371011735e-06, "loss": 1.0392, "num_tokens": 22181444491.0, "step": 9637 }, { "epoch": 1.7180035650623886, "grad_norm": 0.1728515625, "learning_rate": 2.923434983790454e-06, "loss": 1.019, "num_tokens": 22187718414.0, "step": 9638 }, { "epoch": 1.7181818181818183, "grad_norm": 0.1689453125, "learning_rate": 2.9222890035016057e-06, "loss": 1.0483, "num_tokens": 22194001340.0, "step": 9639 }, { "epoch": 1.7183600713012477, "grad_norm": 0.1650390625, "learning_rate": 2.921143696330121e-06, "loss": 1.0371, "num_tokens": 22200285843.0, "step": 9640 }, { "epoch": 1.7185383244206773, "grad_norm": 0.169921875, "learning_rate": 2.9199990623714415e-06, "loss": 1.025, "num_tokens": 22206569132.0, "step": 9641 }, { "epoch": 1.718716577540107, "grad_norm": 0.171875, "learning_rate": 2.9188551017209514e-06, "loss": 1.0285, "num_tokens": 22212835128.0, "step": 9642 }, { "epoch": 1.7188948306595364, "grad_norm": 0.1767578125, "learning_rate": 2.917711814473971e-06, "loss": 1.0513, "num_tokens": 22219117343.0, "step": 9643 }, { "epoch": 1.7190730837789663, "grad_norm": 0.1767578125, "learning_rate": 2.9165692007257775e-06, "loss": 1.012, "num_tokens": 22225401668.0, "step": 9644 }, { "epoch": 1.7192513368983957, "grad_norm": 0.171875, "learning_rate": 2.9154272605715807e-06, "loss": 1.0223, "num_tokens": 22231685884.0, "step": 9645 }, { "epoch": 1.7194295900178254, "grad_norm": 0.173828125, "learning_rate": 2.91428599410654e-06, "loss": 1.0323, "num_tokens": 22237969418.0, "step": 9646 }, { "epoch": 1.719607843137255, "grad_norm": 0.1689453125, "learning_rate": 2.913145401425756e-06, "loss": 0.9881, "num_tokens": 22244248432.0, "step": 9647 }, { "epoch": 1.7197860962566844, "grad_norm": 0.1708984375, "learning_rate": 2.9120054826242773e-06, "loss": 1.0501, "num_tokens": 22250510670.0, "step": 9648 }, { "epoch": 1.719964349376114, "grad_norm": 0.169921875, "learning_rate": 2.910866237797092e-06, "loss": 1.0278, "num_tokens": 22256794461.0, "step": 9649 }, { "epoch": 1.7201426024955437, "grad_norm": 0.1728515625, "learning_rate": 2.9097276670391354e-06, "loss": 1.026, "num_tokens": 22263079587.0, "step": 9650 }, { "epoch": 1.7203208556149732, "grad_norm": 0.16796875, "learning_rate": 2.9085897704452826e-06, "loss": 1.0159, "num_tokens": 22269355038.0, "step": 9651 }, { "epoch": 1.7204991087344028, "grad_norm": 0.173828125, "learning_rate": 2.9074525481103548e-06, "loss": 1.0382, "num_tokens": 22275621005.0, "step": 9652 }, { "epoch": 1.7206773618538325, "grad_norm": 0.1806640625, "learning_rate": 2.9063160001291167e-06, "loss": 1.0691, "num_tokens": 22281877132.0, "step": 9653 }, { "epoch": 1.720855614973262, "grad_norm": 0.1689453125, "learning_rate": 2.905180126596279e-06, "loss": 1.0131, "num_tokens": 22288135076.0, "step": 9654 }, { "epoch": 1.7210338680926918, "grad_norm": 0.1728515625, "learning_rate": 2.9040449276064926e-06, "loss": 1.0066, "num_tokens": 22294419828.0, "step": 9655 }, { "epoch": 1.7212121212121212, "grad_norm": 0.17578125, "learning_rate": 2.9029104032543554e-06, "loss": 1.0158, "num_tokens": 22300677955.0, "step": 9656 }, { "epoch": 1.7213903743315508, "grad_norm": 0.166015625, "learning_rate": 2.901776553634407e-06, "loss": 1.0358, "num_tokens": 22306959960.0, "step": 9657 }, { "epoch": 1.7215686274509805, "grad_norm": 0.173828125, "learning_rate": 2.9006433788411302e-06, "loss": 1.0185, "num_tokens": 22313229352.0, "step": 9658 }, { "epoch": 1.72174688057041, "grad_norm": 0.1748046875, "learning_rate": 2.8995108789689547e-06, "loss": 1.0008, "num_tokens": 22319484268.0, "step": 9659 }, { "epoch": 1.7219251336898396, "grad_norm": 0.17578125, "learning_rate": 2.89837905411225e-06, "loss": 1.026, "num_tokens": 22325763762.0, "step": 9660 }, { "epoch": 1.7221033868092692, "grad_norm": 0.17578125, "learning_rate": 2.89724790436533e-06, "loss": 1.0128, "num_tokens": 22332013719.0, "step": 9661 }, { "epoch": 1.7222816399286986, "grad_norm": 0.1689453125, "learning_rate": 2.8961174298224594e-06, "loss": 1.0094, "num_tokens": 22338250833.0, "step": 9662 }, { "epoch": 1.7224598930481283, "grad_norm": 0.1708984375, "learning_rate": 2.8949876305778345e-06, "loss": 1.0129, "num_tokens": 22344535610.0, "step": 9663 }, { "epoch": 1.722638146167558, "grad_norm": 0.173828125, "learning_rate": 2.8938585067256057e-06, "loss": 0.9801, "num_tokens": 22350819148.0, "step": 9664 }, { "epoch": 1.7228163992869874, "grad_norm": 0.1728515625, "learning_rate": 2.8927300583598626e-06, "loss": 1.0252, "num_tokens": 22357104109.0, "step": 9665 }, { "epoch": 1.7229946524064172, "grad_norm": 0.1728515625, "learning_rate": 2.891602285574638e-06, "loss": 1.0568, "num_tokens": 22363363196.0, "step": 9666 }, { "epoch": 1.7231729055258467, "grad_norm": 0.169921875, "learning_rate": 2.8904751884639092e-06, "loss": 1.0259, "num_tokens": 22369648556.0, "step": 9667 }, { "epoch": 1.7233511586452763, "grad_norm": 0.1728515625, "learning_rate": 2.889348767121596e-06, "loss": 1.0016, "num_tokens": 22375891843.0, "step": 9668 }, { "epoch": 1.723529411764706, "grad_norm": 0.17578125, "learning_rate": 2.888223021641568e-06, "loss": 1.0072, "num_tokens": 22382169939.0, "step": 9669 }, { "epoch": 1.7237076648841354, "grad_norm": 0.1728515625, "learning_rate": 2.88709795211763e-06, "loss": 1.026, "num_tokens": 22388418406.0, "step": 9670 }, { "epoch": 1.723885918003565, "grad_norm": 0.1767578125, "learning_rate": 2.8859735586435362e-06, "loss": 1.0186, "num_tokens": 22394704329.0, "step": 9671 }, { "epoch": 1.7240641711229947, "grad_norm": 0.1708984375, "learning_rate": 2.8848498413129793e-06, "loss": 1.0575, "num_tokens": 22400987820.0, "step": 9672 }, { "epoch": 1.7242424242424241, "grad_norm": 0.1708984375, "learning_rate": 2.8837268002196062e-06, "loss": 1.0156, "num_tokens": 22407201432.0, "step": 9673 }, { "epoch": 1.7244206773618538, "grad_norm": 0.1728515625, "learning_rate": 2.8826044354569915e-06, "loss": 1.0155, "num_tokens": 22413484452.0, "step": 9674 }, { "epoch": 1.7245989304812834, "grad_norm": 0.1708984375, "learning_rate": 2.8814827471186664e-06, "loss": 1.0356, "num_tokens": 22419769180.0, "step": 9675 }, { "epoch": 1.7247771836007129, "grad_norm": 0.1748046875, "learning_rate": 2.8803617352981016e-06, "loss": 1.0087, "num_tokens": 22426051169.0, "step": 9676 }, { "epoch": 1.7249554367201427, "grad_norm": 0.1708984375, "learning_rate": 2.8792414000887103e-06, "loss": 0.9986, "num_tokens": 22432323969.0, "step": 9677 }, { "epoch": 1.7251336898395722, "grad_norm": 0.1708984375, "learning_rate": 2.878121741583851e-06, "loss": 1.0386, "num_tokens": 22438568471.0, "step": 9678 }, { "epoch": 1.7253119429590018, "grad_norm": 0.169921875, "learning_rate": 2.8770027598768224e-06, "loss": 1.0349, "num_tokens": 22444850087.0, "step": 9679 }, { "epoch": 1.7254901960784315, "grad_norm": 0.169921875, "learning_rate": 2.8758844550608745e-06, "loss": 0.9997, "num_tokens": 22451044724.0, "step": 9680 }, { "epoch": 1.7256684491978609, "grad_norm": 0.173828125, "learning_rate": 2.874766827229194e-06, "loss": 1.0146, "num_tokens": 22457300383.0, "step": 9681 }, { "epoch": 1.7258467023172905, "grad_norm": 0.173828125, "learning_rate": 2.873649876474908e-06, "loss": 1.0183, "num_tokens": 22463557347.0, "step": 9682 }, { "epoch": 1.7260249554367202, "grad_norm": 0.1708984375, "learning_rate": 2.8725336028910996e-06, "loss": 1.0135, "num_tokens": 22469798274.0, "step": 9683 }, { "epoch": 1.7262032085561496, "grad_norm": 0.171875, "learning_rate": 2.871418006570784e-06, "loss": 1.0308, "num_tokens": 22476082373.0, "step": 9684 }, { "epoch": 1.7263814616755795, "grad_norm": 0.1708984375, "learning_rate": 2.8703030876069255e-06, "loss": 1.0287, "num_tokens": 22482336626.0, "step": 9685 }, { "epoch": 1.726559714795009, "grad_norm": 0.1669921875, "learning_rate": 2.8691888460924278e-06, "loss": 0.971, "num_tokens": 22488607690.0, "step": 9686 }, { "epoch": 1.7267379679144383, "grad_norm": 0.171875, "learning_rate": 2.8680752821201453e-06, "loss": 1.0081, "num_tokens": 22494891711.0, "step": 9687 }, { "epoch": 1.7269162210338682, "grad_norm": 0.1689453125, "learning_rate": 2.8669623957828705e-06, "loss": 1.0109, "num_tokens": 22501175869.0, "step": 9688 }, { "epoch": 1.7270944741532976, "grad_norm": 0.17578125, "learning_rate": 2.865850187173336e-06, "loss": 1.0326, "num_tokens": 22507459467.0, "step": 9689 }, { "epoch": 1.7272727272727273, "grad_norm": 0.1748046875, "learning_rate": 2.864738656384227e-06, "loss": 1.0417, "num_tokens": 22513742083.0, "step": 9690 }, { "epoch": 1.727450980392157, "grad_norm": 0.1806640625, "learning_rate": 2.863627803508167e-06, "loss": 1.0254, "num_tokens": 22520007453.0, "step": 9691 }, { "epoch": 1.7276292335115864, "grad_norm": 0.16796875, "learning_rate": 2.862517628637723e-06, "loss": 1.0764, "num_tokens": 22526269861.0, "step": 9692 }, { "epoch": 1.727807486631016, "grad_norm": 0.1748046875, "learning_rate": 2.861408131865403e-06, "loss": 1.0233, "num_tokens": 22532553708.0, "step": 9693 }, { "epoch": 1.7279857397504457, "grad_norm": 0.16796875, "learning_rate": 2.860299313283667e-06, "loss": 0.9895, "num_tokens": 22538818035.0, "step": 9694 }, { "epoch": 1.728163992869875, "grad_norm": 0.1708984375, "learning_rate": 2.8591911729849107e-06, "loss": 1.0274, "num_tokens": 22545077303.0, "step": 9695 }, { "epoch": 1.728342245989305, "grad_norm": 0.1689453125, "learning_rate": 2.858083711061477e-06, "loss": 1.0245, "num_tokens": 22551361824.0, "step": 9696 }, { "epoch": 1.7285204991087344, "grad_norm": 0.16796875, "learning_rate": 2.856976927605647e-06, "loss": 1.0335, "num_tokens": 22557643035.0, "step": 9697 }, { "epoch": 1.728698752228164, "grad_norm": 0.171875, "learning_rate": 2.8558708227096534e-06, "loss": 1.0109, "num_tokens": 22563927671.0, "step": 9698 }, { "epoch": 1.7288770053475937, "grad_norm": 0.1669921875, "learning_rate": 2.8547653964656657e-06, "loss": 1.0218, "num_tokens": 22570212736.0, "step": 9699 }, { "epoch": 1.7290552584670231, "grad_norm": 0.1787109375, "learning_rate": 2.853660648965799e-06, "loss": 1.003, "num_tokens": 22576480283.0, "step": 9700 }, { "epoch": 1.7292335115864528, "grad_norm": 0.17578125, "learning_rate": 2.8525565803021143e-06, "loss": 0.9947, "num_tokens": 22582756892.0, "step": 9701 }, { "epoch": 1.7294117647058824, "grad_norm": 0.1689453125, "learning_rate": 2.8514531905666138e-06, "loss": 1.0146, "num_tokens": 22589039664.0, "step": 9702 }, { "epoch": 1.7295900178253119, "grad_norm": 0.1748046875, "learning_rate": 2.8503504798512417e-06, "loss": 1.0047, "num_tokens": 22595322536.0, "step": 9703 }, { "epoch": 1.7297682709447415, "grad_norm": 0.1669921875, "learning_rate": 2.8492484482478877e-06, "loss": 1.0047, "num_tokens": 22601577800.0, "step": 9704 }, { "epoch": 1.7299465240641712, "grad_norm": 0.177734375, "learning_rate": 2.848147095848384e-06, "loss": 1.0111, "num_tokens": 22607862954.0, "step": 9705 }, { "epoch": 1.7301247771836006, "grad_norm": 0.1748046875, "learning_rate": 2.847046422744506e-06, "loss": 0.9805, "num_tokens": 22614136919.0, "step": 9706 }, { "epoch": 1.7303030303030305, "grad_norm": 0.1728515625, "learning_rate": 2.8459464290279736e-06, "loss": 1.0125, "num_tokens": 22620420749.0, "step": 9707 }, { "epoch": 1.7304812834224599, "grad_norm": 0.1748046875, "learning_rate": 2.844847114790451e-06, "loss": 0.9819, "num_tokens": 22626699006.0, "step": 9708 }, { "epoch": 1.7306595365418895, "grad_norm": 0.173828125, "learning_rate": 2.843748480123543e-06, "loss": 1.0222, "num_tokens": 22632982738.0, "step": 9709 }, { "epoch": 1.7308377896613192, "grad_norm": 0.1689453125, "learning_rate": 2.8426505251187984e-06, "loss": 1.0254, "num_tokens": 22639268647.0, "step": 9710 }, { "epoch": 1.7310160427807486, "grad_norm": 0.1708984375, "learning_rate": 2.8415532498677117e-06, "loss": 1.0104, "num_tokens": 22645508677.0, "step": 9711 }, { "epoch": 1.7311942959001783, "grad_norm": 0.1689453125, "learning_rate": 2.8404566544617175e-06, "loss": 1.0381, "num_tokens": 22651770013.0, "step": 9712 }, { "epoch": 1.731372549019608, "grad_norm": 0.17578125, "learning_rate": 2.839360738992196e-06, "loss": 1.0458, "num_tokens": 22658025319.0, "step": 9713 }, { "epoch": 1.7315508021390373, "grad_norm": 0.1708984375, "learning_rate": 2.8382655035504684e-06, "loss": 1.0133, "num_tokens": 22664309979.0, "step": 9714 }, { "epoch": 1.731729055258467, "grad_norm": 0.1767578125, "learning_rate": 2.837170948227804e-06, "loss": 1.0173, "num_tokens": 22670593023.0, "step": 9715 }, { "epoch": 1.7319073083778966, "grad_norm": 0.16796875, "learning_rate": 2.836077073115411e-06, "loss": 1.0481, "num_tokens": 22676862121.0, "step": 9716 }, { "epoch": 1.732085561497326, "grad_norm": 0.1708984375, "learning_rate": 2.8349838783044416e-06, "loss": 1.0227, "num_tokens": 22683125968.0, "step": 9717 }, { "epoch": 1.732263814616756, "grad_norm": 0.1796875, "learning_rate": 2.833891363885991e-06, "loss": 1.0229, "num_tokens": 22689410529.0, "step": 9718 }, { "epoch": 1.7324420677361854, "grad_norm": 0.1708984375, "learning_rate": 2.8327995299511033e-06, "loss": 1.0149, "num_tokens": 22695661803.0, "step": 9719 }, { "epoch": 1.732620320855615, "grad_norm": 0.1748046875, "learning_rate": 2.831708376590757e-06, "loss": 0.9965, "num_tokens": 22701916673.0, "step": 9720 }, { "epoch": 1.7327985739750447, "grad_norm": 0.173828125, "learning_rate": 2.830617903895877e-06, "loss": 1.0007, "num_tokens": 22708170724.0, "step": 9721 }, { "epoch": 1.732976827094474, "grad_norm": 0.173828125, "learning_rate": 2.829528111957336e-06, "loss": 1.0558, "num_tokens": 22714437551.0, "step": 9722 }, { "epoch": 1.7331550802139037, "grad_norm": 0.166015625, "learning_rate": 2.8284390008659467e-06, "loss": 0.9988, "num_tokens": 22720701927.0, "step": 9723 }, { "epoch": 1.7333333333333334, "grad_norm": 0.1728515625, "learning_rate": 2.827350570712462e-06, "loss": 1.0074, "num_tokens": 22726972053.0, "step": 9724 }, { "epoch": 1.7335115864527628, "grad_norm": 0.169921875, "learning_rate": 2.8262628215875813e-06, "loss": 1.0442, "num_tokens": 22733224071.0, "step": 9725 }, { "epoch": 1.7336898395721925, "grad_norm": 0.169921875, "learning_rate": 2.8251757535819503e-06, "loss": 1.0074, "num_tokens": 22739457475.0, "step": 9726 }, { "epoch": 1.7338680926916221, "grad_norm": 0.1748046875, "learning_rate": 2.824089366786154e-06, "loss": 1.0445, "num_tokens": 22745739949.0, "step": 9727 }, { "epoch": 1.7340463458110515, "grad_norm": 0.1728515625, "learning_rate": 2.823003661290717e-06, "loss": 1.0282, "num_tokens": 22752022384.0, "step": 9728 }, { "epoch": 1.7342245989304814, "grad_norm": 0.169921875, "learning_rate": 2.821918637186116e-06, "loss": 0.9992, "num_tokens": 22758305701.0, "step": 9729 }, { "epoch": 1.7344028520499108, "grad_norm": 0.1767578125, "learning_rate": 2.8208342945627643e-06, "loss": 1.0205, "num_tokens": 22764565012.0, "step": 9730 }, { "epoch": 1.7345811051693405, "grad_norm": 0.171875, "learning_rate": 2.819750633511021e-06, "loss": 1.0138, "num_tokens": 22770849450.0, "step": 9731 }, { "epoch": 1.7347593582887701, "grad_norm": 0.1728515625, "learning_rate": 2.818667654121185e-06, "loss": 0.9877, "num_tokens": 22777107274.0, "step": 9732 }, { "epoch": 1.7349376114081996, "grad_norm": 0.1748046875, "learning_rate": 2.817585356483505e-06, "loss": 1.0377, "num_tokens": 22783378143.0, "step": 9733 }, { "epoch": 1.7351158645276292, "grad_norm": 0.1708984375, "learning_rate": 2.8165037406881703e-06, "loss": 1.021, "num_tokens": 22789640553.0, "step": 9734 }, { "epoch": 1.7352941176470589, "grad_norm": 0.16796875, "learning_rate": 2.815422806825306e-06, "loss": 0.9819, "num_tokens": 22795923420.0, "step": 9735 }, { "epoch": 1.7354723707664883, "grad_norm": 0.1728515625, "learning_rate": 2.814342554984991e-06, "loss": 1.0479, "num_tokens": 22802207227.0, "step": 9736 }, { "epoch": 1.735650623885918, "grad_norm": 0.1767578125, "learning_rate": 2.8132629852572427e-06, "loss": 1.0088, "num_tokens": 22808473253.0, "step": 9737 }, { "epoch": 1.7358288770053476, "grad_norm": 0.1689453125, "learning_rate": 2.8121840977320213e-06, "loss": 0.9943, "num_tokens": 22814686322.0, "step": 9738 }, { "epoch": 1.736007130124777, "grad_norm": 0.173828125, "learning_rate": 2.811105892499229e-06, "loss": 1.0233, "num_tokens": 22820958635.0, "step": 9739 }, { "epoch": 1.736185383244207, "grad_norm": 0.1689453125, "learning_rate": 2.810028369648715e-06, "loss": 1.0182, "num_tokens": 22827241337.0, "step": 9740 }, { "epoch": 1.7363636363636363, "grad_norm": 0.171875, "learning_rate": 2.808951529270271e-06, "loss": 1.026, "num_tokens": 22833526394.0, "step": 9741 }, { "epoch": 1.736541889483066, "grad_norm": 0.1767578125, "learning_rate": 2.807875371453627e-06, "loss": 1.0025, "num_tokens": 22839808589.0, "step": 9742 }, { "epoch": 1.7367201426024956, "grad_norm": 0.1728515625, "learning_rate": 2.806799896288461e-06, "loss": 1.0213, "num_tokens": 22846069955.0, "step": 9743 }, { "epoch": 1.736898395721925, "grad_norm": 0.16796875, "learning_rate": 2.8057251038643922e-06, "loss": 1.0064, "num_tokens": 22852351506.0, "step": 9744 }, { "epoch": 1.7370766488413547, "grad_norm": 0.1796875, "learning_rate": 2.804650994270984e-06, "loss": 0.9938, "num_tokens": 22858636412.0, "step": 9745 }, { "epoch": 1.7372549019607844, "grad_norm": 0.1728515625, "learning_rate": 2.8035775675977395e-06, "loss": 1.0063, "num_tokens": 22864919666.0, "step": 9746 }, { "epoch": 1.7374331550802138, "grad_norm": 0.1728515625, "learning_rate": 2.8025048239341123e-06, "loss": 1.0278, "num_tokens": 22871198373.0, "step": 9747 }, { "epoch": 1.7376114081996437, "grad_norm": 0.1767578125, "learning_rate": 2.8014327633694905e-06, "loss": 1.0038, "num_tokens": 22877425969.0, "step": 9748 }, { "epoch": 1.737789661319073, "grad_norm": 0.16796875, "learning_rate": 2.8003613859932113e-06, "loss": 1.0201, "num_tokens": 22883709577.0, "step": 9749 }, { "epoch": 1.7379679144385025, "grad_norm": 0.1728515625, "learning_rate": 2.7992906918945517e-06, "loss": 1.0293, "num_tokens": 22889992015.0, "step": 9750 }, { "epoch": 1.7381461675579324, "grad_norm": 0.1708984375, "learning_rate": 2.798220681162732e-06, "loss": 1.0161, "num_tokens": 22896246587.0, "step": 9751 }, { "epoch": 1.7383244206773618, "grad_norm": 0.1689453125, "learning_rate": 2.7971513538869194e-06, "loss": 1.0126, "num_tokens": 22902529944.0, "step": 9752 }, { "epoch": 1.7385026737967915, "grad_norm": 0.16796875, "learning_rate": 2.796082710156216e-06, "loss": 1.0619, "num_tokens": 22908812321.0, "step": 9753 }, { "epoch": 1.7386809269162211, "grad_norm": 0.16796875, "learning_rate": 2.7950147500596776e-06, "loss": 1.0446, "num_tokens": 22915095511.0, "step": 9754 }, { "epoch": 1.7388591800356505, "grad_norm": 0.169921875, "learning_rate": 2.793947473686296e-06, "loss": 1.0228, "num_tokens": 22921355633.0, "step": 9755 }, { "epoch": 1.7390374331550802, "grad_norm": 0.169921875, "learning_rate": 2.792880881125006e-06, "loss": 1.027, "num_tokens": 22927620129.0, "step": 9756 }, { "epoch": 1.7392156862745098, "grad_norm": 0.173828125, "learning_rate": 2.791814972464688e-06, "loss": 1.0044, "num_tokens": 22933904700.0, "step": 9757 }, { "epoch": 1.7393939393939393, "grad_norm": 0.1728515625, "learning_rate": 2.790749747794165e-06, "loss": 1.0178, "num_tokens": 22940187431.0, "step": 9758 }, { "epoch": 1.7395721925133691, "grad_norm": 0.177734375, "learning_rate": 2.7896852072022003e-06, "loss": 1.0178, "num_tokens": 22946441499.0, "step": 9759 }, { "epoch": 1.7397504456327986, "grad_norm": 0.171875, "learning_rate": 2.788621350777504e-06, "loss": 0.9968, "num_tokens": 22952686825.0, "step": 9760 }, { "epoch": 1.739928698752228, "grad_norm": 0.1796875, "learning_rate": 2.7875581786087265e-06, "loss": 1.0266, "num_tokens": 22958942887.0, "step": 9761 }, { "epoch": 1.7401069518716579, "grad_norm": 0.169921875, "learning_rate": 2.7864956907844636e-06, "loss": 1.0054, "num_tokens": 22965228193.0, "step": 9762 }, { "epoch": 1.7402852049910873, "grad_norm": 0.1708984375, "learning_rate": 2.7854338873932514e-06, "loss": 1.0137, "num_tokens": 22971513317.0, "step": 9763 }, { "epoch": 1.740463458110517, "grad_norm": 0.169921875, "learning_rate": 2.7843727685235696e-06, "loss": 0.9997, "num_tokens": 22977727753.0, "step": 9764 }, { "epoch": 1.7406417112299466, "grad_norm": 0.1689453125, "learning_rate": 2.7833123342638463e-06, "loss": 1.0112, "num_tokens": 22983997628.0, "step": 9765 }, { "epoch": 1.740819964349376, "grad_norm": 0.169921875, "learning_rate": 2.782252584702441e-06, "loss": 1.0322, "num_tokens": 22990261197.0, "step": 9766 }, { "epoch": 1.7409982174688057, "grad_norm": 0.173828125, "learning_rate": 2.7811935199276663e-06, "loss": 1.0219, "num_tokens": 22996543922.0, "step": 9767 }, { "epoch": 1.7411764705882353, "grad_norm": 0.1767578125, "learning_rate": 2.780135140027773e-06, "loss": 1.0142, "num_tokens": 23002802893.0, "step": 9768 }, { "epoch": 1.7413547237076648, "grad_norm": 0.1669921875, "learning_rate": 2.779077445090957e-06, "loss": 0.9958, "num_tokens": 23009086729.0, "step": 9769 }, { "epoch": 1.7415329768270946, "grad_norm": 0.173828125, "learning_rate": 2.7780204352053577e-06, "loss": 0.9948, "num_tokens": 23015348626.0, "step": 9770 }, { "epoch": 1.741711229946524, "grad_norm": 0.1689453125, "learning_rate": 2.7769641104590528e-06, "loss": 1.0329, "num_tokens": 23021623787.0, "step": 9771 }, { "epoch": 1.7418894830659537, "grad_norm": 0.17578125, "learning_rate": 2.775908470940067e-06, "loss": 1.0195, "num_tokens": 23027881755.0, "step": 9772 }, { "epoch": 1.7420677361853834, "grad_norm": 0.1708984375, "learning_rate": 2.77485351673637e-06, "loss": 1.0201, "num_tokens": 23034141168.0, "step": 9773 }, { "epoch": 1.7422459893048128, "grad_norm": 0.1689453125, "learning_rate": 2.7737992479358676e-06, "loss": 1.0364, "num_tokens": 23040425220.0, "step": 9774 }, { "epoch": 1.7424242424242424, "grad_norm": 0.1728515625, "learning_rate": 2.7727456646264116e-06, "loss": 1.0181, "num_tokens": 23046694910.0, "step": 9775 }, { "epoch": 1.742602495543672, "grad_norm": 0.1748046875, "learning_rate": 2.771692766895803e-06, "loss": 1.0249, "num_tokens": 23052937521.0, "step": 9776 }, { "epoch": 1.7427807486631015, "grad_norm": 0.169921875, "learning_rate": 2.770640554831774e-06, "loss": 1.0466, "num_tokens": 23059200561.0, "step": 9777 }, { "epoch": 1.7429590017825312, "grad_norm": 0.171875, "learning_rate": 2.7695890285220083e-06, "loss": 1.0373, "num_tokens": 23065479852.0, "step": 9778 }, { "epoch": 1.7431372549019608, "grad_norm": 0.1787109375, "learning_rate": 2.768538188054129e-06, "loss": 1.0362, "num_tokens": 23071765232.0, "step": 9779 }, { "epoch": 1.7433155080213902, "grad_norm": 0.1708984375, "learning_rate": 2.767488033515705e-06, "loss": 1.0302, "num_tokens": 23078049802.0, "step": 9780 }, { "epoch": 1.74349376114082, "grad_norm": 0.169921875, "learning_rate": 2.766438564994243e-06, "loss": 0.9888, "num_tokens": 23084299025.0, "step": 9781 }, { "epoch": 1.7436720142602495, "grad_norm": 0.171875, "learning_rate": 2.765389782577196e-06, "loss": 1.0181, "num_tokens": 23090566647.0, "step": 9782 }, { "epoch": 1.7438502673796792, "grad_norm": 0.1728515625, "learning_rate": 2.7643416863519606e-06, "loss": 1.0328, "num_tokens": 23096772556.0, "step": 9783 }, { "epoch": 1.7440285204991088, "grad_norm": 0.1689453125, "learning_rate": 2.763294276405875e-06, "loss": 1.0174, "num_tokens": 23103057805.0, "step": 9784 }, { "epoch": 1.7442067736185383, "grad_norm": 0.169921875, "learning_rate": 2.7622475528262194e-06, "loss": 1.035, "num_tokens": 23109342406.0, "step": 9785 }, { "epoch": 1.744385026737968, "grad_norm": 0.171875, "learning_rate": 2.7612015157002146e-06, "loss": 1.0111, "num_tokens": 23115627996.0, "step": 9786 }, { "epoch": 1.7445632798573976, "grad_norm": 0.1669921875, "learning_rate": 2.7601561651150317e-06, "loss": 1.031, "num_tokens": 23121905738.0, "step": 9787 }, { "epoch": 1.744741532976827, "grad_norm": 0.1728515625, "learning_rate": 2.7591115011577807e-06, "loss": 1.0491, "num_tokens": 23128190790.0, "step": 9788 }, { "epoch": 1.7449197860962566, "grad_norm": 0.1689453125, "learning_rate": 2.7580675239155074e-06, "loss": 1.0281, "num_tokens": 23134475513.0, "step": 9789 }, { "epoch": 1.7450980392156863, "grad_norm": 0.1669921875, "learning_rate": 2.7570242334752123e-06, "loss": 1.0135, "num_tokens": 23140738304.0, "step": 9790 }, { "epoch": 1.7452762923351157, "grad_norm": 0.1728515625, "learning_rate": 2.7559816299238306e-06, "loss": 0.9652, "num_tokens": 23146993087.0, "step": 9791 }, { "epoch": 1.7454545454545456, "grad_norm": 0.1728515625, "learning_rate": 2.7549397133482424e-06, "loss": 0.9809, "num_tokens": 23153219390.0, "step": 9792 }, { "epoch": 1.745632798573975, "grad_norm": 0.169921875, "learning_rate": 2.7538984838352716e-06, "loss": 1.036, "num_tokens": 23159503518.0, "step": 9793 }, { "epoch": 1.7458110516934047, "grad_norm": 0.1669921875, "learning_rate": 2.752857941471685e-06, "loss": 1.0178, "num_tokens": 23165731626.0, "step": 9794 }, { "epoch": 1.7459893048128343, "grad_norm": 0.1669921875, "learning_rate": 2.75181808634419e-06, "loss": 1.0259, "num_tokens": 23172015830.0, "step": 9795 }, { "epoch": 1.7461675579322637, "grad_norm": 0.169921875, "learning_rate": 2.7507789185394384e-06, "loss": 1.0364, "num_tokens": 23178278931.0, "step": 9796 }, { "epoch": 1.7463458110516934, "grad_norm": 0.1708984375, "learning_rate": 2.749740438144024e-06, "loss": 0.9979, "num_tokens": 23184561268.0, "step": 9797 }, { "epoch": 1.746524064171123, "grad_norm": 0.1689453125, "learning_rate": 2.748702645244485e-06, "loss": 0.9925, "num_tokens": 23190821376.0, "step": 9798 }, { "epoch": 1.7467023172905525, "grad_norm": 0.1708984375, "learning_rate": 2.7476655399272982e-06, "loss": 1.0202, "num_tokens": 23197104376.0, "step": 9799 }, { "epoch": 1.7468805704099821, "grad_norm": 0.169921875, "learning_rate": 2.746629122278886e-06, "loss": 0.9834, "num_tokens": 23203365861.0, "step": 9800 }, { "epoch": 1.7470588235294118, "grad_norm": 0.169921875, "learning_rate": 2.7455933923856153e-06, "loss": 1.0378, "num_tokens": 23209649421.0, "step": 9801 }, { "epoch": 1.7472370766488412, "grad_norm": 0.181640625, "learning_rate": 2.7445583503337935e-06, "loss": 1.0144, "num_tokens": 23215930371.0, "step": 9802 }, { "epoch": 1.747415329768271, "grad_norm": 0.1708984375, "learning_rate": 2.7435239962096706e-06, "loss": 1.0059, "num_tokens": 23222207929.0, "step": 9803 }, { "epoch": 1.7475935828877005, "grad_norm": 0.1689453125, "learning_rate": 2.7424903300994394e-06, "loss": 1.0185, "num_tokens": 23228444257.0, "step": 9804 }, { "epoch": 1.7477718360071302, "grad_norm": 0.171875, "learning_rate": 2.741457352089234e-06, "loss": 1.0323, "num_tokens": 23234697525.0, "step": 9805 }, { "epoch": 1.7479500891265598, "grad_norm": 0.169921875, "learning_rate": 2.740425062265135e-06, "loss": 1.0238, "num_tokens": 23240977703.0, "step": 9806 }, { "epoch": 1.7481283422459892, "grad_norm": 0.1748046875, "learning_rate": 2.739393460713161e-06, "loss": 1.0349, "num_tokens": 23247260255.0, "step": 9807 }, { "epoch": 1.7483065953654189, "grad_norm": 0.171875, "learning_rate": 2.7383625475192775e-06, "loss": 1.0375, "num_tokens": 23253541348.0, "step": 9808 }, { "epoch": 1.7484848484848485, "grad_norm": 0.1669921875, "learning_rate": 2.7373323227693915e-06, "loss": 1.0065, "num_tokens": 23259799469.0, "step": 9809 }, { "epoch": 1.748663101604278, "grad_norm": 0.1728515625, "learning_rate": 2.73630278654935e-06, "loss": 1.0278, "num_tokens": 23266083943.0, "step": 9810 }, { "epoch": 1.7488413547237078, "grad_norm": 0.173828125, "learning_rate": 2.7352739389449444e-06, "loss": 1.0159, "num_tokens": 23272365802.0, "step": 9811 }, { "epoch": 1.7490196078431373, "grad_norm": 0.17578125, "learning_rate": 2.73424578004191e-06, "loss": 1.0288, "num_tokens": 23278617607.0, "step": 9812 }, { "epoch": 1.7491978609625667, "grad_norm": 0.16796875, "learning_rate": 2.733218309925922e-06, "loss": 1.0035, "num_tokens": 23284888639.0, "step": 9813 }, { "epoch": 1.7493761140819966, "grad_norm": 0.16796875, "learning_rate": 2.7321915286825993e-06, "loss": 1.006, "num_tokens": 23291170081.0, "step": 9814 }, { "epoch": 1.749554367201426, "grad_norm": 0.1728515625, "learning_rate": 2.7311654363975064e-06, "loss": 1.0301, "num_tokens": 23297417106.0, "step": 9815 }, { "epoch": 1.7497326203208556, "grad_norm": 0.1748046875, "learning_rate": 2.7301400331561473e-06, "loss": 0.9951, "num_tokens": 23303657869.0, "step": 9816 }, { "epoch": 1.7499108734402853, "grad_norm": 0.173828125, "learning_rate": 2.7291153190439666e-06, "loss": 1.0342, "num_tokens": 23309910254.0, "step": 9817 }, { "epoch": 1.7500891265597147, "grad_norm": 0.173828125, "learning_rate": 2.728091294146356e-06, "loss": 1.0082, "num_tokens": 23316194834.0, "step": 9818 }, { "epoch": 1.7502673796791444, "grad_norm": 0.1708984375, "learning_rate": 2.727067958548649e-06, "loss": 1.0187, "num_tokens": 23322478050.0, "step": 9819 }, { "epoch": 1.750445632798574, "grad_norm": 0.169921875, "learning_rate": 2.7260453123361174e-06, "loss": 1.0141, "num_tokens": 23328762279.0, "step": 9820 }, { "epoch": 1.7506238859180034, "grad_norm": 0.1708984375, "learning_rate": 2.7250233555939788e-06, "loss": 0.9973, "num_tokens": 23335047710.0, "step": 9821 }, { "epoch": 1.7508021390374333, "grad_norm": 0.1689453125, "learning_rate": 2.724002088407394e-06, "loss": 1.0109, "num_tokens": 23341322548.0, "step": 9822 }, { "epoch": 1.7509803921568627, "grad_norm": 0.1708984375, "learning_rate": 2.7229815108614665e-06, "loss": 1.0214, "num_tokens": 23347604991.0, "step": 9823 }, { "epoch": 1.7511586452762922, "grad_norm": 0.1708984375, "learning_rate": 2.7219616230412405e-06, "loss": 1.0218, "num_tokens": 23353864257.0, "step": 9824 }, { "epoch": 1.751336898395722, "grad_norm": 0.171875, "learning_rate": 2.7209424250317008e-06, "loss": 1.0479, "num_tokens": 23360136018.0, "step": 9825 }, { "epoch": 1.7515151515151515, "grad_norm": 0.1796875, "learning_rate": 2.7199239169177817e-06, "loss": 1.0174, "num_tokens": 23366400798.0, "step": 9826 }, { "epoch": 1.7516934046345811, "grad_norm": 0.173828125, "learning_rate": 2.7189060987843553e-06, "loss": 1.0198, "num_tokens": 23372683645.0, "step": 9827 }, { "epoch": 1.7518716577540108, "grad_norm": 0.1689453125, "learning_rate": 2.717888970716231e-06, "loss": 1.0042, "num_tokens": 23378968100.0, "step": 9828 }, { "epoch": 1.7520499108734402, "grad_norm": 0.173828125, "learning_rate": 2.7168725327981728e-06, "loss": 1.0192, "num_tokens": 23385203422.0, "step": 9829 }, { "epoch": 1.7522281639928698, "grad_norm": 0.1767578125, "learning_rate": 2.7158567851148775e-06, "loss": 1.0002, "num_tokens": 23391486554.0, "step": 9830 }, { "epoch": 1.7524064171122995, "grad_norm": 0.1708984375, "learning_rate": 2.7148417277509897e-06, "loss": 0.9923, "num_tokens": 23397772013.0, "step": 9831 }, { "epoch": 1.752584670231729, "grad_norm": 0.1728515625, "learning_rate": 2.71382736079109e-06, "loss": 0.9985, "num_tokens": 23404041100.0, "step": 9832 }, { "epoch": 1.7527629233511588, "grad_norm": 0.171875, "learning_rate": 2.712813684319711e-06, "loss": 0.9936, "num_tokens": 23410289182.0, "step": 9833 }, { "epoch": 1.7529411764705882, "grad_norm": 0.173828125, "learning_rate": 2.7118006984213208e-06, "loss": 1.0296, "num_tokens": 23416542554.0, "step": 9834 }, { "epoch": 1.7531194295900179, "grad_norm": 0.16796875, "learning_rate": 2.710788403180329e-06, "loss": 0.9993, "num_tokens": 23422793553.0, "step": 9835 }, { "epoch": 1.7532976827094475, "grad_norm": 0.171875, "learning_rate": 2.7097767986810943e-06, "loss": 1.0425, "num_tokens": 23429060433.0, "step": 9836 }, { "epoch": 1.753475935828877, "grad_norm": 0.1728515625, "learning_rate": 2.7087658850079126e-06, "loss": 1.0082, "num_tokens": 23435307003.0, "step": 9837 }, { "epoch": 1.7536541889483066, "grad_norm": 0.1728515625, "learning_rate": 2.7077556622450217e-06, "loss": 1.0307, "num_tokens": 23441591210.0, "step": 9838 }, { "epoch": 1.7538324420677363, "grad_norm": 0.173828125, "learning_rate": 2.706746130476605e-06, "loss": 1.0327, "num_tokens": 23447872468.0, "step": 9839 }, { "epoch": 1.7540106951871657, "grad_norm": 0.171875, "learning_rate": 2.705737289786788e-06, "loss": 1.0508, "num_tokens": 23454155225.0, "step": 9840 }, { "epoch": 1.7541889483065953, "grad_norm": 0.173828125, "learning_rate": 2.704729140259637e-06, "loss": 1.0058, "num_tokens": 23460440950.0, "step": 9841 }, { "epoch": 1.754367201426025, "grad_norm": 0.177734375, "learning_rate": 2.70372168197916e-06, "loss": 1.0434, "num_tokens": 23466711556.0, "step": 9842 }, { "epoch": 1.7545454545454544, "grad_norm": 0.1767578125, "learning_rate": 2.7027149150293115e-06, "loss": 1.0181, "num_tokens": 23472974273.0, "step": 9843 }, { "epoch": 1.7547237076648843, "grad_norm": 0.1748046875, "learning_rate": 2.7017088394939827e-06, "loss": 1.0328, "num_tokens": 23479244292.0, "step": 9844 }, { "epoch": 1.7549019607843137, "grad_norm": 0.1669921875, "learning_rate": 2.700703455457012e-06, "loss": 0.9946, "num_tokens": 23485526887.0, "step": 9845 }, { "epoch": 1.7550802139037434, "grad_norm": 0.171875, "learning_rate": 2.699698763002176e-06, "loss": 1.0211, "num_tokens": 23491811118.0, "step": 9846 }, { "epoch": 1.755258467023173, "grad_norm": 0.1767578125, "learning_rate": 2.698694762213198e-06, "loss": 1.0113, "num_tokens": 23498094540.0, "step": 9847 }, { "epoch": 1.7554367201426024, "grad_norm": 0.169921875, "learning_rate": 2.697691453173742e-06, "loss": 1.0292, "num_tokens": 23504377940.0, "step": 9848 }, { "epoch": 1.755614973262032, "grad_norm": 0.1826171875, "learning_rate": 2.6966888359674126e-06, "loss": 1.0179, "num_tokens": 23510663356.0, "step": 9849 }, { "epoch": 1.7557932263814617, "grad_norm": 0.169921875, "learning_rate": 2.6956869106777585e-06, "loss": 1.031, "num_tokens": 23516946640.0, "step": 9850 }, { "epoch": 1.7559714795008912, "grad_norm": 0.1669921875, "learning_rate": 2.694685677388271e-06, "loss": 1.0306, "num_tokens": 23523216178.0, "step": 9851 }, { "epoch": 1.7561497326203208, "grad_norm": 0.1748046875, "learning_rate": 2.693685136182382e-06, "loss": 0.9838, "num_tokens": 23529470997.0, "step": 9852 }, { "epoch": 1.7563279857397505, "grad_norm": 0.16796875, "learning_rate": 2.692685287143465e-06, "loss": 1.0062, "num_tokens": 23535754399.0, "step": 9853 }, { "epoch": 1.75650623885918, "grad_norm": 0.17578125, "learning_rate": 2.691686130354842e-06, "loss": 1.0167, "num_tokens": 23542037837.0, "step": 9854 }, { "epoch": 1.7566844919786098, "grad_norm": 0.17578125, "learning_rate": 2.6906876658997705e-06, "loss": 1.0345, "num_tokens": 23548316917.0, "step": 9855 }, { "epoch": 1.7568627450980392, "grad_norm": 0.1728515625, "learning_rate": 2.6896898938614545e-06, "loss": 1.0501, "num_tokens": 23554568586.0, "step": 9856 }, { "epoch": 1.7570409982174688, "grad_norm": 0.173828125, "learning_rate": 2.688692814323036e-06, "loss": 0.987, "num_tokens": 23560854732.0, "step": 9857 }, { "epoch": 1.7572192513368985, "grad_norm": 0.1767578125, "learning_rate": 2.6876964273676034e-06, "loss": 1.0043, "num_tokens": 23567112419.0, "step": 9858 }, { "epoch": 1.757397504456328, "grad_norm": 0.17578125, "learning_rate": 2.6867007330781846e-06, "loss": 1.043, "num_tokens": 23573385873.0, "step": 9859 }, { "epoch": 1.7575757575757576, "grad_norm": 0.1708984375, "learning_rate": 2.6857057315377517e-06, "loss": 0.9912, "num_tokens": 23579669544.0, "step": 9860 }, { "epoch": 1.7577540106951872, "grad_norm": 0.1708984375, "learning_rate": 2.6847114228292195e-06, "loss": 1.0614, "num_tokens": 23585886673.0, "step": 9861 }, { "epoch": 1.7579322638146166, "grad_norm": 0.1708984375, "learning_rate": 2.683717807035442e-06, "loss": 1.0443, "num_tokens": 23592153520.0, "step": 9862 }, { "epoch": 1.7581105169340463, "grad_norm": 0.173828125, "learning_rate": 2.6827248842392197e-06, "loss": 1.016, "num_tokens": 23598400822.0, "step": 9863 }, { "epoch": 1.758288770053476, "grad_norm": 0.169921875, "learning_rate": 2.681732654523289e-06, "loss": 1.0019, "num_tokens": 23604685426.0, "step": 9864 }, { "epoch": 1.7584670231729054, "grad_norm": 0.1689453125, "learning_rate": 2.68074111797034e-06, "loss": 0.9832, "num_tokens": 23610968388.0, "step": 9865 }, { "epoch": 1.7586452762923352, "grad_norm": 0.1689453125, "learning_rate": 2.6797502746629905e-06, "loss": 0.9921, "num_tokens": 23617253428.0, "step": 9866 }, { "epoch": 1.7588235294117647, "grad_norm": 0.1689453125, "learning_rate": 2.678760124683809e-06, "loss": 1.005, "num_tokens": 23623537317.0, "step": 9867 }, { "epoch": 1.7590017825311943, "grad_norm": 0.171875, "learning_rate": 2.677770668115307e-06, "loss": 1.0348, "num_tokens": 23629809286.0, "step": 9868 }, { "epoch": 1.759180035650624, "grad_norm": 0.177734375, "learning_rate": 2.676781905039936e-06, "loss": 0.9851, "num_tokens": 23636093686.0, "step": 9869 }, { "epoch": 1.7593582887700534, "grad_norm": 0.16796875, "learning_rate": 2.6757938355400886e-06, "loss": 1.0223, "num_tokens": 23642379118.0, "step": 9870 }, { "epoch": 1.759536541889483, "grad_norm": 0.17578125, "learning_rate": 2.6748064596981e-06, "loss": 1.0285, "num_tokens": 23648634046.0, "step": 9871 }, { "epoch": 1.7597147950089127, "grad_norm": 0.1748046875, "learning_rate": 2.6738197775962517e-06, "loss": 1.0271, "num_tokens": 23654893448.0, "step": 9872 }, { "epoch": 1.7598930481283421, "grad_norm": 0.16796875, "learning_rate": 2.6728337893167633e-06, "loss": 1.028, "num_tokens": 23661151613.0, "step": 9873 }, { "epoch": 1.760071301247772, "grad_norm": 0.1650390625, "learning_rate": 2.6718484949417934e-06, "loss": 1.0399, "num_tokens": 23667432273.0, "step": 9874 }, { "epoch": 1.7602495543672014, "grad_norm": 0.1650390625, "learning_rate": 2.6708638945534516e-06, "loss": 1.0273, "num_tokens": 23673714905.0, "step": 9875 }, { "epoch": 1.7604278074866309, "grad_norm": 0.171875, "learning_rate": 2.6698799882337822e-06, "loss": 1.0258, "num_tokens": 23679998303.0, "step": 9876 }, { "epoch": 1.7606060606060607, "grad_norm": 0.1728515625, "learning_rate": 2.6688967760647753e-06, "loss": 1.036, "num_tokens": 23686251379.0, "step": 9877 }, { "epoch": 1.7607843137254902, "grad_norm": 0.1708984375, "learning_rate": 2.66791425812836e-06, "loss": 1.0085, "num_tokens": 23692534256.0, "step": 9878 }, { "epoch": 1.7609625668449198, "grad_norm": 0.169921875, "learning_rate": 2.6669324345064147e-06, "loss": 1.0088, "num_tokens": 23698804337.0, "step": 9879 }, { "epoch": 1.7611408199643495, "grad_norm": 0.169921875, "learning_rate": 2.6659513052807523e-06, "loss": 1.0133, "num_tokens": 23705081027.0, "step": 9880 }, { "epoch": 1.7613190730837789, "grad_norm": 0.173828125, "learning_rate": 2.6649708705331267e-06, "loss": 1.0, "num_tokens": 23711337081.0, "step": 9881 }, { "epoch": 1.7614973262032085, "grad_norm": 0.1728515625, "learning_rate": 2.663991130345243e-06, "loss": 0.9925, "num_tokens": 23717591724.0, "step": 9882 }, { "epoch": 1.7616755793226382, "grad_norm": 0.1669921875, "learning_rate": 2.663012084798742e-06, "loss": 1.0328, "num_tokens": 23723832670.0, "step": 9883 }, { "epoch": 1.7618538324420676, "grad_norm": 0.169921875, "learning_rate": 2.6620337339752068e-06, "loss": 0.98, "num_tokens": 23730116384.0, "step": 9884 }, { "epoch": 1.7620320855614975, "grad_norm": 0.177734375, "learning_rate": 2.661056077956163e-06, "loss": 0.9881, "num_tokens": 23736400346.0, "step": 9885 }, { "epoch": 1.762210338680927, "grad_norm": 0.1787109375, "learning_rate": 2.6600791168230803e-06, "loss": 1.0268, "num_tokens": 23742627901.0, "step": 9886 }, { "epoch": 1.7623885918003563, "grad_norm": 0.1708984375, "learning_rate": 2.659102850657369e-06, "loss": 1.0223, "num_tokens": 23748848264.0, "step": 9887 }, { "epoch": 1.7625668449197862, "grad_norm": 0.173828125, "learning_rate": 2.6581272795403807e-06, "loss": 1.0341, "num_tokens": 23755127661.0, "step": 9888 }, { "epoch": 1.7627450980392156, "grad_norm": 0.169921875, "learning_rate": 2.6571524035534115e-06, "loss": 1.0146, "num_tokens": 23761412269.0, "step": 9889 }, { "epoch": 1.7629233511586453, "grad_norm": 0.1787109375, "learning_rate": 2.6561782227776957e-06, "loss": 1.039, "num_tokens": 23767664528.0, "step": 9890 }, { "epoch": 1.763101604278075, "grad_norm": 0.169921875, "learning_rate": 2.6552047372944127e-06, "loss": 1.029, "num_tokens": 23773949004.0, "step": 9891 }, { "epoch": 1.7632798573975044, "grad_norm": 0.1728515625, "learning_rate": 2.6542319471846844e-06, "loss": 0.9949, "num_tokens": 23780207641.0, "step": 9892 }, { "epoch": 1.763458110516934, "grad_norm": 0.169921875, "learning_rate": 2.6532598525295728e-06, "loss": 1.0444, "num_tokens": 23786473614.0, "step": 9893 }, { "epoch": 1.7636363636363637, "grad_norm": 0.1728515625, "learning_rate": 2.652288453410083e-06, "loss": 0.9929, "num_tokens": 23792736837.0, "step": 9894 }, { "epoch": 1.763814616755793, "grad_norm": 0.1708984375, "learning_rate": 2.651317749907162e-06, "loss": 1.0448, "num_tokens": 23799018906.0, "step": 9895 }, { "epoch": 1.763992869875223, "grad_norm": 0.171875, "learning_rate": 2.6503477421016986e-06, "loss": 1.0052, "num_tokens": 23805301879.0, "step": 9896 }, { "epoch": 1.7641711229946524, "grad_norm": 0.173828125, "learning_rate": 2.6493784300745235e-06, "loss": 1.0108, "num_tokens": 23811575397.0, "step": 9897 }, { "epoch": 1.764349376114082, "grad_norm": 0.1748046875, "learning_rate": 2.6484098139064102e-06, "loss": 1.0216, "num_tokens": 23817826876.0, "step": 9898 }, { "epoch": 1.7645276292335117, "grad_norm": 0.171875, "learning_rate": 2.6474418936780716e-06, "loss": 1.0093, "num_tokens": 23824104586.0, "step": 9899 }, { "epoch": 1.7647058823529411, "grad_norm": 0.17578125, "learning_rate": 2.646474669470168e-06, "loss": 1.0355, "num_tokens": 23830363134.0, "step": 9900 }, { "epoch": 1.7648841354723708, "grad_norm": 0.169921875, "learning_rate": 2.645508141363297e-06, "loss": 1.0036, "num_tokens": 23836646234.0, "step": 9901 }, { "epoch": 1.7650623885918004, "grad_norm": 0.1708984375, "learning_rate": 2.644542309437999e-06, "loss": 1.0188, "num_tokens": 23842905075.0, "step": 9902 }, { "epoch": 1.7652406417112299, "grad_norm": 0.1689453125, "learning_rate": 2.6435771737747582e-06, "loss": 1.0256, "num_tokens": 23849188573.0, "step": 9903 }, { "epoch": 1.7654188948306595, "grad_norm": 0.1669921875, "learning_rate": 2.6426127344539975e-06, "loss": 1.0152, "num_tokens": 23855450070.0, "step": 9904 }, { "epoch": 1.7655971479500892, "grad_norm": 0.173828125, "learning_rate": 2.6416489915560862e-06, "loss": 1.0184, "num_tokens": 23861705722.0, "step": 9905 }, { "epoch": 1.7657754010695186, "grad_norm": 0.1708984375, "learning_rate": 2.6406859451613303e-06, "loss": 1.0088, "num_tokens": 23867990736.0, "step": 9906 }, { "epoch": 1.7659536541889485, "grad_norm": 0.1650390625, "learning_rate": 2.6397235953499835e-06, "loss": 1.0171, "num_tokens": 23874264527.0, "step": 9907 }, { "epoch": 1.7661319073083779, "grad_norm": 0.1689453125, "learning_rate": 2.6387619422022383e-06, "loss": 0.9955, "num_tokens": 23880548601.0, "step": 9908 }, { "epoch": 1.7663101604278075, "grad_norm": 0.173828125, "learning_rate": 2.637800985798228e-06, "loss": 1.0015, "num_tokens": 23886792098.0, "step": 9909 }, { "epoch": 1.7664884135472372, "grad_norm": 0.1689453125, "learning_rate": 2.6368407262180297e-06, "loss": 1.0334, "num_tokens": 23893063457.0, "step": 9910 }, { "epoch": 1.7666666666666666, "grad_norm": 0.171875, "learning_rate": 2.635881163541665e-06, "loss": 1.0206, "num_tokens": 23899345986.0, "step": 9911 }, { "epoch": 1.7668449197860963, "grad_norm": 0.1796875, "learning_rate": 2.634922297849089e-06, "loss": 1.0131, "num_tokens": 23905630194.0, "step": 9912 }, { "epoch": 1.767023172905526, "grad_norm": 0.1650390625, "learning_rate": 2.633964129220207e-06, "loss": 1.0384, "num_tokens": 23911890448.0, "step": 9913 }, { "epoch": 1.7672014260249553, "grad_norm": 0.1748046875, "learning_rate": 2.633006657734864e-06, "loss": 1.0475, "num_tokens": 23918170635.0, "step": 9914 }, { "epoch": 1.767379679144385, "grad_norm": 0.171875, "learning_rate": 2.6320498834728453e-06, "loss": 0.9973, "num_tokens": 23924430516.0, "step": 9915 }, { "epoch": 1.7675579322638146, "grad_norm": 0.171875, "learning_rate": 2.6310938065138804e-06, "loss": 1.021, "num_tokens": 23930712598.0, "step": 9916 }, { "epoch": 1.767736185383244, "grad_norm": 0.1689453125, "learning_rate": 2.6301384269376374e-06, "loss": 1.0162, "num_tokens": 23936995088.0, "step": 9917 }, { "epoch": 1.767914438502674, "grad_norm": 0.1748046875, "learning_rate": 2.6291837448237294e-06, "loss": 1.0446, "num_tokens": 23943275780.0, "step": 9918 }, { "epoch": 1.7680926916221034, "grad_norm": 0.171875, "learning_rate": 2.6282297602517133e-06, "loss": 1.0316, "num_tokens": 23949558743.0, "step": 9919 }, { "epoch": 1.768270944741533, "grad_norm": 0.16796875, "learning_rate": 2.627276473301079e-06, "loss": 1.0186, "num_tokens": 23955841715.0, "step": 9920 }, { "epoch": 1.7684491978609627, "grad_norm": 0.1728515625, "learning_rate": 2.6263238840512662e-06, "loss": 1.038, "num_tokens": 23962125446.0, "step": 9921 }, { "epoch": 1.768627450980392, "grad_norm": 0.16796875, "learning_rate": 2.625371992581656e-06, "loss": 1.0465, "num_tokens": 23968381187.0, "step": 9922 }, { "epoch": 1.7688057040998217, "grad_norm": 0.1669921875, "learning_rate": 2.6244207989715693e-06, "loss": 0.9963, "num_tokens": 23974662672.0, "step": 9923 }, { "epoch": 1.7689839572192514, "grad_norm": 0.1708984375, "learning_rate": 2.6234703033002667e-06, "loss": 1.0094, "num_tokens": 23980929016.0, "step": 9924 }, { "epoch": 1.7691622103386808, "grad_norm": 0.169921875, "learning_rate": 2.6225205056469583e-06, "loss": 1.031, "num_tokens": 23987211715.0, "step": 9925 }, { "epoch": 1.7693404634581105, "grad_norm": 0.17578125, "learning_rate": 2.621571406090788e-06, "loss": 1.024, "num_tokens": 23993491813.0, "step": 9926 }, { "epoch": 1.7695187165775401, "grad_norm": 0.169921875, "learning_rate": 2.6206230047108427e-06, "loss": 1.002, "num_tokens": 23999737269.0, "step": 9927 }, { "epoch": 1.7696969696969695, "grad_norm": 0.1669921875, "learning_rate": 2.6196753015861533e-06, "loss": 0.992, "num_tokens": 24006006687.0, "step": 9928 }, { "epoch": 1.7698752228163994, "grad_norm": 0.177734375, "learning_rate": 2.6187282967956944e-06, "loss": 1.0259, "num_tokens": 24012267491.0, "step": 9929 }, { "epoch": 1.7700534759358288, "grad_norm": 0.1708984375, "learning_rate": 2.61778199041838e-06, "loss": 1.0117, "num_tokens": 24018550146.0, "step": 9930 }, { "epoch": 1.7702317290552585, "grad_norm": 0.177734375, "learning_rate": 2.6168363825330647e-06, "loss": 1.0292, "num_tokens": 24024771972.0, "step": 9931 }, { "epoch": 1.7704099821746881, "grad_norm": 0.171875, "learning_rate": 2.6158914732185448e-06, "loss": 1.0353, "num_tokens": 24031054475.0, "step": 9932 }, { "epoch": 1.7705882352941176, "grad_norm": 0.1689453125, "learning_rate": 2.6149472625535626e-06, "loss": 0.9732, "num_tokens": 24037312376.0, "step": 9933 }, { "epoch": 1.7707664884135472, "grad_norm": 0.1767578125, "learning_rate": 2.6140037506168002e-06, "loss": 0.9885, "num_tokens": 24043543144.0, "step": 9934 }, { "epoch": 1.7709447415329769, "grad_norm": 0.1728515625, "learning_rate": 2.613060937486876e-06, "loss": 1.0015, "num_tokens": 24049825048.0, "step": 9935 }, { "epoch": 1.7711229946524063, "grad_norm": 0.1669921875, "learning_rate": 2.6121188232423596e-06, "loss": 1.0226, "num_tokens": 24056108768.0, "step": 9936 }, { "epoch": 1.771301247771836, "grad_norm": 0.16796875, "learning_rate": 2.6111774079617546e-06, "loss": 1.0393, "num_tokens": 24062392223.0, "step": 9937 }, { "epoch": 1.7714795008912656, "grad_norm": 0.1669921875, "learning_rate": 2.6102366917235104e-06, "loss": 1.0145, "num_tokens": 24068676633.0, "step": 9938 }, { "epoch": 1.771657754010695, "grad_norm": 0.173828125, "learning_rate": 2.6092966746060162e-06, "loss": 1.0069, "num_tokens": 24074959993.0, "step": 9939 }, { "epoch": 1.771836007130125, "grad_norm": 0.177734375, "learning_rate": 2.608357356687606e-06, "loss": 1.021, "num_tokens": 24081234021.0, "step": 9940 }, { "epoch": 1.7720142602495543, "grad_norm": 0.16796875, "learning_rate": 2.607418738046551e-06, "loss": 1.0049, "num_tokens": 24087496381.0, "step": 9941 }, { "epoch": 1.772192513368984, "grad_norm": 0.1708984375, "learning_rate": 2.6064808187610685e-06, "loss": 0.98, "num_tokens": 24093753946.0, "step": 9942 }, { "epoch": 1.7723707664884136, "grad_norm": 0.17578125, "learning_rate": 2.6055435989093143e-06, "loss": 1.0391, "num_tokens": 24100037862.0, "step": 9943 }, { "epoch": 1.772549019607843, "grad_norm": 0.171875, "learning_rate": 2.604607078569387e-06, "loss": 1.0274, "num_tokens": 24106294802.0, "step": 9944 }, { "epoch": 1.7727272727272727, "grad_norm": 0.166015625, "learning_rate": 2.603671257819327e-06, "loss": 1.0116, "num_tokens": 24112549380.0, "step": 9945 }, { "epoch": 1.7729055258467024, "grad_norm": 0.169921875, "learning_rate": 2.6027361367371155e-06, "loss": 1.0162, "num_tokens": 24118831643.0, "step": 9946 }, { "epoch": 1.7730837789661318, "grad_norm": 0.1748046875, "learning_rate": 2.60180171540068e-06, "loss": 1.0075, "num_tokens": 24125075500.0, "step": 9947 }, { "epoch": 1.7732620320855617, "grad_norm": 0.1689453125, "learning_rate": 2.600867993887882e-06, "loss": 1.0058, "num_tokens": 24131361069.0, "step": 9948 }, { "epoch": 1.773440285204991, "grad_norm": 0.1669921875, "learning_rate": 2.5999349722765322e-06, "loss": 1.0128, "num_tokens": 24137625756.0, "step": 9949 }, { "epoch": 1.7736185383244205, "grad_norm": 0.1728515625, "learning_rate": 2.5990026506443766e-06, "loss": 1.0128, "num_tokens": 24143887326.0, "step": 9950 }, { "epoch": 1.7737967914438504, "grad_norm": 0.1728515625, "learning_rate": 2.5980710290691065e-06, "loss": 0.9833, "num_tokens": 24150107682.0, "step": 9951 }, { "epoch": 1.7739750445632798, "grad_norm": 0.17578125, "learning_rate": 2.5971401076283557e-06, "loss": 1.0298, "num_tokens": 24156390527.0, "step": 9952 }, { "epoch": 1.7741532976827095, "grad_norm": 0.16796875, "learning_rate": 2.5962098863996944e-06, "loss": 1.0192, "num_tokens": 24162674671.0, "step": 9953 }, { "epoch": 1.7743315508021391, "grad_norm": 0.1708984375, "learning_rate": 2.5952803654606417e-06, "loss": 1.0035, "num_tokens": 24168917299.0, "step": 9954 }, { "epoch": 1.7745098039215685, "grad_norm": 0.171875, "learning_rate": 2.594351544888654e-06, "loss": 1.0107, "num_tokens": 24175159174.0, "step": 9955 }, { "epoch": 1.7746880570409982, "grad_norm": 0.173828125, "learning_rate": 2.5934234247611297e-06, "loss": 1.027, "num_tokens": 24181428324.0, "step": 9956 }, { "epoch": 1.7748663101604278, "grad_norm": 0.1689453125, "learning_rate": 2.5924960051554098e-06, "loss": 1.0365, "num_tokens": 24187711286.0, "step": 9957 }, { "epoch": 1.7750445632798573, "grad_norm": 0.169921875, "learning_rate": 2.5915692861487755e-06, "loss": 1.0099, "num_tokens": 24193994548.0, "step": 9958 }, { "epoch": 1.7752228163992871, "grad_norm": 0.1708984375, "learning_rate": 2.590643267818451e-06, "loss": 1.0276, "num_tokens": 24200271569.0, "step": 9959 }, { "epoch": 1.7754010695187166, "grad_norm": 0.1748046875, "learning_rate": 2.5897179502415997e-06, "loss": 0.999, "num_tokens": 24206504803.0, "step": 9960 }, { "epoch": 1.7755793226381462, "grad_norm": 0.1728515625, "learning_rate": 2.5887933334953313e-06, "loss": 1.0209, "num_tokens": 24212790064.0, "step": 9961 }, { "epoch": 1.7757575757575759, "grad_norm": 0.171875, "learning_rate": 2.5878694176566942e-06, "loss": 0.9974, "num_tokens": 24219074011.0, "step": 9962 }, { "epoch": 1.7759358288770053, "grad_norm": 0.1708984375, "learning_rate": 2.586946202802677e-06, "loss": 1.0285, "num_tokens": 24225359154.0, "step": 9963 }, { "epoch": 1.776114081996435, "grad_norm": 0.1728515625, "learning_rate": 2.58602368901021e-06, "loss": 0.9908, "num_tokens": 24231613946.0, "step": 9964 }, { "epoch": 1.7762923351158646, "grad_norm": 0.16796875, "learning_rate": 2.585101876356172e-06, "loss": 1.0114, "num_tokens": 24237895699.0, "step": 9965 }, { "epoch": 1.776470588235294, "grad_norm": 0.1708984375, "learning_rate": 2.584180764917373e-06, "loss": 1.0245, "num_tokens": 24244180830.0, "step": 9966 }, { "epoch": 1.7766488413547237, "grad_norm": 0.1708984375, "learning_rate": 2.583260354770568e-06, "loss": 1.001, "num_tokens": 24250452211.0, "step": 9967 }, { "epoch": 1.7768270944741533, "grad_norm": 0.169921875, "learning_rate": 2.582340645992461e-06, "loss": 1.0122, "num_tokens": 24256734888.0, "step": 9968 }, { "epoch": 1.7770053475935828, "grad_norm": 0.1689453125, "learning_rate": 2.5814216386596874e-06, "loss": 0.9982, "num_tokens": 24263018811.0, "step": 9969 }, { "epoch": 1.7771836007130126, "grad_norm": 0.169921875, "learning_rate": 2.580503332848828e-06, "loss": 1.0736, "num_tokens": 24269290154.0, "step": 9970 }, { "epoch": 1.777361853832442, "grad_norm": 0.1767578125, "learning_rate": 2.579585728636407e-06, "loss": 1.009, "num_tokens": 24275573066.0, "step": 9971 }, { "epoch": 1.7775401069518717, "grad_norm": 0.166015625, "learning_rate": 2.578668826098888e-06, "loss": 0.9813, "num_tokens": 24281859244.0, "step": 9972 }, { "epoch": 1.7777183600713014, "grad_norm": 0.1787109375, "learning_rate": 2.5777526253126785e-06, "loss": 1.04, "num_tokens": 24288143544.0, "step": 9973 }, { "epoch": 1.7778966131907308, "grad_norm": 0.177734375, "learning_rate": 2.576837126354121e-06, "loss": 1.0331, "num_tokens": 24294411117.0, "step": 9974 }, { "epoch": 1.7780748663101604, "grad_norm": 0.173828125, "learning_rate": 2.5759223292995085e-06, "loss": 1.0165, "num_tokens": 24300671015.0, "step": 9975 }, { "epoch": 1.77825311942959, "grad_norm": 0.1767578125, "learning_rate": 2.5750082342250697e-06, "loss": 0.9634, "num_tokens": 24306956008.0, "step": 9976 }, { "epoch": 1.7784313725490195, "grad_norm": 0.1708984375, "learning_rate": 2.5740948412069766e-06, "loss": 1.018, "num_tokens": 24313223570.0, "step": 9977 }, { "epoch": 1.7786096256684492, "grad_norm": 0.1669921875, "learning_rate": 2.573182150321341e-06, "loss": 1.0109, "num_tokens": 24319507370.0, "step": 9978 }, { "epoch": 1.7787878787878788, "grad_norm": 0.16796875, "learning_rate": 2.5722701616442202e-06, "loss": 1.0096, "num_tokens": 24325787643.0, "step": 9979 }, { "epoch": 1.7789661319073082, "grad_norm": 0.171875, "learning_rate": 2.571358875251611e-06, "loss": 1.0117, "num_tokens": 24332071719.0, "step": 9980 }, { "epoch": 1.779144385026738, "grad_norm": 0.1796875, "learning_rate": 2.5704482912194463e-06, "loss": 1.0371, "num_tokens": 24338278759.0, "step": 9981 }, { "epoch": 1.7793226381461675, "grad_norm": 0.16796875, "learning_rate": 2.56953840962361e-06, "loss": 1.0108, "num_tokens": 24344552988.0, "step": 9982 }, { "epoch": 1.7795008912655972, "grad_norm": 0.1708984375, "learning_rate": 2.5686292305399206e-06, "loss": 0.9816, "num_tokens": 24350832743.0, "step": 9983 }, { "epoch": 1.7796791443850268, "grad_norm": 0.1689453125, "learning_rate": 2.5677207540441407e-06, "loss": 1.018, "num_tokens": 24357110612.0, "step": 9984 }, { "epoch": 1.7798573975044563, "grad_norm": 0.1708984375, "learning_rate": 2.566812980211972e-06, "loss": 1.0303, "num_tokens": 24363388710.0, "step": 9985 }, { "epoch": 1.780035650623886, "grad_norm": 0.181640625, "learning_rate": 2.5659059091190624e-06, "loss": 1.034, "num_tokens": 24369650444.0, "step": 9986 }, { "epoch": 1.7802139037433156, "grad_norm": 0.169921875, "learning_rate": 2.5649995408409966e-06, "loss": 1.019, "num_tokens": 24375853526.0, "step": 9987 }, { "epoch": 1.780392156862745, "grad_norm": 0.171875, "learning_rate": 2.564093875453304e-06, "loss": 0.9881, "num_tokens": 24382138190.0, "step": 9988 }, { "epoch": 1.7805704099821746, "grad_norm": 0.16796875, "learning_rate": 2.5631889130314527e-06, "loss": 1.019, "num_tokens": 24388421956.0, "step": 9989 }, { "epoch": 1.7807486631016043, "grad_norm": 0.173828125, "learning_rate": 2.5622846536508526e-06, "loss": 1.0201, "num_tokens": 24394677277.0, "step": 9990 }, { "epoch": 1.7809269162210337, "grad_norm": 0.169921875, "learning_rate": 2.5613810973868574e-06, "loss": 1.0311, "num_tokens": 24400942675.0, "step": 9991 }, { "epoch": 1.7811051693404636, "grad_norm": 0.1728515625, "learning_rate": 2.5604782443147593e-06, "loss": 1.033, "num_tokens": 24407220568.0, "step": 9992 }, { "epoch": 1.781283422459893, "grad_norm": 0.173828125, "learning_rate": 2.5595760945097943e-06, "loss": 1.0093, "num_tokens": 24413504897.0, "step": 9993 }, { "epoch": 1.7814616755793227, "grad_norm": 0.173828125, "learning_rate": 2.5586746480471388e-06, "loss": 1.0135, "num_tokens": 24419786048.0, "step": 9994 }, { "epoch": 1.7816399286987523, "grad_norm": 0.1748046875, "learning_rate": 2.55777390500191e-06, "loss": 1.01, "num_tokens": 24426028357.0, "step": 9995 }, { "epoch": 1.7818181818181817, "grad_norm": 0.1767578125, "learning_rate": 2.556873865449167e-06, "loss": 1.0403, "num_tokens": 24432290689.0, "step": 9996 }, { "epoch": 1.7819964349376114, "grad_norm": 0.1728515625, "learning_rate": 2.5559745294639106e-06, "loss": 1.0242, "num_tokens": 24438558910.0, "step": 9997 }, { "epoch": 1.782174688057041, "grad_norm": 0.1689453125, "learning_rate": 2.555075897121082e-06, "loss": 1.0232, "num_tokens": 24444812947.0, "step": 9998 }, { "epoch": 1.7823529411764705, "grad_norm": 0.171875, "learning_rate": 2.5541779684955644e-06, "loss": 1.0136, "num_tokens": 24451097144.0, "step": 9999 }, { "epoch": 1.7825311942959001, "grad_norm": 0.171875, "learning_rate": 2.5532807436621827e-06, "loss": 1.0036, "num_tokens": 24457330075.0, "step": 10000 }, { "epoch": 1.7827094474153298, "grad_norm": 0.1767578125, "learning_rate": 2.5523842226957042e-06, "loss": 1.0308, "num_tokens": 24463612495.0, "step": 10001 }, { "epoch": 1.7828877005347592, "grad_norm": 0.171875, "learning_rate": 2.5514884056708342e-06, "loss": 1.0221, "num_tokens": 24469895644.0, "step": 10002 }, { "epoch": 1.783065953654189, "grad_norm": 0.1669921875, "learning_rate": 2.550593292662223e-06, "loss": 1.0043, "num_tokens": 24476171311.0, "step": 10003 }, { "epoch": 1.7832442067736185, "grad_norm": 0.1669921875, "learning_rate": 2.549698883744459e-06, "loss": 1.0078, "num_tokens": 24482426284.0, "step": 10004 }, { "epoch": 1.7834224598930482, "grad_norm": 0.1748046875, "learning_rate": 2.5488051789920746e-06, "loss": 1.0011, "num_tokens": 24488708695.0, "step": 10005 }, { "epoch": 1.7836007130124778, "grad_norm": 0.1806640625, "learning_rate": 2.54791217847954e-06, "loss": 1.0192, "num_tokens": 24494982077.0, "step": 10006 }, { "epoch": 1.7837789661319072, "grad_norm": 0.1689453125, "learning_rate": 2.5470198822812732e-06, "loss": 1.0436, "num_tokens": 24501231839.0, "step": 10007 }, { "epoch": 1.7839572192513369, "grad_norm": 0.1787109375, "learning_rate": 2.5461282904716262e-06, "loss": 1.0105, "num_tokens": 24507515021.0, "step": 10008 }, { "epoch": 1.7841354723707665, "grad_norm": 0.1708984375, "learning_rate": 2.545237403124897e-06, "loss": 1.0292, "num_tokens": 24513797988.0, "step": 10009 }, { "epoch": 1.784313725490196, "grad_norm": 0.1748046875, "learning_rate": 2.5443472203153217e-06, "loss": 1.0039, "num_tokens": 24520056858.0, "step": 10010 }, { "epoch": 1.7844919786096258, "grad_norm": 0.171875, "learning_rate": 2.5434577421170838e-06, "loss": 1.0044, "num_tokens": 24526323598.0, "step": 10011 }, { "epoch": 1.7846702317290553, "grad_norm": 0.16796875, "learning_rate": 2.5425689686042997e-06, "loss": 1.0313, "num_tokens": 24532607589.0, "step": 10012 }, { "epoch": 1.7848484848484847, "grad_norm": 0.1728515625, "learning_rate": 2.5416808998510297e-06, "loss": 1.0366, "num_tokens": 24538875125.0, "step": 10013 }, { "epoch": 1.7850267379679146, "grad_norm": 0.1708984375, "learning_rate": 2.540793535931281e-06, "loss": 1.051, "num_tokens": 24545133766.0, "step": 10014 }, { "epoch": 1.785204991087344, "grad_norm": 0.171875, "learning_rate": 2.5399068769189964e-06, "loss": 1.0357, "num_tokens": 24551368593.0, "step": 10015 }, { "epoch": 1.7853832442067736, "grad_norm": 0.1787109375, "learning_rate": 2.5390209228880603e-06, "loss": 1.0653, "num_tokens": 24557651756.0, "step": 10016 }, { "epoch": 1.7855614973262033, "grad_norm": 0.1748046875, "learning_rate": 2.5381356739122985e-06, "loss": 1.0348, "num_tokens": 24563934984.0, "step": 10017 }, { "epoch": 1.7857397504456327, "grad_norm": 0.1748046875, "learning_rate": 2.5372511300654826e-06, "loss": 1.0648, "num_tokens": 24570208763.0, "step": 10018 }, { "epoch": 1.7859180035650624, "grad_norm": 0.1708984375, "learning_rate": 2.5363672914213206e-06, "loss": 1.0382, "num_tokens": 24576445122.0, "step": 10019 }, { "epoch": 1.786096256684492, "grad_norm": 0.1748046875, "learning_rate": 2.5354841580534597e-06, "loss": 1.0407, "num_tokens": 24582726802.0, "step": 10020 }, { "epoch": 1.7862745098039214, "grad_norm": 0.1708984375, "learning_rate": 2.5346017300354953e-06, "loss": 1.0273, "num_tokens": 24589006893.0, "step": 10021 }, { "epoch": 1.7864527629233513, "grad_norm": 0.1650390625, "learning_rate": 2.5337200074409597e-06, "loss": 0.9944, "num_tokens": 24595290842.0, "step": 10022 }, { "epoch": 1.7866310160427807, "grad_norm": 0.1728515625, "learning_rate": 2.532838990343325e-06, "loss": 0.9971, "num_tokens": 24601574391.0, "step": 10023 }, { "epoch": 1.7868092691622104, "grad_norm": 0.1728515625, "learning_rate": 2.531958678816007e-06, "loss": 1.0246, "num_tokens": 24607859580.0, "step": 10024 }, { "epoch": 1.78698752228164, "grad_norm": 0.171875, "learning_rate": 2.531079072932365e-06, "loss": 1.0394, "num_tokens": 24614143840.0, "step": 10025 }, { "epoch": 1.7871657754010695, "grad_norm": 0.169921875, "learning_rate": 2.530200172765696e-06, "loss": 1.0251, "num_tokens": 24620414276.0, "step": 10026 }, { "epoch": 1.7873440285204991, "grad_norm": 0.171875, "learning_rate": 2.5293219783892353e-06, "loss": 0.9863, "num_tokens": 24626698525.0, "step": 10027 }, { "epoch": 1.7875222816399288, "grad_norm": 0.177734375, "learning_rate": 2.5284444898761668e-06, "loss": 0.9984, "num_tokens": 24632982922.0, "step": 10028 }, { "epoch": 1.7877005347593582, "grad_norm": 0.1748046875, "learning_rate": 2.527567707299611e-06, "loss": 0.9984, "num_tokens": 24639267701.0, "step": 10029 }, { "epoch": 1.7878787878787878, "grad_norm": 0.171875, "learning_rate": 2.5266916307326282e-06, "loss": 0.9889, "num_tokens": 24645533335.0, "step": 10030 }, { "epoch": 1.7880570409982175, "grad_norm": 0.1689453125, "learning_rate": 2.5258162602482244e-06, "loss": 0.9984, "num_tokens": 24651816447.0, "step": 10031 }, { "epoch": 1.788235294117647, "grad_norm": 0.1748046875, "learning_rate": 2.5249415959193445e-06, "loss": 1.0308, "num_tokens": 24658101285.0, "step": 10032 }, { "epoch": 1.7884135472370768, "grad_norm": 0.171875, "learning_rate": 2.5240676378188737e-06, "loss": 1.033, "num_tokens": 24664384290.0, "step": 10033 }, { "epoch": 1.7885918003565062, "grad_norm": 0.173828125, "learning_rate": 2.523194386019639e-06, "loss": 1.0554, "num_tokens": 24670665473.0, "step": 10034 }, { "epoch": 1.7887700534759359, "grad_norm": 0.173828125, "learning_rate": 2.5223218405944088e-06, "loss": 1.0241, "num_tokens": 24676940465.0, "step": 10035 }, { "epoch": 1.7889483065953655, "grad_norm": 0.173828125, "learning_rate": 2.5214500016158926e-06, "loss": 1.0098, "num_tokens": 24683224163.0, "step": 10036 }, { "epoch": 1.789126559714795, "grad_norm": 0.171875, "learning_rate": 2.5205788691567413e-06, "loss": 1.0119, "num_tokens": 24689507557.0, "step": 10037 }, { "epoch": 1.7893048128342246, "grad_norm": 0.1787109375, "learning_rate": 2.5197084432895443e-06, "loss": 0.9893, "num_tokens": 24695768944.0, "step": 10038 }, { "epoch": 1.7894830659536543, "grad_norm": 0.173828125, "learning_rate": 2.518838724086838e-06, "loss": 1.0332, "num_tokens": 24702051706.0, "step": 10039 }, { "epoch": 1.7896613190730837, "grad_norm": 0.169921875, "learning_rate": 2.517969711621094e-06, "loss": 0.9943, "num_tokens": 24708332396.0, "step": 10040 }, { "epoch": 1.7898395721925133, "grad_norm": 0.1689453125, "learning_rate": 2.5171014059647277e-06, "loss": 1.0457, "num_tokens": 24714617810.0, "step": 10041 }, { "epoch": 1.790017825311943, "grad_norm": 0.169921875, "learning_rate": 2.516233807190095e-06, "loss": 1.0274, "num_tokens": 24720900918.0, "step": 10042 }, { "epoch": 1.7901960784313724, "grad_norm": 0.1689453125, "learning_rate": 2.5153669153694944e-06, "loss": 1.002, "num_tokens": 24727174719.0, "step": 10043 }, { "epoch": 1.7903743315508023, "grad_norm": 0.1787109375, "learning_rate": 2.5145007305751618e-06, "loss": 1.0005, "num_tokens": 24733461397.0, "step": 10044 }, { "epoch": 1.7905525846702317, "grad_norm": 0.166015625, "learning_rate": 2.5136352528792773e-06, "loss": 1.0009, "num_tokens": 24739733032.0, "step": 10045 }, { "epoch": 1.7907308377896614, "grad_norm": 0.1728515625, "learning_rate": 2.512770482353963e-06, "loss": 1.0315, "num_tokens": 24746015485.0, "step": 10046 }, { "epoch": 1.790909090909091, "grad_norm": 0.1650390625, "learning_rate": 2.51190641907128e-06, "loss": 1.013, "num_tokens": 24752297074.0, "step": 10047 }, { "epoch": 1.7910873440285204, "grad_norm": 0.169921875, "learning_rate": 2.511043063103229e-06, "loss": 0.9964, "num_tokens": 24758543189.0, "step": 10048 }, { "epoch": 1.79126559714795, "grad_norm": 0.173828125, "learning_rate": 2.5101804145217562e-06, "loss": 0.9834, "num_tokens": 24764822528.0, "step": 10049 }, { "epoch": 1.7914438502673797, "grad_norm": 0.171875, "learning_rate": 2.509318473398744e-06, "loss": 0.9842, "num_tokens": 24771095528.0, "step": 10050 }, { "epoch": 1.7916221033868092, "grad_norm": 0.1748046875, "learning_rate": 2.50845723980602e-06, "loss": 1.0499, "num_tokens": 24777378195.0, "step": 10051 }, { "epoch": 1.7918003565062388, "grad_norm": 0.1708984375, "learning_rate": 2.507596713815347e-06, "loss": 0.994, "num_tokens": 24783650503.0, "step": 10052 }, { "epoch": 1.7919786096256685, "grad_norm": 0.1689453125, "learning_rate": 2.506736895498438e-06, "loss": 1.0046, "num_tokens": 24789904381.0, "step": 10053 }, { "epoch": 1.792156862745098, "grad_norm": 0.1708984375, "learning_rate": 2.50587778492694e-06, "loss": 1.008, "num_tokens": 24796158327.0, "step": 10054 }, { "epoch": 1.7923351158645278, "grad_norm": 0.1728515625, "learning_rate": 2.5050193821724423e-06, "loss": 1.0464, "num_tokens": 24802427097.0, "step": 10055 }, { "epoch": 1.7925133689839572, "grad_norm": 0.1708984375, "learning_rate": 2.5041616873064757e-06, "loss": 0.9984, "num_tokens": 24808689908.0, "step": 10056 }, { "epoch": 1.7926916221033868, "grad_norm": 0.169921875, "learning_rate": 2.503304700400514e-06, "loss": 1.0274, "num_tokens": 24814974109.0, "step": 10057 }, { "epoch": 1.7928698752228165, "grad_norm": 0.1650390625, "learning_rate": 2.502448421525967e-06, "loss": 1.0032, "num_tokens": 24821259817.0, "step": 10058 }, { "epoch": 1.793048128342246, "grad_norm": 0.1669921875, "learning_rate": 2.5015928507541897e-06, "loss": 0.979, "num_tokens": 24827515552.0, "step": 10059 }, { "epoch": 1.7932263814616756, "grad_norm": 0.171875, "learning_rate": 2.5007379881564777e-06, "loss": 1.0469, "num_tokens": 24833784168.0, "step": 10060 }, { "epoch": 1.7934046345811052, "grad_norm": 0.1748046875, "learning_rate": 2.4998838338040677e-06, "loss": 1.0376, "num_tokens": 24840050380.0, "step": 10061 }, { "epoch": 1.7935828877005346, "grad_norm": 0.166015625, "learning_rate": 2.4990303877681347e-06, "loss": 1.0507, "num_tokens": 24846333815.0, "step": 10062 }, { "epoch": 1.7937611408199643, "grad_norm": 0.169921875, "learning_rate": 2.4981776501197964e-06, "loss": 1.0058, "num_tokens": 24852618594.0, "step": 10063 }, { "epoch": 1.793939393939394, "grad_norm": 0.1728515625, "learning_rate": 2.4973256209301133e-06, "loss": 1.0103, "num_tokens": 24858883786.0, "step": 10064 }, { "epoch": 1.7941176470588234, "grad_norm": 0.1767578125, "learning_rate": 2.4964743002700863e-06, "loss": 1.0534, "num_tokens": 24865153765.0, "step": 10065 }, { "epoch": 1.7942959001782532, "grad_norm": 0.169921875, "learning_rate": 2.4956236882106514e-06, "loss": 1.0469, "num_tokens": 24871438489.0, "step": 10066 }, { "epoch": 1.7944741532976827, "grad_norm": 0.169921875, "learning_rate": 2.4947737848226943e-06, "loss": 1.0298, "num_tokens": 24877677129.0, "step": 10067 }, { "epoch": 1.7946524064171123, "grad_norm": 0.1669921875, "learning_rate": 2.493924590177037e-06, "loss": 1.0158, "num_tokens": 24883960640.0, "step": 10068 }, { "epoch": 1.794830659536542, "grad_norm": 0.16796875, "learning_rate": 2.493076104344442e-06, "loss": 1.0112, "num_tokens": 24890243938.0, "step": 10069 }, { "epoch": 1.7950089126559714, "grad_norm": 0.171875, "learning_rate": 2.4922283273956127e-06, "loss": 1.0048, "num_tokens": 24896475032.0, "step": 10070 }, { "epoch": 1.795187165775401, "grad_norm": 0.1806640625, "learning_rate": 2.4913812594011983e-06, "loss": 1.0411, "num_tokens": 24902727692.0, "step": 10071 }, { "epoch": 1.7953654188948307, "grad_norm": 0.16796875, "learning_rate": 2.490534900431784e-06, "loss": 1.0227, "num_tokens": 24909009144.0, "step": 10072 }, { "epoch": 1.7955436720142601, "grad_norm": 0.1708984375, "learning_rate": 2.4896892505578926e-06, "loss": 1.0241, "num_tokens": 24915260087.0, "step": 10073 }, { "epoch": 1.79572192513369, "grad_norm": 0.169921875, "learning_rate": 2.4888443098499983e-06, "loss": 1.0307, "num_tokens": 24921543802.0, "step": 10074 }, { "epoch": 1.7959001782531194, "grad_norm": 0.1728515625, "learning_rate": 2.4880000783785066e-06, "loss": 1.0112, "num_tokens": 24927767600.0, "step": 10075 }, { "epoch": 1.7960784313725489, "grad_norm": 0.1748046875, "learning_rate": 2.487156556213769e-06, "loss": 0.9994, "num_tokens": 24934049915.0, "step": 10076 }, { "epoch": 1.7962566844919787, "grad_norm": 0.16796875, "learning_rate": 2.4863137434260747e-06, "loss": 1.0093, "num_tokens": 24940306040.0, "step": 10077 }, { "epoch": 1.7964349376114082, "grad_norm": 0.171875, "learning_rate": 2.4854716400856585e-06, "loss": 0.9934, "num_tokens": 24946560342.0, "step": 10078 }, { "epoch": 1.7966131907308378, "grad_norm": 0.1689453125, "learning_rate": 2.48463024626269e-06, "loss": 1.013, "num_tokens": 24952845591.0, "step": 10079 }, { "epoch": 1.7967914438502675, "grad_norm": 0.1748046875, "learning_rate": 2.483789562027286e-06, "loss": 1.0049, "num_tokens": 24959129121.0, "step": 10080 }, { "epoch": 1.7969696969696969, "grad_norm": 0.1728515625, "learning_rate": 2.482949587449496e-06, "loss": 1.0376, "num_tokens": 24965401787.0, "step": 10081 }, { "epoch": 1.7971479500891265, "grad_norm": 0.169921875, "learning_rate": 2.4821103225993197e-06, "loss": 1.0254, "num_tokens": 24971680104.0, "step": 10082 }, { "epoch": 1.7973262032085562, "grad_norm": 0.171875, "learning_rate": 2.481271767546692e-06, "loss": 0.998, "num_tokens": 24977958127.0, "step": 10083 }, { "epoch": 1.7975044563279856, "grad_norm": 0.1708984375, "learning_rate": 2.4804339223614866e-06, "loss": 1.0212, "num_tokens": 24984188530.0, "step": 10084 }, { "epoch": 1.7976827094474155, "grad_norm": 0.1748046875, "learning_rate": 2.479596787113527e-06, "loss": 1.0306, "num_tokens": 24990435223.0, "step": 10085 }, { "epoch": 1.797860962566845, "grad_norm": 0.1767578125, "learning_rate": 2.478760361872568e-06, "loss": 1.0187, "num_tokens": 24996680579.0, "step": 10086 }, { "epoch": 1.7980392156862746, "grad_norm": 0.1708984375, "learning_rate": 2.4779246467083117e-06, "loss": 1.0237, "num_tokens": 25002965254.0, "step": 10087 }, { "epoch": 1.7982174688057042, "grad_norm": 0.173828125, "learning_rate": 2.4770896416903954e-06, "loss": 0.9998, "num_tokens": 25009237453.0, "step": 10088 }, { "epoch": 1.7983957219251336, "grad_norm": 0.1767578125, "learning_rate": 2.476255346888403e-06, "loss": 1.0121, "num_tokens": 25015503476.0, "step": 10089 }, { "epoch": 1.7985739750445633, "grad_norm": 0.1689453125, "learning_rate": 2.475421762371854e-06, "loss": 1.0558, "num_tokens": 25021784077.0, "step": 10090 }, { "epoch": 1.798752228163993, "grad_norm": 0.173828125, "learning_rate": 2.4745888882102127e-06, "loss": 1.0493, "num_tokens": 25028062707.0, "step": 10091 }, { "epoch": 1.7989304812834224, "grad_norm": 0.1708984375, "learning_rate": 2.473756724472881e-06, "loss": 1.0134, "num_tokens": 25034345992.0, "step": 10092 }, { "epoch": 1.799108734402852, "grad_norm": 0.1689453125, "learning_rate": 2.4729252712292062e-06, "loss": 1.0348, "num_tokens": 25040596627.0, "step": 10093 }, { "epoch": 1.7992869875222817, "grad_norm": 0.17578125, "learning_rate": 2.4720945285484717e-06, "loss": 1.0011, "num_tokens": 25046855782.0, "step": 10094 }, { "epoch": 1.799465240641711, "grad_norm": 0.1728515625, "learning_rate": 2.4712644964999027e-06, "loss": 1.0022, "num_tokens": 25053138441.0, "step": 10095 }, { "epoch": 1.799643493761141, "grad_norm": 0.1669921875, "learning_rate": 2.470435175152669e-06, "loss": 1.0084, "num_tokens": 25059405838.0, "step": 10096 }, { "epoch": 1.7998217468805704, "grad_norm": 0.1728515625, "learning_rate": 2.469606564575875e-06, "loss": 1.0262, "num_tokens": 25065682662.0, "step": 10097 }, { "epoch": 1.8, "grad_norm": 0.1669921875, "learning_rate": 2.4687786648385695e-06, "loss": 1.0099, "num_tokens": 25071967434.0, "step": 10098 }, { "epoch": 1.8001782531194297, "grad_norm": 0.1689453125, "learning_rate": 2.467951476009742e-06, "loss": 1.0239, "num_tokens": 25078252076.0, "step": 10099 }, { "epoch": 1.8003565062388591, "grad_norm": 0.171875, "learning_rate": 2.4671249981583222e-06, "loss": 1.0417, "num_tokens": 25084537214.0, "step": 10100 }, { "epoch": 1.8005347593582888, "grad_norm": 0.169921875, "learning_rate": 2.4662992313531825e-06, "loss": 1.0275, "num_tokens": 25090821234.0, "step": 10101 }, { "epoch": 1.8007130124777184, "grad_norm": 0.169921875, "learning_rate": 2.4654741756631315e-06, "loss": 1.0445, "num_tokens": 25097082571.0, "step": 10102 }, { "epoch": 1.8008912655971479, "grad_norm": 0.171875, "learning_rate": 2.4646498311569223e-06, "loss": 1.0129, "num_tokens": 25103357976.0, "step": 10103 }, { "epoch": 1.8010695187165775, "grad_norm": 0.173828125, "learning_rate": 2.463826197903248e-06, "loss": 1.0004, "num_tokens": 25109625532.0, "step": 10104 }, { "epoch": 1.8012477718360071, "grad_norm": 0.1708984375, "learning_rate": 2.4630032759707418e-06, "loss": 1.0421, "num_tokens": 25115909337.0, "step": 10105 }, { "epoch": 1.8014260249554366, "grad_norm": 0.16796875, "learning_rate": 2.4621810654279775e-06, "loss": 1.0297, "num_tokens": 25122191612.0, "step": 10106 }, { "epoch": 1.8016042780748664, "grad_norm": 0.169921875, "learning_rate": 2.4613595663434714e-06, "loss": 1.0341, "num_tokens": 25128474799.0, "step": 10107 }, { "epoch": 1.8017825311942959, "grad_norm": 0.1669921875, "learning_rate": 2.4605387787856784e-06, "loss": 0.9898, "num_tokens": 25134759001.0, "step": 10108 }, { "epoch": 1.8019607843137255, "grad_norm": 0.16796875, "learning_rate": 2.459718702822995e-06, "loss": 1.0216, "num_tokens": 25141016267.0, "step": 10109 }, { "epoch": 1.8021390374331552, "grad_norm": 0.171875, "learning_rate": 2.4588993385237563e-06, "loss": 1.0155, "num_tokens": 25147274913.0, "step": 10110 }, { "epoch": 1.8023172905525846, "grad_norm": 0.1728515625, "learning_rate": 2.4580806859562455e-06, "loss": 1.0626, "num_tokens": 25153557242.0, "step": 10111 }, { "epoch": 1.8024955436720143, "grad_norm": 0.1728515625, "learning_rate": 2.4572627451886758e-06, "loss": 1.0208, "num_tokens": 25159827906.0, "step": 10112 }, { "epoch": 1.802673796791444, "grad_norm": 0.1689453125, "learning_rate": 2.4564455162892074e-06, "loss": 1.0387, "num_tokens": 25166098596.0, "step": 10113 }, { "epoch": 1.8028520499108733, "grad_norm": 0.1748046875, "learning_rate": 2.4556289993259418e-06, "loss": 1.0634, "num_tokens": 25172380884.0, "step": 10114 }, { "epoch": 1.803030303030303, "grad_norm": 0.1708984375, "learning_rate": 2.454813194366918e-06, "loss": 1.0659, "num_tokens": 25178666332.0, "step": 10115 }, { "epoch": 1.8032085561497326, "grad_norm": 0.16796875, "learning_rate": 2.4539981014801194e-06, "loss": 0.9962, "num_tokens": 25184925221.0, "step": 10116 }, { "epoch": 1.803386809269162, "grad_norm": 0.169921875, "learning_rate": 2.453183720733464e-06, "loss": 1.0092, "num_tokens": 25191207343.0, "step": 10117 }, { "epoch": 1.803565062388592, "grad_norm": 0.17578125, "learning_rate": 2.4523700521948186e-06, "loss": 1.0344, "num_tokens": 25197454207.0, "step": 10118 }, { "epoch": 1.8037433155080214, "grad_norm": 0.1767578125, "learning_rate": 2.451557095931986e-06, "loss": 1.0033, "num_tokens": 25203729773.0, "step": 10119 }, { "epoch": 1.803921568627451, "grad_norm": 0.1748046875, "learning_rate": 2.4507448520127052e-06, "loss": 1.0147, "num_tokens": 25210014176.0, "step": 10120 }, { "epoch": 1.8040998217468807, "grad_norm": 0.166015625, "learning_rate": 2.4499333205046654e-06, "loss": 1.0143, "num_tokens": 25216282913.0, "step": 10121 }, { "epoch": 1.80427807486631, "grad_norm": 0.173828125, "learning_rate": 2.4491225014754897e-06, "loss": 1.0425, "num_tokens": 25222543272.0, "step": 10122 }, { "epoch": 1.8044563279857397, "grad_norm": 0.171875, "learning_rate": 2.448312394992745e-06, "loss": 1.0228, "num_tokens": 25228826046.0, "step": 10123 }, { "epoch": 1.8046345811051694, "grad_norm": 0.1689453125, "learning_rate": 2.447503001123936e-06, "loss": 1.026, "num_tokens": 25235078734.0, "step": 10124 }, { "epoch": 1.8048128342245988, "grad_norm": 0.169921875, "learning_rate": 2.4466943199365106e-06, "loss": 0.9965, "num_tokens": 25241322738.0, "step": 10125 }, { "epoch": 1.8049910873440285, "grad_norm": 0.1669921875, "learning_rate": 2.445886351497859e-06, "loss": 1.0228, "num_tokens": 25247605025.0, "step": 10126 }, { "epoch": 1.8051693404634581, "grad_norm": 0.16796875, "learning_rate": 2.4450790958753033e-06, "loss": 1.0317, "num_tokens": 25253884074.0, "step": 10127 }, { "epoch": 1.8053475935828875, "grad_norm": 0.16796875, "learning_rate": 2.4442725531361177e-06, "loss": 1.0096, "num_tokens": 25260144211.0, "step": 10128 }, { "epoch": 1.8055258467023174, "grad_norm": 0.1689453125, "learning_rate": 2.4434667233475097e-06, "loss": 1.0071, "num_tokens": 25266400537.0, "step": 10129 }, { "epoch": 1.8057040998217468, "grad_norm": 0.17578125, "learning_rate": 2.4426616065766293e-06, "loss": 1.022, "num_tokens": 25272672841.0, "step": 10130 }, { "epoch": 1.8058823529411765, "grad_norm": 0.171875, "learning_rate": 2.441857202890565e-06, "loss": 1.0318, "num_tokens": 25278943298.0, "step": 10131 }, { "epoch": 1.8060606060606061, "grad_norm": 0.1767578125, "learning_rate": 2.4410535123563516e-06, "loss": 1.0076, "num_tokens": 25285212239.0, "step": 10132 }, { "epoch": 1.8062388591800356, "grad_norm": 0.1767578125, "learning_rate": 2.4402505350409595e-06, "loss": 1.0273, "num_tokens": 25291479451.0, "step": 10133 }, { "epoch": 1.8064171122994652, "grad_norm": 0.1708984375, "learning_rate": 2.439448271011299e-06, "loss": 1.0131, "num_tokens": 25297749629.0, "step": 10134 }, { "epoch": 1.8065953654188949, "grad_norm": 0.169921875, "learning_rate": 2.4386467203342264e-06, "loss": 0.961, "num_tokens": 25304012959.0, "step": 10135 }, { "epoch": 1.8067736185383243, "grad_norm": 0.17578125, "learning_rate": 2.437845883076532e-06, "loss": 1.0229, "num_tokens": 25310296783.0, "step": 10136 }, { "epoch": 1.8069518716577542, "grad_norm": 0.1728515625, "learning_rate": 2.437045759304952e-06, "loss": 1.0537, "num_tokens": 25316580707.0, "step": 10137 }, { "epoch": 1.8071301247771836, "grad_norm": 0.169921875, "learning_rate": 2.436246349086157e-06, "loss": 1.0387, "num_tokens": 25322857484.0, "step": 10138 }, { "epoch": 1.807308377896613, "grad_norm": 0.1748046875, "learning_rate": 2.4354476524867675e-06, "loss": 1.0188, "num_tokens": 25329109487.0, "step": 10139 }, { "epoch": 1.807486631016043, "grad_norm": 0.16796875, "learning_rate": 2.434649669573336e-06, "loss": 0.9925, "num_tokens": 25335395328.0, "step": 10140 }, { "epoch": 1.8076648841354723, "grad_norm": 0.1708984375, "learning_rate": 2.4338524004123587e-06, "loss": 1.0182, "num_tokens": 25341677566.0, "step": 10141 }, { "epoch": 1.807843137254902, "grad_norm": 0.169921875, "learning_rate": 2.4330558450702723e-06, "loss": 1.0182, "num_tokens": 25347961213.0, "step": 10142 }, { "epoch": 1.8080213903743316, "grad_norm": 0.1708984375, "learning_rate": 2.432260003613455e-06, "loss": 1.0147, "num_tokens": 25354221610.0, "step": 10143 }, { "epoch": 1.808199643493761, "grad_norm": 0.1708984375, "learning_rate": 2.4314648761082233e-06, "loss": 1.0031, "num_tokens": 25360506271.0, "step": 10144 }, { "epoch": 1.8083778966131907, "grad_norm": 0.1708984375, "learning_rate": 2.4306704626208337e-06, "loss": 1.0049, "num_tokens": 25366764513.0, "step": 10145 }, { "epoch": 1.8085561497326204, "grad_norm": 0.1689453125, "learning_rate": 2.4298767632174887e-06, "loss": 1.0392, "num_tokens": 25373047439.0, "step": 10146 }, { "epoch": 1.8087344028520498, "grad_norm": 0.171875, "learning_rate": 2.429083777964325e-06, "loss": 1.027, "num_tokens": 25379329720.0, "step": 10147 }, { "epoch": 1.8089126559714797, "grad_norm": 0.1796875, "learning_rate": 2.4282915069274223e-06, "loss": 1.0022, "num_tokens": 25385611992.0, "step": 10148 }, { "epoch": 1.809090909090909, "grad_norm": 0.166015625, "learning_rate": 2.427499950172802e-06, "loss": 1.0161, "num_tokens": 25391864873.0, "step": 10149 }, { "epoch": 1.8092691622103387, "grad_norm": 0.169921875, "learning_rate": 2.426709107766423e-06, "loss": 1.0125, "num_tokens": 25398149596.0, "step": 10150 }, { "epoch": 1.8094474153297684, "grad_norm": 0.1767578125, "learning_rate": 2.425918979774188e-06, "loss": 1.0468, "num_tokens": 25404401529.0, "step": 10151 }, { "epoch": 1.8096256684491978, "grad_norm": 0.1728515625, "learning_rate": 2.4251295662619362e-06, "loss": 1.0256, "num_tokens": 25410683522.0, "step": 10152 }, { "epoch": 1.8098039215686275, "grad_norm": 0.1689453125, "learning_rate": 2.424340867295452e-06, "loss": 1.0247, "num_tokens": 25416967594.0, "step": 10153 }, { "epoch": 1.809982174688057, "grad_norm": 0.1728515625, "learning_rate": 2.423552882940457e-06, "loss": 1.0201, "num_tokens": 25423240762.0, "step": 10154 }, { "epoch": 1.8101604278074865, "grad_norm": 0.171875, "learning_rate": 2.4227656132626137e-06, "loss": 1.0212, "num_tokens": 25429495270.0, "step": 10155 }, { "epoch": 1.8103386809269162, "grad_norm": 0.166015625, "learning_rate": 2.421979058327525e-06, "loss": 1.0114, "num_tokens": 25435778493.0, "step": 10156 }, { "epoch": 1.8105169340463458, "grad_norm": 0.1748046875, "learning_rate": 2.4211932182007387e-06, "loss": 1.0324, "num_tokens": 25442040661.0, "step": 10157 }, { "epoch": 1.8106951871657753, "grad_norm": 0.1708984375, "learning_rate": 2.4204080929477336e-06, "loss": 1.0018, "num_tokens": 25448309939.0, "step": 10158 }, { "epoch": 1.8108734402852051, "grad_norm": 0.1708984375, "learning_rate": 2.4196236826339354e-06, "loss": 1.0298, "num_tokens": 25454563847.0, "step": 10159 }, { "epoch": 1.8110516934046346, "grad_norm": 0.16796875, "learning_rate": 2.4188399873247113e-06, "loss": 1.0166, "num_tokens": 25460846637.0, "step": 10160 }, { "epoch": 1.8112299465240642, "grad_norm": 0.169921875, "learning_rate": 2.418057007085367e-06, "loss": 1.0219, "num_tokens": 25467127901.0, "step": 10161 }, { "epoch": 1.8114081996434939, "grad_norm": 0.1689453125, "learning_rate": 2.417274741981147e-06, "loss": 1.0098, "num_tokens": 25473413454.0, "step": 10162 }, { "epoch": 1.8115864527629233, "grad_norm": 0.1708984375, "learning_rate": 2.4164931920772354e-06, "loss": 1.0199, "num_tokens": 25479697773.0, "step": 10163 }, { "epoch": 1.811764705882353, "grad_norm": 0.171875, "learning_rate": 2.415712357438764e-06, "loss": 1.0028, "num_tokens": 25485953345.0, "step": 10164 }, { "epoch": 1.8119429590017826, "grad_norm": 0.171875, "learning_rate": 2.4149322381307974e-06, "loss": 1.031, "num_tokens": 25492236800.0, "step": 10165 }, { "epoch": 1.812121212121212, "grad_norm": 0.1669921875, "learning_rate": 2.4141528342183417e-06, "loss": 1.0433, "num_tokens": 25498508733.0, "step": 10166 }, { "epoch": 1.8122994652406417, "grad_norm": 0.1669921875, "learning_rate": 2.413374145766347e-06, "loss": 1.0264, "num_tokens": 25504786963.0, "step": 10167 }, { "epoch": 1.8124777183600713, "grad_norm": 0.1689453125, "learning_rate": 2.4125961728397e-06, "loss": 1.01, "num_tokens": 25511061920.0, "step": 10168 }, { "epoch": 1.8126559714795007, "grad_norm": 0.16796875, "learning_rate": 2.41181891550323e-06, "loss": 0.9819, "num_tokens": 25517337368.0, "step": 10169 }, { "epoch": 1.8128342245989306, "grad_norm": 0.1708984375, "learning_rate": 2.4110423738217056e-06, "loss": 1.0333, "num_tokens": 25523614676.0, "step": 10170 }, { "epoch": 1.81301247771836, "grad_norm": 0.1689453125, "learning_rate": 2.4102665478598375e-06, "loss": 1.0116, "num_tokens": 25529849968.0, "step": 10171 }, { "epoch": 1.8131907308377897, "grad_norm": 0.1796875, "learning_rate": 2.4094914376822754e-06, "loss": 1.0056, "num_tokens": 25536096919.0, "step": 10172 }, { "epoch": 1.8133689839572193, "grad_norm": 0.1708984375, "learning_rate": 2.408717043353606e-06, "loss": 1.0311, "num_tokens": 25542347043.0, "step": 10173 }, { "epoch": 1.8135472370766488, "grad_norm": 0.1728515625, "learning_rate": 2.4079433649383647e-06, "loss": 1.0199, "num_tokens": 25548598266.0, "step": 10174 }, { "epoch": 1.8137254901960784, "grad_norm": 0.1767578125, "learning_rate": 2.4071704025010196e-06, "loss": 0.9977, "num_tokens": 25554861450.0, "step": 10175 }, { "epoch": 1.813903743315508, "grad_norm": 0.173828125, "learning_rate": 2.4063981561059812e-06, "loss": 1.0107, "num_tokens": 25561119880.0, "step": 10176 }, { "epoch": 1.8140819964349375, "grad_norm": 0.1669921875, "learning_rate": 2.4056266258176013e-06, "loss": 1.0096, "num_tokens": 25567376150.0, "step": 10177 }, { "epoch": 1.8142602495543672, "grad_norm": 0.169921875, "learning_rate": 2.404855811700174e-06, "loss": 1.038, "num_tokens": 25573637476.0, "step": 10178 }, { "epoch": 1.8144385026737968, "grad_norm": 0.1748046875, "learning_rate": 2.4040857138179286e-06, "loss": 0.9974, "num_tokens": 25579922551.0, "step": 10179 }, { "epoch": 1.8146167557932262, "grad_norm": 0.169921875, "learning_rate": 2.403316332235039e-06, "loss": 1.0088, "num_tokens": 25586204584.0, "step": 10180 }, { "epoch": 1.814795008912656, "grad_norm": 0.1728515625, "learning_rate": 2.402547667015617e-06, "loss": 1.0055, "num_tokens": 25592487600.0, "step": 10181 }, { "epoch": 1.8149732620320855, "grad_norm": 0.17578125, "learning_rate": 2.4017797182237174e-06, "loss": 1.0222, "num_tokens": 25598749054.0, "step": 10182 }, { "epoch": 1.8151515151515152, "grad_norm": 0.171875, "learning_rate": 2.401012485923331e-06, "loss": 0.9985, "num_tokens": 25604995124.0, "step": 10183 }, { "epoch": 1.8153297682709448, "grad_norm": 0.1708984375, "learning_rate": 2.400245970178392e-06, "loss": 1.0191, "num_tokens": 25611267933.0, "step": 10184 }, { "epoch": 1.8155080213903743, "grad_norm": 0.1708984375, "learning_rate": 2.3994801710527763e-06, "loss": 0.9867, "num_tokens": 25617543196.0, "step": 10185 }, { "epoch": 1.815686274509804, "grad_norm": 0.1708984375, "learning_rate": 2.3987150886102967e-06, "loss": 0.9913, "num_tokens": 25623825950.0, "step": 10186 }, { "epoch": 1.8158645276292336, "grad_norm": 0.1708984375, "learning_rate": 2.3979507229147074e-06, "loss": 1.0175, "num_tokens": 25630109965.0, "step": 10187 }, { "epoch": 1.816042780748663, "grad_norm": 0.1806640625, "learning_rate": 2.397187074029704e-06, "loss": 1.0157, "num_tokens": 25636394179.0, "step": 10188 }, { "epoch": 1.8162210338680926, "grad_norm": 0.169921875, "learning_rate": 2.396424142018921e-06, "loss": 0.9874, "num_tokens": 25642655171.0, "step": 10189 }, { "epoch": 1.8163992869875223, "grad_norm": 0.171875, "learning_rate": 2.395661926945934e-06, "loss": 0.9876, "num_tokens": 25648938923.0, "step": 10190 }, { "epoch": 1.8165775401069517, "grad_norm": 0.1669921875, "learning_rate": 2.3949004288742566e-06, "loss": 1.0181, "num_tokens": 25655175278.0, "step": 10191 }, { "epoch": 1.8167557932263816, "grad_norm": 0.169921875, "learning_rate": 2.3941396478673486e-06, "loss": 0.9906, "num_tokens": 25661441075.0, "step": 10192 }, { "epoch": 1.816934046345811, "grad_norm": 0.1708984375, "learning_rate": 2.393379583988603e-06, "loss": 1.0215, "num_tokens": 25667700333.0, "step": 10193 }, { "epoch": 1.8171122994652407, "grad_norm": 0.1650390625, "learning_rate": 2.392620237301356e-06, "loss": 1.0316, "num_tokens": 25673983558.0, "step": 10194 }, { "epoch": 1.8172905525846703, "grad_norm": 0.16796875, "learning_rate": 2.3918616078688865e-06, "loss": 0.9978, "num_tokens": 25680238300.0, "step": 10195 }, { "epoch": 1.8174688057040997, "grad_norm": 0.1669921875, "learning_rate": 2.391103695754409e-06, "loss": 1.0169, "num_tokens": 25686520891.0, "step": 10196 }, { "epoch": 1.8176470588235294, "grad_norm": 0.173828125, "learning_rate": 2.390346501021081e-06, "loss": 1.0192, "num_tokens": 25692798521.0, "step": 10197 }, { "epoch": 1.817825311942959, "grad_norm": 0.1728515625, "learning_rate": 2.389590023731999e-06, "loss": 1.0091, "num_tokens": 25699073749.0, "step": 10198 }, { "epoch": 1.8180035650623885, "grad_norm": 0.169921875, "learning_rate": 2.3888342639502026e-06, "loss": 1.0058, "num_tokens": 25705356643.0, "step": 10199 }, { "epoch": 1.8181818181818183, "grad_norm": 0.16796875, "learning_rate": 2.3880792217386673e-06, "loss": 0.9907, "num_tokens": 25711639494.0, "step": 10200 }, { "epoch": 1.8183600713012478, "grad_norm": 0.1728515625, "learning_rate": 2.387324897160313e-06, "loss": 1.0048, "num_tokens": 25717879180.0, "step": 10201 }, { "epoch": 1.8185383244206772, "grad_norm": 0.1689453125, "learning_rate": 2.386571290277994e-06, "loss": 1.0033, "num_tokens": 25724099272.0, "step": 10202 }, { "epoch": 1.818716577540107, "grad_norm": 0.17578125, "learning_rate": 2.385818401154514e-06, "loss": 1.034, "num_tokens": 25730382583.0, "step": 10203 }, { "epoch": 1.8188948306595365, "grad_norm": 0.1728515625, "learning_rate": 2.3850662298526077e-06, "loss": 1.0387, "num_tokens": 25736666230.0, "step": 10204 }, { "epoch": 1.8190730837789661, "grad_norm": 0.1728515625, "learning_rate": 2.3843147764349527e-06, "loss": 1.0077, "num_tokens": 25742950898.0, "step": 10205 }, { "epoch": 1.8192513368983958, "grad_norm": 0.173828125, "learning_rate": 2.3835640409641703e-06, "loss": 1.0324, "num_tokens": 25749214966.0, "step": 10206 }, { "epoch": 1.8194295900178252, "grad_norm": 0.1708984375, "learning_rate": 2.382814023502819e-06, "loss": 1.0232, "num_tokens": 25755451802.0, "step": 10207 }, { "epoch": 1.8196078431372549, "grad_norm": 0.173828125, "learning_rate": 2.3820647241133978e-06, "loss": 1.0347, "num_tokens": 25761735387.0, "step": 10208 }, { "epoch": 1.8197860962566845, "grad_norm": 0.1767578125, "learning_rate": 2.3813161428583432e-06, "loss": 1.0016, "num_tokens": 25767911299.0, "step": 10209 }, { "epoch": 1.819964349376114, "grad_norm": 0.1728515625, "learning_rate": 2.38056827980004e-06, "loss": 1.0355, "num_tokens": 25774193648.0, "step": 10210 }, { "epoch": 1.8201426024955438, "grad_norm": 0.171875, "learning_rate": 2.379821135000805e-06, "loss": 1.0277, "num_tokens": 25780478095.0, "step": 10211 }, { "epoch": 1.8203208556149733, "grad_norm": 0.1708984375, "learning_rate": 2.3790747085228963e-06, "loss": 1.0144, "num_tokens": 25786748259.0, "step": 10212 }, { "epoch": 1.820499108734403, "grad_norm": 0.171875, "learning_rate": 2.378329000428516e-06, "loss": 1.0319, "num_tokens": 25793032609.0, "step": 10213 }, { "epoch": 1.8206773618538326, "grad_norm": 0.1689453125, "learning_rate": 2.3775840107798032e-06, "loss": 1.0074, "num_tokens": 25799314876.0, "step": 10214 }, { "epoch": 1.820855614973262, "grad_norm": 0.169921875, "learning_rate": 2.376839739638839e-06, "loss": 1.0177, "num_tokens": 25805558990.0, "step": 10215 }, { "epoch": 1.8210338680926916, "grad_norm": 0.1708984375, "learning_rate": 2.3760961870676413e-06, "loss": 0.9945, "num_tokens": 25811828665.0, "step": 10216 }, { "epoch": 1.8212121212121213, "grad_norm": 0.1767578125, "learning_rate": 2.3753533531281733e-06, "loss": 1.0064, "num_tokens": 25818107996.0, "step": 10217 }, { "epoch": 1.8213903743315507, "grad_norm": 0.1689453125, "learning_rate": 2.374611237882335e-06, "loss": 0.9685, "num_tokens": 25824352626.0, "step": 10218 }, { "epoch": 1.8215686274509804, "grad_norm": 0.177734375, "learning_rate": 2.3738698413919658e-06, "loss": 1.0242, "num_tokens": 25830635968.0, "step": 10219 }, { "epoch": 1.82174688057041, "grad_norm": 0.1728515625, "learning_rate": 2.3731291637188474e-06, "loss": 1.0345, "num_tokens": 25836919971.0, "step": 10220 }, { "epoch": 1.8219251336898394, "grad_norm": 0.173828125, "learning_rate": 2.3723892049246993e-06, "loss": 0.9998, "num_tokens": 25843203997.0, "step": 10221 }, { "epoch": 1.8221033868092693, "grad_norm": 0.1748046875, "learning_rate": 2.371649965071184e-06, "loss": 1.0463, "num_tokens": 25849466079.0, "step": 10222 }, { "epoch": 1.8222816399286987, "grad_norm": 0.1796875, "learning_rate": 2.370911444219901e-06, "loss": 1.0102, "num_tokens": 25855690913.0, "step": 10223 }, { "epoch": 1.8224598930481284, "grad_norm": 0.171875, "learning_rate": 2.370173642432393e-06, "loss": 1.025, "num_tokens": 25861964756.0, "step": 10224 }, { "epoch": 1.822638146167558, "grad_norm": 0.1728515625, "learning_rate": 2.3694365597701404e-06, "loss": 1.0401, "num_tokens": 25868236870.0, "step": 10225 }, { "epoch": 1.8228163992869875, "grad_norm": 0.173828125, "learning_rate": 2.368700196294565e-06, "loss": 0.9867, "num_tokens": 25874520144.0, "step": 10226 }, { "epoch": 1.8229946524064171, "grad_norm": 0.171875, "learning_rate": 2.367964552067027e-06, "loss": 1.0421, "num_tokens": 25880758905.0, "step": 10227 }, { "epoch": 1.8231729055258468, "grad_norm": 0.1748046875, "learning_rate": 2.367229627148829e-06, "loss": 1.029, "num_tokens": 25887035441.0, "step": 10228 }, { "epoch": 1.8233511586452762, "grad_norm": 0.16796875, "learning_rate": 2.366495421601212e-06, "loss": 1.0025, "num_tokens": 25893294784.0, "step": 10229 }, { "epoch": 1.8235294117647058, "grad_norm": 0.173828125, "learning_rate": 2.3657619354853566e-06, "loss": 1.0321, "num_tokens": 25899545585.0, "step": 10230 }, { "epoch": 1.8237076648841355, "grad_norm": 0.169921875, "learning_rate": 2.365029168862385e-06, "loss": 1.0434, "num_tokens": 25905823011.0, "step": 10231 }, { "epoch": 1.823885918003565, "grad_norm": 0.17578125, "learning_rate": 2.36429712179336e-06, "loss": 1.0267, "num_tokens": 25912106059.0, "step": 10232 }, { "epoch": 1.8240641711229948, "grad_norm": 0.173828125, "learning_rate": 2.3635657943392815e-06, "loss": 0.9833, "num_tokens": 25918388510.0, "step": 10233 }, { "epoch": 1.8242424242424242, "grad_norm": 0.173828125, "learning_rate": 2.362835186561092e-06, "loss": 1.0116, "num_tokens": 25924666034.0, "step": 10234 }, { "epoch": 1.8244206773618539, "grad_norm": 0.1708984375, "learning_rate": 2.3621052985196722e-06, "loss": 1.0336, "num_tokens": 25930943232.0, "step": 10235 }, { "epoch": 1.8245989304812835, "grad_norm": 0.16796875, "learning_rate": 2.3613761302758456e-06, "loss": 1.0177, "num_tokens": 25937225440.0, "step": 10236 }, { "epoch": 1.824777183600713, "grad_norm": 0.177734375, "learning_rate": 2.3606476818903718e-06, "loss": 1.0172, "num_tokens": 25943507776.0, "step": 10237 }, { "epoch": 1.8249554367201426, "grad_norm": 0.1748046875, "learning_rate": 2.359919953423955e-06, "loss": 1.0105, "num_tokens": 25949760460.0, "step": 10238 }, { "epoch": 1.8251336898395722, "grad_norm": 0.171875, "learning_rate": 2.359192944937235e-06, "loss": 1.0167, "num_tokens": 25956022007.0, "step": 10239 }, { "epoch": 1.8253119429590017, "grad_norm": 0.169921875, "learning_rate": 2.358466656490795e-06, "loss": 1.0415, "num_tokens": 25962302910.0, "step": 10240 }, { "epoch": 1.8254901960784313, "grad_norm": 0.173828125, "learning_rate": 2.357741088145155e-06, "loss": 1.0212, "num_tokens": 25968536233.0, "step": 10241 }, { "epoch": 1.825668449197861, "grad_norm": 0.1689453125, "learning_rate": 2.3570162399607803e-06, "loss": 1.0157, "num_tokens": 25974763501.0, "step": 10242 }, { "epoch": 1.8258467023172904, "grad_norm": 0.16796875, "learning_rate": 2.356292111998068e-06, "loss": 1.0071, "num_tokens": 25981046370.0, "step": 10243 }, { "epoch": 1.8260249554367203, "grad_norm": 0.1708984375, "learning_rate": 2.3555687043173624e-06, "loss": 1.0133, "num_tokens": 25987291163.0, "step": 10244 }, { "epoch": 1.8262032085561497, "grad_norm": 0.1787109375, "learning_rate": 2.354846016978944e-06, "loss": 1.018, "num_tokens": 25993575199.0, "step": 10245 }, { "epoch": 1.8263814616755794, "grad_norm": 0.171875, "learning_rate": 2.354124050043037e-06, "loss": 1.0112, "num_tokens": 25999857019.0, "step": 10246 }, { "epoch": 1.826559714795009, "grad_norm": 0.1728515625, "learning_rate": 2.3534028035698006e-06, "loss": 0.9973, "num_tokens": 26006140968.0, "step": 10247 }, { "epoch": 1.8267379679144384, "grad_norm": 0.171875, "learning_rate": 2.352682277619336e-06, "loss": 0.9955, "num_tokens": 26012410445.0, "step": 10248 }, { "epoch": 1.826916221033868, "grad_norm": 0.1748046875, "learning_rate": 2.351962472251688e-06, "loss": 1.0095, "num_tokens": 26018694714.0, "step": 10249 }, { "epoch": 1.8270944741532977, "grad_norm": 0.1669921875, "learning_rate": 2.3512433875268363e-06, "loss": 1.0047, "num_tokens": 26024979140.0, "step": 10250 }, { "epoch": 1.8272727272727272, "grad_norm": 0.169921875, "learning_rate": 2.3505250235047008e-06, "loss": 1.0067, "num_tokens": 26031263744.0, "step": 10251 }, { "epoch": 1.8274509803921568, "grad_norm": 0.17578125, "learning_rate": 2.3498073802451436e-06, "loss": 1.0423, "num_tokens": 26037548627.0, "step": 10252 }, { "epoch": 1.8276292335115865, "grad_norm": 0.173828125, "learning_rate": 2.3490904578079687e-06, "loss": 1.0083, "num_tokens": 26043828827.0, "step": 10253 }, { "epoch": 1.8278074866310159, "grad_norm": 0.173828125, "learning_rate": 2.3483742562529143e-06, "loss": 0.9913, "num_tokens": 26050113792.0, "step": 10254 }, { "epoch": 1.8279857397504458, "grad_norm": 0.1669921875, "learning_rate": 2.3476587756396637e-06, "loss": 1.0139, "num_tokens": 26056386144.0, "step": 10255 }, { "epoch": 1.8281639928698752, "grad_norm": 0.1728515625, "learning_rate": 2.3469440160278355e-06, "loss": 1.0091, "num_tokens": 26062670001.0, "step": 10256 }, { "epoch": 1.8283422459893048, "grad_norm": 0.173828125, "learning_rate": 2.3462299774769946e-06, "loss": 0.9928, "num_tokens": 26068925558.0, "step": 10257 }, { "epoch": 1.8285204991087345, "grad_norm": 0.1728515625, "learning_rate": 2.34551666004664e-06, "loss": 1.0164, "num_tokens": 26075179976.0, "step": 10258 }, { "epoch": 1.828698752228164, "grad_norm": 0.1748046875, "learning_rate": 2.3448040637962093e-06, "loss": 1.0172, "num_tokens": 26081443733.0, "step": 10259 }, { "epoch": 1.8288770053475936, "grad_norm": 0.1767578125, "learning_rate": 2.3440921887850896e-06, "loss": 1.0135, "num_tokens": 26087726911.0, "step": 10260 }, { "epoch": 1.8290552584670232, "grad_norm": 0.173828125, "learning_rate": 2.3433810350725964e-06, "loss": 1.0158, "num_tokens": 26093983794.0, "step": 10261 }, { "epoch": 1.8292335115864526, "grad_norm": 0.1708984375, "learning_rate": 2.3426706027179933e-06, "loss": 1.0193, "num_tokens": 26100267518.0, "step": 10262 }, { "epoch": 1.8294117647058825, "grad_norm": 0.171875, "learning_rate": 2.3419608917804785e-06, "loss": 1.0275, "num_tokens": 26106543019.0, "step": 10263 }, { "epoch": 1.829590017825312, "grad_norm": 0.171875, "learning_rate": 2.3412519023191948e-06, "loss": 0.9925, "num_tokens": 26112826755.0, "step": 10264 }, { "epoch": 1.8297682709447414, "grad_norm": 0.17578125, "learning_rate": 2.3405436343932227e-06, "loss": 1.004, "num_tokens": 26119056146.0, "step": 10265 }, { "epoch": 1.8299465240641712, "grad_norm": 0.171875, "learning_rate": 2.3398360880615785e-06, "loss": 1.0067, "num_tokens": 26125329424.0, "step": 10266 }, { "epoch": 1.8301247771836007, "grad_norm": 0.166015625, "learning_rate": 2.3391292633832254e-06, "loss": 1.0474, "num_tokens": 26131584368.0, "step": 10267 }, { "epoch": 1.8303030303030303, "grad_norm": 0.169921875, "learning_rate": 2.338423160417062e-06, "loss": 0.9958, "num_tokens": 26137842713.0, "step": 10268 }, { "epoch": 1.83048128342246, "grad_norm": 0.1689453125, "learning_rate": 2.3377177792219293e-06, "loss": 0.9769, "num_tokens": 26144046492.0, "step": 10269 }, { "epoch": 1.8306595365418894, "grad_norm": 0.16796875, "learning_rate": 2.337013119856605e-06, "loss": 1.0236, "num_tokens": 26150328688.0, "step": 10270 }, { "epoch": 1.830837789661319, "grad_norm": 0.171875, "learning_rate": 2.3363091823798096e-06, "loss": 0.9923, "num_tokens": 26156561220.0, "step": 10271 }, { "epoch": 1.8310160427807487, "grad_norm": 0.173828125, "learning_rate": 2.3356059668502046e-06, "loss": 0.9725, "num_tokens": 26162844262.0, "step": 10272 }, { "epoch": 1.8311942959001781, "grad_norm": 0.1708984375, "learning_rate": 2.3349034733263835e-06, "loss": 0.9908, "num_tokens": 26169116743.0, "step": 10273 }, { "epoch": 1.831372549019608, "grad_norm": 0.173828125, "learning_rate": 2.33420170186689e-06, "loss": 1.0155, "num_tokens": 26175398390.0, "step": 10274 }, { "epoch": 1.8315508021390374, "grad_norm": 0.1689453125, "learning_rate": 2.3335006525302016e-06, "loss": 1.0083, "num_tokens": 26181680400.0, "step": 10275 }, { "epoch": 1.831729055258467, "grad_norm": 0.1689453125, "learning_rate": 2.3328003253747364e-06, "loss": 1.0295, "num_tokens": 26187964781.0, "step": 10276 }, { "epoch": 1.8319073083778967, "grad_norm": 0.171875, "learning_rate": 2.3321007204588523e-06, "loss": 1.0239, "num_tokens": 26194244338.0, "step": 10277 }, { "epoch": 1.8320855614973262, "grad_norm": 0.17578125, "learning_rate": 2.3314018378408488e-06, "loss": 0.9876, "num_tokens": 26200528769.0, "step": 10278 }, { "epoch": 1.8322638146167558, "grad_norm": 0.177734375, "learning_rate": 2.3307036775789636e-06, "loss": 1.0218, "num_tokens": 26206800262.0, "step": 10279 }, { "epoch": 1.8324420677361855, "grad_norm": 0.16796875, "learning_rate": 2.3300062397313744e-06, "loss": 1.0213, "num_tokens": 26213084472.0, "step": 10280 }, { "epoch": 1.8326203208556149, "grad_norm": 0.1767578125, "learning_rate": 2.329309524356199e-06, "loss": 1.0624, "num_tokens": 26219369282.0, "step": 10281 }, { "epoch": 1.8327985739750445, "grad_norm": 0.169921875, "learning_rate": 2.328613531511495e-06, "loss": 1.0133, "num_tokens": 26225649991.0, "step": 10282 }, { "epoch": 1.8329768270944742, "grad_norm": 0.16796875, "learning_rate": 2.327918261255259e-06, "loss": 1.0129, "num_tokens": 26231911161.0, "step": 10283 }, { "epoch": 1.8331550802139036, "grad_norm": 0.166015625, "learning_rate": 2.3272237136454267e-06, "loss": 1.0347, "num_tokens": 26238189041.0, "step": 10284 }, { "epoch": 1.8333333333333335, "grad_norm": 0.166015625, "learning_rate": 2.326529888739878e-06, "loss": 1.0061, "num_tokens": 26244473317.0, "step": 10285 }, { "epoch": 1.833511586452763, "grad_norm": 0.1689453125, "learning_rate": 2.3258367865964286e-06, "loss": 1.0305, "num_tokens": 26250739687.0, "step": 10286 }, { "epoch": 1.8336898395721926, "grad_norm": 0.1689453125, "learning_rate": 2.3251444072728325e-06, "loss": 0.988, "num_tokens": 26257024341.0, "step": 10287 }, { "epoch": 1.8338680926916222, "grad_norm": 0.1767578125, "learning_rate": 2.3244527508267887e-06, "loss": 1.0152, "num_tokens": 26263307523.0, "step": 10288 }, { "epoch": 1.8340463458110516, "grad_norm": 0.16796875, "learning_rate": 2.323761817315932e-06, "loss": 1.0207, "num_tokens": 26269582788.0, "step": 10289 }, { "epoch": 1.8342245989304813, "grad_norm": 0.173828125, "learning_rate": 2.323071606797838e-06, "loss": 1.0097, "num_tokens": 26275843004.0, "step": 10290 }, { "epoch": 1.834402852049911, "grad_norm": 0.1748046875, "learning_rate": 2.3223821193300206e-06, "loss": 0.9964, "num_tokens": 26282090271.0, "step": 10291 }, { "epoch": 1.8345811051693404, "grad_norm": 0.1728515625, "learning_rate": 2.3216933549699374e-06, "loss": 1.0337, "num_tokens": 26288365188.0, "step": 10292 }, { "epoch": 1.83475935828877, "grad_norm": 0.16796875, "learning_rate": 2.321005313774983e-06, "loss": 1.0334, "num_tokens": 26294648093.0, "step": 10293 }, { "epoch": 1.8349376114081997, "grad_norm": 0.173828125, "learning_rate": 2.3203179958024907e-06, "loss": 0.9816, "num_tokens": 26300930418.0, "step": 10294 }, { "epoch": 1.835115864527629, "grad_norm": 0.173828125, "learning_rate": 2.3196314011097353e-06, "loss": 1.0198, "num_tokens": 26307216440.0, "step": 10295 }, { "epoch": 1.835294117647059, "grad_norm": 0.1748046875, "learning_rate": 2.318945529753931e-06, "loss": 1.0243, "num_tokens": 26313477062.0, "step": 10296 }, { "epoch": 1.8354723707664884, "grad_norm": 0.169921875, "learning_rate": 2.3182603817922317e-06, "loss": 1.01, "num_tokens": 26319762118.0, "step": 10297 }, { "epoch": 1.835650623885918, "grad_norm": 0.173828125, "learning_rate": 2.3175759572817303e-06, "loss": 1.0274, "num_tokens": 26326029310.0, "step": 10298 }, { "epoch": 1.8358288770053477, "grad_norm": 0.171875, "learning_rate": 2.3168922562794612e-06, "loss": 1.0246, "num_tokens": 26332310068.0, "step": 10299 }, { "epoch": 1.8360071301247771, "grad_norm": 0.171875, "learning_rate": 2.3162092788423966e-06, "loss": 1.0065, "num_tokens": 26338594375.0, "step": 10300 }, { "epoch": 1.8361853832442068, "grad_norm": 0.1689453125, "learning_rate": 2.3155270250274496e-06, "loss": 1.0111, "num_tokens": 26344844930.0, "step": 10301 }, { "epoch": 1.8363636363636364, "grad_norm": 0.16796875, "learning_rate": 2.3148454948914716e-06, "loss": 1.019, "num_tokens": 26351094611.0, "step": 10302 }, { "epoch": 1.8365418894830658, "grad_norm": 0.173828125, "learning_rate": 2.3141646884912563e-06, "loss": 1.0032, "num_tokens": 26357360354.0, "step": 10303 }, { "epoch": 1.8367201426024955, "grad_norm": 0.1708984375, "learning_rate": 2.313484605883534e-06, "loss": 1.025, "num_tokens": 26363626827.0, "step": 10304 }, { "epoch": 1.8368983957219251, "grad_norm": 0.171875, "learning_rate": 2.3128052471249744e-06, "loss": 1.0033, "num_tokens": 26369895718.0, "step": 10305 }, { "epoch": 1.8370766488413546, "grad_norm": 0.1669921875, "learning_rate": 2.3121266122721934e-06, "loss": 1.0251, "num_tokens": 26376181270.0, "step": 10306 }, { "epoch": 1.8372549019607844, "grad_norm": 0.169921875, "learning_rate": 2.3114487013817382e-06, "loss": 1.0038, "num_tokens": 26382446881.0, "step": 10307 }, { "epoch": 1.8374331550802139, "grad_norm": 0.171875, "learning_rate": 2.310771514510101e-06, "loss": 1.0119, "num_tokens": 26388730221.0, "step": 10308 }, { "epoch": 1.8376114081996435, "grad_norm": 0.17578125, "learning_rate": 2.3100950517137096e-06, "loss": 1.019, "num_tokens": 26395014039.0, "step": 10309 }, { "epoch": 1.8377896613190732, "grad_norm": 0.16796875, "learning_rate": 2.309419313048937e-06, "loss": 1.0139, "num_tokens": 26401279612.0, "step": 10310 }, { "epoch": 1.8379679144385026, "grad_norm": 0.1708984375, "learning_rate": 2.308744298572091e-06, "loss": 1.0168, "num_tokens": 26407562243.0, "step": 10311 }, { "epoch": 1.8381461675579323, "grad_norm": 0.1689453125, "learning_rate": 2.3080700083394193e-06, "loss": 1.0115, "num_tokens": 26413845592.0, "step": 10312 }, { "epoch": 1.838324420677362, "grad_norm": 0.16796875, "learning_rate": 2.3073964424071126e-06, "loss": 1.0216, "num_tokens": 26420128116.0, "step": 10313 }, { "epoch": 1.8385026737967913, "grad_norm": 0.1767578125, "learning_rate": 2.3067236008312984e-06, "loss": 1.0458, "num_tokens": 26426384616.0, "step": 10314 }, { "epoch": 1.838680926916221, "grad_norm": 0.1748046875, "learning_rate": 2.3060514836680465e-06, "loss": 1.004, "num_tokens": 26432668816.0, "step": 10315 }, { "epoch": 1.8388591800356506, "grad_norm": 0.1728515625, "learning_rate": 2.3053800909733603e-06, "loss": 1.0472, "num_tokens": 26438952334.0, "step": 10316 }, { "epoch": 1.83903743315508, "grad_norm": 0.1728515625, "learning_rate": 2.304709422803192e-06, "loss": 1.018, "num_tokens": 26445213860.0, "step": 10317 }, { "epoch": 1.83921568627451, "grad_norm": 0.1748046875, "learning_rate": 2.304039479213427e-06, "loss": 1.0243, "num_tokens": 26451443776.0, "step": 10318 }, { "epoch": 1.8393939393939394, "grad_norm": 0.1640625, "learning_rate": 2.303370260259889e-06, "loss": 0.9921, "num_tokens": 26457711697.0, "step": 10319 }, { "epoch": 1.839572192513369, "grad_norm": 0.1728515625, "learning_rate": 2.3027017659983485e-06, "loss": 1.0167, "num_tokens": 26463995250.0, "step": 10320 }, { "epoch": 1.8397504456327987, "grad_norm": 0.1669921875, "learning_rate": 2.3020339964845077e-06, "loss": 0.9801, "num_tokens": 26470278096.0, "step": 10321 }, { "epoch": 1.839928698752228, "grad_norm": 0.171875, "learning_rate": 2.3013669517740147e-06, "loss": 1.0326, "num_tokens": 26476562132.0, "step": 10322 }, { "epoch": 1.8401069518716577, "grad_norm": 0.1767578125, "learning_rate": 2.3007006319224525e-06, "loss": 1.0165, "num_tokens": 26482768762.0, "step": 10323 }, { "epoch": 1.8402852049910874, "grad_norm": 0.17578125, "learning_rate": 2.3000350369853463e-06, "loss": 1.0196, "num_tokens": 26488980361.0, "step": 10324 }, { "epoch": 1.8404634581105168, "grad_norm": 0.1796875, "learning_rate": 2.2993701670181602e-06, "loss": 1.0185, "num_tokens": 26495208014.0, "step": 10325 }, { "epoch": 1.8406417112299467, "grad_norm": 0.16796875, "learning_rate": 2.298706022076299e-06, "loss": 0.9606, "num_tokens": 26501491284.0, "step": 10326 }, { "epoch": 1.8408199643493761, "grad_norm": 0.171875, "learning_rate": 2.298042602215105e-06, "loss": 1.0311, "num_tokens": 26507745490.0, "step": 10327 }, { "epoch": 1.8409982174688055, "grad_norm": 0.1669921875, "learning_rate": 2.2973799074898615e-06, "loss": 1.0329, "num_tokens": 26514029384.0, "step": 10328 }, { "epoch": 1.8411764705882354, "grad_norm": 0.16796875, "learning_rate": 2.2967179379557904e-06, "loss": 0.9878, "num_tokens": 26520314279.0, "step": 10329 }, { "epoch": 1.8413547237076648, "grad_norm": 0.169921875, "learning_rate": 2.296056693668053e-06, "loss": 1.0331, "num_tokens": 26526592812.0, "step": 10330 }, { "epoch": 1.8415329768270945, "grad_norm": 0.1708984375, "learning_rate": 2.295396174681753e-06, "loss": 1.0233, "num_tokens": 26532878252.0, "step": 10331 }, { "epoch": 1.8417112299465241, "grad_norm": 0.166015625, "learning_rate": 2.2947363810519316e-06, "loss": 1.0397, "num_tokens": 26539157128.0, "step": 10332 }, { "epoch": 1.8418894830659536, "grad_norm": 0.171875, "learning_rate": 2.294077312833567e-06, "loss": 1.0351, "num_tokens": 26545420237.0, "step": 10333 }, { "epoch": 1.8420677361853832, "grad_norm": 0.1669921875, "learning_rate": 2.2934189700815827e-06, "loss": 1.0099, "num_tokens": 26551668153.0, "step": 10334 }, { "epoch": 1.8422459893048129, "grad_norm": 0.1728515625, "learning_rate": 2.292761352850836e-06, "loss": 0.9953, "num_tokens": 26557900643.0, "step": 10335 }, { "epoch": 1.8424242424242423, "grad_norm": 0.1748046875, "learning_rate": 2.2921044611961267e-06, "loss": 1.0318, "num_tokens": 26564183994.0, "step": 10336 }, { "epoch": 1.8426024955436722, "grad_norm": 0.1748046875, "learning_rate": 2.2914482951721943e-06, "loss": 1.0238, "num_tokens": 26570467277.0, "step": 10337 }, { "epoch": 1.8427807486631016, "grad_norm": 0.1708984375, "learning_rate": 2.2907928548337183e-06, "loss": 1.0381, "num_tokens": 26576719380.0, "step": 10338 }, { "epoch": 1.8429590017825312, "grad_norm": 0.169921875, "learning_rate": 2.2901381402353152e-06, "loss": 1.014, "num_tokens": 26582970642.0, "step": 10339 }, { "epoch": 1.843137254901961, "grad_norm": 0.17578125, "learning_rate": 2.2894841514315426e-06, "loss": 1.0479, "num_tokens": 26589231712.0, "step": 10340 }, { "epoch": 1.8433155080213903, "grad_norm": 0.171875, "learning_rate": 2.288830888476898e-06, "loss": 1.0566, "num_tokens": 26595515840.0, "step": 10341 }, { "epoch": 1.84349376114082, "grad_norm": 0.173828125, "learning_rate": 2.2881783514258177e-06, "loss": 1.0027, "num_tokens": 26601791643.0, "step": 10342 }, { "epoch": 1.8436720142602496, "grad_norm": 0.1669921875, "learning_rate": 2.287526540332678e-06, "loss": 1.0082, "num_tokens": 26608060158.0, "step": 10343 }, { "epoch": 1.843850267379679, "grad_norm": 0.1708984375, "learning_rate": 2.2868754552517938e-06, "loss": 1.011, "num_tokens": 26614299600.0, "step": 10344 }, { "epoch": 1.8440285204991087, "grad_norm": 0.177734375, "learning_rate": 2.2862250962374197e-06, "loss": 0.9763, "num_tokens": 26620582361.0, "step": 10345 }, { "epoch": 1.8442067736185384, "grad_norm": 0.173828125, "learning_rate": 2.285575463343753e-06, "loss": 1.0298, "num_tokens": 26626853211.0, "step": 10346 }, { "epoch": 1.8443850267379678, "grad_norm": 0.169921875, "learning_rate": 2.2849265566249254e-06, "loss": 1.0241, "num_tokens": 26633137678.0, "step": 10347 }, { "epoch": 1.8445632798573977, "grad_norm": 0.1708984375, "learning_rate": 2.2842783761350096e-06, "loss": 0.9943, "num_tokens": 26639371621.0, "step": 10348 }, { "epoch": 1.844741532976827, "grad_norm": 0.1728515625, "learning_rate": 2.2836309219280227e-06, "loss": 1.0046, "num_tokens": 26645653446.0, "step": 10349 }, { "epoch": 1.8449197860962567, "grad_norm": 0.177734375, "learning_rate": 2.2829841940579125e-06, "loss": 1.005, "num_tokens": 26651936513.0, "step": 10350 }, { "epoch": 1.8450980392156864, "grad_norm": 0.166015625, "learning_rate": 2.282338192578573e-06, "loss": 1.0303, "num_tokens": 26658221176.0, "step": 10351 }, { "epoch": 1.8452762923351158, "grad_norm": 0.171875, "learning_rate": 2.2816929175438355e-06, "loss": 1.0214, "num_tokens": 26664506062.0, "step": 10352 }, { "epoch": 1.8454545454545455, "grad_norm": 0.1767578125, "learning_rate": 2.2810483690074707e-06, "loss": 0.98, "num_tokens": 26670778085.0, "step": 10353 }, { "epoch": 1.845632798573975, "grad_norm": 0.17578125, "learning_rate": 2.2804045470231904e-06, "loss": 1.0065, "num_tokens": 26677025579.0, "step": 10354 }, { "epoch": 1.8458110516934045, "grad_norm": 0.1748046875, "learning_rate": 2.279761451644642e-06, "loss": 1.0527, "num_tokens": 26683295137.0, "step": 10355 }, { "epoch": 1.8459893048128342, "grad_norm": 0.16796875, "learning_rate": 2.279119082925417e-06, "loss": 0.9889, "num_tokens": 26689568723.0, "step": 10356 }, { "epoch": 1.8461675579322638, "grad_norm": 0.177734375, "learning_rate": 2.278477440919044e-06, "loss": 0.9981, "num_tokens": 26695838288.0, "step": 10357 }, { "epoch": 1.8463458110516933, "grad_norm": 0.177734375, "learning_rate": 2.2778365256789882e-06, "loss": 0.9797, "num_tokens": 26702077621.0, "step": 10358 }, { "epoch": 1.8465240641711231, "grad_norm": 0.1748046875, "learning_rate": 2.2771963372586607e-06, "loss": 1.0494, "num_tokens": 26708330710.0, "step": 10359 }, { "epoch": 1.8467023172905526, "grad_norm": 0.177734375, "learning_rate": 2.2765568757114068e-06, "loss": 1.0191, "num_tokens": 26714597484.0, "step": 10360 }, { "epoch": 1.8468805704099822, "grad_norm": 0.169921875, "learning_rate": 2.275918141090513e-06, "loss": 1.0309, "num_tokens": 26720851626.0, "step": 10361 }, { "epoch": 1.8470588235294119, "grad_norm": 0.16796875, "learning_rate": 2.2752801334492047e-06, "loss": 1.0227, "num_tokens": 26727117307.0, "step": 10362 }, { "epoch": 1.8472370766488413, "grad_norm": 0.1689453125, "learning_rate": 2.27464285284065e-06, "loss": 1.0341, "num_tokens": 26733401320.0, "step": 10363 }, { "epoch": 1.847415329768271, "grad_norm": 0.1787109375, "learning_rate": 2.27400629931795e-06, "loss": 1.0047, "num_tokens": 26739676494.0, "step": 10364 }, { "epoch": 1.8475935828877006, "grad_norm": 0.169921875, "learning_rate": 2.2733704729341517e-06, "loss": 0.9781, "num_tokens": 26745952705.0, "step": 10365 }, { "epoch": 1.84777183600713, "grad_norm": 0.171875, "learning_rate": 2.2727353737422365e-06, "loss": 1.0023, "num_tokens": 26752236756.0, "step": 10366 }, { "epoch": 1.8479500891265597, "grad_norm": 0.1728515625, "learning_rate": 2.2721010017951293e-06, "loss": 1.0199, "num_tokens": 26758515639.0, "step": 10367 }, { "epoch": 1.8481283422459893, "grad_norm": 0.171875, "learning_rate": 2.2714673571456908e-06, "loss": 0.9932, "num_tokens": 26764772402.0, "step": 10368 }, { "epoch": 1.8483065953654187, "grad_norm": 0.17578125, "learning_rate": 2.2708344398467234e-06, "loss": 1.0275, "num_tokens": 26771032461.0, "step": 10369 }, { "epoch": 1.8484848484848486, "grad_norm": 0.1689453125, "learning_rate": 2.2702022499509684e-06, "loss": 1.0266, "num_tokens": 26777290936.0, "step": 10370 }, { "epoch": 1.848663101604278, "grad_norm": 0.1708984375, "learning_rate": 2.2695707875111063e-06, "loss": 1.0071, "num_tokens": 26783574885.0, "step": 10371 }, { "epoch": 1.8488413547237077, "grad_norm": 0.1689453125, "learning_rate": 2.268940052579756e-06, "loss": 1.0117, "num_tokens": 26789839473.0, "step": 10372 }, { "epoch": 1.8490196078431373, "grad_norm": 0.1748046875, "learning_rate": 2.2683100452094787e-06, "loss": 1.0172, "num_tokens": 26796123658.0, "step": 10373 }, { "epoch": 1.8491978609625668, "grad_norm": 0.1708984375, "learning_rate": 2.2676807654527716e-06, "loss": 1.0184, "num_tokens": 26802381510.0, "step": 10374 }, { "epoch": 1.8493761140819964, "grad_norm": 0.169921875, "learning_rate": 2.2670522133620728e-06, "loss": 1.0028, "num_tokens": 26808665711.0, "step": 10375 }, { "epoch": 1.849554367201426, "grad_norm": 0.173828125, "learning_rate": 2.2664243889897584e-06, "loss": 1.0417, "num_tokens": 26814933995.0, "step": 10376 }, { "epoch": 1.8497326203208555, "grad_norm": 0.1708984375, "learning_rate": 2.2657972923881494e-06, "loss": 0.9845, "num_tokens": 26821160553.0, "step": 10377 }, { "epoch": 1.8499108734402852, "grad_norm": 0.173828125, "learning_rate": 2.265170923609498e-06, "loss": 1.0367, "num_tokens": 26827425042.0, "step": 10378 }, { "epoch": 1.8500891265597148, "grad_norm": 0.1728515625, "learning_rate": 2.2645452827060013e-06, "loss": 0.9976, "num_tokens": 26833709851.0, "step": 10379 }, { "epoch": 1.8502673796791442, "grad_norm": 0.173828125, "learning_rate": 2.2639203697297936e-06, "loss": 0.9816, "num_tokens": 26839993869.0, "step": 10380 }, { "epoch": 1.850445632798574, "grad_norm": 0.17578125, "learning_rate": 2.2632961847329483e-06, "loss": 1.0173, "num_tokens": 26846278133.0, "step": 10381 }, { "epoch": 1.8506238859180035, "grad_norm": 0.177734375, "learning_rate": 2.2626727277674803e-06, "loss": 1.0394, "num_tokens": 26852509420.0, "step": 10382 }, { "epoch": 1.8508021390374332, "grad_norm": 0.18359375, "learning_rate": 2.26204999888534e-06, "loss": 1.0606, "num_tokens": 26858745907.0, "step": 10383 }, { "epoch": 1.8509803921568628, "grad_norm": 0.1708984375, "learning_rate": 2.2614279981384227e-06, "loss": 0.9878, "num_tokens": 26865029574.0, "step": 10384 }, { "epoch": 1.8511586452762923, "grad_norm": 0.1728515625, "learning_rate": 2.260806725578558e-06, "loss": 1.0083, "num_tokens": 26871313541.0, "step": 10385 }, { "epoch": 1.851336898395722, "grad_norm": 0.173828125, "learning_rate": 2.260186181257518e-06, "loss": 1.0267, "num_tokens": 26877573201.0, "step": 10386 }, { "epoch": 1.8515151515151516, "grad_norm": 0.1708984375, "learning_rate": 2.2595663652270105e-06, "loss": 1.0382, "num_tokens": 26883857099.0, "step": 10387 }, { "epoch": 1.851693404634581, "grad_norm": 0.171875, "learning_rate": 2.258947277538688e-06, "loss": 0.9933, "num_tokens": 26890112780.0, "step": 10388 }, { "epoch": 1.8518716577540109, "grad_norm": 0.171875, "learning_rate": 2.258328918244136e-06, "loss": 1.0232, "num_tokens": 26896397742.0, "step": 10389 }, { "epoch": 1.8520499108734403, "grad_norm": 0.1767578125, "learning_rate": 2.2577112873948838e-06, "loss": 1.014, "num_tokens": 26902681357.0, "step": 10390 }, { "epoch": 1.8522281639928697, "grad_norm": 0.1728515625, "learning_rate": 2.2570943850424e-06, "loss": 1.0096, "num_tokens": 26908966116.0, "step": 10391 }, { "epoch": 1.8524064171122996, "grad_norm": 0.177734375, "learning_rate": 2.2564782112380893e-06, "loss": 1.0283, "num_tokens": 26915250928.0, "step": 10392 }, { "epoch": 1.852584670231729, "grad_norm": 0.171875, "learning_rate": 2.2558627660332995e-06, "loss": 1.0473, "num_tokens": 26921525865.0, "step": 10393 }, { "epoch": 1.8527629233511587, "grad_norm": 0.173828125, "learning_rate": 2.255248049479313e-06, "loss": 1.0022, "num_tokens": 26927810920.0, "step": 10394 }, { "epoch": 1.8529411764705883, "grad_norm": 0.169921875, "learning_rate": 2.2546340616273573e-06, "loss": 1.0249, "num_tokens": 26934065717.0, "step": 10395 }, { "epoch": 1.8531194295900177, "grad_norm": 0.169921875, "learning_rate": 2.254020802528594e-06, "loss": 1.0247, "num_tokens": 26940350310.0, "step": 10396 }, { "epoch": 1.8532976827094474, "grad_norm": 0.1669921875, "learning_rate": 2.2534082722341265e-06, "loss": 0.993, "num_tokens": 26946633095.0, "step": 10397 }, { "epoch": 1.853475935828877, "grad_norm": 0.1767578125, "learning_rate": 2.2527964707949984e-06, "loss": 1.0217, "num_tokens": 26952917180.0, "step": 10398 }, { "epoch": 1.8536541889483065, "grad_norm": 0.169921875, "learning_rate": 2.25218539826219e-06, "loss": 1.0048, "num_tokens": 26959200374.0, "step": 10399 }, { "epoch": 1.8538324420677363, "grad_norm": 0.17578125, "learning_rate": 2.251575054686622e-06, "loss": 0.991, "num_tokens": 26965451089.0, "step": 10400 }, { "epoch": 1.8540106951871658, "grad_norm": 0.1708984375, "learning_rate": 2.250965440119155e-06, "loss": 0.9958, "num_tokens": 26971735364.0, "step": 10401 }, { "epoch": 1.8541889483065954, "grad_norm": 0.171875, "learning_rate": 2.2503565546105878e-06, "loss": 1.0409, "num_tokens": 26978016611.0, "step": 10402 }, { "epoch": 1.854367201426025, "grad_norm": 0.1708984375, "learning_rate": 2.2497483982116607e-06, "loss": 1.0071, "num_tokens": 26984299580.0, "step": 10403 }, { "epoch": 1.8545454545454545, "grad_norm": 0.1748046875, "learning_rate": 2.24914097097305e-06, "loss": 0.979, "num_tokens": 26990556843.0, "step": 10404 }, { "epoch": 1.8547237076648841, "grad_norm": 0.1689453125, "learning_rate": 2.24853427294537e-06, "loss": 1.0035, "num_tokens": 26996839791.0, "step": 10405 }, { "epoch": 1.8549019607843138, "grad_norm": 0.177734375, "learning_rate": 2.247928304179183e-06, "loss": 1.0246, "num_tokens": 27003112718.0, "step": 10406 }, { "epoch": 1.8550802139037432, "grad_norm": 0.173828125, "learning_rate": 2.2473230647249796e-06, "loss": 1.0355, "num_tokens": 27009370305.0, "step": 10407 }, { "epoch": 1.8552584670231729, "grad_norm": 0.1728515625, "learning_rate": 2.2467185546331967e-06, "loss": 1.0529, "num_tokens": 27015631673.0, "step": 10408 }, { "epoch": 1.8554367201426025, "grad_norm": 0.169921875, "learning_rate": 2.2461147739542077e-06, "loss": 1.0236, "num_tokens": 27021897479.0, "step": 10409 }, { "epoch": 1.855614973262032, "grad_norm": 0.1708984375, "learning_rate": 2.2455117227383267e-06, "loss": 1.0535, "num_tokens": 27028178736.0, "step": 10410 }, { "epoch": 1.8557932263814618, "grad_norm": 0.16796875, "learning_rate": 2.2449094010358058e-06, "loss": 0.9916, "num_tokens": 27034434488.0, "step": 10411 }, { "epoch": 1.8559714795008913, "grad_norm": 0.16796875, "learning_rate": 2.2443078088968345e-06, "loss": 1.0335, "num_tokens": 27040689057.0, "step": 10412 }, { "epoch": 1.856149732620321, "grad_norm": 0.16796875, "learning_rate": 2.243706946371546e-06, "loss": 1.0079, "num_tokens": 27046974233.0, "step": 10413 }, { "epoch": 1.8563279857397506, "grad_norm": 0.1689453125, "learning_rate": 2.2431068135100093e-06, "loss": 1.0117, "num_tokens": 27053195804.0, "step": 10414 }, { "epoch": 1.85650623885918, "grad_norm": 0.1767578125, "learning_rate": 2.242507410362234e-06, "loss": 1.0025, "num_tokens": 27059477998.0, "step": 10415 }, { "epoch": 1.8566844919786096, "grad_norm": 0.1689453125, "learning_rate": 2.241908736978167e-06, "loss": 0.9866, "num_tokens": 27065750731.0, "step": 10416 }, { "epoch": 1.8568627450980393, "grad_norm": 0.1689453125, "learning_rate": 2.241310793407698e-06, "loss": 1.0281, "num_tokens": 27072034067.0, "step": 10417 }, { "epoch": 1.8570409982174687, "grad_norm": 0.1767578125, "learning_rate": 2.2407135797006544e-06, "loss": 1.0212, "num_tokens": 27078316015.0, "step": 10418 }, { "epoch": 1.8572192513368984, "grad_norm": 0.1708984375, "learning_rate": 2.2401170959067986e-06, "loss": 1.0337, "num_tokens": 27084598875.0, "step": 10419 }, { "epoch": 1.857397504456328, "grad_norm": 0.1689453125, "learning_rate": 2.239521342075839e-06, "loss": 1.0083, "num_tokens": 27090884133.0, "step": 10420 }, { "epoch": 1.8575757575757574, "grad_norm": 0.1796875, "learning_rate": 2.2389263182574182e-06, "loss": 1.0354, "num_tokens": 27097165725.0, "step": 10421 }, { "epoch": 1.8577540106951873, "grad_norm": 0.177734375, "learning_rate": 2.23833202450112e-06, "loss": 1.042, "num_tokens": 27103445971.0, "step": 10422 }, { "epoch": 1.8579322638146167, "grad_norm": 0.173828125, "learning_rate": 2.2377384608564663e-06, "loss": 0.9992, "num_tokens": 27109719537.0, "step": 10423 }, { "epoch": 1.8581105169340464, "grad_norm": 0.1689453125, "learning_rate": 2.237145627372921e-06, "loss": 1.0173, "num_tokens": 27115973572.0, "step": 10424 }, { "epoch": 1.858288770053476, "grad_norm": 0.17578125, "learning_rate": 2.236553524099884e-06, "loss": 1.0168, "num_tokens": 27122256918.0, "step": 10425 }, { "epoch": 1.8584670231729055, "grad_norm": 0.1748046875, "learning_rate": 2.235962151086694e-06, "loss": 1.0272, "num_tokens": 27128541258.0, "step": 10426 }, { "epoch": 1.8586452762923351, "grad_norm": 0.169921875, "learning_rate": 2.235371508382632e-06, "loss": 1.0218, "num_tokens": 27134777315.0, "step": 10427 }, { "epoch": 1.8588235294117648, "grad_norm": 0.1767578125, "learning_rate": 2.2347815960369174e-06, "loss": 1.0146, "num_tokens": 27141037701.0, "step": 10428 }, { "epoch": 1.8590017825311942, "grad_norm": 0.1708984375, "learning_rate": 2.234192414098704e-06, "loss": 1.0139, "num_tokens": 27147318732.0, "step": 10429 }, { "epoch": 1.8591800356506238, "grad_norm": 0.171875, "learning_rate": 2.23360396261709e-06, "loss": 1.0201, "num_tokens": 27153602933.0, "step": 10430 }, { "epoch": 1.8593582887700535, "grad_norm": 0.16796875, "learning_rate": 2.2330162416411137e-06, "loss": 1.0418, "num_tokens": 27159834730.0, "step": 10431 }, { "epoch": 1.859536541889483, "grad_norm": 0.1708984375, "learning_rate": 2.2324292512197474e-06, "loss": 1.0197, "num_tokens": 27166116583.0, "step": 10432 }, { "epoch": 1.8597147950089128, "grad_norm": 0.17578125, "learning_rate": 2.2318429914019054e-06, "loss": 1.0179, "num_tokens": 27172400067.0, "step": 10433 }, { "epoch": 1.8598930481283422, "grad_norm": 0.171875, "learning_rate": 2.2312574622364422e-06, "loss": 1.0092, "num_tokens": 27178683478.0, "step": 10434 }, { "epoch": 1.8600713012477719, "grad_norm": 0.1748046875, "learning_rate": 2.2306726637721486e-06, "loss": 1.0014, "num_tokens": 27184968434.0, "step": 10435 }, { "epoch": 1.8602495543672015, "grad_norm": 0.177734375, "learning_rate": 2.2300885960577564e-06, "loss": 1.0431, "num_tokens": 27191225925.0, "step": 10436 }, { "epoch": 1.860427807486631, "grad_norm": 0.16796875, "learning_rate": 2.229505259141935e-06, "loss": 1.0196, "num_tokens": 27197508047.0, "step": 10437 }, { "epoch": 1.8606060606060606, "grad_norm": 0.16796875, "learning_rate": 2.228922653073296e-06, "loss": 1.0094, "num_tokens": 27203791231.0, "step": 10438 }, { "epoch": 1.8607843137254902, "grad_norm": 0.1767578125, "learning_rate": 2.2283407779003883e-06, "loss": 1.022, "num_tokens": 27210074784.0, "step": 10439 }, { "epoch": 1.8609625668449197, "grad_norm": 0.1748046875, "learning_rate": 2.2277596336716976e-06, "loss": 0.9963, "num_tokens": 27216314739.0, "step": 10440 }, { "epoch": 1.8611408199643493, "grad_norm": 0.1748046875, "learning_rate": 2.227179220435653e-06, "loss": 0.9925, "num_tokens": 27222547286.0, "step": 10441 }, { "epoch": 1.861319073083779, "grad_norm": 0.169921875, "learning_rate": 2.226599538240618e-06, "loss": 1.0147, "num_tokens": 27228828521.0, "step": 10442 }, { "epoch": 1.8614973262032084, "grad_norm": 0.1708984375, "learning_rate": 2.2260205871349e-06, "loss": 0.9906, "num_tokens": 27235094671.0, "step": 10443 }, { "epoch": 1.8616755793226383, "grad_norm": 0.173828125, "learning_rate": 2.225442367166741e-06, "loss": 1.0, "num_tokens": 27241336316.0, "step": 10444 }, { "epoch": 1.8618538324420677, "grad_norm": 0.173828125, "learning_rate": 2.224864878384325e-06, "loss": 1.0356, "num_tokens": 27247590071.0, "step": 10445 }, { "epoch": 1.8620320855614974, "grad_norm": 0.1748046875, "learning_rate": 2.2242881208357756e-06, "loss": 1.0037, "num_tokens": 27253830026.0, "step": 10446 }, { "epoch": 1.862210338680927, "grad_norm": 0.1708984375, "learning_rate": 2.2237120945691532e-06, "loss": 0.9989, "num_tokens": 27260088463.0, "step": 10447 }, { "epoch": 1.8623885918003564, "grad_norm": 0.166015625, "learning_rate": 2.2231367996324563e-06, "loss": 1.0012, "num_tokens": 27266312660.0, "step": 10448 }, { "epoch": 1.862566844919786, "grad_norm": 0.1728515625, "learning_rate": 2.222562236073628e-06, "loss": 1.0524, "num_tokens": 27272563756.0, "step": 10449 }, { "epoch": 1.8627450980392157, "grad_norm": 0.1748046875, "learning_rate": 2.221988403940544e-06, "loss": 1.024, "num_tokens": 27278842557.0, "step": 10450 }, { "epoch": 1.8629233511586452, "grad_norm": 0.17578125, "learning_rate": 2.2214153032810224e-06, "loss": 1.0202, "num_tokens": 27285124677.0, "step": 10451 }, { "epoch": 1.863101604278075, "grad_norm": 0.169921875, "learning_rate": 2.22084293414282e-06, "loss": 1.0228, "num_tokens": 27291352386.0, "step": 10452 }, { "epoch": 1.8632798573975045, "grad_norm": 0.169921875, "learning_rate": 2.220271296573633e-06, "loss": 1.0037, "num_tokens": 27297622423.0, "step": 10453 }, { "epoch": 1.8634581105169339, "grad_norm": 0.1669921875, "learning_rate": 2.2197003906210963e-06, "loss": 1.0084, "num_tokens": 27303906924.0, "step": 10454 }, { "epoch": 1.8636363636363638, "grad_norm": 0.1728515625, "learning_rate": 2.2191302163327806e-06, "loss": 1.0401, "num_tokens": 27310142364.0, "step": 10455 }, { "epoch": 1.8638146167557932, "grad_norm": 0.169921875, "learning_rate": 2.2185607737562036e-06, "loss": 1.0076, "num_tokens": 27316392535.0, "step": 10456 }, { "epoch": 1.8639928698752228, "grad_norm": 0.171875, "learning_rate": 2.2179920629388145e-06, "loss": 1.0237, "num_tokens": 27322675278.0, "step": 10457 }, { "epoch": 1.8641711229946525, "grad_norm": 0.16796875, "learning_rate": 2.2174240839280016e-06, "loss": 1.0217, "num_tokens": 27328934783.0, "step": 10458 }, { "epoch": 1.864349376114082, "grad_norm": 0.169921875, "learning_rate": 2.2168568367710994e-06, "loss": 0.9817, "num_tokens": 27335155141.0, "step": 10459 }, { "epoch": 1.8645276292335116, "grad_norm": 0.1728515625, "learning_rate": 2.2162903215153736e-06, "loss": 1.021, "num_tokens": 27341439632.0, "step": 10460 }, { "epoch": 1.8647058823529412, "grad_norm": 0.166015625, "learning_rate": 2.215724538208033e-06, "loss": 1.0307, "num_tokens": 27347720953.0, "step": 10461 }, { "epoch": 1.8648841354723706, "grad_norm": 0.1708984375, "learning_rate": 2.2151594868962233e-06, "loss": 1.0305, "num_tokens": 27354002628.0, "step": 10462 }, { "epoch": 1.8650623885918005, "grad_norm": 0.1708984375, "learning_rate": 2.2145951676270334e-06, "loss": 1.0412, "num_tokens": 27360284888.0, "step": 10463 }, { "epoch": 1.86524064171123, "grad_norm": 0.17578125, "learning_rate": 2.2140315804474868e-06, "loss": 1.0426, "num_tokens": 27366530889.0, "step": 10464 }, { "epoch": 1.8654188948306596, "grad_norm": 0.16796875, "learning_rate": 2.213468725404544e-06, "loss": 1.0157, "num_tokens": 27372787230.0, "step": 10465 }, { "epoch": 1.8655971479500892, "grad_norm": 0.1650390625, "learning_rate": 2.212906602545112e-06, "loss": 0.9851, "num_tokens": 27379071445.0, "step": 10466 }, { "epoch": 1.8657754010695187, "grad_norm": 0.173828125, "learning_rate": 2.2123452119160303e-06, "loss": 1.0302, "num_tokens": 27385354551.0, "step": 10467 }, { "epoch": 1.8659536541889483, "grad_norm": 0.16796875, "learning_rate": 2.2117845535640815e-06, "loss": 1.0039, "num_tokens": 27391621108.0, "step": 10468 }, { "epoch": 1.866131907308378, "grad_norm": 0.1708984375, "learning_rate": 2.2112246275359836e-06, "loss": 1.0156, "num_tokens": 27397906559.0, "step": 10469 }, { "epoch": 1.8663101604278074, "grad_norm": 0.169921875, "learning_rate": 2.2106654338783966e-06, "loss": 1.0183, "num_tokens": 27404163630.0, "step": 10470 }, { "epoch": 1.866488413547237, "grad_norm": 0.1708984375, "learning_rate": 2.210106972637918e-06, "loss": 1.0255, "num_tokens": 27410421528.0, "step": 10471 }, { "epoch": 1.8666666666666667, "grad_norm": 0.171875, "learning_rate": 2.2095492438610837e-06, "loss": 1.0322, "num_tokens": 27416704082.0, "step": 10472 }, { "epoch": 1.8668449197860961, "grad_norm": 0.16796875, "learning_rate": 2.2089922475943714e-06, "loss": 1.0278, "num_tokens": 27422984305.0, "step": 10473 }, { "epoch": 1.867023172905526, "grad_norm": 0.173828125, "learning_rate": 2.208435983884193e-06, "loss": 1.0111, "num_tokens": 27429249528.0, "step": 10474 }, { "epoch": 1.8672014260249554, "grad_norm": 0.17578125, "learning_rate": 2.207880452776903e-06, "loss": 1.0269, "num_tokens": 27435509970.0, "step": 10475 }, { "epoch": 1.867379679144385, "grad_norm": 0.1728515625, "learning_rate": 2.207325654318795e-06, "loss": 0.9864, "num_tokens": 27441793097.0, "step": 10476 }, { "epoch": 1.8675579322638147, "grad_norm": 0.169921875, "learning_rate": 2.2067715885560996e-06, "loss": 1.0182, "num_tokens": 27448075424.0, "step": 10477 }, { "epoch": 1.8677361853832442, "grad_norm": 0.1728515625, "learning_rate": 2.206218255534987e-06, "loss": 0.9991, "num_tokens": 27454360680.0, "step": 10478 }, { "epoch": 1.8679144385026738, "grad_norm": 0.1728515625, "learning_rate": 2.205665655301568e-06, "loss": 1.0388, "num_tokens": 27460634561.0, "step": 10479 }, { "epoch": 1.8680926916221035, "grad_norm": 0.1728515625, "learning_rate": 2.205113787901889e-06, "loss": 1.0268, "num_tokens": 27466903199.0, "step": 10480 }, { "epoch": 1.8682709447415329, "grad_norm": 0.1728515625, "learning_rate": 2.2045626533819386e-06, "loss": 1.0104, "num_tokens": 27473143879.0, "step": 10481 }, { "epoch": 1.8684491978609625, "grad_norm": 0.1708984375, "learning_rate": 2.204012251787641e-06, "loss": 1.016, "num_tokens": 27479425491.0, "step": 10482 }, { "epoch": 1.8686274509803922, "grad_norm": 0.17578125, "learning_rate": 2.203462583164863e-06, "loss": 1.0082, "num_tokens": 27485709395.0, "step": 10483 }, { "epoch": 1.8688057040998216, "grad_norm": 0.16796875, "learning_rate": 2.202913647559409e-06, "loss": 0.9928, "num_tokens": 27491971931.0, "step": 10484 }, { "epoch": 1.8689839572192515, "grad_norm": 0.171875, "learning_rate": 2.2023654450170213e-06, "loss": 1.0139, "num_tokens": 27498255076.0, "step": 10485 }, { "epoch": 1.869162210338681, "grad_norm": 0.169921875, "learning_rate": 2.201817975583382e-06, "loss": 1.046, "num_tokens": 27504538458.0, "step": 10486 }, { "epoch": 1.8693404634581106, "grad_norm": 0.1728515625, "learning_rate": 2.20127123930411e-06, "loss": 1.0348, "num_tokens": 27510823137.0, "step": 10487 }, { "epoch": 1.8695187165775402, "grad_norm": 0.169921875, "learning_rate": 2.200725236224768e-06, "loss": 1.023, "num_tokens": 27517108664.0, "step": 10488 }, { "epoch": 1.8696969696969696, "grad_norm": 0.169921875, "learning_rate": 2.2001799663908533e-06, "loss": 1.0158, "num_tokens": 27523389998.0, "step": 10489 }, { "epoch": 1.8698752228163993, "grad_norm": 0.1767578125, "learning_rate": 2.1996354298478005e-06, "loss": 1.0116, "num_tokens": 27529619981.0, "step": 10490 }, { "epoch": 1.870053475935829, "grad_norm": 0.1669921875, "learning_rate": 2.1990916266409906e-06, "loss": 0.9886, "num_tokens": 27535901288.0, "step": 10491 }, { "epoch": 1.8702317290552584, "grad_norm": 0.1748046875, "learning_rate": 2.198548556815736e-06, "loss": 1.0223, "num_tokens": 27542184359.0, "step": 10492 }, { "epoch": 1.870409982174688, "grad_norm": 0.169921875, "learning_rate": 2.1980062204172917e-06, "loss": 1.0163, "num_tokens": 27548468238.0, "step": 10493 }, { "epoch": 1.8705882352941177, "grad_norm": 0.169921875, "learning_rate": 2.1974646174908497e-06, "loss": 1.0123, "num_tokens": 27554752666.0, "step": 10494 }, { "epoch": 1.870766488413547, "grad_norm": 0.1728515625, "learning_rate": 2.1969237480815445e-06, "loss": 1.0163, "num_tokens": 27561035244.0, "step": 10495 }, { "epoch": 1.870944741532977, "grad_norm": 0.1728515625, "learning_rate": 2.1963836122344444e-06, "loss": 1.0139, "num_tokens": 27567310684.0, "step": 10496 }, { "epoch": 1.8711229946524064, "grad_norm": 0.173828125, "learning_rate": 2.195844209994558e-06, "loss": 1.0404, "num_tokens": 27573565886.0, "step": 10497 }, { "epoch": 1.871301247771836, "grad_norm": 0.1708984375, "learning_rate": 2.195305541406838e-06, "loss": 1.0011, "num_tokens": 27579851486.0, "step": 10498 }, { "epoch": 1.8714795008912657, "grad_norm": 0.1767578125, "learning_rate": 2.194767606516168e-06, "loss": 1.0268, "num_tokens": 27586126530.0, "step": 10499 }, { "epoch": 1.8716577540106951, "grad_norm": 0.16796875, "learning_rate": 2.1942304053673753e-06, "loss": 1.0055, "num_tokens": 27592412037.0, "step": 10500 }, { "epoch": 1.8718360071301248, "grad_norm": 0.1708984375, "learning_rate": 2.1936939380052254e-06, "loss": 1.0161, "num_tokens": 27598686518.0, "step": 10501 }, { "epoch": 1.8720142602495544, "grad_norm": 0.171875, "learning_rate": 2.1931582044744228e-06, "loss": 0.9931, "num_tokens": 27604955248.0, "step": 10502 }, { "epoch": 1.8721925133689838, "grad_norm": 0.1796875, "learning_rate": 2.19262320481961e-06, "loss": 1.0248, "num_tokens": 27611205356.0, "step": 10503 }, { "epoch": 1.8723707664884135, "grad_norm": 0.1689453125, "learning_rate": 2.192088939085367e-06, "loss": 0.9912, "num_tokens": 27617470799.0, "step": 10504 }, { "epoch": 1.8725490196078431, "grad_norm": 0.1748046875, "learning_rate": 2.191555407316215e-06, "loss": 1.0381, "num_tokens": 27623750177.0, "step": 10505 }, { "epoch": 1.8727272727272726, "grad_norm": 0.16796875, "learning_rate": 2.1910226095566146e-06, "loss": 1.0113, "num_tokens": 27630006467.0, "step": 10506 }, { "epoch": 1.8729055258467024, "grad_norm": 0.171875, "learning_rate": 2.1904905458509627e-06, "loss": 1.015, "num_tokens": 27636237532.0, "step": 10507 }, { "epoch": 1.8730837789661319, "grad_norm": 0.1689453125, "learning_rate": 2.1899592162435957e-06, "loss": 0.9914, "num_tokens": 27642519715.0, "step": 10508 }, { "epoch": 1.8732620320855615, "grad_norm": 0.1689453125, "learning_rate": 2.189428620778792e-06, "loss": 1.0315, "num_tokens": 27648799801.0, "step": 10509 }, { "epoch": 1.8734402852049912, "grad_norm": 0.1748046875, "learning_rate": 2.1888987595007644e-06, "loss": 1.015, "num_tokens": 27655052699.0, "step": 10510 }, { "epoch": 1.8736185383244206, "grad_norm": 0.171875, "learning_rate": 2.1883696324536667e-06, "loss": 1.023, "num_tokens": 27661317520.0, "step": 10511 }, { "epoch": 1.8737967914438503, "grad_norm": 0.1708984375, "learning_rate": 2.187841239681591e-06, "loss": 1.0446, "num_tokens": 27667597645.0, "step": 10512 }, { "epoch": 1.87397504456328, "grad_norm": 0.16796875, "learning_rate": 2.1873135812285683e-06, "loss": 1.0187, "num_tokens": 27673859098.0, "step": 10513 }, { "epoch": 1.8741532976827093, "grad_norm": 0.16796875, "learning_rate": 2.1867866571385692e-06, "loss": 1.0243, "num_tokens": 27680129336.0, "step": 10514 }, { "epoch": 1.8743315508021392, "grad_norm": 0.169921875, "learning_rate": 2.1862604674555e-06, "loss": 1.0178, "num_tokens": 27686355561.0, "step": 10515 }, { "epoch": 1.8745098039215686, "grad_norm": 0.169921875, "learning_rate": 2.1857350122232114e-06, "loss": 1.0389, "num_tokens": 27692616501.0, "step": 10516 }, { "epoch": 1.874688057040998, "grad_norm": 0.1669921875, "learning_rate": 2.1852102914854884e-06, "loss": 1.0103, "num_tokens": 27698901409.0, "step": 10517 }, { "epoch": 1.874866310160428, "grad_norm": 0.1689453125, "learning_rate": 2.1846863052860555e-06, "loss": 1.0105, "num_tokens": 27705134148.0, "step": 10518 }, { "epoch": 1.8750445632798574, "grad_norm": 0.171875, "learning_rate": 2.184163053668578e-06, "loss": 1.0284, "num_tokens": 27711417343.0, "step": 10519 }, { "epoch": 1.875222816399287, "grad_norm": 0.1728515625, "learning_rate": 2.1836405366766563e-06, "loss": 1.0321, "num_tokens": 27717697185.0, "step": 10520 }, { "epoch": 1.8754010695187167, "grad_norm": 0.1689453125, "learning_rate": 2.1831187543538336e-06, "loss": 1.0061, "num_tokens": 27723962923.0, "step": 10521 }, { "epoch": 1.875579322638146, "grad_norm": 0.1689453125, "learning_rate": 2.1825977067435895e-06, "loss": 1.0224, "num_tokens": 27730224257.0, "step": 10522 }, { "epoch": 1.8757575757575757, "grad_norm": 0.17578125, "learning_rate": 2.182077393889344e-06, "loss": 1.0349, "num_tokens": 27736473170.0, "step": 10523 }, { "epoch": 1.8759358288770054, "grad_norm": 0.1708984375, "learning_rate": 2.181557815834454e-06, "loss": 1.0455, "num_tokens": 27742721873.0, "step": 10524 }, { "epoch": 1.8761140819964348, "grad_norm": 0.16796875, "learning_rate": 2.1810389726222163e-06, "loss": 1.0086, "num_tokens": 27749004853.0, "step": 10525 }, { "epoch": 1.8762923351158647, "grad_norm": 0.171875, "learning_rate": 2.1805208642958656e-06, "loss": 1.0248, "num_tokens": 27755288617.0, "step": 10526 }, { "epoch": 1.8764705882352941, "grad_norm": 0.1748046875, "learning_rate": 2.1800034908985766e-06, "loss": 1.0221, "num_tokens": 27761543206.0, "step": 10527 }, { "epoch": 1.8766488413547238, "grad_norm": 0.173828125, "learning_rate": 2.179486852473463e-06, "loss": 1.0009, "num_tokens": 27767819328.0, "step": 10528 }, { "epoch": 1.8768270944741534, "grad_norm": 0.1767578125, "learning_rate": 2.1789709490635734e-06, "loss": 0.9934, "num_tokens": 27774088142.0, "step": 10529 }, { "epoch": 1.8770053475935828, "grad_norm": 0.1708984375, "learning_rate": 2.178455780711901e-06, "loss": 0.982, "num_tokens": 27780372862.0, "step": 10530 }, { "epoch": 1.8771836007130125, "grad_norm": 0.1728515625, "learning_rate": 2.1779413474613743e-06, "loss": 0.9983, "num_tokens": 27786629403.0, "step": 10531 }, { "epoch": 1.8773618538324421, "grad_norm": 0.16796875, "learning_rate": 2.177427649354861e-06, "loss": 1.0004, "num_tokens": 27792914335.0, "step": 10532 }, { "epoch": 1.8775401069518716, "grad_norm": 0.171875, "learning_rate": 2.176914686435166e-06, "loss": 0.9973, "num_tokens": 27799188745.0, "step": 10533 }, { "epoch": 1.8777183600713012, "grad_norm": 0.171875, "learning_rate": 2.176402458745038e-06, "loss": 1.0137, "num_tokens": 27805445102.0, "step": 10534 }, { "epoch": 1.8778966131907309, "grad_norm": 0.1748046875, "learning_rate": 2.17589096632716e-06, "loss": 1.0066, "num_tokens": 27811730110.0, "step": 10535 }, { "epoch": 1.8780748663101603, "grad_norm": 0.1689453125, "learning_rate": 2.1753802092241512e-06, "loss": 1.034, "num_tokens": 27817988032.0, "step": 10536 }, { "epoch": 1.8782531194295902, "grad_norm": 0.1689453125, "learning_rate": 2.1748701874785775e-06, "loss": 1.0012, "num_tokens": 27824273435.0, "step": 10537 }, { "epoch": 1.8784313725490196, "grad_norm": 0.1728515625, "learning_rate": 2.1743609011329366e-06, "loss": 1.0295, "num_tokens": 27830557743.0, "step": 10538 }, { "epoch": 1.8786096256684492, "grad_norm": 0.169921875, "learning_rate": 2.173852350229669e-06, "loss": 1.0114, "num_tokens": 27836812854.0, "step": 10539 }, { "epoch": 1.878787878787879, "grad_norm": 0.1689453125, "learning_rate": 2.1733445348111513e-06, "loss": 0.9966, "num_tokens": 27843092199.0, "step": 10540 }, { "epoch": 1.8789661319073083, "grad_norm": 0.1689453125, "learning_rate": 2.172837454919701e-06, "loss": 1.049, "num_tokens": 27849373491.0, "step": 10541 }, { "epoch": 1.879144385026738, "grad_norm": 0.1708984375, "learning_rate": 2.172331110597572e-06, "loss": 1.053, "num_tokens": 27855625638.0, "step": 10542 }, { "epoch": 1.8793226381461676, "grad_norm": 0.1708984375, "learning_rate": 2.1718255018869568e-06, "loss": 1.0548, "num_tokens": 27861907064.0, "step": 10543 }, { "epoch": 1.879500891265597, "grad_norm": 0.1748046875, "learning_rate": 2.1713206288299916e-06, "loss": 1.0406, "num_tokens": 27868165861.0, "step": 10544 }, { "epoch": 1.8796791443850267, "grad_norm": 0.1748046875, "learning_rate": 2.1708164914687445e-06, "loss": 1.0375, "num_tokens": 27874417140.0, "step": 10545 }, { "epoch": 1.8798573975044564, "grad_norm": 0.171875, "learning_rate": 2.1703130898452276e-06, "loss": 0.9982, "num_tokens": 27880650729.0, "step": 10546 }, { "epoch": 1.8800356506238858, "grad_norm": 0.1669921875, "learning_rate": 2.1698104240013867e-06, "loss": 1.0338, "num_tokens": 27886908000.0, "step": 10547 }, { "epoch": 1.8802139037433157, "grad_norm": 0.169921875, "learning_rate": 2.1693084939791116e-06, "loss": 1.0005, "num_tokens": 27893192109.0, "step": 10548 }, { "epoch": 1.880392156862745, "grad_norm": 0.169921875, "learning_rate": 2.168807299820227e-06, "loss": 1.0218, "num_tokens": 27899477400.0, "step": 10549 }, { "epoch": 1.8805704099821747, "grad_norm": 0.173828125, "learning_rate": 2.1683068415664984e-06, "loss": 0.9648, "num_tokens": 27905762310.0, "step": 10550 }, { "epoch": 1.8807486631016044, "grad_norm": 0.166015625, "learning_rate": 2.1678071192596274e-06, "loss": 1.0287, "num_tokens": 27912021208.0, "step": 10551 }, { "epoch": 1.8809269162210338, "grad_norm": 0.1689453125, "learning_rate": 2.1673081329412576e-06, "loss": 1.0152, "num_tokens": 27918298338.0, "step": 10552 }, { "epoch": 1.8811051693404635, "grad_norm": 0.173828125, "learning_rate": 2.1668098826529686e-06, "loss": 0.9851, "num_tokens": 27924554699.0, "step": 10553 }, { "epoch": 1.881283422459893, "grad_norm": 0.171875, "learning_rate": 2.16631236843628e-06, "loss": 1.04, "num_tokens": 27930836319.0, "step": 10554 }, { "epoch": 1.8814616755793225, "grad_norm": 0.1728515625, "learning_rate": 2.165815590332651e-06, "loss": 1.0292, "num_tokens": 27937075012.0, "step": 10555 }, { "epoch": 1.8816399286987522, "grad_norm": 0.169921875, "learning_rate": 2.165319548383476e-06, "loss": 1.0251, "num_tokens": 27943357303.0, "step": 10556 }, { "epoch": 1.8818181818181818, "grad_norm": 0.1728515625, "learning_rate": 2.164824242630092e-06, "loss": 1.018, "num_tokens": 27949631527.0, "step": 10557 }, { "epoch": 1.8819964349376113, "grad_norm": 0.1689453125, "learning_rate": 2.1643296731137723e-06, "loss": 0.9929, "num_tokens": 27955885323.0, "step": 10558 }, { "epoch": 1.8821746880570411, "grad_norm": 0.1748046875, "learning_rate": 2.1638358398757304e-06, "loss": 1.0173, "num_tokens": 27962168688.0, "step": 10559 }, { "epoch": 1.8823529411764706, "grad_norm": 0.1787109375, "learning_rate": 2.163342742957116e-06, "loss": 1.0188, "num_tokens": 27968453246.0, "step": 10560 }, { "epoch": 1.8825311942959002, "grad_norm": 0.169921875, "learning_rate": 2.1628503823990193e-06, "loss": 1.0312, "num_tokens": 27974682933.0, "step": 10561 }, { "epoch": 1.8827094474153299, "grad_norm": 0.1708984375, "learning_rate": 2.162358758242469e-06, "loss": 1.028, "num_tokens": 27980943539.0, "step": 10562 }, { "epoch": 1.8828877005347593, "grad_norm": 0.1708984375, "learning_rate": 2.1618678705284335e-06, "loss": 1.0179, "num_tokens": 27987205574.0, "step": 10563 }, { "epoch": 1.883065953654189, "grad_norm": 0.171875, "learning_rate": 2.161377719297817e-06, "loss": 1.0559, "num_tokens": 27993480771.0, "step": 10564 }, { "epoch": 1.8832442067736186, "grad_norm": 0.16796875, "learning_rate": 2.1608883045914635e-06, "loss": 0.9889, "num_tokens": 27999756268.0, "step": 10565 }, { "epoch": 1.883422459893048, "grad_norm": 0.1689453125, "learning_rate": 2.160399626450158e-06, "loss": 1.0096, "num_tokens": 28006027683.0, "step": 10566 }, { "epoch": 1.8836007130124777, "grad_norm": 0.169921875, "learning_rate": 2.1599116849146206e-06, "loss": 1.0559, "num_tokens": 28012307254.0, "step": 10567 }, { "epoch": 1.8837789661319073, "grad_norm": 0.1689453125, "learning_rate": 2.159424480025512e-06, "loss": 1.0341, "num_tokens": 28018540549.0, "step": 10568 }, { "epoch": 1.8839572192513367, "grad_norm": 0.173828125, "learning_rate": 2.158938011823431e-06, "loss": 1.0058, "num_tokens": 28024825098.0, "step": 10569 }, { "epoch": 1.8841354723707666, "grad_norm": 0.173828125, "learning_rate": 2.1584522803489156e-06, "loss": 1.0096, "num_tokens": 28031096126.0, "step": 10570 }, { "epoch": 1.884313725490196, "grad_norm": 0.1689453125, "learning_rate": 2.157967285642441e-06, "loss": 1.0321, "num_tokens": 28037355871.0, "step": 10571 }, { "epoch": 1.8844919786096257, "grad_norm": 0.1748046875, "learning_rate": 2.157483027744423e-06, "loss": 1.0041, "num_tokens": 28043610370.0, "step": 10572 }, { "epoch": 1.8846702317290553, "grad_norm": 0.169921875, "learning_rate": 2.1569995066952145e-06, "loss": 1.0064, "num_tokens": 28049841544.0, "step": 10573 }, { "epoch": 1.8848484848484848, "grad_norm": 0.169921875, "learning_rate": 2.156516722535107e-06, "loss": 0.9895, "num_tokens": 28056102166.0, "step": 10574 }, { "epoch": 1.8850267379679144, "grad_norm": 0.171875, "learning_rate": 2.1560346753043308e-06, "loss": 1.0252, "num_tokens": 28062367516.0, "step": 10575 }, { "epoch": 1.885204991087344, "grad_norm": 0.1767578125, "learning_rate": 2.1555533650430563e-06, "loss": 1.0235, "num_tokens": 28068647184.0, "step": 10576 }, { "epoch": 1.8853832442067735, "grad_norm": 0.169921875, "learning_rate": 2.15507279179139e-06, "loss": 1.0047, "num_tokens": 28074928490.0, "step": 10577 }, { "epoch": 1.8855614973262034, "grad_norm": 0.1689453125, "learning_rate": 2.154592955589378e-06, "loss": 1.0075, "num_tokens": 28081156595.0, "step": 10578 }, { "epoch": 1.8857397504456328, "grad_norm": 0.1708984375, "learning_rate": 2.1541138564770055e-06, "loss": 1.0036, "num_tokens": 28087430224.0, "step": 10579 }, { "epoch": 1.8859180035650622, "grad_norm": 0.1689453125, "learning_rate": 2.1536354944941982e-06, "loss": 1.0205, "num_tokens": 28093673365.0, "step": 10580 }, { "epoch": 1.886096256684492, "grad_norm": 0.17578125, "learning_rate": 2.1531578696808145e-06, "loss": 1.0376, "num_tokens": 28099953175.0, "step": 10581 }, { "epoch": 1.8862745098039215, "grad_norm": 0.1767578125, "learning_rate": 2.1526809820766565e-06, "loss": 1.0298, "num_tokens": 28106212059.0, "step": 10582 }, { "epoch": 1.8864527629233512, "grad_norm": 0.16796875, "learning_rate": 2.152204831721463e-06, "loss": 0.9886, "num_tokens": 28112482384.0, "step": 10583 }, { "epoch": 1.8866310160427808, "grad_norm": 0.1669921875, "learning_rate": 2.151729418654912e-06, "loss": 1.023, "num_tokens": 28118746315.0, "step": 10584 }, { "epoch": 1.8868092691622103, "grad_norm": 0.1689453125, "learning_rate": 2.15125474291662e-06, "loss": 1.0233, "num_tokens": 28124995468.0, "step": 10585 }, { "epoch": 1.88698752228164, "grad_norm": 0.1728515625, "learning_rate": 2.1507808045461428e-06, "loss": 1.0302, "num_tokens": 28131279631.0, "step": 10586 }, { "epoch": 1.8871657754010696, "grad_norm": 0.181640625, "learning_rate": 2.150307603582972e-06, "loss": 1.0601, "num_tokens": 28137541062.0, "step": 10587 }, { "epoch": 1.887344028520499, "grad_norm": 0.1708984375, "learning_rate": 2.14983514006654e-06, "loss": 1.0091, "num_tokens": 28143820591.0, "step": 10588 }, { "epoch": 1.8875222816399289, "grad_norm": 0.1767578125, "learning_rate": 2.149363414036217e-06, "loss": 0.9957, "num_tokens": 28150085349.0, "step": 10589 }, { "epoch": 1.8877005347593583, "grad_norm": 0.171875, "learning_rate": 2.1488924255313122e-06, "loss": 1.014, "num_tokens": 28156360355.0, "step": 10590 }, { "epoch": 1.887878787878788, "grad_norm": 0.1728515625, "learning_rate": 2.1484221745910746e-06, "loss": 1.0191, "num_tokens": 28162644585.0, "step": 10591 }, { "epoch": 1.8880570409982176, "grad_norm": 0.1640625, "learning_rate": 2.147952661254689e-06, "loss": 0.978, "num_tokens": 28168928567.0, "step": 10592 }, { "epoch": 1.888235294117647, "grad_norm": 0.17578125, "learning_rate": 2.14748388556128e-06, "loss": 1.0057, "num_tokens": 28175197894.0, "step": 10593 }, { "epoch": 1.8884135472370767, "grad_norm": 0.1669921875, "learning_rate": 2.1470158475499094e-06, "loss": 1.0365, "num_tokens": 28181480938.0, "step": 10594 }, { "epoch": 1.8885918003565063, "grad_norm": 0.1689453125, "learning_rate": 2.146548547259583e-06, "loss": 1.0115, "num_tokens": 28187697672.0, "step": 10595 }, { "epoch": 1.8887700534759357, "grad_norm": 0.173828125, "learning_rate": 2.146081984729238e-06, "loss": 1.052, "num_tokens": 28193974873.0, "step": 10596 }, { "epoch": 1.8889483065953654, "grad_norm": 0.1669921875, "learning_rate": 2.145616159997752e-06, "loss": 0.9871, "num_tokens": 28200257762.0, "step": 10597 }, { "epoch": 1.889126559714795, "grad_norm": 0.169921875, "learning_rate": 2.145151073103946e-06, "loss": 1.0018, "num_tokens": 28206508075.0, "step": 10598 }, { "epoch": 1.8893048128342245, "grad_norm": 0.1689453125, "learning_rate": 2.1446867240865724e-06, "loss": 0.9954, "num_tokens": 28212791179.0, "step": 10599 }, { "epoch": 1.8894830659536543, "grad_norm": 0.169921875, "learning_rate": 2.144223112984327e-06, "loss": 1.0188, "num_tokens": 28219073843.0, "step": 10600 }, { "epoch": 1.8896613190730838, "grad_norm": 0.1708984375, "learning_rate": 2.1437602398358432e-06, "loss": 1.0271, "num_tokens": 28225335411.0, "step": 10601 }, { "epoch": 1.8898395721925134, "grad_norm": 0.1787109375, "learning_rate": 2.143298104679691e-06, "loss": 1.0241, "num_tokens": 28231618220.0, "step": 10602 }, { "epoch": 1.890017825311943, "grad_norm": 0.173828125, "learning_rate": 2.1428367075543817e-06, "loss": 1.0271, "num_tokens": 28237879417.0, "step": 10603 }, { "epoch": 1.8901960784313725, "grad_norm": 0.17578125, "learning_rate": 2.1423760484983607e-06, "loss": 1.0008, "num_tokens": 28244163017.0, "step": 10604 }, { "epoch": 1.8903743315508021, "grad_norm": 0.1669921875, "learning_rate": 2.1419161275500182e-06, "loss": 1.0365, "num_tokens": 28250425194.0, "step": 10605 }, { "epoch": 1.8905525846702318, "grad_norm": 0.173828125, "learning_rate": 2.1414569447476784e-06, "loss": 1.0485, "num_tokens": 28256682600.0, "step": 10606 }, { "epoch": 1.8907308377896612, "grad_norm": 0.1728515625, "learning_rate": 2.140998500129605e-06, "loss": 1.0061, "num_tokens": 28262902263.0, "step": 10607 }, { "epoch": 1.8909090909090909, "grad_norm": 0.1728515625, "learning_rate": 2.1405407937339982e-06, "loss": 0.9932, "num_tokens": 28269185573.0, "step": 10608 }, { "epoch": 1.8910873440285205, "grad_norm": 0.1640625, "learning_rate": 2.1400838255990024e-06, "loss": 1.0302, "num_tokens": 28275469297.0, "step": 10609 }, { "epoch": 1.89126559714795, "grad_norm": 0.173828125, "learning_rate": 2.1396275957626956e-06, "loss": 1.0384, "num_tokens": 28281748949.0, "step": 10610 }, { "epoch": 1.8914438502673798, "grad_norm": 0.169921875, "learning_rate": 2.1391721042630937e-06, "loss": 1.0233, "num_tokens": 28288032576.0, "step": 10611 }, { "epoch": 1.8916221033868093, "grad_norm": 0.1708984375, "learning_rate": 2.1387173511381548e-06, "loss": 1.0063, "num_tokens": 28294295389.0, "step": 10612 }, { "epoch": 1.891800356506239, "grad_norm": 0.171875, "learning_rate": 2.1382633364257733e-06, "loss": 1.0315, "num_tokens": 28300547249.0, "step": 10613 }, { "epoch": 1.8919786096256686, "grad_norm": 0.1767578125, "learning_rate": 2.137810060163783e-06, "loss": 0.9966, "num_tokens": 28306831306.0, "step": 10614 }, { "epoch": 1.892156862745098, "grad_norm": 0.1787109375, "learning_rate": 2.1373575223899527e-06, "loss": 1.0494, "num_tokens": 28313087011.0, "step": 10615 }, { "epoch": 1.8923351158645276, "grad_norm": 0.173828125, "learning_rate": 2.1369057231419953e-06, "loss": 1.0237, "num_tokens": 28319370396.0, "step": 10616 }, { "epoch": 1.8925133689839573, "grad_norm": 0.173828125, "learning_rate": 2.136454662457558e-06, "loss": 1.0191, "num_tokens": 28325609625.0, "step": 10617 }, { "epoch": 1.8926916221033867, "grad_norm": 0.1708984375, "learning_rate": 2.136004340374229e-06, "loss": 1.0315, "num_tokens": 28331866160.0, "step": 10618 }, { "epoch": 1.8928698752228164, "grad_norm": 0.173828125, "learning_rate": 2.1355547569295324e-06, "loss": 1.0101, "num_tokens": 28338134105.0, "step": 10619 }, { "epoch": 1.893048128342246, "grad_norm": 0.171875, "learning_rate": 2.135105912160933e-06, "loss": 1.0257, "num_tokens": 28344417288.0, "step": 10620 }, { "epoch": 1.8932263814616754, "grad_norm": 0.173828125, "learning_rate": 2.134657806105832e-06, "loss": 1.0148, "num_tokens": 28350701869.0, "step": 10621 }, { "epoch": 1.8934046345811053, "grad_norm": 0.1708984375, "learning_rate": 2.1342104388015704e-06, "loss": 1.04, "num_tokens": 28356986252.0, "step": 10622 }, { "epoch": 1.8935828877005347, "grad_norm": 0.1708984375, "learning_rate": 2.1337638102854294e-06, "loss": 1.0168, "num_tokens": 28363219170.0, "step": 10623 }, { "epoch": 1.8937611408199644, "grad_norm": 0.1689453125, "learning_rate": 2.1333179205946245e-06, "loss": 1.0321, "num_tokens": 28369503943.0, "step": 10624 }, { "epoch": 1.893939393939394, "grad_norm": 0.1669921875, "learning_rate": 2.1328727697663126e-06, "loss": 1.0022, "num_tokens": 28375787631.0, "step": 10625 }, { "epoch": 1.8941176470588235, "grad_norm": 0.1806640625, "learning_rate": 2.1324283578375876e-06, "loss": 1.0017, "num_tokens": 28382072293.0, "step": 10626 }, { "epoch": 1.8942959001782531, "grad_norm": 0.169921875, "learning_rate": 2.1319846848454833e-06, "loss": 1.0327, "num_tokens": 28388354750.0, "step": 10627 }, { "epoch": 1.8944741532976828, "grad_norm": 0.1689453125, "learning_rate": 2.1315417508269713e-06, "loss": 1.0118, "num_tokens": 28394633912.0, "step": 10628 }, { "epoch": 1.8946524064171122, "grad_norm": 0.1689453125, "learning_rate": 2.131099555818959e-06, "loss": 0.9994, "num_tokens": 28400910671.0, "step": 10629 }, { "epoch": 1.8948306595365418, "grad_norm": 0.171875, "learning_rate": 2.130658099858298e-06, "loss": 1.0448, "num_tokens": 28407194667.0, "step": 10630 }, { "epoch": 1.8950089126559715, "grad_norm": 0.16796875, "learning_rate": 2.130217382981772e-06, "loss": 1.0338, "num_tokens": 28413471163.0, "step": 10631 }, { "epoch": 1.895187165775401, "grad_norm": 0.173828125, "learning_rate": 2.1297774052261083e-06, "loss": 1.0245, "num_tokens": 28419737598.0, "step": 10632 }, { "epoch": 1.8953654188948308, "grad_norm": 0.1708984375, "learning_rate": 2.129338166627968e-06, "loss": 1.0331, "num_tokens": 28426020510.0, "step": 10633 }, { "epoch": 1.8955436720142602, "grad_norm": 0.1728515625, "learning_rate": 2.1288996672239566e-06, "loss": 1.0074, "num_tokens": 28432267857.0, "step": 10634 }, { "epoch": 1.8957219251336899, "grad_norm": 0.16796875, "learning_rate": 2.128461907050611e-06, "loss": 1.0306, "num_tokens": 28438549855.0, "step": 10635 }, { "epoch": 1.8959001782531195, "grad_norm": 0.1708984375, "learning_rate": 2.12802488614441e-06, "loss": 1.0574, "num_tokens": 28444816625.0, "step": 10636 }, { "epoch": 1.896078431372549, "grad_norm": 0.171875, "learning_rate": 2.127588604541772e-06, "loss": 1.0149, "num_tokens": 28451100892.0, "step": 10637 }, { "epoch": 1.8962566844919786, "grad_norm": 0.169921875, "learning_rate": 2.1271530622790524e-06, "loss": 1.0329, "num_tokens": 28457373707.0, "step": 10638 }, { "epoch": 1.8964349376114082, "grad_norm": 0.173828125, "learning_rate": 2.126718259392544e-06, "loss": 1.0096, "num_tokens": 28463621912.0, "step": 10639 }, { "epoch": 1.8966131907308377, "grad_norm": 0.173828125, "learning_rate": 2.1262841959184787e-06, "loss": 1.0255, "num_tokens": 28469854327.0, "step": 10640 }, { "epoch": 1.8967914438502675, "grad_norm": 0.16796875, "learning_rate": 2.1258508718930303e-06, "loss": 1.044, "num_tokens": 28476085530.0, "step": 10641 }, { "epoch": 1.896969696969697, "grad_norm": 0.1767578125, "learning_rate": 2.125418287352305e-06, "loss": 1.0044, "num_tokens": 28482368032.0, "step": 10642 }, { "epoch": 1.8971479500891264, "grad_norm": 0.17578125, "learning_rate": 2.124986442332349e-06, "loss": 1.0071, "num_tokens": 28488617627.0, "step": 10643 }, { "epoch": 1.8973262032085563, "grad_norm": 0.1796875, "learning_rate": 2.1245553368691517e-06, "loss": 1.0138, "num_tokens": 28494898188.0, "step": 10644 }, { "epoch": 1.8975044563279857, "grad_norm": 0.1650390625, "learning_rate": 2.124124970998634e-06, "loss": 0.9907, "num_tokens": 28501163909.0, "step": 10645 }, { "epoch": 1.8976827094474154, "grad_norm": 0.1689453125, "learning_rate": 2.1236953447566593e-06, "loss": 1.0399, "num_tokens": 28507447977.0, "step": 10646 }, { "epoch": 1.897860962566845, "grad_norm": 0.169921875, "learning_rate": 2.123266458179029e-06, "loss": 1.0086, "num_tokens": 28513731628.0, "step": 10647 }, { "epoch": 1.8980392156862744, "grad_norm": 0.17578125, "learning_rate": 2.1228383113014827e-06, "loss": 1.0159, "num_tokens": 28520013616.0, "step": 10648 }, { "epoch": 1.898217468805704, "grad_norm": 0.173828125, "learning_rate": 2.122410904159697e-06, "loss": 1.0129, "num_tokens": 28526274712.0, "step": 10649 }, { "epoch": 1.8983957219251337, "grad_norm": 0.169921875, "learning_rate": 2.1219842367892875e-06, "loss": 1.0697, "num_tokens": 28532559301.0, "step": 10650 }, { "epoch": 1.8985739750445632, "grad_norm": 0.169921875, "learning_rate": 2.1215583092258097e-06, "loss": 1.0119, "num_tokens": 28538843861.0, "step": 10651 }, { "epoch": 1.898752228163993, "grad_norm": 0.169921875, "learning_rate": 2.1211331215047563e-06, "loss": 0.9998, "num_tokens": 28545125821.0, "step": 10652 }, { "epoch": 1.8989304812834225, "grad_norm": 0.169921875, "learning_rate": 2.1207086736615568e-06, "loss": 1.0481, "num_tokens": 28551409752.0, "step": 10653 }, { "epoch": 1.8991087344028519, "grad_norm": 0.1689453125, "learning_rate": 2.120284965731581e-06, "loss": 1.0485, "num_tokens": 28557665747.0, "step": 10654 }, { "epoch": 1.8992869875222818, "grad_norm": 0.1640625, "learning_rate": 2.1198619977501365e-06, "loss": 1.0075, "num_tokens": 28563922945.0, "step": 10655 }, { "epoch": 1.8994652406417112, "grad_norm": 0.1767578125, "learning_rate": 2.119439769752471e-06, "loss": 0.9846, "num_tokens": 28570205909.0, "step": 10656 }, { "epoch": 1.8996434937611408, "grad_norm": 0.171875, "learning_rate": 2.1190182817737672e-06, "loss": 1.0034, "num_tokens": 28576479619.0, "step": 10657 }, { "epoch": 1.8998217468805705, "grad_norm": 0.1708984375, "learning_rate": 2.118597533849148e-06, "loss": 1.0265, "num_tokens": 28582764061.0, "step": 10658 }, { "epoch": 1.9, "grad_norm": 0.1728515625, "learning_rate": 2.1181775260136752e-06, "loss": 1.0142, "num_tokens": 28589024957.0, "step": 10659 }, { "epoch": 1.9001782531194296, "grad_norm": 0.1708984375, "learning_rate": 2.1177582583023474e-06, "loss": 1.0236, "num_tokens": 28595267646.0, "step": 10660 }, { "epoch": 1.9003565062388592, "grad_norm": 0.169921875, "learning_rate": 2.117339730750101e-06, "loss": 1.019, "num_tokens": 28601539637.0, "step": 10661 }, { "epoch": 1.9005347593582886, "grad_norm": 0.1708984375, "learning_rate": 2.1169219433918155e-06, "loss": 1.0511, "num_tokens": 28607822616.0, "step": 10662 }, { "epoch": 1.9007130124777185, "grad_norm": 0.169921875, "learning_rate": 2.1165048962623015e-06, "loss": 0.9972, "num_tokens": 28614094525.0, "step": 10663 }, { "epoch": 1.900891265597148, "grad_norm": 0.171875, "learning_rate": 2.1160885893963135e-06, "loss": 1.0361, "num_tokens": 28620366614.0, "step": 10664 }, { "epoch": 1.9010695187165776, "grad_norm": 0.1748046875, "learning_rate": 2.115673022828543e-06, "loss": 1.0313, "num_tokens": 28626647663.0, "step": 10665 }, { "epoch": 1.9012477718360072, "grad_norm": 0.171875, "learning_rate": 2.115258196593618e-06, "loss": 1.0332, "num_tokens": 28632910679.0, "step": 10666 }, { "epoch": 1.9014260249554367, "grad_norm": 0.1708984375, "learning_rate": 2.1148441107261065e-06, "loss": 0.9981, "num_tokens": 28639194293.0, "step": 10667 }, { "epoch": 1.9016042780748663, "grad_norm": 0.171875, "learning_rate": 2.1144307652605136e-06, "loss": 1.0163, "num_tokens": 28645478063.0, "step": 10668 }, { "epoch": 1.901782531194296, "grad_norm": 0.1669921875, "learning_rate": 2.1140181602312847e-06, "loss": 0.9986, "num_tokens": 28651761981.0, "step": 10669 }, { "epoch": 1.9019607843137254, "grad_norm": 0.1708984375, "learning_rate": 2.113606295672802e-06, "loss": 1.0177, "num_tokens": 28658045047.0, "step": 10670 }, { "epoch": 1.902139037433155, "grad_norm": 0.1689453125, "learning_rate": 2.113195171619385e-06, "loss": 1.0324, "num_tokens": 28664328189.0, "step": 10671 }, { "epoch": 1.9023172905525847, "grad_norm": 0.1728515625, "learning_rate": 2.112784788105295e-06, "loss": 1.0604, "num_tokens": 28670608578.0, "step": 10672 }, { "epoch": 1.9024955436720141, "grad_norm": 0.181640625, "learning_rate": 2.1123751451647283e-06, "loss": 1.0549, "num_tokens": 28676885200.0, "step": 10673 }, { "epoch": 1.902673796791444, "grad_norm": 0.1728515625, "learning_rate": 2.11196624283182e-06, "loss": 1.0151, "num_tokens": 28683095682.0, "step": 10674 }, { "epoch": 1.9028520499108734, "grad_norm": 0.169921875, "learning_rate": 2.111558081140643e-06, "loss": 1.0089, "num_tokens": 28689370948.0, "step": 10675 }, { "epoch": 1.903030303030303, "grad_norm": 0.171875, "learning_rate": 2.1111506601252126e-06, "loss": 1.0114, "num_tokens": 28695656691.0, "step": 10676 }, { "epoch": 1.9032085561497327, "grad_norm": 0.169921875, "learning_rate": 2.1107439798194764e-06, "loss": 1.0012, "num_tokens": 28701941125.0, "step": 10677 }, { "epoch": 1.9033868092691621, "grad_norm": 0.1689453125, "learning_rate": 2.1103380402573247e-06, "loss": 1.0205, "num_tokens": 28708196376.0, "step": 10678 }, { "epoch": 1.9035650623885918, "grad_norm": 0.1689453125, "learning_rate": 2.109932841472584e-06, "loss": 1.0031, "num_tokens": 28714426126.0, "step": 10679 }, { "epoch": 1.9037433155080214, "grad_norm": 0.169921875, "learning_rate": 2.1095283834990206e-06, "loss": 1.0097, "num_tokens": 28720685854.0, "step": 10680 }, { "epoch": 1.9039215686274509, "grad_norm": 0.1708984375, "learning_rate": 2.109124666370337e-06, "loss": 0.9968, "num_tokens": 28726969006.0, "step": 10681 }, { "epoch": 1.9040998217468805, "grad_norm": 0.1728515625, "learning_rate": 2.108721690120174e-06, "loss": 1.0086, "num_tokens": 28733219640.0, "step": 10682 }, { "epoch": 1.9042780748663102, "grad_norm": 0.1689453125, "learning_rate": 2.1083194547821136e-06, "loss": 1.0034, "num_tokens": 28739493849.0, "step": 10683 }, { "epoch": 1.9044563279857396, "grad_norm": 0.1796875, "learning_rate": 2.1079179603896742e-06, "loss": 1.0242, "num_tokens": 28745768787.0, "step": 10684 }, { "epoch": 1.9046345811051695, "grad_norm": 0.16796875, "learning_rate": 2.107517206976311e-06, "loss": 1.016, "num_tokens": 28752026830.0, "step": 10685 }, { "epoch": 1.904812834224599, "grad_norm": 0.1689453125, "learning_rate": 2.107117194575419e-06, "loss": 1.0051, "num_tokens": 28758310552.0, "step": 10686 }, { "epoch": 1.9049910873440286, "grad_norm": 0.1669921875, "learning_rate": 2.106717923220334e-06, "loss": 1.0021, "num_tokens": 28764550376.0, "step": 10687 }, { "epoch": 1.9051693404634582, "grad_norm": 0.1748046875, "learning_rate": 2.1063193929443237e-06, "loss": 1.0214, "num_tokens": 28770834962.0, "step": 10688 }, { "epoch": 1.9053475935828876, "grad_norm": 0.173828125, "learning_rate": 2.105921603780599e-06, "loss": 1.0168, "num_tokens": 28777119425.0, "step": 10689 }, { "epoch": 1.9055258467023173, "grad_norm": 0.171875, "learning_rate": 2.1055245557623087e-06, "loss": 1.0449, "num_tokens": 28783400689.0, "step": 10690 }, { "epoch": 1.905704099821747, "grad_norm": 0.1708984375, "learning_rate": 2.1051282489225374e-06, "loss": 1.0177, "num_tokens": 28789685800.0, "step": 10691 }, { "epoch": 1.9058823529411764, "grad_norm": 0.17578125, "learning_rate": 2.104732683294311e-06, "loss": 1.0205, "num_tokens": 28795961031.0, "step": 10692 }, { "epoch": 1.906060606060606, "grad_norm": 0.17578125, "learning_rate": 2.1043378589105907e-06, "loss": 0.9943, "num_tokens": 28802218936.0, "step": 10693 }, { "epoch": 1.9062388591800357, "grad_norm": 0.1796875, "learning_rate": 2.103943775804278e-06, "loss": 1.0399, "num_tokens": 28808476154.0, "step": 10694 }, { "epoch": 1.906417112299465, "grad_norm": 0.171875, "learning_rate": 2.1035504340082137e-06, "loss": 1.0078, "num_tokens": 28814711132.0, "step": 10695 }, { "epoch": 1.906595365418895, "grad_norm": 0.1689453125, "learning_rate": 2.10315783355517e-06, "loss": 1.0189, "num_tokens": 28820994884.0, "step": 10696 }, { "epoch": 1.9067736185383244, "grad_norm": 0.1708984375, "learning_rate": 2.1027659744778676e-06, "loss": 1.0286, "num_tokens": 28827272518.0, "step": 10697 }, { "epoch": 1.906951871657754, "grad_norm": 0.1767578125, "learning_rate": 2.1023748568089586e-06, "loss": 1.0042, "num_tokens": 28833556492.0, "step": 10698 }, { "epoch": 1.9071301247771837, "grad_norm": 0.17578125, "learning_rate": 2.101984480581033e-06, "loss": 1.0465, "num_tokens": 28839781892.0, "step": 10699 }, { "epoch": 1.9073083778966131, "grad_norm": 0.169921875, "learning_rate": 2.1015948458266234e-06, "loss": 1.0009, "num_tokens": 28846027542.0, "step": 10700 }, { "epoch": 1.9074866310160428, "grad_norm": 0.17578125, "learning_rate": 2.101205952578196e-06, "loss": 1.0554, "num_tokens": 28852310419.0, "step": 10701 }, { "epoch": 1.9076648841354724, "grad_norm": 0.1689453125, "learning_rate": 2.1008178008681597e-06, "loss": 1.0097, "num_tokens": 28858584130.0, "step": 10702 }, { "epoch": 1.9078431372549018, "grad_norm": 0.1728515625, "learning_rate": 2.100430390728858e-06, "loss": 1.0253, "num_tokens": 28864818200.0, "step": 10703 }, { "epoch": 1.9080213903743317, "grad_norm": 0.1748046875, "learning_rate": 2.1000437221925744e-06, "loss": 1.0316, "num_tokens": 28871097093.0, "step": 10704 }, { "epoch": 1.9081996434937611, "grad_norm": 0.169921875, "learning_rate": 2.099657795291529e-06, "loss": 1.0263, "num_tokens": 28877365145.0, "step": 10705 }, { "epoch": 1.9083778966131906, "grad_norm": 0.1748046875, "learning_rate": 2.099272610057882e-06, "loss": 1.0201, "num_tokens": 28883650761.0, "step": 10706 }, { "epoch": 1.9085561497326204, "grad_norm": 0.1708984375, "learning_rate": 2.0988881665237298e-06, "loss": 1.0312, "num_tokens": 28889879964.0, "step": 10707 }, { "epoch": 1.9087344028520499, "grad_norm": 0.17578125, "learning_rate": 2.09850446472111e-06, "loss": 1.011, "num_tokens": 28896114062.0, "step": 10708 }, { "epoch": 1.9089126559714795, "grad_norm": 0.1689453125, "learning_rate": 2.0981215046819957e-06, "loss": 1.0472, "num_tokens": 28902353507.0, "step": 10709 }, { "epoch": 1.9090909090909092, "grad_norm": 0.173828125, "learning_rate": 2.0977392864382996e-06, "loss": 0.9997, "num_tokens": 28908605513.0, "step": 10710 }, { "epoch": 1.9092691622103386, "grad_norm": 0.17578125, "learning_rate": 2.097357810021871e-06, "loss": 1.0508, "num_tokens": 28914884660.0, "step": 10711 }, { "epoch": 1.9094474153297682, "grad_norm": 0.1708984375, "learning_rate": 2.0969770754644984e-06, "loss": 1.0165, "num_tokens": 28921136352.0, "step": 10712 }, { "epoch": 1.909625668449198, "grad_norm": 0.169921875, "learning_rate": 2.0965970827979086e-06, "loss": 0.9843, "num_tokens": 28927422072.0, "step": 10713 }, { "epoch": 1.9098039215686273, "grad_norm": 0.1708984375, "learning_rate": 2.0962178320537678e-06, "loss": 1.0346, "num_tokens": 28933690887.0, "step": 10714 }, { "epoch": 1.9099821746880572, "grad_norm": 0.1748046875, "learning_rate": 2.0958393232636775e-06, "loss": 1.0041, "num_tokens": 28939975940.0, "step": 10715 }, { "epoch": 1.9101604278074866, "grad_norm": 0.1669921875, "learning_rate": 2.0954615564591795e-06, "loss": 1.0305, "num_tokens": 28946258867.0, "step": 10716 }, { "epoch": 1.910338680926916, "grad_norm": 0.1669921875, "learning_rate": 2.0950845316717534e-06, "loss": 0.9807, "num_tokens": 28952521438.0, "step": 10717 }, { "epoch": 1.910516934046346, "grad_norm": 0.1796875, "learning_rate": 2.094708248932817e-06, "loss": 1.0406, "num_tokens": 28958805535.0, "step": 10718 }, { "epoch": 1.9106951871657754, "grad_norm": 0.17578125, "learning_rate": 2.094332708273725e-06, "loss": 1.0314, "num_tokens": 28965090935.0, "step": 10719 }, { "epoch": 1.910873440285205, "grad_norm": 0.171875, "learning_rate": 2.0939579097257716e-06, "loss": 1.0294, "num_tokens": 28971373406.0, "step": 10720 }, { "epoch": 1.9110516934046347, "grad_norm": 0.173828125, "learning_rate": 2.093583853320189e-06, "loss": 1.0107, "num_tokens": 28977658076.0, "step": 10721 }, { "epoch": 1.911229946524064, "grad_norm": 0.169921875, "learning_rate": 2.0932105390881473e-06, "loss": 1.032, "num_tokens": 28983942837.0, "step": 10722 }, { "epoch": 1.9114081996434937, "grad_norm": 0.1669921875, "learning_rate": 2.092837967060756e-06, "loss": 1.0306, "num_tokens": 28990210079.0, "step": 10723 }, { "epoch": 1.9115864527629234, "grad_norm": 0.177734375, "learning_rate": 2.0924661372690606e-06, "loss": 1.0259, "num_tokens": 28996464634.0, "step": 10724 }, { "epoch": 1.9117647058823528, "grad_norm": 0.171875, "learning_rate": 2.092095049744045e-06, "loss": 1.0152, "num_tokens": 29002717898.0, "step": 10725 }, { "epoch": 1.9119429590017827, "grad_norm": 0.173828125, "learning_rate": 2.091724704516634e-06, "loss": 1.0487, "num_tokens": 29008988785.0, "step": 10726 }, { "epoch": 1.912121212121212, "grad_norm": 0.17578125, "learning_rate": 2.0913551016176866e-06, "loss": 1.0272, "num_tokens": 29015257019.0, "step": 10727 }, { "epoch": 1.9122994652406418, "grad_norm": 0.1689453125, "learning_rate": 2.0909862410780015e-06, "loss": 1.0239, "num_tokens": 29021524374.0, "step": 10728 }, { "epoch": 1.9124777183600714, "grad_norm": 0.166015625, "learning_rate": 2.0906181229283183e-06, "loss": 1.0104, "num_tokens": 29027780502.0, "step": 10729 }, { "epoch": 1.9126559714795008, "grad_norm": 0.173828125, "learning_rate": 2.0902507471993114e-06, "loss": 0.9836, "num_tokens": 29034064689.0, "step": 10730 }, { "epoch": 1.9128342245989305, "grad_norm": 0.171875, "learning_rate": 2.089884113921593e-06, "loss": 1.0062, "num_tokens": 29040335351.0, "step": 10731 }, { "epoch": 1.9130124777183601, "grad_norm": 0.1748046875, "learning_rate": 2.0895182231257156e-06, "loss": 0.9996, "num_tokens": 29046618161.0, "step": 10732 }, { "epoch": 1.9131907308377896, "grad_norm": 0.1689453125, "learning_rate": 2.0891530748421706e-06, "loss": 1.0225, "num_tokens": 29052882356.0, "step": 10733 }, { "epoch": 1.9133689839572192, "grad_norm": 0.1796875, "learning_rate": 2.088788669101384e-06, "loss": 1.0447, "num_tokens": 29059166017.0, "step": 10734 }, { "epoch": 1.9135472370766489, "grad_norm": 0.173828125, "learning_rate": 2.0884250059337214e-06, "loss": 1.0103, "num_tokens": 29065421245.0, "step": 10735 }, { "epoch": 1.9137254901960783, "grad_norm": 0.1728515625, "learning_rate": 2.0880620853694876e-06, "loss": 1.0367, "num_tokens": 29071704607.0, "step": 10736 }, { "epoch": 1.9139037433155082, "grad_norm": 0.1728515625, "learning_rate": 2.0876999074389254e-06, "loss": 1.046, "num_tokens": 29077988797.0, "step": 10737 }, { "epoch": 1.9140819964349376, "grad_norm": 0.171875, "learning_rate": 2.0873384721722146e-06, "loss": 1.054, "num_tokens": 29084260992.0, "step": 10738 }, { "epoch": 1.9142602495543672, "grad_norm": 0.169921875, "learning_rate": 2.086977779599474e-06, "loss": 1.0442, "num_tokens": 29090544682.0, "step": 10739 }, { "epoch": 1.914438502673797, "grad_norm": 0.169921875, "learning_rate": 2.08661782975076e-06, "loss": 1.0136, "num_tokens": 29096829043.0, "step": 10740 }, { "epoch": 1.9146167557932263, "grad_norm": 0.1669921875, "learning_rate": 2.086258622656068e-06, "loss": 1.0372, "num_tokens": 29103113262.0, "step": 10741 }, { "epoch": 1.914795008912656, "grad_norm": 0.1708984375, "learning_rate": 2.085900158345329e-06, "loss": 1.0189, "num_tokens": 29109368592.0, "step": 10742 }, { "epoch": 1.9149732620320856, "grad_norm": 0.1708984375, "learning_rate": 2.0855424368484153e-06, "loss": 1.0395, "num_tokens": 29115652076.0, "step": 10743 }, { "epoch": 1.915151515151515, "grad_norm": 0.16796875, "learning_rate": 2.0851854581951356e-06, "loss": 1.0302, "num_tokens": 29121934573.0, "step": 10744 }, { "epoch": 1.9153297682709447, "grad_norm": 0.169921875, "learning_rate": 2.084829222415237e-06, "loss": 1.0422, "num_tokens": 29128217403.0, "step": 10745 }, { "epoch": 1.9155080213903743, "grad_norm": 0.1708984375, "learning_rate": 2.0844737295384042e-06, "loss": 1.0389, "num_tokens": 29134501664.0, "step": 10746 }, { "epoch": 1.9156862745098038, "grad_norm": 0.1728515625, "learning_rate": 2.084118979594262e-06, "loss": 1.0435, "num_tokens": 29140786419.0, "step": 10747 }, { "epoch": 1.9158645276292336, "grad_norm": 0.166015625, "learning_rate": 2.0837649726123704e-06, "loss": 1.0079, "num_tokens": 29146976591.0, "step": 10748 }, { "epoch": 1.916042780748663, "grad_norm": 0.1748046875, "learning_rate": 2.0834117086222287e-06, "loss": 1.0199, "num_tokens": 29153258098.0, "step": 10749 }, { "epoch": 1.9162210338680927, "grad_norm": 0.169921875, "learning_rate": 2.083059187653276e-06, "loss": 1.0219, "num_tokens": 29159536226.0, "step": 10750 }, { "epoch": 1.9163992869875224, "grad_norm": 0.1689453125, "learning_rate": 2.0827074097348863e-06, "loss": 1.0248, "num_tokens": 29165796289.0, "step": 10751 }, { "epoch": 1.9165775401069518, "grad_norm": 0.1728515625, "learning_rate": 2.082356374896375e-06, "loss": 1.0365, "num_tokens": 29172051422.0, "step": 10752 }, { "epoch": 1.9167557932263815, "grad_norm": 0.1728515625, "learning_rate": 2.082006083166992e-06, "loss": 1.004, "num_tokens": 29178335597.0, "step": 10753 }, { "epoch": 1.916934046345811, "grad_norm": 0.169921875, "learning_rate": 2.0816565345759278e-06, "loss": 1.0155, "num_tokens": 29184616097.0, "step": 10754 }, { "epoch": 1.9171122994652405, "grad_norm": 0.1708984375, "learning_rate": 2.0813077291523116e-06, "loss": 1.0021, "num_tokens": 29190901492.0, "step": 10755 }, { "epoch": 1.9172905525846702, "grad_norm": 0.1708984375, "learning_rate": 2.0809596669252074e-06, "loss": 1.0192, "num_tokens": 29197186265.0, "step": 10756 }, { "epoch": 1.9174688057040998, "grad_norm": 0.1767578125, "learning_rate": 2.0806123479236213e-06, "loss": 1.0268, "num_tokens": 29203422281.0, "step": 10757 }, { "epoch": 1.9176470588235293, "grad_norm": 0.1728515625, "learning_rate": 2.0802657721764943e-06, "loss": 1.0366, "num_tokens": 29209677813.0, "step": 10758 }, { "epoch": 1.9178253119429591, "grad_norm": 0.171875, "learning_rate": 2.079919939712707e-06, "loss": 1.0212, "num_tokens": 29215959888.0, "step": 10759 }, { "epoch": 1.9180035650623886, "grad_norm": 0.1708984375, "learning_rate": 2.0795748505610776e-06, "loss": 0.9984, "num_tokens": 29222234135.0, "step": 10760 }, { "epoch": 1.9181818181818182, "grad_norm": 0.1767578125, "learning_rate": 2.0792305047503624e-06, "loss": 1.0187, "num_tokens": 29228509552.0, "step": 10761 }, { "epoch": 1.9183600713012479, "grad_norm": 0.1787109375, "learning_rate": 2.078886902309256e-06, "loss": 1.0233, "num_tokens": 29234769810.0, "step": 10762 }, { "epoch": 1.9185383244206773, "grad_norm": 0.1728515625, "learning_rate": 2.07854404326639e-06, "loss": 1.0277, "num_tokens": 29241052422.0, "step": 10763 }, { "epoch": 1.918716577540107, "grad_norm": 0.1669921875, "learning_rate": 2.0782019276503364e-06, "loss": 1.0022, "num_tokens": 29247336009.0, "step": 10764 }, { "epoch": 1.9188948306595366, "grad_norm": 0.169921875, "learning_rate": 2.077860555489603e-06, "loss": 1.0214, "num_tokens": 29253619187.0, "step": 10765 }, { "epoch": 1.919073083778966, "grad_norm": 0.1728515625, "learning_rate": 2.0775199268126363e-06, "loss": 0.989, "num_tokens": 29259903753.0, "step": 10766 }, { "epoch": 1.9192513368983959, "grad_norm": 0.171875, "learning_rate": 2.0771800416478207e-06, "loss": 1.0451, "num_tokens": 29266167851.0, "step": 10767 }, { "epoch": 1.9194295900178253, "grad_norm": 0.1728515625, "learning_rate": 2.076840900023479e-06, "loss": 1.0019, "num_tokens": 29272452812.0, "step": 10768 }, { "epoch": 1.9196078431372547, "grad_norm": 0.1708984375, "learning_rate": 2.076502501967873e-06, "loss": 1.0027, "num_tokens": 29278688629.0, "step": 10769 }, { "epoch": 1.9197860962566846, "grad_norm": 0.177734375, "learning_rate": 2.0761648475092004e-06, "loss": 1.0038, "num_tokens": 29284971859.0, "step": 10770 }, { "epoch": 1.919964349376114, "grad_norm": 0.1708984375, "learning_rate": 2.0758279366755984e-06, "loss": 1.0364, "num_tokens": 29291210305.0, "step": 10771 }, { "epoch": 1.9201426024955437, "grad_norm": 0.169921875, "learning_rate": 2.0754917694951415e-06, "loss": 1.0363, "num_tokens": 29297468512.0, "step": 10772 }, { "epoch": 1.9203208556149733, "grad_norm": 0.171875, "learning_rate": 2.075156345995843e-06, "loss": 1.0291, "num_tokens": 29303710172.0, "step": 10773 }, { "epoch": 1.9204991087344028, "grad_norm": 0.171875, "learning_rate": 2.074821666205654e-06, "loss": 1.0302, "num_tokens": 29309967017.0, "step": 10774 }, { "epoch": 1.9206773618538324, "grad_norm": 0.1767578125, "learning_rate": 2.0744877301524625e-06, "loss": 1.0347, "num_tokens": 29316228110.0, "step": 10775 }, { "epoch": 1.920855614973262, "grad_norm": 0.171875, "learning_rate": 2.074154537864096e-06, "loss": 1.0436, "num_tokens": 29322513451.0, "step": 10776 }, { "epoch": 1.9210338680926915, "grad_norm": 0.1748046875, "learning_rate": 2.0738220893683208e-06, "loss": 1.0104, "num_tokens": 29328796716.0, "step": 10777 }, { "epoch": 1.9212121212121214, "grad_norm": 0.171875, "learning_rate": 2.0734903846928365e-06, "loss": 1.0411, "num_tokens": 29335080042.0, "step": 10778 }, { "epoch": 1.9213903743315508, "grad_norm": 0.169921875, "learning_rate": 2.073159423865288e-06, "loss": 1.0326, "num_tokens": 29341364466.0, "step": 10779 }, { "epoch": 1.9215686274509802, "grad_norm": 0.166015625, "learning_rate": 2.0728292069132524e-06, "loss": 1.0311, "num_tokens": 29347631760.0, "step": 10780 }, { "epoch": 1.92174688057041, "grad_norm": 0.171875, "learning_rate": 2.072499733864246e-06, "loss": 1.0, "num_tokens": 29353915835.0, "step": 10781 }, { "epoch": 1.9219251336898395, "grad_norm": 0.17578125, "learning_rate": 2.072171004745725e-06, "loss": 1.0292, "num_tokens": 29360169586.0, "step": 10782 }, { "epoch": 1.9221033868092692, "grad_norm": 0.1728515625, "learning_rate": 2.0718430195850835e-06, "loss": 1.0158, "num_tokens": 29366452767.0, "step": 10783 }, { "epoch": 1.9222816399286988, "grad_norm": 0.1708984375, "learning_rate": 2.07151577840965e-06, "loss": 1.0527, "num_tokens": 29372715667.0, "step": 10784 }, { "epoch": 1.9224598930481283, "grad_norm": 0.173828125, "learning_rate": 2.071189281246696e-06, "loss": 1.0058, "num_tokens": 29379000582.0, "step": 10785 }, { "epoch": 1.922638146167558, "grad_norm": 0.16796875, "learning_rate": 2.0708635281234265e-06, "loss": 1.0417, "num_tokens": 29385267376.0, "step": 10786 }, { "epoch": 1.9228163992869876, "grad_norm": 0.166015625, "learning_rate": 2.0705385190669884e-06, "loss": 1.0084, "num_tokens": 29391545455.0, "step": 10787 }, { "epoch": 1.922994652406417, "grad_norm": 0.1708984375, "learning_rate": 2.070214254104464e-06, "loss": 0.9956, "num_tokens": 29397813715.0, "step": 10788 }, { "epoch": 1.9231729055258469, "grad_norm": 0.1796875, "learning_rate": 2.069890733262873e-06, "loss": 1.047, "num_tokens": 29404056268.0, "step": 10789 }, { "epoch": 1.9233511586452763, "grad_norm": 0.1689453125, "learning_rate": 2.0695679565691768e-06, "loss": 1.026, "num_tokens": 29410318660.0, "step": 10790 }, { "epoch": 1.923529411764706, "grad_norm": 0.17578125, "learning_rate": 2.0692459240502716e-06, "loss": 1.0233, "num_tokens": 29416602049.0, "step": 10791 }, { "epoch": 1.9237076648841356, "grad_norm": 0.173828125, "learning_rate": 2.0689246357329917e-06, "loss": 1.0269, "num_tokens": 29422852206.0, "step": 10792 }, { "epoch": 1.923885918003565, "grad_norm": 0.173828125, "learning_rate": 2.0686040916441106e-06, "loss": 1.0258, "num_tokens": 29429119152.0, "step": 10793 }, { "epoch": 1.9240641711229947, "grad_norm": 0.1708984375, "learning_rate": 2.0682842918103404e-06, "loss": 1.0173, "num_tokens": 29435341221.0, "step": 10794 }, { "epoch": 1.9242424242424243, "grad_norm": 0.1689453125, "learning_rate": 2.067965236258329e-06, "loss": 0.999, "num_tokens": 29441624632.0, "step": 10795 }, { "epoch": 1.9244206773618537, "grad_norm": 0.171875, "learning_rate": 2.0676469250146623e-06, "loss": 1.0351, "num_tokens": 29447908287.0, "step": 10796 }, { "epoch": 1.9245989304812834, "grad_norm": 0.169921875, "learning_rate": 2.067329358105867e-06, "loss": 1.0382, "num_tokens": 29454190738.0, "step": 10797 }, { "epoch": 1.924777183600713, "grad_norm": 0.173828125, "learning_rate": 2.0670125355584057e-06, "loss": 1.0379, "num_tokens": 29460407661.0, "step": 10798 }, { "epoch": 1.9249554367201425, "grad_norm": 0.169921875, "learning_rate": 2.0666964573986783e-06, "loss": 1.0309, "num_tokens": 29466673736.0, "step": 10799 }, { "epoch": 1.9251336898395723, "grad_norm": 0.16796875, "learning_rate": 2.066381123653024e-06, "loss": 1.0205, "num_tokens": 29472949862.0, "step": 10800 }, { "epoch": 1.9253119429590018, "grad_norm": 0.1650390625, "learning_rate": 2.0660665343477214e-06, "loss": 1.0182, "num_tokens": 29479202413.0, "step": 10801 }, { "epoch": 1.9254901960784314, "grad_norm": 0.1669921875, "learning_rate": 2.065752689508983e-06, "loss": 1.0318, "num_tokens": 29485447451.0, "step": 10802 }, { "epoch": 1.925668449197861, "grad_norm": 0.171875, "learning_rate": 2.065439589162963e-06, "loss": 1.0417, "num_tokens": 29491730061.0, "step": 10803 }, { "epoch": 1.9258467023172905, "grad_norm": 0.18359375, "learning_rate": 2.0651272333357515e-06, "loss": 1.062, "num_tokens": 29498014934.0, "step": 10804 }, { "epoch": 1.9260249554367201, "grad_norm": 0.169921875, "learning_rate": 2.0648156220533773e-06, "loss": 0.985, "num_tokens": 29504273875.0, "step": 10805 }, { "epoch": 1.9262032085561498, "grad_norm": 0.16796875, "learning_rate": 2.0645047553418065e-06, "loss": 1.0108, "num_tokens": 29510557055.0, "step": 10806 }, { "epoch": 1.9263814616755792, "grad_norm": 0.1669921875, "learning_rate": 2.0641946332269442e-06, "loss": 1.0117, "num_tokens": 29516813860.0, "step": 10807 }, { "epoch": 1.9265597147950089, "grad_norm": 0.1669921875, "learning_rate": 2.0638852557346323e-06, "loss": 1.0058, "num_tokens": 29523083680.0, "step": 10808 }, { "epoch": 1.9267379679144385, "grad_norm": 0.1689453125, "learning_rate": 2.0635766228906527e-06, "loss": 1.0011, "num_tokens": 29529368317.0, "step": 10809 }, { "epoch": 1.926916221033868, "grad_norm": 0.1650390625, "learning_rate": 2.0632687347207235e-06, "loss": 1.0116, "num_tokens": 29535640149.0, "step": 10810 }, { "epoch": 1.9270944741532978, "grad_norm": 0.1689453125, "learning_rate": 2.0629615912504995e-06, "loss": 1.0104, "num_tokens": 29541924922.0, "step": 10811 }, { "epoch": 1.9272727272727272, "grad_norm": 0.1708984375, "learning_rate": 2.0626551925055777e-06, "loss": 1.0192, "num_tokens": 29548176189.0, "step": 10812 }, { "epoch": 1.927450980392157, "grad_norm": 0.169921875, "learning_rate": 2.0623495385114877e-06, "loss": 1.0078, "num_tokens": 29554459792.0, "step": 10813 }, { "epoch": 1.9276292335115865, "grad_norm": 0.171875, "learning_rate": 2.0620446292937005e-06, "loss": 1.0213, "num_tokens": 29560741438.0, "step": 10814 }, { "epoch": 1.927807486631016, "grad_norm": 0.177734375, "learning_rate": 2.0617404648776247e-06, "loss": 1.0274, "num_tokens": 29567003895.0, "step": 10815 }, { "epoch": 1.9279857397504456, "grad_norm": 0.1669921875, "learning_rate": 2.0614370452886065e-06, "loss": 0.9918, "num_tokens": 29573243796.0, "step": 10816 }, { "epoch": 1.9281639928698753, "grad_norm": 0.173828125, "learning_rate": 2.0611343705519305e-06, "loss": 1.0245, "num_tokens": 29579512808.0, "step": 10817 }, { "epoch": 1.9283422459893047, "grad_norm": 0.16796875, "learning_rate": 2.0608324406928172e-06, "loss": 1.0258, "num_tokens": 29585795241.0, "step": 10818 }, { "epoch": 1.9285204991087344, "grad_norm": 0.16796875, "learning_rate": 2.060531255736427e-06, "loss": 1.0403, "num_tokens": 29592079694.0, "step": 10819 }, { "epoch": 1.928698752228164, "grad_norm": 0.1708984375, "learning_rate": 2.0602308157078572e-06, "loss": 1.0171, "num_tokens": 29598340995.0, "step": 10820 }, { "epoch": 1.9288770053475934, "grad_norm": 0.1669921875, "learning_rate": 2.0599311206321455e-06, "loss": 1.0397, "num_tokens": 29604621811.0, "step": 10821 }, { "epoch": 1.9290552584670233, "grad_norm": 0.169921875, "learning_rate": 2.0596321705342624e-06, "loss": 1.0218, "num_tokens": 29610875031.0, "step": 10822 }, { "epoch": 1.9292335115864527, "grad_norm": 0.169921875, "learning_rate": 2.0593339654391233e-06, "loss": 1.0267, "num_tokens": 29617159083.0, "step": 10823 }, { "epoch": 1.9294117647058824, "grad_norm": 0.169921875, "learning_rate": 2.0590365053715746e-06, "loss": 1.0069, "num_tokens": 29623430776.0, "step": 10824 }, { "epoch": 1.929590017825312, "grad_norm": 0.1728515625, "learning_rate": 2.058739790356404e-06, "loss": 1.0427, "num_tokens": 29629715278.0, "step": 10825 }, { "epoch": 1.9297682709447415, "grad_norm": 0.171875, "learning_rate": 2.058443820418339e-06, "loss": 0.9995, "num_tokens": 29635978973.0, "step": 10826 }, { "epoch": 1.929946524064171, "grad_norm": 0.171875, "learning_rate": 2.0581485955820417e-06, "loss": 1.047, "num_tokens": 29642261713.0, "step": 10827 }, { "epoch": 1.9301247771836008, "grad_norm": 0.1708984375, "learning_rate": 2.0578541158721114e-06, "loss": 1.0533, "num_tokens": 29648546449.0, "step": 10828 }, { "epoch": 1.9303030303030302, "grad_norm": 0.1689453125, "learning_rate": 2.05756038131309e-06, "loss": 1.0107, "num_tokens": 29654829812.0, "step": 10829 }, { "epoch": 1.93048128342246, "grad_norm": 0.169921875, "learning_rate": 2.0572673919294532e-06, "loss": 1.049, "num_tokens": 29661112415.0, "step": 10830 }, { "epoch": 1.9306595365418895, "grad_norm": 0.171875, "learning_rate": 2.0569751477456157e-06, "loss": 1.0248, "num_tokens": 29667395358.0, "step": 10831 }, { "epoch": 1.930837789661319, "grad_norm": 0.173828125, "learning_rate": 2.0566836487859294e-06, "loss": 1.0031, "num_tokens": 29673678736.0, "step": 10832 }, { "epoch": 1.9310160427807488, "grad_norm": 0.1689453125, "learning_rate": 2.056392895074688e-06, "loss": 1.0402, "num_tokens": 29679962679.0, "step": 10833 }, { "epoch": 1.9311942959001782, "grad_norm": 0.169921875, "learning_rate": 2.0561028866361164e-06, "loss": 1.0323, "num_tokens": 29686202539.0, "step": 10834 }, { "epoch": 1.9313725490196079, "grad_norm": 0.173828125, "learning_rate": 2.0558136234943837e-06, "loss": 1.0571, "num_tokens": 29692485204.0, "step": 10835 }, { "epoch": 1.9315508021390375, "grad_norm": 0.1728515625, "learning_rate": 2.0555251056735923e-06, "loss": 1.0058, "num_tokens": 29698729104.0, "step": 10836 }, { "epoch": 1.931729055258467, "grad_norm": 0.169921875, "learning_rate": 2.0552373331977865e-06, "loss": 1.0109, "num_tokens": 29705011782.0, "step": 10837 }, { "epoch": 1.9319073083778966, "grad_norm": 0.1689453125, "learning_rate": 2.0549503060909453e-06, "loss": 1.0132, "num_tokens": 29711279613.0, "step": 10838 }, { "epoch": 1.9320855614973262, "grad_norm": 0.169921875, "learning_rate": 2.0546640243769863e-06, "loss": 1.0395, "num_tokens": 29717565280.0, "step": 10839 }, { "epoch": 1.9322638146167557, "grad_norm": 0.173828125, "learning_rate": 2.0543784880797656e-06, "loss": 1.0294, "num_tokens": 29723816747.0, "step": 10840 }, { "epoch": 1.9324420677361855, "grad_norm": 0.16796875, "learning_rate": 2.0540936972230787e-06, "loss": 0.9857, "num_tokens": 29730083290.0, "step": 10841 }, { "epoch": 1.932620320855615, "grad_norm": 0.1728515625, "learning_rate": 2.053809651830655e-06, "loss": 1.0352, "num_tokens": 29736366046.0, "step": 10842 }, { "epoch": 1.9327985739750444, "grad_norm": 0.171875, "learning_rate": 2.053526351926165e-06, "loss": 1.0288, "num_tokens": 29742649912.0, "step": 10843 }, { "epoch": 1.9329768270944743, "grad_norm": 0.1689453125, "learning_rate": 2.053243797533216e-06, "loss": 0.9997, "num_tokens": 29748931435.0, "step": 10844 }, { "epoch": 1.9331550802139037, "grad_norm": 0.1728515625, "learning_rate": 2.052961988675355e-06, "loss": 1.0334, "num_tokens": 29755212706.0, "step": 10845 }, { "epoch": 1.9333333333333333, "grad_norm": 0.1748046875, "learning_rate": 2.0526809253760615e-06, "loss": 1.0373, "num_tokens": 29761496285.0, "step": 10846 }, { "epoch": 1.933511586452763, "grad_norm": 0.1669921875, "learning_rate": 2.05240060765876e-06, "loss": 1.0027, "num_tokens": 29767762676.0, "step": 10847 }, { "epoch": 1.9336898395721924, "grad_norm": 0.1689453125, "learning_rate": 2.0521210355468076e-06, "loss": 0.9892, "num_tokens": 29774047873.0, "step": 10848 }, { "epoch": 1.933868092691622, "grad_norm": 0.1728515625, "learning_rate": 2.051842209063502e-06, "loss": 1.0597, "num_tokens": 29780284528.0, "step": 10849 }, { "epoch": 1.9340463458110517, "grad_norm": 0.171875, "learning_rate": 2.0515641282320776e-06, "loss": 1.0171, "num_tokens": 29786550734.0, "step": 10850 }, { "epoch": 1.9342245989304812, "grad_norm": 0.169921875, "learning_rate": 2.051286793075706e-06, "loss": 0.9981, "num_tokens": 29792833636.0, "step": 10851 }, { "epoch": 1.934402852049911, "grad_norm": 0.1748046875, "learning_rate": 2.0510102036174995e-06, "loss": 1.0226, "num_tokens": 29799106427.0, "step": 10852 }, { "epoch": 1.9345811051693405, "grad_norm": 0.169921875, "learning_rate": 2.0507343598805046e-06, "loss": 1.0265, "num_tokens": 29805372466.0, "step": 10853 }, { "epoch": 1.93475935828877, "grad_norm": 0.1748046875, "learning_rate": 2.0504592618877088e-06, "loss": 0.9995, "num_tokens": 29811637811.0, "step": 10854 }, { "epoch": 1.9349376114081998, "grad_norm": 0.17578125, "learning_rate": 2.050184909662035e-06, "loss": 1.032, "num_tokens": 29817921412.0, "step": 10855 }, { "epoch": 1.9351158645276292, "grad_norm": 0.1787109375, "learning_rate": 2.0499113032263455e-06, "loss": 1.0115, "num_tokens": 29824183758.0, "step": 10856 }, { "epoch": 1.9352941176470588, "grad_norm": 0.169921875, "learning_rate": 2.04963844260344e-06, "loss": 1.0432, "num_tokens": 29830461358.0, "step": 10857 }, { "epoch": 1.9354723707664885, "grad_norm": 0.166015625, "learning_rate": 2.049366327816055e-06, "loss": 1.0072, "num_tokens": 29836726549.0, "step": 10858 }, { "epoch": 1.935650623885918, "grad_norm": 0.169921875, "learning_rate": 2.049094958886868e-06, "loss": 1.0015, "num_tokens": 29842991477.0, "step": 10859 }, { "epoch": 1.9358288770053476, "grad_norm": 0.1708984375, "learning_rate": 2.0488243358384907e-06, "loss": 1.0295, "num_tokens": 29849233670.0, "step": 10860 }, { "epoch": 1.9360071301247772, "grad_norm": 0.1708984375, "learning_rate": 2.0485544586934743e-06, "loss": 1.0139, "num_tokens": 29855516135.0, "step": 10861 }, { "epoch": 1.9361853832442066, "grad_norm": 0.1708984375, "learning_rate": 2.0482853274743086e-06, "loss": 1.0063, "num_tokens": 29861801602.0, "step": 10862 }, { "epoch": 1.9363636363636365, "grad_norm": 0.1708984375, "learning_rate": 2.048016942203419e-06, "loss": 1.0221, "num_tokens": 29868084449.0, "step": 10863 }, { "epoch": 1.936541889483066, "grad_norm": 0.169921875, "learning_rate": 2.047749302903171e-06, "loss": 1.0341, "num_tokens": 29874367875.0, "step": 10864 }, { "epoch": 1.9367201426024956, "grad_norm": 0.169921875, "learning_rate": 2.0474824095958664e-06, "loss": 1.026, "num_tokens": 29880653808.0, "step": 10865 }, { "epoch": 1.9368983957219252, "grad_norm": 0.1689453125, "learning_rate": 2.0472162623037473e-06, "loss": 1.0321, "num_tokens": 29886912693.0, "step": 10866 }, { "epoch": 1.9370766488413547, "grad_norm": 0.1748046875, "learning_rate": 2.046950861048989e-06, "loss": 1.0103, "num_tokens": 29893196781.0, "step": 10867 }, { "epoch": 1.9372549019607843, "grad_norm": 0.1748046875, "learning_rate": 2.0466862058537093e-06, "loss": 1.0231, "num_tokens": 29899438725.0, "step": 10868 }, { "epoch": 1.937433155080214, "grad_norm": 0.1728515625, "learning_rate": 2.046422296739962e-06, "loss": 1.0469, "num_tokens": 29905671537.0, "step": 10869 }, { "epoch": 1.9376114081996434, "grad_norm": 0.1728515625, "learning_rate": 2.0461591337297387e-06, "loss": 1.0172, "num_tokens": 29911942841.0, "step": 10870 }, { "epoch": 1.937789661319073, "grad_norm": 0.1728515625, "learning_rate": 2.0458967168449663e-06, "loss": 1.003, "num_tokens": 29918227827.0, "step": 10871 }, { "epoch": 1.9379679144385027, "grad_norm": 0.1650390625, "learning_rate": 2.0456350461075162e-06, "loss": 0.9999, "num_tokens": 29924502953.0, "step": 10872 }, { "epoch": 1.9381461675579321, "grad_norm": 0.177734375, "learning_rate": 2.0453741215391907e-06, "loss": 1.0253, "num_tokens": 29930787109.0, "step": 10873 }, { "epoch": 1.938324420677362, "grad_norm": 0.16796875, "learning_rate": 2.0451139431617333e-06, "loss": 1.029, "num_tokens": 29937056182.0, "step": 10874 }, { "epoch": 1.9385026737967914, "grad_norm": 0.1708984375, "learning_rate": 2.0448545109968253e-06, "loss": 1.0048, "num_tokens": 29943340267.0, "step": 10875 }, { "epoch": 1.938680926916221, "grad_norm": 0.1728515625, "learning_rate": 2.044595825066084e-06, "loss": 1.0317, "num_tokens": 29949624270.0, "step": 10876 }, { "epoch": 1.9388591800356507, "grad_norm": 0.169921875, "learning_rate": 2.044337885391067e-06, "loss": 1.0044, "num_tokens": 29955907157.0, "step": 10877 }, { "epoch": 1.9390374331550801, "grad_norm": 0.171875, "learning_rate": 2.044080691993268e-06, "loss": 1.0154, "num_tokens": 29962193019.0, "step": 10878 }, { "epoch": 1.9392156862745098, "grad_norm": 0.166015625, "learning_rate": 2.0438242448941188e-06, "loss": 0.9937, "num_tokens": 29968439811.0, "step": 10879 }, { "epoch": 1.9393939393939394, "grad_norm": 0.171875, "learning_rate": 2.0435685441149907e-06, "loss": 1.0267, "num_tokens": 29974713665.0, "step": 10880 }, { "epoch": 1.9395721925133689, "grad_norm": 0.1650390625, "learning_rate": 2.0433135896771876e-06, "loss": 1.001, "num_tokens": 29980982822.0, "step": 10881 }, { "epoch": 1.9397504456327985, "grad_norm": 0.1767578125, "learning_rate": 2.043059381601959e-06, "loss": 1.0025, "num_tokens": 29987265981.0, "step": 10882 }, { "epoch": 1.9399286987522282, "grad_norm": 0.16796875, "learning_rate": 2.0428059199104856e-06, "loss": 1.0323, "num_tokens": 29993523421.0, "step": 10883 }, { "epoch": 1.9401069518716576, "grad_norm": 0.17578125, "learning_rate": 2.0425532046238895e-06, "loss": 1.0544, "num_tokens": 29999805758.0, "step": 10884 }, { "epoch": 1.9402852049910875, "grad_norm": 0.166015625, "learning_rate": 2.042301235763228e-06, "loss": 1.009, "num_tokens": 30006091152.0, "step": 10885 }, { "epoch": 1.940463458110517, "grad_norm": 0.173828125, "learning_rate": 2.0420500133495003e-06, "loss": 0.9899, "num_tokens": 30012375222.0, "step": 10886 }, { "epoch": 1.9406417112299466, "grad_norm": 0.1669921875, "learning_rate": 2.04179953740364e-06, "loss": 1.0395, "num_tokens": 30018655871.0, "step": 10887 }, { "epoch": 1.9408199643493762, "grad_norm": 0.1728515625, "learning_rate": 2.0415498079465176e-06, "loss": 0.9999, "num_tokens": 30024931686.0, "step": 10888 }, { "epoch": 1.9409982174688056, "grad_norm": 0.1689453125, "learning_rate": 2.041300824998945e-06, "loss": 1.0251, "num_tokens": 30031216477.0, "step": 10889 }, { "epoch": 1.9411764705882353, "grad_norm": 0.166015625, "learning_rate": 2.0410525885816696e-06, "loss": 1.0146, "num_tokens": 30037483351.0, "step": 10890 }, { "epoch": 1.941354723707665, "grad_norm": 0.169921875, "learning_rate": 2.0408050987153757e-06, "loss": 1.0078, "num_tokens": 30043760338.0, "step": 10891 }, { "epoch": 1.9415329768270944, "grad_norm": 0.169921875, "learning_rate": 2.0405583554206876e-06, "loss": 1.0248, "num_tokens": 30050023872.0, "step": 10892 }, { "epoch": 1.941711229946524, "grad_norm": 0.1787109375, "learning_rate": 2.0403123587181674e-06, "loss": 1.0095, "num_tokens": 30056308990.0, "step": 10893 }, { "epoch": 1.9418894830659537, "grad_norm": 0.1669921875, "learning_rate": 2.0400671086283132e-06, "loss": 0.9898, "num_tokens": 30062575775.0, "step": 10894 }, { "epoch": 1.942067736185383, "grad_norm": 0.1689453125, "learning_rate": 2.0398226051715607e-06, "loss": 0.9651, "num_tokens": 30068841911.0, "step": 10895 }, { "epoch": 1.942245989304813, "grad_norm": 0.173828125, "learning_rate": 2.0395788483682855e-06, "loss": 0.9808, "num_tokens": 30075126439.0, "step": 10896 }, { "epoch": 1.9424242424242424, "grad_norm": 0.169921875, "learning_rate": 2.0393358382388013e-06, "loss": 1.0068, "num_tokens": 30081410697.0, "step": 10897 }, { "epoch": 1.942602495543672, "grad_norm": 0.1728515625, "learning_rate": 2.039093574803356e-06, "loss": 1.0036, "num_tokens": 30087696109.0, "step": 10898 }, { "epoch": 1.9427807486631017, "grad_norm": 0.17578125, "learning_rate": 2.0388520580821377e-06, "loss": 1.0367, "num_tokens": 30093939647.0, "step": 10899 }, { "epoch": 1.9429590017825311, "grad_norm": 0.1728515625, "learning_rate": 2.038611288095273e-06, "loss": 1.0539, "num_tokens": 30100186061.0, "step": 10900 }, { "epoch": 1.9431372549019608, "grad_norm": 0.169921875, "learning_rate": 2.0383712648628244e-06, "loss": 0.9708, "num_tokens": 30106470691.0, "step": 10901 }, { "epoch": 1.9433155080213904, "grad_norm": 0.1708984375, "learning_rate": 2.038131988404794e-06, "loss": 1.0255, "num_tokens": 30112755230.0, "step": 10902 }, { "epoch": 1.9434937611408198, "grad_norm": 0.166015625, "learning_rate": 2.03789345874112e-06, "loss": 1.0335, "num_tokens": 30119039785.0, "step": 10903 }, { "epoch": 1.9436720142602497, "grad_norm": 0.16796875, "learning_rate": 2.03765567589168e-06, "loss": 1.0133, "num_tokens": 30125325725.0, "step": 10904 }, { "epoch": 1.9438502673796791, "grad_norm": 0.17578125, "learning_rate": 2.037418639876287e-06, "loss": 1.0459, "num_tokens": 30131609796.0, "step": 10905 }, { "epoch": 1.9440285204991086, "grad_norm": 0.173828125, "learning_rate": 2.037182350714695e-06, "loss": 1.0292, "num_tokens": 30137871916.0, "step": 10906 }, { "epoch": 1.9442067736185384, "grad_norm": 0.1728515625, "learning_rate": 2.0369468084265932e-06, "loss": 1.0291, "num_tokens": 30144113153.0, "step": 10907 }, { "epoch": 1.9443850267379679, "grad_norm": 0.173828125, "learning_rate": 2.0367120130316085e-06, "loss": 1.0163, "num_tokens": 30150398682.0, "step": 10908 }, { "epoch": 1.9445632798573975, "grad_norm": 0.171875, "learning_rate": 2.0364779645493085e-06, "loss": 1.0344, "num_tokens": 30156681282.0, "step": 10909 }, { "epoch": 1.9447415329768272, "grad_norm": 0.171875, "learning_rate": 2.036244662999195e-06, "loss": 1.0169, "num_tokens": 30162966310.0, "step": 10910 }, { "epoch": 1.9449197860962566, "grad_norm": 0.169921875, "learning_rate": 2.0360121084007096e-06, "loss": 1.0285, "num_tokens": 30169251860.0, "step": 10911 }, { "epoch": 1.9450980392156862, "grad_norm": 0.1767578125, "learning_rate": 2.0357803007732303e-06, "loss": 0.9966, "num_tokens": 30175509881.0, "step": 10912 }, { "epoch": 1.945276292335116, "grad_norm": 0.1728515625, "learning_rate": 2.0355492401360747e-06, "loss": 1.0358, "num_tokens": 30181779897.0, "step": 10913 }, { "epoch": 1.9454545454545453, "grad_norm": 0.171875, "learning_rate": 2.035318926508496e-06, "loss": 1.0356, "num_tokens": 30188050007.0, "step": 10914 }, { "epoch": 1.9456327985739752, "grad_norm": 0.171875, "learning_rate": 2.0350893599096887e-06, "loss": 1.0409, "num_tokens": 30194333683.0, "step": 10915 }, { "epoch": 1.9458110516934046, "grad_norm": 0.169921875, "learning_rate": 2.0348605403587796e-06, "loss": 1.0074, "num_tokens": 30200597131.0, "step": 10916 }, { "epoch": 1.9459893048128343, "grad_norm": 0.169921875, "learning_rate": 2.0346324678748378e-06, "loss": 0.9868, "num_tokens": 30206855438.0, "step": 10917 }, { "epoch": 1.946167557932264, "grad_norm": 0.171875, "learning_rate": 2.034405142476869e-06, "loss": 1.0394, "num_tokens": 30213121967.0, "step": 10918 }, { "epoch": 1.9463458110516934, "grad_norm": 0.171875, "learning_rate": 2.0341785641838156e-06, "loss": 0.9868, "num_tokens": 30219397637.0, "step": 10919 }, { "epoch": 1.946524064171123, "grad_norm": 0.17578125, "learning_rate": 2.0339527330145587e-06, "loss": 1.0244, "num_tokens": 30225680462.0, "step": 10920 }, { "epoch": 1.9467023172905527, "grad_norm": 0.1689453125, "learning_rate": 2.0337276489879166e-06, "loss": 1.0178, "num_tokens": 30231910560.0, "step": 10921 }, { "epoch": 1.946880570409982, "grad_norm": 0.16796875, "learning_rate": 2.033503312122646e-06, "loss": 1.018, "num_tokens": 30238182414.0, "step": 10922 }, { "epoch": 1.9470588235294117, "grad_norm": 0.171875, "learning_rate": 2.033279722437441e-06, "loss": 1.0012, "num_tokens": 30244467306.0, "step": 10923 }, { "epoch": 1.9472370766488414, "grad_norm": 0.173828125, "learning_rate": 2.033056879950932e-06, "loss": 1.0377, "num_tokens": 30250745039.0, "step": 10924 }, { "epoch": 1.9474153297682708, "grad_norm": 0.1689453125, "learning_rate": 2.0328347846816894e-06, "loss": 1.0025, "num_tokens": 30257003698.0, "step": 10925 }, { "epoch": 1.9475935828877007, "grad_norm": 0.169921875, "learning_rate": 2.032613436648222e-06, "loss": 0.9981, "num_tokens": 30263286935.0, "step": 10926 }, { "epoch": 1.94777183600713, "grad_norm": 0.171875, "learning_rate": 2.0323928358689725e-06, "loss": 1.0215, "num_tokens": 30269570354.0, "step": 10927 }, { "epoch": 1.9479500891265598, "grad_norm": 0.1708984375, "learning_rate": 2.032172982362324e-06, "loss": 1.0175, "num_tokens": 30275854914.0, "step": 10928 }, { "epoch": 1.9481283422459894, "grad_norm": 0.1748046875, "learning_rate": 2.0319538761465983e-06, "loss": 1.0073, "num_tokens": 30282140252.0, "step": 10929 }, { "epoch": 1.9483065953654188, "grad_norm": 0.1689453125, "learning_rate": 2.031735517240052e-06, "loss": 0.9908, "num_tokens": 30288399836.0, "step": 10930 }, { "epoch": 1.9484848484848485, "grad_norm": 0.17578125, "learning_rate": 2.0315179056608826e-06, "loss": 1.0013, "num_tokens": 30294657902.0, "step": 10931 }, { "epoch": 1.9486631016042781, "grad_norm": 0.1708984375, "learning_rate": 2.031301041427222e-06, "loss": 1.0262, "num_tokens": 30300941035.0, "step": 10932 }, { "epoch": 1.9488413547237076, "grad_norm": 0.173828125, "learning_rate": 2.031084924557143e-06, "loss": 0.9895, "num_tokens": 30307191971.0, "step": 10933 }, { "epoch": 1.9490196078431372, "grad_norm": 0.1748046875, "learning_rate": 2.030869555068654e-06, "loss": 0.9821, "num_tokens": 30313475714.0, "step": 10934 }, { "epoch": 1.9491978609625669, "grad_norm": 0.1708984375, "learning_rate": 2.030654932979702e-06, "loss": 1.0618, "num_tokens": 30319760040.0, "step": 10935 }, { "epoch": 1.9493761140819963, "grad_norm": 0.173828125, "learning_rate": 2.030441058308171e-06, "loss": 1.0109, "num_tokens": 30326045797.0, "step": 10936 }, { "epoch": 1.9495543672014262, "grad_norm": 0.1650390625, "learning_rate": 2.0302279310718824e-06, "loss": 1.0117, "num_tokens": 30332330388.0, "step": 10937 }, { "epoch": 1.9497326203208556, "grad_norm": 0.1708984375, "learning_rate": 2.0300155512885988e-06, "loss": 1.0045, "num_tokens": 30338613685.0, "step": 10938 }, { "epoch": 1.9499108734402852, "grad_norm": 0.1767578125, "learning_rate": 2.0298039189760154e-06, "loss": 1.0183, "num_tokens": 30344849988.0, "step": 10939 }, { "epoch": 1.950089126559715, "grad_norm": 0.169921875, "learning_rate": 2.029593034151769e-06, "loss": 1.0179, "num_tokens": 30351131793.0, "step": 10940 }, { "epoch": 1.9502673796791443, "grad_norm": 0.1728515625, "learning_rate": 2.029382896833433e-06, "loss": 1.0341, "num_tokens": 30357416884.0, "step": 10941 }, { "epoch": 1.950445632798574, "grad_norm": 0.1748046875, "learning_rate": 2.029173507038516e-06, "loss": 1.0418, "num_tokens": 30363699518.0, "step": 10942 }, { "epoch": 1.9506238859180036, "grad_norm": 0.169921875, "learning_rate": 2.0289648647844685e-06, "loss": 1.0166, "num_tokens": 30369959491.0, "step": 10943 }, { "epoch": 1.950802139037433, "grad_norm": 0.1708984375, "learning_rate": 2.0287569700886762e-06, "loss": 1.0308, "num_tokens": 30376239375.0, "step": 10944 }, { "epoch": 1.9509803921568627, "grad_norm": 0.1708984375, "learning_rate": 2.0285498229684617e-06, "loss": 1.0147, "num_tokens": 30382502794.0, "step": 10945 }, { "epoch": 1.9511586452762923, "grad_norm": 0.1640625, "learning_rate": 2.028343423441089e-06, "loss": 1.0062, "num_tokens": 30388781427.0, "step": 10946 }, { "epoch": 1.9513368983957218, "grad_norm": 0.1728515625, "learning_rate": 2.0281377715237552e-06, "loss": 1.0223, "num_tokens": 30395027525.0, "step": 10947 }, { "epoch": 1.9515151515151516, "grad_norm": 0.1708984375, "learning_rate": 2.027932867233599e-06, "loss": 1.0091, "num_tokens": 30401308974.0, "step": 10948 }, { "epoch": 1.951693404634581, "grad_norm": 0.1767578125, "learning_rate": 2.0277287105876954e-06, "loss": 1.0198, "num_tokens": 30407594092.0, "step": 10949 }, { "epoch": 1.9518716577540107, "grad_norm": 0.1650390625, "learning_rate": 2.0275253016030543e-06, "loss": 0.9842, "num_tokens": 30413879218.0, "step": 10950 }, { "epoch": 1.9520499108734404, "grad_norm": 0.173828125, "learning_rate": 2.027322640296628e-06, "loss": 1.0159, "num_tokens": 30420141142.0, "step": 10951 }, { "epoch": 1.9522281639928698, "grad_norm": 0.1708984375, "learning_rate": 2.027120726685303e-06, "loss": 1.013, "num_tokens": 30426419361.0, "step": 10952 }, { "epoch": 1.9524064171122995, "grad_norm": 0.169921875, "learning_rate": 2.026919560785906e-06, "loss": 1.0574, "num_tokens": 30432685955.0, "step": 10953 }, { "epoch": 1.952584670231729, "grad_norm": 0.1708984375, "learning_rate": 2.0267191426151994e-06, "loss": 1.0083, "num_tokens": 30438965339.0, "step": 10954 }, { "epoch": 1.9527629233511585, "grad_norm": 0.1708984375, "learning_rate": 2.0265194721898844e-06, "loss": 1.0193, "num_tokens": 30445212194.0, "step": 10955 }, { "epoch": 1.9529411764705882, "grad_norm": 0.171875, "learning_rate": 2.0263205495266e-06, "loss": 1.0435, "num_tokens": 30451474704.0, "step": 10956 }, { "epoch": 1.9531194295900178, "grad_norm": 0.171875, "learning_rate": 2.026122374641922e-06, "loss": 1.0167, "num_tokens": 30457759232.0, "step": 10957 }, { "epoch": 1.9532976827094473, "grad_norm": 0.169921875, "learning_rate": 2.025924947552364e-06, "loss": 0.9789, "num_tokens": 30464020021.0, "step": 10958 }, { "epoch": 1.9534759358288771, "grad_norm": 0.177734375, "learning_rate": 2.025728268274378e-06, "loss": 1.0241, "num_tokens": 30470287769.0, "step": 10959 }, { "epoch": 1.9536541889483066, "grad_norm": 0.1650390625, "learning_rate": 2.0255323368243535e-06, "loss": 1.0009, "num_tokens": 30476571806.0, "step": 10960 }, { "epoch": 1.9538324420677362, "grad_norm": 0.16796875, "learning_rate": 2.025337153218618e-06, "loss": 1.0205, "num_tokens": 30482850545.0, "step": 10961 }, { "epoch": 1.9540106951871659, "grad_norm": 0.1728515625, "learning_rate": 2.0251427174734344e-06, "loss": 1.0067, "num_tokens": 30489115759.0, "step": 10962 }, { "epoch": 1.9541889483065953, "grad_norm": 0.16796875, "learning_rate": 2.0249490296050066e-06, "loss": 0.9782, "num_tokens": 30495341846.0, "step": 10963 }, { "epoch": 1.954367201426025, "grad_norm": 0.1650390625, "learning_rate": 2.024756089629474e-06, "loss": 1.0258, "num_tokens": 30501609096.0, "step": 10964 }, { "epoch": 1.9545454545454546, "grad_norm": 0.171875, "learning_rate": 2.0245638975629147e-06, "loss": 0.9836, "num_tokens": 30507878939.0, "step": 10965 }, { "epoch": 1.954723707664884, "grad_norm": 0.1689453125, "learning_rate": 2.0243724534213442e-06, "loss": 1.0299, "num_tokens": 30514162653.0, "step": 10966 }, { "epoch": 1.9549019607843139, "grad_norm": 0.1708984375, "learning_rate": 2.024181757220715e-06, "loss": 1.0246, "num_tokens": 30520448295.0, "step": 10967 }, { "epoch": 1.9550802139037433, "grad_norm": 0.171875, "learning_rate": 2.023991808976919e-06, "loss": 1.0226, "num_tokens": 30526689292.0, "step": 10968 }, { "epoch": 1.9552584670231727, "grad_norm": 0.171875, "learning_rate": 2.0238026087057827e-06, "loss": 1.0117, "num_tokens": 30532926558.0, "step": 10969 }, { "epoch": 1.9554367201426026, "grad_norm": 0.1669921875, "learning_rate": 2.023614156423075e-06, "loss": 1.0372, "num_tokens": 30539204008.0, "step": 10970 }, { "epoch": 1.955614973262032, "grad_norm": 0.173828125, "learning_rate": 2.0234264521444964e-06, "loss": 1.0233, "num_tokens": 30545485190.0, "step": 10971 }, { "epoch": 1.9557932263814617, "grad_norm": 0.1689453125, "learning_rate": 2.0232394958856904e-06, "loss": 1.0317, "num_tokens": 30551769319.0, "step": 10972 }, { "epoch": 1.9559714795008913, "grad_norm": 0.1689453125, "learning_rate": 2.023053287662236e-06, "loss": 1.0344, "num_tokens": 30558041688.0, "step": 10973 }, { "epoch": 1.9561497326203208, "grad_norm": 0.171875, "learning_rate": 2.0228678274896494e-06, "loss": 1.0254, "num_tokens": 30564316768.0, "step": 10974 }, { "epoch": 1.9563279857397504, "grad_norm": 0.171875, "learning_rate": 2.0226831153833853e-06, "loss": 0.99, "num_tokens": 30570571299.0, "step": 10975 }, { "epoch": 1.95650623885918, "grad_norm": 0.171875, "learning_rate": 2.0224991513588355e-06, "loss": 0.9946, "num_tokens": 30576830126.0, "step": 10976 }, { "epoch": 1.9566844919786095, "grad_norm": 0.16796875, "learning_rate": 2.02231593543133e-06, "loss": 1.0519, "num_tokens": 30583114239.0, "step": 10977 }, { "epoch": 1.9568627450980394, "grad_norm": 0.1689453125, "learning_rate": 2.0221334676161365e-06, "loss": 1.036, "num_tokens": 30589398633.0, "step": 10978 }, { "epoch": 1.9570409982174688, "grad_norm": 0.166015625, "learning_rate": 2.02195174792846e-06, "loss": 1.0273, "num_tokens": 30595655292.0, "step": 10979 }, { "epoch": 1.9572192513368984, "grad_norm": 0.1748046875, "learning_rate": 2.0217707763834435e-06, "loss": 0.9791, "num_tokens": 30601937639.0, "step": 10980 }, { "epoch": 1.957397504456328, "grad_norm": 0.1689453125, "learning_rate": 2.0215905529961657e-06, "loss": 1.003, "num_tokens": 30608221597.0, "step": 10981 }, { "epoch": 1.9575757575757575, "grad_norm": 0.16796875, "learning_rate": 2.0214110777816463e-06, "loss": 1.008, "num_tokens": 30614497864.0, "step": 10982 }, { "epoch": 1.9577540106951872, "grad_norm": 0.171875, "learning_rate": 2.021232350754841e-06, "loss": 1.0455, "num_tokens": 30620783133.0, "step": 10983 }, { "epoch": 1.9579322638146168, "grad_norm": 0.169921875, "learning_rate": 2.021054371930643e-06, "loss": 1.0465, "num_tokens": 30627040339.0, "step": 10984 }, { "epoch": 1.9581105169340463, "grad_norm": 0.17578125, "learning_rate": 2.020877141323882e-06, "loss": 1.0527, "num_tokens": 30633304258.0, "step": 10985 }, { "epoch": 1.958288770053476, "grad_norm": 0.1728515625, "learning_rate": 2.020700658949329e-06, "loss": 1.0115, "num_tokens": 30639574745.0, "step": 10986 }, { "epoch": 1.9584670231729056, "grad_norm": 0.173828125, "learning_rate": 2.0205249248216895e-06, "loss": 1.0465, "num_tokens": 30645856939.0, "step": 10987 }, { "epoch": 1.958645276292335, "grad_norm": 0.1728515625, "learning_rate": 2.0203499389556053e-06, "loss": 1.0035, "num_tokens": 30652136567.0, "step": 10988 }, { "epoch": 1.9588235294117649, "grad_norm": 0.1708984375, "learning_rate": 2.020175701365661e-06, "loss": 1.025, "num_tokens": 30658392087.0, "step": 10989 }, { "epoch": 1.9590017825311943, "grad_norm": 0.171875, "learning_rate": 2.0200022120663745e-06, "loss": 1.0172, "num_tokens": 30664630619.0, "step": 10990 }, { "epoch": 1.959180035650624, "grad_norm": 0.171875, "learning_rate": 2.0198294710722015e-06, "loss": 1.0314, "num_tokens": 30670909199.0, "step": 10991 }, { "epoch": 1.9593582887700536, "grad_norm": 0.169921875, "learning_rate": 2.0196574783975393e-06, "loss": 0.9939, "num_tokens": 30677190404.0, "step": 10992 }, { "epoch": 1.959536541889483, "grad_norm": 0.1748046875, "learning_rate": 2.019486234056718e-06, "loss": 1.0276, "num_tokens": 30683428137.0, "step": 10993 }, { "epoch": 1.9597147950089127, "grad_norm": 0.1708984375, "learning_rate": 2.019315738064009e-06, "loss": 1.0439, "num_tokens": 30689695479.0, "step": 10994 }, { "epoch": 1.9598930481283423, "grad_norm": 0.1689453125, "learning_rate": 2.0191459904336182e-06, "loss": 1.0172, "num_tokens": 30695973286.0, "step": 10995 }, { "epoch": 1.9600713012477717, "grad_norm": 0.171875, "learning_rate": 2.018976991179691e-06, "loss": 0.9832, "num_tokens": 30702255531.0, "step": 10996 }, { "epoch": 1.9602495543672014, "grad_norm": 0.177734375, "learning_rate": 2.0188087403163105e-06, "loss": 1.0231, "num_tokens": 30708540201.0, "step": 10997 }, { "epoch": 1.960427807486631, "grad_norm": 0.166015625, "learning_rate": 2.018641237857497e-06, "loss": 1.0395, "num_tokens": 30714791295.0, "step": 10998 }, { "epoch": 1.9606060606060605, "grad_norm": 0.166015625, "learning_rate": 2.0184744838172086e-06, "loss": 1.0143, "num_tokens": 30721075816.0, "step": 10999 }, { "epoch": 1.9607843137254903, "grad_norm": 0.1708984375, "learning_rate": 2.018308478209341e-06, "loss": 1.0278, "num_tokens": 30727335934.0, "step": 11000 }, { "epoch": 1.9609625668449198, "grad_norm": 0.1669921875, "learning_rate": 2.018143221047728e-06, "loss": 1.0101, "num_tokens": 30733618052.0, "step": 11001 }, { "epoch": 1.9611408199643494, "grad_norm": 0.17578125, "learning_rate": 2.01797871234614e-06, "loss": 1.0185, "num_tokens": 30739873541.0, "step": 11002 }, { "epoch": 1.961319073083779, "grad_norm": 0.1689453125, "learning_rate": 2.0178149521182843e-06, "loss": 0.9891, "num_tokens": 30746119736.0, "step": 11003 }, { "epoch": 1.9614973262032085, "grad_norm": 0.1669921875, "learning_rate": 2.0176519403778084e-06, "loss": 1.0161, "num_tokens": 30752403828.0, "step": 11004 }, { "epoch": 1.9616755793226381, "grad_norm": 0.1689453125, "learning_rate": 2.0174896771382966e-06, "loss": 0.9849, "num_tokens": 30758659006.0, "step": 11005 }, { "epoch": 1.9618538324420678, "grad_norm": 0.1689453125, "learning_rate": 2.017328162413269e-06, "loss": 1.0135, "num_tokens": 30764915997.0, "step": 11006 }, { "epoch": 1.9620320855614972, "grad_norm": 0.17578125, "learning_rate": 2.0171673962161863e-06, "loss": 1.0259, "num_tokens": 30771192219.0, "step": 11007 }, { "epoch": 1.9622103386809269, "grad_norm": 0.173828125, "learning_rate": 2.0170073785604434e-06, "loss": 1.0393, "num_tokens": 30777458321.0, "step": 11008 }, { "epoch": 1.9623885918003565, "grad_norm": 0.166015625, "learning_rate": 2.0168481094593754e-06, "loss": 1.0272, "num_tokens": 30783740837.0, "step": 11009 }, { "epoch": 1.962566844919786, "grad_norm": 0.16796875, "learning_rate": 2.0166895889262547e-06, "loss": 1.0176, "num_tokens": 30789990156.0, "step": 11010 }, { "epoch": 1.9627450980392158, "grad_norm": 0.1728515625, "learning_rate": 2.0165318169742906e-06, "loss": 0.9925, "num_tokens": 30796267880.0, "step": 11011 }, { "epoch": 1.9629233511586452, "grad_norm": 0.16796875, "learning_rate": 2.016374793616629e-06, "loss": 1.0201, "num_tokens": 30802523234.0, "step": 11012 }, { "epoch": 1.963101604278075, "grad_norm": 0.166015625, "learning_rate": 2.016218518866357e-06, "loss": 1.0109, "num_tokens": 30808791387.0, "step": 11013 }, { "epoch": 1.9632798573975045, "grad_norm": 0.1748046875, "learning_rate": 2.016062992736494e-06, "loss": 1.0315, "num_tokens": 30815016041.0, "step": 11014 }, { "epoch": 1.963458110516934, "grad_norm": 0.173828125, "learning_rate": 2.0159082152400037e-06, "loss": 1.0125, "num_tokens": 30821205155.0, "step": 11015 }, { "epoch": 1.9636363636363636, "grad_norm": 0.169921875, "learning_rate": 2.0157541863897816e-06, "loss": 1.0316, "num_tokens": 30827488031.0, "step": 11016 }, { "epoch": 1.9638146167557933, "grad_norm": 0.1708984375, "learning_rate": 2.0156009061986618e-06, "loss": 0.9923, "num_tokens": 30833761953.0, "step": 11017 }, { "epoch": 1.9639928698752227, "grad_norm": 0.171875, "learning_rate": 2.0154483746794197e-06, "loss": 1.0148, "num_tokens": 30840047796.0, "step": 11018 }, { "epoch": 1.9641711229946524, "grad_norm": 0.1748046875, "learning_rate": 2.0152965918447646e-06, "loss": 0.9951, "num_tokens": 30846309659.0, "step": 11019 }, { "epoch": 1.964349376114082, "grad_norm": 0.1708984375, "learning_rate": 2.015145557707344e-06, "loss": 1.0431, "num_tokens": 30852567391.0, "step": 11020 }, { "epoch": 1.9645276292335114, "grad_norm": 0.171875, "learning_rate": 2.0149952722797445e-06, "loss": 1.0422, "num_tokens": 30858825696.0, "step": 11021 }, { "epoch": 1.9647058823529413, "grad_norm": 0.1689453125, "learning_rate": 2.014845735574489e-06, "loss": 1.0131, "num_tokens": 30865099578.0, "step": 11022 }, { "epoch": 1.9648841354723707, "grad_norm": 0.17578125, "learning_rate": 2.0146969476040387e-06, "loss": 0.9994, "num_tokens": 30871382085.0, "step": 11023 }, { "epoch": 1.9650623885918004, "grad_norm": 0.1650390625, "learning_rate": 2.0145489083807925e-06, "loss": 0.9877, "num_tokens": 30877666608.0, "step": 11024 }, { "epoch": 1.96524064171123, "grad_norm": 0.173828125, "learning_rate": 2.014401617917085e-06, "loss": 1.0459, "num_tokens": 30883934093.0, "step": 11025 }, { "epoch": 1.9654188948306595, "grad_norm": 0.1728515625, "learning_rate": 2.0142550762251918e-06, "loss": 1.0297, "num_tokens": 30890184718.0, "step": 11026 }, { "epoch": 1.965597147950089, "grad_norm": 0.173828125, "learning_rate": 2.0141092833173238e-06, "loss": 1.0751, "num_tokens": 30896467986.0, "step": 11027 }, { "epoch": 1.9657754010695188, "grad_norm": 0.171875, "learning_rate": 2.013964239205629e-06, "loss": 1.0323, "num_tokens": 30902718193.0, "step": 11028 }, { "epoch": 1.9659536541889482, "grad_norm": 0.1669921875, "learning_rate": 2.0138199439021944e-06, "loss": 0.9787, "num_tokens": 30909001941.0, "step": 11029 }, { "epoch": 1.966131907308378, "grad_norm": 0.166015625, "learning_rate": 2.0136763974190443e-06, "loss": 1.0035, "num_tokens": 30915286350.0, "step": 11030 }, { "epoch": 1.9663101604278075, "grad_norm": 0.171875, "learning_rate": 2.0135335997681414e-06, "loss": 1.0166, "num_tokens": 30921517951.0, "step": 11031 }, { "epoch": 1.966488413547237, "grad_norm": 0.1708984375, "learning_rate": 2.0133915509613834e-06, "loss": 1.0259, "num_tokens": 30927800086.0, "step": 11032 }, { "epoch": 1.9666666666666668, "grad_norm": 0.1689453125, "learning_rate": 2.013250251010608e-06, "loss": 0.9959, "num_tokens": 30934068329.0, "step": 11033 }, { "epoch": 1.9668449197860962, "grad_norm": 0.173828125, "learning_rate": 2.013109699927589e-06, "loss": 1.0112, "num_tokens": 30940351286.0, "step": 11034 }, { "epoch": 1.9670231729055259, "grad_norm": 0.1728515625, "learning_rate": 2.0129698977240404e-06, "loss": 0.9838, "num_tokens": 30946634781.0, "step": 11035 }, { "epoch": 1.9672014260249555, "grad_norm": 0.1728515625, "learning_rate": 2.012830844411611e-06, "loss": 0.9969, "num_tokens": 30952918742.0, "step": 11036 }, { "epoch": 1.967379679144385, "grad_norm": 0.1708984375, "learning_rate": 2.0126925400018875e-06, "loss": 1.0267, "num_tokens": 30959164329.0, "step": 11037 }, { "epoch": 1.9675579322638146, "grad_norm": 0.169921875, "learning_rate": 2.012554984506395e-06, "loss": 1.0186, "num_tokens": 30965446451.0, "step": 11038 }, { "epoch": 1.9677361853832442, "grad_norm": 0.169921875, "learning_rate": 2.0124181779365963e-06, "loss": 1.0071, "num_tokens": 30971731643.0, "step": 11039 }, { "epoch": 1.9679144385026737, "grad_norm": 0.1708984375, "learning_rate": 2.0122821203038923e-06, "loss": 1.0365, "num_tokens": 30977971658.0, "step": 11040 }, { "epoch": 1.9680926916221035, "grad_norm": 0.173828125, "learning_rate": 2.01214681161962e-06, "loss": 1.0239, "num_tokens": 30984254277.0, "step": 11041 }, { "epoch": 1.968270944741533, "grad_norm": 0.16796875, "learning_rate": 2.0120122518950543e-06, "loss": 1.0268, "num_tokens": 30990530848.0, "step": 11042 }, { "epoch": 1.9684491978609626, "grad_norm": 0.1669921875, "learning_rate": 2.011878441141409e-06, "loss": 1.0041, "num_tokens": 30996788640.0, "step": 11043 }, { "epoch": 1.9686274509803923, "grad_norm": 0.1669921875, "learning_rate": 2.011745379369833e-06, "loss": 1.0176, "num_tokens": 31003048275.0, "step": 11044 }, { "epoch": 1.9688057040998217, "grad_norm": 0.1708984375, "learning_rate": 2.0116130665914163e-06, "loss": 1.0094, "num_tokens": 31009332364.0, "step": 11045 }, { "epoch": 1.9689839572192513, "grad_norm": 0.16796875, "learning_rate": 2.0114815028171832e-06, "loss": 1.027, "num_tokens": 31015605303.0, "step": 11046 }, { "epoch": 1.969162210338681, "grad_norm": 0.171875, "learning_rate": 2.0113506880580973e-06, "loss": 1.0123, "num_tokens": 31021840931.0, "step": 11047 }, { "epoch": 1.9693404634581104, "grad_norm": 0.1689453125, "learning_rate": 2.0112206223250603e-06, "loss": 1.0169, "num_tokens": 31028126611.0, "step": 11048 }, { "epoch": 1.96951871657754, "grad_norm": 0.1748046875, "learning_rate": 2.011091305628909e-06, "loss": 1.0211, "num_tokens": 31034410219.0, "step": 11049 }, { "epoch": 1.9696969696969697, "grad_norm": 0.173828125, "learning_rate": 2.0109627379804204e-06, "loss": 1.0213, "num_tokens": 31040664593.0, "step": 11050 }, { "epoch": 1.9698752228163992, "grad_norm": 0.1708984375, "learning_rate": 2.0108349193903086e-06, "loss": 1.0132, "num_tokens": 31046941420.0, "step": 11051 }, { "epoch": 1.970053475935829, "grad_norm": 0.17578125, "learning_rate": 2.010707849869223e-06, "loss": 1.034, "num_tokens": 31053224775.0, "step": 11052 }, { "epoch": 1.9702317290552585, "grad_norm": 0.1728515625, "learning_rate": 2.010581529427753e-06, "loss": 1.0235, "num_tokens": 31059497987.0, "step": 11053 }, { "epoch": 1.970409982174688, "grad_norm": 0.1689453125, "learning_rate": 2.010455958076427e-06, "loss": 1.0322, "num_tokens": 31065782282.0, "step": 11054 }, { "epoch": 1.9705882352941178, "grad_norm": 0.173828125, "learning_rate": 2.0103311358257055e-06, "loss": 1.0263, "num_tokens": 31072064774.0, "step": 11055 }, { "epoch": 1.9707664884135472, "grad_norm": 0.171875, "learning_rate": 2.010207062685993e-06, "loss": 1.0157, "num_tokens": 31078346673.0, "step": 11056 }, { "epoch": 1.9709447415329768, "grad_norm": 0.171875, "learning_rate": 2.010083738667627e-06, "loss": 1.0244, "num_tokens": 31084604459.0, "step": 11057 }, { "epoch": 1.9711229946524065, "grad_norm": 0.171875, "learning_rate": 2.009961163780883e-06, "loss": 1.0656, "num_tokens": 31090860437.0, "step": 11058 }, { "epoch": 1.971301247771836, "grad_norm": 0.1728515625, "learning_rate": 2.0098393380359773e-06, "loss": 1.0178, "num_tokens": 31097143895.0, "step": 11059 }, { "epoch": 1.9714795008912656, "grad_norm": 0.1748046875, "learning_rate": 2.0097182614430606e-06, "loss": 1.0469, "num_tokens": 31103421853.0, "step": 11060 }, { "epoch": 1.9716577540106952, "grad_norm": 0.173828125, "learning_rate": 2.0095979340122237e-06, "loss": 1.0143, "num_tokens": 31109673369.0, "step": 11061 }, { "epoch": 1.9718360071301246, "grad_norm": 0.1728515625, "learning_rate": 2.0094783557534904e-06, "loss": 1.0051, "num_tokens": 31115939850.0, "step": 11062 }, { "epoch": 1.9720142602495545, "grad_norm": 0.171875, "learning_rate": 2.0093595266768277e-06, "loss": 0.9843, "num_tokens": 31122205875.0, "step": 11063 }, { "epoch": 1.972192513368984, "grad_norm": 0.1689453125, "learning_rate": 2.009241446792138e-06, "loss": 1.0073, "num_tokens": 31128468172.0, "step": 11064 }, { "epoch": 1.9723707664884136, "grad_norm": 0.171875, "learning_rate": 2.0091241161092594e-06, "loss": 1.0299, "num_tokens": 31134725608.0, "step": 11065 }, { "epoch": 1.9725490196078432, "grad_norm": 0.169921875, "learning_rate": 2.0090075346379697e-06, "loss": 1.0342, "num_tokens": 31141002000.0, "step": 11066 }, { "epoch": 1.9727272727272727, "grad_norm": 0.1689453125, "learning_rate": 2.008891702387983e-06, "loss": 1.0067, "num_tokens": 31147284598.0, "step": 11067 }, { "epoch": 1.9729055258467023, "grad_norm": 0.1689453125, "learning_rate": 2.008776619368953e-06, "loss": 1.0335, "num_tokens": 31153530773.0, "step": 11068 }, { "epoch": 1.973083778966132, "grad_norm": 0.1708984375, "learning_rate": 2.008662285590468e-06, "loss": 1.0121, "num_tokens": 31159800178.0, "step": 11069 }, { "epoch": 1.9732620320855614, "grad_norm": 0.16796875, "learning_rate": 2.0085487010620573e-06, "loss": 1.0179, "num_tokens": 31166081832.0, "step": 11070 }, { "epoch": 1.973440285204991, "grad_norm": 0.173828125, "learning_rate": 2.0084358657931838e-06, "loss": 1.0523, "num_tokens": 31172345458.0, "step": 11071 }, { "epoch": 1.9736185383244207, "grad_norm": 0.173828125, "learning_rate": 2.008323779793252e-06, "loss": 1.0152, "num_tokens": 31178630075.0, "step": 11072 }, { "epoch": 1.9737967914438501, "grad_norm": 0.1728515625, "learning_rate": 2.008212443071601e-06, "loss": 0.9825, "num_tokens": 31184905964.0, "step": 11073 }, { "epoch": 1.97397504456328, "grad_norm": 0.1728515625, "learning_rate": 2.0081018556375083e-06, "loss": 1.0468, "num_tokens": 31191158629.0, "step": 11074 }, { "epoch": 1.9741532976827094, "grad_norm": 0.1708984375, "learning_rate": 2.0079920175001897e-06, "loss": 0.9837, "num_tokens": 31197430730.0, "step": 11075 }, { "epoch": 1.974331550802139, "grad_norm": 0.1748046875, "learning_rate": 2.0078829286687984e-06, "loss": 1.0239, "num_tokens": 31203716108.0, "step": 11076 }, { "epoch": 1.9745098039215687, "grad_norm": 0.169921875, "learning_rate": 2.007774589152424e-06, "loss": 1.0138, "num_tokens": 31209976482.0, "step": 11077 }, { "epoch": 1.9746880570409981, "grad_norm": 0.177734375, "learning_rate": 2.007666998960095e-06, "loss": 1.0554, "num_tokens": 31216245851.0, "step": 11078 }, { "epoch": 1.9748663101604278, "grad_norm": 0.1708984375, "learning_rate": 2.007560158100777e-06, "loss": 1.0304, "num_tokens": 31222530065.0, "step": 11079 }, { "epoch": 1.9750445632798574, "grad_norm": 0.1728515625, "learning_rate": 2.007454066583372e-06, "loss": 1.0246, "num_tokens": 31228811773.0, "step": 11080 }, { "epoch": 1.9752228163992869, "grad_norm": 0.16796875, "learning_rate": 2.0073487244167224e-06, "loss": 1.0208, "num_tokens": 31235094231.0, "step": 11081 }, { "epoch": 1.9754010695187165, "grad_norm": 0.1728515625, "learning_rate": 2.0072441316096055e-06, "loss": 1.0361, "num_tokens": 31241376760.0, "step": 11082 }, { "epoch": 1.9755793226381462, "grad_norm": 0.169921875, "learning_rate": 2.0071402881707354e-06, "loss": 1.0041, "num_tokens": 31247632487.0, "step": 11083 }, { "epoch": 1.9757575757575756, "grad_norm": 0.169921875, "learning_rate": 2.007037194108768e-06, "loss": 1.0247, "num_tokens": 31253915495.0, "step": 11084 }, { "epoch": 1.9759358288770055, "grad_norm": 0.17578125, "learning_rate": 2.0069348494322933e-06, "loss": 1.0424, "num_tokens": 31260140382.0, "step": 11085 }, { "epoch": 1.976114081996435, "grad_norm": 0.17578125, "learning_rate": 2.0068332541498386e-06, "loss": 1.012, "num_tokens": 31266424095.0, "step": 11086 }, { "epoch": 1.9762923351158646, "grad_norm": 0.1728515625, "learning_rate": 2.006732408269872e-06, "loss": 1.0553, "num_tokens": 31272695888.0, "step": 11087 }, { "epoch": 1.9764705882352942, "grad_norm": 0.166015625, "learning_rate": 2.0066323118007947e-06, "loss": 1.0191, "num_tokens": 31278979567.0, "step": 11088 }, { "epoch": 1.9766488413547236, "grad_norm": 0.171875, "learning_rate": 2.00653296475095e-06, "loss": 1.0017, "num_tokens": 31285222226.0, "step": 11089 }, { "epoch": 1.9768270944741533, "grad_norm": 0.1728515625, "learning_rate": 2.006434367128614e-06, "loss": 1.0288, "num_tokens": 31291485689.0, "step": 11090 }, { "epoch": 1.977005347593583, "grad_norm": 0.1669921875, "learning_rate": 2.006336518942005e-06, "loss": 0.9976, "num_tokens": 31297758251.0, "step": 11091 }, { "epoch": 1.9771836007130124, "grad_norm": 0.171875, "learning_rate": 2.0062394201992757e-06, "loss": 1.0185, "num_tokens": 31304040735.0, "step": 11092 }, { "epoch": 1.9773618538324422, "grad_norm": 0.1669921875, "learning_rate": 2.006143070908517e-06, "loss": 1.0173, "num_tokens": 31310279314.0, "step": 11093 }, { "epoch": 1.9775401069518717, "grad_norm": 0.1728515625, "learning_rate": 2.0060474710777596e-06, "loss": 1.0087, "num_tokens": 31316563677.0, "step": 11094 }, { "epoch": 1.977718360071301, "grad_norm": 0.1689453125, "learning_rate": 2.0059526207149674e-06, "loss": 0.9769, "num_tokens": 31322847661.0, "step": 11095 }, { "epoch": 1.977896613190731, "grad_norm": 0.17578125, "learning_rate": 2.0058585198280456e-06, "loss": 1.0088, "num_tokens": 31329130788.0, "step": 11096 }, { "epoch": 1.9780748663101604, "grad_norm": 0.1748046875, "learning_rate": 2.005765168424835e-06, "loss": 1.0059, "num_tokens": 31335415017.0, "step": 11097 }, { "epoch": 1.97825311942959, "grad_norm": 0.17578125, "learning_rate": 2.0056725665131155e-06, "loss": 0.9822, "num_tokens": 31341699605.0, "step": 11098 }, { "epoch": 1.9784313725490197, "grad_norm": 0.171875, "learning_rate": 2.0055807141006034e-06, "loss": 1.011, "num_tokens": 31347981375.0, "step": 11099 }, { "epoch": 1.9786096256684491, "grad_norm": 0.1748046875, "learning_rate": 2.005489611194952e-06, "loss": 1.0315, "num_tokens": 31354245665.0, "step": 11100 }, { "epoch": 1.9787878787878788, "grad_norm": 0.16796875, "learning_rate": 2.0053992578037543e-06, "loss": 1.0258, "num_tokens": 31360494470.0, "step": 11101 }, { "epoch": 1.9789661319073084, "grad_norm": 0.166015625, "learning_rate": 2.005309653934539e-06, "loss": 0.9962, "num_tokens": 31366778519.0, "step": 11102 }, { "epoch": 1.9791443850267378, "grad_norm": 0.173828125, "learning_rate": 2.005220799594772e-06, "loss": 1.0243, "num_tokens": 31373062979.0, "step": 11103 }, { "epoch": 1.9793226381461677, "grad_norm": 0.1728515625, "learning_rate": 2.0051326947918575e-06, "loss": 1.0303, "num_tokens": 31379327903.0, "step": 11104 }, { "epoch": 1.9795008912655971, "grad_norm": 0.1767578125, "learning_rate": 2.005045339533138e-06, "loss": 0.9825, "num_tokens": 31385580590.0, "step": 11105 }, { "epoch": 1.9796791443850268, "grad_norm": 0.169921875, "learning_rate": 2.0049587338258923e-06, "loss": 1.0126, "num_tokens": 31391825650.0, "step": 11106 }, { "epoch": 1.9798573975044564, "grad_norm": 0.171875, "learning_rate": 2.0048728776773386e-06, "loss": 1.0429, "num_tokens": 31398109903.0, "step": 11107 }, { "epoch": 1.9800356506238859, "grad_norm": 0.173828125, "learning_rate": 2.0047877710946293e-06, "loss": 1.0365, "num_tokens": 31404376068.0, "step": 11108 }, { "epoch": 1.9802139037433155, "grad_norm": 0.1728515625, "learning_rate": 2.004703414084859e-06, "loss": 1.0079, "num_tokens": 31410655239.0, "step": 11109 }, { "epoch": 1.9803921568627452, "grad_norm": 0.1787109375, "learning_rate": 2.0046198066550545e-06, "loss": 1.0301, "num_tokens": 31416938332.0, "step": 11110 }, { "epoch": 1.9805704099821746, "grad_norm": 0.166015625, "learning_rate": 2.0045369488121837e-06, "loss": 1.0009, "num_tokens": 31423223226.0, "step": 11111 }, { "epoch": 1.9807486631016042, "grad_norm": 0.171875, "learning_rate": 2.0044548405631517e-06, "loss": 1.0376, "num_tokens": 31429469941.0, "step": 11112 }, { "epoch": 1.980926916221034, "grad_norm": 0.1689453125, "learning_rate": 2.0043734819147995e-06, "loss": 1.0149, "num_tokens": 31435754903.0, "step": 11113 }, { "epoch": 1.9811051693404633, "grad_norm": 0.1748046875, "learning_rate": 2.004292872873908e-06, "loss": 1.031, "num_tokens": 31442040456.0, "step": 11114 }, { "epoch": 1.9812834224598932, "grad_norm": 0.171875, "learning_rate": 2.0042130134471947e-06, "loss": 1.0529, "num_tokens": 31448269305.0, "step": 11115 }, { "epoch": 1.9814616755793226, "grad_norm": 0.171875, "learning_rate": 2.0041339036413126e-06, "loss": 1.0007, "num_tokens": 31454552202.0, "step": 11116 }, { "epoch": 1.9816399286987523, "grad_norm": 0.173828125, "learning_rate": 2.0040555434628546e-06, "loss": 1.0136, "num_tokens": 31460816817.0, "step": 11117 }, { "epoch": 1.981818181818182, "grad_norm": 0.17578125, "learning_rate": 2.0039779329183516e-06, "loss": 1.0327, "num_tokens": 31467059948.0, "step": 11118 }, { "epoch": 1.9819964349376114, "grad_norm": 0.171875, "learning_rate": 2.003901072014269e-06, "loss": 1.0227, "num_tokens": 31473342306.0, "step": 11119 }, { "epoch": 1.982174688057041, "grad_norm": 0.177734375, "learning_rate": 2.0038249607570134e-06, "loss": 1.0347, "num_tokens": 31479625411.0, "step": 11120 }, { "epoch": 1.9823529411764707, "grad_norm": 0.1728515625, "learning_rate": 2.0037495991529255e-06, "loss": 0.9929, "num_tokens": 31485881951.0, "step": 11121 }, { "epoch": 1.9825311942959, "grad_norm": 0.1689453125, "learning_rate": 2.0036749872082867e-06, "loss": 1.0116, "num_tokens": 31492147317.0, "step": 11122 }, { "epoch": 1.9827094474153297, "grad_norm": 0.169921875, "learning_rate": 2.0036011249293137e-06, "loss": 1.0258, "num_tokens": 31498431549.0, "step": 11123 }, { "epoch": 1.9828877005347594, "grad_norm": 0.1728515625, "learning_rate": 2.0035280123221613e-06, "loss": 1.031, "num_tokens": 31504717107.0, "step": 11124 }, { "epoch": 1.9830659536541888, "grad_norm": 0.1708984375, "learning_rate": 2.0034556493929224e-06, "loss": 1.0565, "num_tokens": 31511000444.0, "step": 11125 }, { "epoch": 1.9832442067736187, "grad_norm": 0.1708984375, "learning_rate": 2.0033840361476275e-06, "loss": 1.0291, "num_tokens": 31517284430.0, "step": 11126 }, { "epoch": 1.983422459893048, "grad_norm": 0.1708984375, "learning_rate": 2.0033131725922428e-06, "loss": 0.9989, "num_tokens": 31523569170.0, "step": 11127 }, { "epoch": 1.9836007130124778, "grad_norm": 0.1728515625, "learning_rate": 2.0032430587326736e-06, "loss": 0.9955, "num_tokens": 31529852530.0, "step": 11128 }, { "epoch": 1.9837789661319074, "grad_norm": 0.1689453125, "learning_rate": 2.003173694574764e-06, "loss": 1.0362, "num_tokens": 31536115550.0, "step": 11129 }, { "epoch": 1.9839572192513368, "grad_norm": 0.171875, "learning_rate": 2.003105080124293e-06, "loss": 1.0198, "num_tokens": 31542398818.0, "step": 11130 }, { "epoch": 1.9841354723707665, "grad_norm": 0.16796875, "learning_rate": 2.003037215386978e-06, "loss": 0.9874, "num_tokens": 31548681183.0, "step": 11131 }, { "epoch": 1.9843137254901961, "grad_norm": 0.181640625, "learning_rate": 2.0029701003684742e-06, "loss": 1.02, "num_tokens": 31554938471.0, "step": 11132 }, { "epoch": 1.9844919786096256, "grad_norm": 0.1728515625, "learning_rate": 2.002903735074375e-06, "loss": 1.0488, "num_tokens": 31561222312.0, "step": 11133 }, { "epoch": 1.9846702317290552, "grad_norm": 0.1728515625, "learning_rate": 2.0028381195102107e-06, "loss": 1.0233, "num_tokens": 31567505150.0, "step": 11134 }, { "epoch": 1.9848484848484849, "grad_norm": 0.17578125, "learning_rate": 2.0027732536814486e-06, "loss": 1.0039, "num_tokens": 31573788460.0, "step": 11135 }, { "epoch": 1.9850267379679143, "grad_norm": 0.169921875, "learning_rate": 2.002709137593494e-06, "loss": 1.04, "num_tokens": 31580038321.0, "step": 11136 }, { "epoch": 1.9852049910873442, "grad_norm": 0.1748046875, "learning_rate": 2.0026457712516893e-06, "loss": 1.0214, "num_tokens": 31586310399.0, "step": 11137 }, { "epoch": 1.9853832442067736, "grad_norm": 0.169921875, "learning_rate": 2.0025831546613152e-06, "loss": 0.9924, "num_tokens": 31592595826.0, "step": 11138 }, { "epoch": 1.9855614973262032, "grad_norm": 0.1650390625, "learning_rate": 2.0025212878275903e-06, "loss": 1.0334, "num_tokens": 31598860149.0, "step": 11139 }, { "epoch": 1.985739750445633, "grad_norm": 0.171875, "learning_rate": 2.002460170755669e-06, "loss": 1.0023, "num_tokens": 31605140817.0, "step": 11140 }, { "epoch": 1.9859180035650623, "grad_norm": 0.1689453125, "learning_rate": 2.002399803450645e-06, "loss": 0.9816, "num_tokens": 31611424563.0, "step": 11141 }, { "epoch": 1.986096256684492, "grad_norm": 0.1748046875, "learning_rate": 2.002340185917548e-06, "loss": 1.0172, "num_tokens": 31617673782.0, "step": 11142 }, { "epoch": 1.9862745098039216, "grad_norm": 0.171875, "learning_rate": 2.0022813181613455e-06, "loss": 1.0109, "num_tokens": 31623953384.0, "step": 11143 }, { "epoch": 1.986452762923351, "grad_norm": 0.16796875, "learning_rate": 2.0022232001869444e-06, "loss": 1.0209, "num_tokens": 31630222624.0, "step": 11144 }, { "epoch": 1.9866310160427807, "grad_norm": 0.1689453125, "learning_rate": 2.002165831999187e-06, "loss": 1.0174, "num_tokens": 31636505921.0, "step": 11145 }, { "epoch": 1.9868092691622103, "grad_norm": 0.169921875, "learning_rate": 2.002109213602853e-06, "loss": 1.0066, "num_tokens": 31642790234.0, "step": 11146 }, { "epoch": 1.9869875222816398, "grad_norm": 0.169921875, "learning_rate": 2.0020533450026617e-06, "loss": 1.0203, "num_tokens": 31649048636.0, "step": 11147 }, { "epoch": 1.9871657754010696, "grad_norm": 0.1689453125, "learning_rate": 2.0019982262032678e-06, "loss": 1.0376, "num_tokens": 31655306374.0, "step": 11148 }, { "epoch": 1.987344028520499, "grad_norm": 0.1728515625, "learning_rate": 2.001943857209265e-06, "loss": 1.0227, "num_tokens": 31661590742.0, "step": 11149 }, { "epoch": 1.9875222816399287, "grad_norm": 0.1787109375, "learning_rate": 2.001890238025183e-06, "loss": 1.0199, "num_tokens": 31667855389.0, "step": 11150 }, { "epoch": 1.9877005347593584, "grad_norm": 0.169921875, "learning_rate": 2.001837368655491e-06, "loss": 1.0259, "num_tokens": 31674133114.0, "step": 11151 }, { "epoch": 1.9878787878787878, "grad_norm": 0.1728515625, "learning_rate": 2.001785249104593e-06, "loss": 1.0102, "num_tokens": 31680401272.0, "step": 11152 }, { "epoch": 1.9880570409982175, "grad_norm": 0.1708984375, "learning_rate": 2.0017338793768344e-06, "loss": 1.0117, "num_tokens": 31686681800.0, "step": 11153 }, { "epoch": 1.988235294117647, "grad_norm": 0.171875, "learning_rate": 2.001683259476494e-06, "loss": 1.0252, "num_tokens": 31692961968.0, "step": 11154 }, { "epoch": 1.9884135472370765, "grad_norm": 0.166015625, "learning_rate": 2.0016333894077904e-06, "loss": 1.034, "num_tokens": 31699241283.0, "step": 11155 }, { "epoch": 1.9885918003565064, "grad_norm": 0.1689453125, "learning_rate": 2.00158426917488e-06, "loss": 1.0465, "num_tokens": 31705524964.0, "step": 11156 }, { "epoch": 1.9887700534759358, "grad_norm": 0.16796875, "learning_rate": 2.0015358987818553e-06, "loss": 1.0313, "num_tokens": 31711808798.0, "step": 11157 }, { "epoch": 1.9889483065953653, "grad_norm": 0.1689453125, "learning_rate": 2.001488278232747e-06, "loss": 1.0401, "num_tokens": 31718091897.0, "step": 11158 }, { "epoch": 1.9891265597147951, "grad_norm": 0.1708984375, "learning_rate": 2.001441407531524e-06, "loss": 1.013, "num_tokens": 31724373852.0, "step": 11159 }, { "epoch": 1.9893048128342246, "grad_norm": 0.171875, "learning_rate": 2.0013952866820916e-06, "loss": 1.0229, "num_tokens": 31730629727.0, "step": 11160 }, { "epoch": 1.9894830659536542, "grad_norm": 0.16796875, "learning_rate": 2.0013499156882927e-06, "loss": 1.0242, "num_tokens": 31736913106.0, "step": 11161 }, { "epoch": 1.9896613190730839, "grad_norm": 0.16796875, "learning_rate": 2.001305294553908e-06, "loss": 1.0251, "num_tokens": 31743134632.0, "step": 11162 }, { "epoch": 1.9898395721925133, "grad_norm": 0.16796875, "learning_rate": 2.0012614232826563e-06, "loss": 1.0038, "num_tokens": 31749418381.0, "step": 11163 }, { "epoch": 1.990017825311943, "grad_norm": 0.17578125, "learning_rate": 2.0012183018781942e-06, "loss": 1.0158, "num_tokens": 31755699209.0, "step": 11164 }, { "epoch": 1.9901960784313726, "grad_norm": 0.16796875, "learning_rate": 2.0011759303441136e-06, "loss": 1.0293, "num_tokens": 31761982926.0, "step": 11165 }, { "epoch": 1.990374331550802, "grad_norm": 0.169921875, "learning_rate": 2.0011343086839454e-06, "loss": 1.0024, "num_tokens": 31768239895.0, "step": 11166 }, { "epoch": 1.9905525846702319, "grad_norm": 0.169921875, "learning_rate": 2.0010934369011593e-06, "loss": 1.0285, "num_tokens": 31774502326.0, "step": 11167 }, { "epoch": 1.9907308377896613, "grad_norm": 0.1689453125, "learning_rate": 2.0010533149991593e-06, "loss": 1.0222, "num_tokens": 31780784327.0, "step": 11168 }, { "epoch": 1.990909090909091, "grad_norm": 0.1728515625, "learning_rate": 2.00101394298129e-06, "loss": 1.0335, "num_tokens": 31787063936.0, "step": 11169 }, { "epoch": 1.9910873440285206, "grad_norm": 0.16796875, "learning_rate": 2.0009753208508313e-06, "loss": 1.0445, "num_tokens": 31793345998.0, "step": 11170 }, { "epoch": 1.99126559714795, "grad_norm": 0.171875, "learning_rate": 2.0009374486110035e-06, "loss": 1.0512, "num_tokens": 31799627308.0, "step": 11171 }, { "epoch": 1.9914438502673797, "grad_norm": 0.173828125, "learning_rate": 2.000900326264961e-06, "loss": 1.0401, "num_tokens": 31805887456.0, "step": 11172 }, { "epoch": 1.9916221033868093, "grad_norm": 0.1689453125, "learning_rate": 2.0008639538157975e-06, "loss": 1.0315, "num_tokens": 31812160265.0, "step": 11173 }, { "epoch": 1.9918003565062388, "grad_norm": 0.169921875, "learning_rate": 2.0008283312665434e-06, "loss": 1.0007, "num_tokens": 31818435726.0, "step": 11174 }, { "epoch": 1.9919786096256684, "grad_norm": 0.173828125, "learning_rate": 2.0007934586201676e-06, "loss": 1.0176, "num_tokens": 31824719683.0, "step": 11175 }, { "epoch": 1.992156862745098, "grad_norm": 0.1708984375, "learning_rate": 2.0007593358795767e-06, "loss": 1.0044, "num_tokens": 31831003423.0, "step": 11176 }, { "epoch": 1.9923351158645275, "grad_norm": 0.1728515625, "learning_rate": 2.0007259630476134e-06, "loss": 0.9934, "num_tokens": 31837287044.0, "step": 11177 }, { "epoch": 1.9925133689839574, "grad_norm": 0.16796875, "learning_rate": 2.0006933401270585e-06, "loss": 1.0399, "num_tokens": 31843571524.0, "step": 11178 }, { "epoch": 1.9926916221033868, "grad_norm": 0.1640625, "learning_rate": 2.0006614671206305e-06, "loss": 1.0207, "num_tokens": 31849835499.0, "step": 11179 }, { "epoch": 1.9928698752228164, "grad_norm": 0.17578125, "learning_rate": 2.000630344030987e-06, "loss": 1.0035, "num_tokens": 31856093345.0, "step": 11180 }, { "epoch": 1.993048128342246, "grad_norm": 0.171875, "learning_rate": 2.0005999708607194e-06, "loss": 1.0244, "num_tokens": 31862376963.0, "step": 11181 }, { "epoch": 1.9932263814616755, "grad_norm": 0.171875, "learning_rate": 2.0005703476123587e-06, "loss": 1.0425, "num_tokens": 31868657795.0, "step": 11182 }, { "epoch": 1.9934046345811052, "grad_norm": 0.171875, "learning_rate": 2.0005414742883754e-06, "loss": 1.0217, "num_tokens": 31874940411.0, "step": 11183 }, { "epoch": 1.9935828877005348, "grad_norm": 0.1689453125, "learning_rate": 2.000513350891173e-06, "loss": 1.03, "num_tokens": 31881224395.0, "step": 11184 }, { "epoch": 1.9937611408199643, "grad_norm": 0.1708984375, "learning_rate": 2.0004859774230968e-06, "loss": 1.0097, "num_tokens": 31887449841.0, "step": 11185 }, { "epoch": 1.993939393939394, "grad_norm": 0.1728515625, "learning_rate": 2.0004593538864276e-06, "loss": 1.0302, "num_tokens": 31893720241.0, "step": 11186 }, { "epoch": 1.9941176470588236, "grad_norm": 0.16796875, "learning_rate": 2.000433480283385e-06, "loss": 1.0156, "num_tokens": 31900002784.0, "step": 11187 }, { "epoch": 1.994295900178253, "grad_norm": 0.16796875, "learning_rate": 2.0004083566161218e-06, "loss": 1.028, "num_tokens": 31906285653.0, "step": 11188 }, { "epoch": 1.9944741532976829, "grad_norm": 0.1728515625, "learning_rate": 2.0003839828867343e-06, "loss": 1.0039, "num_tokens": 31912569529.0, "step": 11189 }, { "epoch": 1.9946524064171123, "grad_norm": 0.171875, "learning_rate": 2.0003603590972528e-06, "loss": 0.9935, "num_tokens": 31918814239.0, "step": 11190 }, { "epoch": 1.994830659536542, "grad_norm": 0.16796875, "learning_rate": 2.0003374852496452e-06, "loss": 1.0206, "num_tokens": 31925098629.0, "step": 11191 }, { "epoch": 1.9950089126559716, "grad_norm": 0.169921875, "learning_rate": 2.000315361345819e-06, "loss": 1.0251, "num_tokens": 31931360925.0, "step": 11192 }, { "epoch": 1.995187165775401, "grad_norm": 0.1728515625, "learning_rate": 2.0002939873876163e-06, "loss": 1.0062, "num_tokens": 31937635847.0, "step": 11193 }, { "epoch": 1.9953654188948307, "grad_norm": 0.173828125, "learning_rate": 2.00027336337682e-06, "loss": 1.0382, "num_tokens": 31943909622.0, "step": 11194 }, { "epoch": 1.9955436720142603, "grad_norm": 0.1708984375, "learning_rate": 2.000253489315147e-06, "loss": 1.0537, "num_tokens": 31950173958.0, "step": 11195 }, { "epoch": 1.9957219251336897, "grad_norm": 0.1708984375, "learning_rate": 2.000234365204254e-06, "loss": 1.0147, "num_tokens": 31956452725.0, "step": 11196 }, { "epoch": 1.9959001782531194, "grad_norm": 0.1708984375, "learning_rate": 2.0002159910457343e-06, "loss": 1.0113, "num_tokens": 31962709470.0, "step": 11197 }, { "epoch": 1.996078431372549, "grad_norm": 0.16796875, "learning_rate": 2.0001983668411204e-06, "loss": 1.0214, "num_tokens": 31968957214.0, "step": 11198 }, { "epoch": 1.9962566844919785, "grad_norm": 0.1728515625, "learning_rate": 2.0001814925918796e-06, "loss": 1.0339, "num_tokens": 31975210341.0, "step": 11199 }, { "epoch": 1.9964349376114083, "grad_norm": 0.1689453125, "learning_rate": 2.0001653682994173e-06, "loss": 1.0104, "num_tokens": 31981487480.0, "step": 11200 }, { "epoch": 1.9966131907308378, "grad_norm": 0.1728515625, "learning_rate": 2.00014999396508e-06, "loss": 1.0114, "num_tokens": 31987737524.0, "step": 11201 }, { "epoch": 1.9967914438502674, "grad_norm": 0.1728515625, "learning_rate": 2.0001353695901456e-06, "loss": 1.0157, "num_tokens": 31994021360.0, "step": 11202 }, { "epoch": 1.996969696969697, "grad_norm": 0.169921875, "learning_rate": 2.000121495175834e-06, "loss": 1.0255, "num_tokens": 32000296201.0, "step": 11203 }, { "epoch": 1.9971479500891265, "grad_norm": 0.1689453125, "learning_rate": 2.0001083707233024e-06, "loss": 1.0051, "num_tokens": 32006579742.0, "step": 11204 }, { "epoch": 1.9973262032085561, "grad_norm": 0.1708984375, "learning_rate": 2.0000959962336435e-06, "loss": 0.9884, "num_tokens": 32012864108.0, "step": 11205 }, { "epoch": 1.9975044563279858, "grad_norm": 0.169921875, "learning_rate": 2.0000843717078886e-06, "loss": 1.0302, "num_tokens": 32019147887.0, "step": 11206 }, { "epoch": 1.9976827094474152, "grad_norm": 0.1728515625, "learning_rate": 2.0000734971470056e-06, "loss": 1.0404, "num_tokens": 32025430888.0, "step": 11207 }, { "epoch": 1.9978609625668449, "grad_norm": 0.1689453125, "learning_rate": 2.0000633725519023e-06, "loss": 0.9848, "num_tokens": 32031688997.0, "step": 11208 }, { "epoch": 1.9980392156862745, "grad_norm": 0.171875, "learning_rate": 2.000053997923421e-06, "loss": 1.0129, "num_tokens": 32037971832.0, "step": 11209 }, { "epoch": 1.998217468805704, "grad_norm": 0.1748046875, "learning_rate": 2.0000453732623435e-06, "loss": 1.0094, "num_tokens": 32044231627.0, "step": 11210 }, { "epoch": 1.9983957219251338, "grad_norm": 0.1748046875, "learning_rate": 2.000037498569389e-06, "loss": 1.0157, "num_tokens": 32050514641.0, "step": 11211 }, { "epoch": 1.9985739750445632, "grad_norm": 0.1767578125, "learning_rate": 2.000030373845212e-06, "loss": 1.0383, "num_tokens": 32056799042.0, "step": 11212 }, { "epoch": 1.998752228163993, "grad_norm": 0.166015625, "learning_rate": 2.0000239990904083e-06, "loss": 1.0162, "num_tokens": 32063079451.0, "step": 11213 }, { "epoch": 1.9989304812834225, "grad_norm": 0.1708984375, "learning_rate": 2.0000183743055085e-06, "loss": 1.0185, "num_tokens": 32069333760.0, "step": 11214 }, { "epoch": 1.999108734402852, "grad_norm": 0.171875, "learning_rate": 2.000013499490979e-06, "loss": 1.0541, "num_tokens": 32075618381.0, "step": 11215 }, { "epoch": 1.9992869875222816, "grad_norm": 0.173828125, "learning_rate": 2.00000937464723e-06, "loss": 1.0165, "num_tokens": 32081876965.0, "step": 11216 }, { "epoch": 1.9994652406417113, "grad_norm": 0.1767578125, "learning_rate": 2.0000059997746025e-06, "loss": 1.007, "num_tokens": 32088079433.0, "step": 11217 }, { "epoch": 1.9996434937611407, "grad_norm": 0.16796875, "learning_rate": 2.000003374873378e-06, "loss": 1.0447, "num_tokens": 32094345884.0, "step": 11218 }, { "epoch": 1.9998217468805706, "grad_norm": 0.1689453125, "learning_rate": 2.0000014999437755e-06, "loss": 1.0583, "num_tokens": 32100617359.0, "step": 11219 }, { "epoch": 2.0, "grad_norm": 0.171875, "learning_rate": 2.0000003749859516e-06, "loss": 1.008, "num_tokens": 32106899777.0, "step": 11220 }, { "epoch": 2.0, "step": 11220, "total_flos": 4.107393905038092e+20, "train_loss": 0.4648131006222995, "train_runtime": 276842.9058, "train_samples_per_second": 7.781, "train_steps_per_second": 0.041 } ], "logging_steps": 1, "max_steps": 11220, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 57, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.107393905038092e+20, "train_batch_size": 3, "trial_name": null, "trial_params": null }