{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999178959467966, "eval_steps": 500, "global_step": 9134, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010947207093790197, "grad_norm": 1.1375685723564313, "learning_rate": 4.99999985212739e-05, "loss": 0.6712, "num_input_tokens_seen": 200704, "step": 1 }, { "epoch": 0.00021894414187580393, "grad_norm": 1.1605247657233773, "learning_rate": 4.9999994085095755e-05, "loss": 0.6784, "num_input_tokens_seen": 386624, "step": 2 }, { "epoch": 0.0003284162128137059, "grad_norm": 1.2436587180373795, "learning_rate": 4.9999986691466115e-05, "loss": 0.802, "num_input_tokens_seen": 592032, "step": 3 }, { "epoch": 0.00043788828375160787, "grad_norm": 1.5537743114923308, "learning_rate": 4.999997634038584e-05, "loss": 0.6541, "num_input_tokens_seen": 764736, "step": 4 }, { "epoch": 0.0005473603546895098, "grad_norm": 1.415405731398495, "learning_rate": 4.9999963031856145e-05, "loss": 0.5063, "num_input_tokens_seen": 930944, "step": 5 }, { "epoch": 0.0006568324256274118, "grad_norm": 1.3973241881739824, "learning_rate": 4.999994676587863e-05, "loss": 0.7316, "num_input_tokens_seen": 1134560, "step": 6 }, { "epoch": 0.0007663044965653138, "grad_norm": 1.1401563230213645, "learning_rate": 4.9999927542455196e-05, "loss": 0.5013, "num_input_tokens_seen": 1325856, "step": 7 }, { "epoch": 0.0008757765675032157, "grad_norm": 1.2263192099542266, "learning_rate": 4.9999905361588115e-05, "loss": 0.4773, "num_input_tokens_seen": 1524096, "step": 8 }, { "epoch": 0.0009852486384411177, "grad_norm": 1.3538059732378338, "learning_rate": 4.999988022328004e-05, "loss": 0.6527, "num_input_tokens_seen": 1688064, "step": 9 }, { "epoch": 0.0010947207093790197, "grad_norm": 1.3169036287835785, "learning_rate": 4.999985212753391e-05, "loss": 0.6857, "num_input_tokens_seen": 1861216, "step": 10 }, { "epoch": 0.0012041927803169217, "grad_norm": 1.3775049280815796, "learning_rate": 4.999982107435308e-05, "loss": 0.729, "num_input_tokens_seen": 2038176, "step": 11 }, { "epoch": 0.0013136648512548236, "grad_norm": 1.3916497044852547, "learning_rate": 4.99997870637412e-05, "loss": 0.7044, "num_input_tokens_seen": 2213344, "step": 12 }, { "epoch": 0.0014231369221927256, "grad_norm": 1.0808066037788786, "learning_rate": 4.99997500957023e-05, "loss": 0.6114, "num_input_tokens_seen": 2387392, "step": 13 }, { "epoch": 0.0015326089931306276, "grad_norm": 1.446204740035348, "learning_rate": 4.999971017024076e-05, "loss": 0.9107, "num_input_tokens_seen": 2558528, "step": 14 }, { "epoch": 0.0016420810640685294, "grad_norm": 1.4002516077649108, "learning_rate": 4.99996672873613e-05, "loss": 0.7237, "num_input_tokens_seen": 2744224, "step": 15 }, { "epoch": 0.0017515531350064315, "grad_norm": 1.291097710520378, "learning_rate": 4.999962144706898e-05, "loss": 0.6368, "num_input_tokens_seen": 2939552, "step": 16 }, { "epoch": 0.0018610252059443335, "grad_norm": 1.132461914533669, "learning_rate": 4.999957264936925e-05, "loss": 0.7459, "num_input_tokens_seen": 3119200, "step": 17 }, { "epoch": 0.0019704972768822353, "grad_norm": 1.1220580947781853, "learning_rate": 4.999952089426785e-05, "loss": 0.5144, "num_input_tokens_seen": 3288096, "step": 18 }, { "epoch": 0.0020799693478201374, "grad_norm": 1.255043868479814, "learning_rate": 4.9999466181770934e-05, "loss": 0.7758, "num_input_tokens_seen": 3496864, "step": 19 }, { "epoch": 0.0021894414187580394, "grad_norm": 1.1836915935701577, "learning_rate": 4.999940851188495e-05, "loss": 0.9462, "num_input_tokens_seen": 3710560, "step": 20 }, { "epoch": 0.0022989134896959414, "grad_norm": 1.374230125948353, "learning_rate": 4.999934788461673e-05, "loss": 0.8865, "num_input_tokens_seen": 3894016, "step": 21 }, { "epoch": 0.0024083855606338435, "grad_norm": 1.1307017162100463, "learning_rate": 4.9999284299973456e-05, "loss": 0.5197, "num_input_tokens_seen": 4062688, "step": 22 }, { "epoch": 0.002517857631571745, "grad_norm": 1.2849048960035863, "learning_rate": 4.999921775796263e-05, "loss": 0.6756, "num_input_tokens_seen": 4241216, "step": 23 }, { "epoch": 0.002627329702509647, "grad_norm": 1.2966354332686816, "learning_rate": 4.999914825859214e-05, "loss": 0.6375, "num_input_tokens_seen": 4415712, "step": 24 }, { "epoch": 0.002736801773447549, "grad_norm": 1.294241886339698, "learning_rate": 4.999907580187019e-05, "loss": 0.8339, "num_input_tokens_seen": 4624256, "step": 25 }, { "epoch": 0.002846273844385451, "grad_norm": 1.3497145704242808, "learning_rate": 4.9999000387805375e-05, "loss": 0.6684, "num_input_tokens_seen": 4798304, "step": 26 }, { "epoch": 0.002955745915323353, "grad_norm": 1.2495057559420786, "learning_rate": 4.99989220164066e-05, "loss": 0.6474, "num_input_tokens_seen": 4979744, "step": 27 }, { "epoch": 0.0030652179862612552, "grad_norm": 1.6041937821643815, "learning_rate": 4.9998840687683135e-05, "loss": 0.7383, "num_input_tokens_seen": 5139008, "step": 28 }, { "epoch": 0.0031746900571991573, "grad_norm": 1.4431920348169647, "learning_rate": 4.999875640164461e-05, "loss": 0.8955, "num_input_tokens_seen": 5321344, "step": 29 }, { "epoch": 0.003284162128137059, "grad_norm": 1.3190368719984544, "learning_rate": 4.9998669158301e-05, "loss": 0.737, "num_input_tokens_seen": 5515552, "step": 30 }, { "epoch": 0.003393634199074961, "grad_norm": 1.3916466635372422, "learning_rate": 4.999857895766261e-05, "loss": 0.8718, "num_input_tokens_seen": 5720064, "step": 31 }, { "epoch": 0.003503106270012863, "grad_norm": 1.2276217509410152, "learning_rate": 4.999848579974012e-05, "loss": 0.851, "num_input_tokens_seen": 5921440, "step": 32 }, { "epoch": 0.003612578340950765, "grad_norm": 1.2148459087978207, "learning_rate": 4.9998389684544546e-05, "loss": 0.7081, "num_input_tokens_seen": 6113184, "step": 33 }, { "epoch": 0.003722050411888667, "grad_norm": 1.136058917371372, "learning_rate": 4.999829061208726e-05, "loss": 0.5156, "num_input_tokens_seen": 6283648, "step": 34 }, { "epoch": 0.003831522482826569, "grad_norm": 1.1218150407074938, "learning_rate": 4.999818858237999e-05, "loss": 0.6898, "num_input_tokens_seen": 6468896, "step": 35 }, { "epoch": 0.003940994553764471, "grad_norm": 1.1919061831319286, "learning_rate": 4.99980835954348e-05, "loss": 0.6718, "num_input_tokens_seen": 6661312, "step": 36 }, { "epoch": 0.004050466624702373, "grad_norm": 1.2358926063225693, "learning_rate": 4.999797565126411e-05, "loss": 0.6001, "num_input_tokens_seen": 6839840, "step": 37 }, { "epoch": 0.004159938695640275, "grad_norm": 1.0979278337078424, "learning_rate": 4.999786474988067e-05, "loss": 0.5177, "num_input_tokens_seen": 7020832, "step": 38 }, { "epoch": 0.004269410766578176, "grad_norm": 1.1982213448326078, "learning_rate": 4.9997750891297636e-05, "loss": 0.634, "num_input_tokens_seen": 7218624, "step": 39 }, { "epoch": 0.004378882837516079, "grad_norm": 1.271601752094647, "learning_rate": 4.9997634075528454e-05, "loss": 0.6785, "num_input_tokens_seen": 7394912, "step": 40 }, { "epoch": 0.00448835490845398, "grad_norm": 1.329866994718059, "learning_rate": 4.999751430258695e-05, "loss": 0.6915, "num_input_tokens_seen": 7573888, "step": 41 }, { "epoch": 0.004597826979391883, "grad_norm": 1.2701181786287075, "learning_rate": 4.999739157248729e-05, "loss": 0.6931, "num_input_tokens_seen": 7770112, "step": 42 }, { "epoch": 0.0047072990503297845, "grad_norm": 1.2917071869349803, "learning_rate": 4.9997265885243993e-05, "loss": 0.7323, "num_input_tokens_seen": 7986944, "step": 43 }, { "epoch": 0.004816771121267687, "grad_norm": 1.2481899985330958, "learning_rate": 4.999713724087193e-05, "loss": 0.8218, "num_input_tokens_seen": 8189664, "step": 44 }, { "epoch": 0.0049262431922055885, "grad_norm": 1.3795275706722696, "learning_rate": 4.999700563938632e-05, "loss": 0.9619, "num_input_tokens_seen": 8383424, "step": 45 }, { "epoch": 0.00503571526314349, "grad_norm": 1.2361077809854482, "learning_rate": 4.999687108080272e-05, "loss": 0.6285, "num_input_tokens_seen": 8576960, "step": 46 }, { "epoch": 0.005145187334081393, "grad_norm": 1.2048509776299716, "learning_rate": 4.999673356513707e-05, "loss": 0.6056, "num_input_tokens_seen": 8737792, "step": 47 }, { "epoch": 0.005254659405019294, "grad_norm": 1.1653587753179582, "learning_rate": 4.999659309240561e-05, "loss": 0.6865, "num_input_tokens_seen": 8913856, "step": 48 }, { "epoch": 0.005364131475957197, "grad_norm": 1.1864551665833467, "learning_rate": 4.9996449662624986e-05, "loss": 0.6112, "num_input_tokens_seen": 9081408, "step": 49 }, { "epoch": 0.005473603546895098, "grad_norm": 1.2517284364269574, "learning_rate": 4.999630327581214e-05, "loss": 0.6682, "num_input_tokens_seen": 9257472, "step": 50 }, { "epoch": 0.005583075617833001, "grad_norm": 1.3397732374137514, "learning_rate": 4.9996153931984415e-05, "loss": 0.8365, "num_input_tokens_seen": 9419648, "step": 51 }, { "epoch": 0.005692547688770902, "grad_norm": 1.3276760913416354, "learning_rate": 4.999600163115945e-05, "loss": 0.7045, "num_input_tokens_seen": 9596384, "step": 52 }, { "epoch": 0.005802019759708804, "grad_norm": 1.2838270740345719, "learning_rate": 4.999584637335529e-05, "loss": 0.7067, "num_input_tokens_seen": 9769984, "step": 53 }, { "epoch": 0.005911491830646706, "grad_norm": 1.3014800339822237, "learning_rate": 4.9995688158590284e-05, "loss": 0.7084, "num_input_tokens_seen": 9985248, "step": 54 }, { "epoch": 0.006020963901584608, "grad_norm": 1.3596091671655097, "learning_rate": 4.9995526986883146e-05, "loss": 0.7537, "num_input_tokens_seen": 10184832, "step": 55 }, { "epoch": 0.0061304359725225105, "grad_norm": 1.350615903090275, "learning_rate": 4.999536285825295e-05, "loss": 0.8001, "num_input_tokens_seen": 10334240, "step": 56 }, { "epoch": 0.006239908043460412, "grad_norm": 1.2279643897724124, "learning_rate": 4.999519577271912e-05, "loss": 0.5893, "num_input_tokens_seen": 10484544, "step": 57 }, { "epoch": 0.0063493801143983146, "grad_norm": 1.3462862431740028, "learning_rate": 4.9995025730301406e-05, "loss": 0.859, "num_input_tokens_seen": 10655008, "step": 58 }, { "epoch": 0.006458852185336216, "grad_norm": 1.2974966466636175, "learning_rate": 4.999485273101993e-05, "loss": 0.8568, "num_input_tokens_seen": 10842496, "step": 59 }, { "epoch": 0.006568324256274118, "grad_norm": 1.233930048844086, "learning_rate": 4.9994676774895154e-05, "loss": 0.5817, "num_input_tokens_seen": 10991456, "step": 60 }, { "epoch": 0.00667779632721202, "grad_norm": 1.2547017838934604, "learning_rate": 4.99944978619479e-05, "loss": 0.7665, "num_input_tokens_seen": 11186784, "step": 61 }, { "epoch": 0.006787268398149922, "grad_norm": 1.0973100218002094, "learning_rate": 4.9994315992199335e-05, "loss": 0.6607, "num_input_tokens_seen": 11373824, "step": 62 }, { "epoch": 0.006896740469087824, "grad_norm": 1.1520029889604417, "learning_rate": 4.9994131165670965e-05, "loss": 0.6294, "num_input_tokens_seen": 11558176, "step": 63 }, { "epoch": 0.007006212540025726, "grad_norm": 1.330066249531122, "learning_rate": 4.999394338238466e-05, "loss": 0.766, "num_input_tokens_seen": 11744096, "step": 64 }, { "epoch": 0.0071156846109636275, "grad_norm": 1.35972283834916, "learning_rate": 4.999375264236263e-05, "loss": 0.9307, "num_input_tokens_seen": 11934048, "step": 65 }, { "epoch": 0.00722515668190153, "grad_norm": 1.2439166815169054, "learning_rate": 4.999355894562745e-05, "loss": 0.7371, "num_input_tokens_seen": 12089728, "step": 66 }, { "epoch": 0.007334628752839432, "grad_norm": 1.3260443152765562, "learning_rate": 4.9993362292202024e-05, "loss": 0.7666, "num_input_tokens_seen": 12249888, "step": 67 }, { "epoch": 0.007444100823777334, "grad_norm": 1.1537749115640215, "learning_rate": 4.999316268210962e-05, "loss": 0.6124, "num_input_tokens_seen": 12437152, "step": 68 }, { "epoch": 0.007553572894715236, "grad_norm": 1.308760948317531, "learning_rate": 4.999296011537384e-05, "loss": 0.6846, "num_input_tokens_seen": 12618592, "step": 69 }, { "epoch": 0.007663044965653138, "grad_norm": 1.2632118106930115, "learning_rate": 4.999275459201866e-05, "loss": 0.9314, "num_input_tokens_seen": 12802944, "step": 70 }, { "epoch": 0.00777251703659104, "grad_norm": 1.2801486142877243, "learning_rate": 4.9992546112068394e-05, "loss": 0.7839, "num_input_tokens_seen": 12979456, "step": 71 }, { "epoch": 0.007881989107528941, "grad_norm": 1.175245549537397, "learning_rate": 4.9992334675547704e-05, "loss": 0.696, "num_input_tokens_seen": 13140064, "step": 72 }, { "epoch": 0.007991461178466844, "grad_norm": 1.195755521167767, "learning_rate": 4.999212028248159e-05, "loss": 0.598, "num_input_tokens_seen": 13289920, "step": 73 }, { "epoch": 0.008100933249404746, "grad_norm": 1.2464561772422158, "learning_rate": 4.999190293289543e-05, "loss": 0.6061, "num_input_tokens_seen": 13497792, "step": 74 }, { "epoch": 0.008210405320342647, "grad_norm": 1.310941497469276, "learning_rate": 4.999168262681492e-05, "loss": 0.5776, "num_input_tokens_seen": 13658176, "step": 75 }, { "epoch": 0.00831987739128055, "grad_norm": 1.3134431629117824, "learning_rate": 4.999145936426614e-05, "loss": 0.6711, "num_input_tokens_seen": 13830432, "step": 76 }, { "epoch": 0.008429349462218452, "grad_norm": 1.3566804246806279, "learning_rate": 4.9991233145275495e-05, "loss": 0.6841, "num_input_tokens_seen": 14008064, "step": 77 }, { "epoch": 0.008538821533156353, "grad_norm": 1.1617955914181757, "learning_rate": 4.999100396986974e-05, "loss": 0.5426, "num_input_tokens_seen": 14177408, "step": 78 }, { "epoch": 0.008648293604094255, "grad_norm": 1.4081358238782862, "learning_rate": 4.999077183807599e-05, "loss": 0.7276, "num_input_tokens_seen": 14332416, "step": 79 }, { "epoch": 0.008757765675032158, "grad_norm": 1.2956730431695114, "learning_rate": 4.9990536749921704e-05, "loss": 0.7968, "num_input_tokens_seen": 14506912, "step": 80 }, { "epoch": 0.00886723774597006, "grad_norm": 1.3432844819414627, "learning_rate": 4.999029870543469e-05, "loss": 0.7246, "num_input_tokens_seen": 14691488, "step": 81 }, { "epoch": 0.00897670981690796, "grad_norm": 1.215107830819847, "learning_rate": 4.999005770464312e-05, "loss": 0.6807, "num_input_tokens_seen": 14874272, "step": 82 }, { "epoch": 0.009086181887845863, "grad_norm": 1.4219052657375089, "learning_rate": 4.99898137475755e-05, "loss": 0.7125, "num_input_tokens_seen": 15070944, "step": 83 }, { "epoch": 0.009195653958783766, "grad_norm": 1.2447303799448415, "learning_rate": 4.998956683426068e-05, "loss": 0.8963, "num_input_tokens_seen": 15264032, "step": 84 }, { "epoch": 0.009305126029721666, "grad_norm": 1.236325376968584, "learning_rate": 4.9989316964727873e-05, "loss": 0.6492, "num_input_tokens_seen": 15433824, "step": 85 }, { "epoch": 0.009414598100659569, "grad_norm": 1.2932306583358555, "learning_rate": 4.9989064139006645e-05, "loss": 0.7585, "num_input_tokens_seen": 15591296, "step": 86 }, { "epoch": 0.009524070171597471, "grad_norm": 1.291843489367351, "learning_rate": 4.99888083571269e-05, "loss": 0.7022, "num_input_tokens_seen": 15773408, "step": 87 }, { "epoch": 0.009633542242535374, "grad_norm": 1.2330059233381645, "learning_rate": 4.99885496191189e-05, "loss": 0.6068, "num_input_tokens_seen": 15975904, "step": 88 }, { "epoch": 0.009743014313473275, "grad_norm": 1.380008030398127, "learning_rate": 4.998828792501324e-05, "loss": 0.7437, "num_input_tokens_seen": 16134272, "step": 89 }, { "epoch": 0.009852486384411177, "grad_norm": 1.342459924337691, "learning_rate": 4.998802327484089e-05, "loss": 0.8501, "num_input_tokens_seen": 16287936, "step": 90 }, { "epoch": 0.00996195845534908, "grad_norm": 1.3102632263948444, "learning_rate": 4.9987755668633165e-05, "loss": 0.7088, "num_input_tokens_seen": 16470048, "step": 91 }, { "epoch": 0.01007143052628698, "grad_norm": 1.6900652708055774, "learning_rate": 4.998748510642171e-05, "loss": 1.2246, "num_input_tokens_seen": 16663584, "step": 92 }, { "epoch": 0.010180902597224883, "grad_norm": 1.1778728346781973, "learning_rate": 4.998721158823853e-05, "loss": 0.7137, "num_input_tokens_seen": 16859808, "step": 93 }, { "epoch": 0.010290374668162785, "grad_norm": 1.1839983157423, "learning_rate": 4.998693511411599e-05, "loss": 0.8742, "num_input_tokens_seen": 17044832, "step": 94 }, { "epoch": 0.010399846739100688, "grad_norm": 1.3026541209057638, "learning_rate": 4.998665568408679e-05, "loss": 0.8652, "num_input_tokens_seen": 17235456, "step": 95 }, { "epoch": 0.010509318810038588, "grad_norm": 1.2607408637313926, "learning_rate": 4.998637329818399e-05, "loss": 0.804, "num_input_tokens_seen": 17433696, "step": 96 }, { "epoch": 0.010618790880976491, "grad_norm": 1.172348740506192, "learning_rate": 4.9986087956441e-05, "loss": 0.6367, "num_input_tokens_seen": 17581760, "step": 97 }, { "epoch": 0.010728262951914393, "grad_norm": 1.1897495946368453, "learning_rate": 4.9985799658891563e-05, "loss": 0.8407, "num_input_tokens_seen": 17766112, "step": 98 }, { "epoch": 0.010837735022852294, "grad_norm": 1.2996089108612177, "learning_rate": 4.998550840556979e-05, "loss": 0.9003, "num_input_tokens_seen": 17971296, "step": 99 }, { "epoch": 0.010947207093790197, "grad_norm": 1.0973625468942212, "learning_rate": 4.998521419651014e-05, "loss": 0.6371, "num_input_tokens_seen": 18153632, "step": 100 }, { "epoch": 0.011056679164728099, "grad_norm": 1.3438372029242445, "learning_rate": 4.998491703174742e-05, "loss": 0.8215, "num_input_tokens_seen": 18327680, "step": 101 }, { "epoch": 0.011166151235666001, "grad_norm": 1.3508748857687496, "learning_rate": 4.998461691131677e-05, "loss": 0.6554, "num_input_tokens_seen": 18492096, "step": 102 }, { "epoch": 0.011275623306603902, "grad_norm": 1.304374520229074, "learning_rate": 4.9984313835253705e-05, "loss": 0.6907, "num_input_tokens_seen": 18668160, "step": 103 }, { "epoch": 0.011385095377541805, "grad_norm": 1.3466960637657748, "learning_rate": 4.998400780359408e-05, "loss": 0.8705, "num_input_tokens_seen": 18863712, "step": 104 }, { "epoch": 0.011494567448479707, "grad_norm": 1.3093877860016294, "learning_rate": 4.998369881637408e-05, "loss": 0.8243, "num_input_tokens_seen": 19085920, "step": 105 }, { "epoch": 0.011604039519417608, "grad_norm": 1.2837554811176786, "learning_rate": 4.9983386873630285e-05, "loss": 0.7918, "num_input_tokens_seen": 19289312, "step": 106 }, { "epoch": 0.01171351159035551, "grad_norm": 1.2113823350513657, "learning_rate": 4.998307197539958e-05, "loss": 0.8296, "num_input_tokens_seen": 19510400, "step": 107 }, { "epoch": 0.011822983661293413, "grad_norm": 1.2047157175442955, "learning_rate": 4.998275412171921e-05, "loss": 0.6138, "num_input_tokens_seen": 19667648, "step": 108 }, { "epoch": 0.011932455732231315, "grad_norm": 1.253281149327372, "learning_rate": 4.99824333126268e-05, "loss": 0.6533, "num_input_tokens_seen": 19813248, "step": 109 }, { "epoch": 0.012041927803169216, "grad_norm": 1.356347186917736, "learning_rate": 4.9982109548160274e-05, "loss": 0.6766, "num_input_tokens_seen": 20019552, "step": 110 }, { "epoch": 0.012151399874107119, "grad_norm": 1.3927892495470626, "learning_rate": 4.998178282835795e-05, "loss": 0.8005, "num_input_tokens_seen": 20190688, "step": 111 }, { "epoch": 0.012260871945045021, "grad_norm": 1.1484580524707517, "learning_rate": 4.998145315325848e-05, "loss": 0.7124, "num_input_tokens_seen": 20392736, "step": 112 }, { "epoch": 0.012370344015982922, "grad_norm": 1.111208998156478, "learning_rate": 4.998112052290086e-05, "loss": 0.7671, "num_input_tokens_seen": 20599488, "step": 113 }, { "epoch": 0.012479816086920824, "grad_norm": 1.2434348315689434, "learning_rate": 4.9980784937324434e-05, "loss": 0.7535, "num_input_tokens_seen": 20764128, "step": 114 }, { "epoch": 0.012589288157858727, "grad_norm": 1.3001190424480877, "learning_rate": 4.9980446396568906e-05, "loss": 0.6377, "num_input_tokens_seen": 20911072, "step": 115 }, { "epoch": 0.012698760228796629, "grad_norm": 1.3033923270556487, "learning_rate": 4.998010490067432e-05, "loss": 0.7537, "num_input_tokens_seen": 21116480, "step": 116 }, { "epoch": 0.01280823229973453, "grad_norm": 1.285282951996608, "learning_rate": 4.997976044968108e-05, "loss": 0.8034, "num_input_tokens_seen": 21263872, "step": 117 }, { "epoch": 0.012917704370672432, "grad_norm": 1.2700183110156096, "learning_rate": 4.997941304362993e-05, "loss": 0.7566, "num_input_tokens_seen": 21443744, "step": 118 }, { "epoch": 0.013027176441610335, "grad_norm": 1.1933837488560382, "learning_rate": 4.997906268256197e-05, "loss": 0.5467, "num_input_tokens_seen": 21616896, "step": 119 }, { "epoch": 0.013136648512548236, "grad_norm": 1.3830137375778893, "learning_rate": 4.997870936651865e-05, "loss": 0.8963, "num_input_tokens_seen": 21807520, "step": 120 }, { "epoch": 0.013246120583486138, "grad_norm": 1.257332562168919, "learning_rate": 4.9978353095541766e-05, "loss": 0.7902, "num_input_tokens_seen": 22001728, "step": 121 }, { "epoch": 0.01335559265442404, "grad_norm": 1.3204808325367638, "learning_rate": 4.997799386967345e-05, "loss": 0.79, "num_input_tokens_seen": 22203776, "step": 122 }, { "epoch": 0.013465064725361941, "grad_norm": 1.3640715049280374, "learning_rate": 4.9977631688956215e-05, "loss": 0.7779, "num_input_tokens_seen": 22382752, "step": 123 }, { "epoch": 0.013574536796299844, "grad_norm": 1.4345919177023527, "learning_rate": 4.99772665534329e-05, "loss": 0.7406, "num_input_tokens_seen": 22556576, "step": 124 }, { "epoch": 0.013684008867237746, "grad_norm": 1.1456887703070118, "learning_rate": 4.9976898463146706e-05, "loss": 0.7324, "num_input_tokens_seen": 22740256, "step": 125 }, { "epoch": 0.013793480938175649, "grad_norm": 1.2628167161160788, "learning_rate": 4.997652741814116e-05, "loss": 0.8445, "num_input_tokens_seen": 22933120, "step": 126 }, { "epoch": 0.01390295300911355, "grad_norm": 1.1689941090124547, "learning_rate": 4.9976153418460184e-05, "loss": 0.5797, "num_input_tokens_seen": 23090144, "step": 127 }, { "epoch": 0.014012425080051452, "grad_norm": 1.361829974413629, "learning_rate": 4.997577646414799e-05, "loss": 0.8427, "num_input_tokens_seen": 23281888, "step": 128 }, { "epoch": 0.014121897150989354, "grad_norm": 1.1775641120578497, "learning_rate": 4.997539655524919e-05, "loss": 0.8864, "num_input_tokens_seen": 23460640, "step": 129 }, { "epoch": 0.014231369221927255, "grad_norm": 1.2854167361184734, "learning_rate": 4.997501369180872e-05, "loss": 0.8312, "num_input_tokens_seen": 23639840, "step": 130 }, { "epoch": 0.014340841292865157, "grad_norm": 1.267659706347049, "learning_rate": 4.997462787387188e-05, "loss": 0.7574, "num_input_tokens_seen": 23806048, "step": 131 }, { "epoch": 0.01445031336380306, "grad_norm": 1.180767651124116, "learning_rate": 4.997423910148431e-05, "loss": 0.6549, "num_input_tokens_seen": 23972928, "step": 132 }, { "epoch": 0.014559785434740962, "grad_norm": 1.1825930453544513, "learning_rate": 4.9973847374691985e-05, "loss": 0.6969, "num_input_tokens_seen": 24161984, "step": 133 }, { "epoch": 0.014669257505678863, "grad_norm": 1.1945018637456202, "learning_rate": 4.997345269354127e-05, "loss": 0.6144, "num_input_tokens_seen": 24306016, "step": 134 }, { "epoch": 0.014778729576616766, "grad_norm": 1.3292269903304006, "learning_rate": 4.9973055058078835e-05, "loss": 0.7967, "num_input_tokens_seen": 24494400, "step": 135 }, { "epoch": 0.014888201647554668, "grad_norm": 1.3011517154685959, "learning_rate": 4.997265446835172e-05, "loss": 0.8719, "num_input_tokens_seen": 24669344, "step": 136 }, { "epoch": 0.014997673718492569, "grad_norm": 1.274512042375184, "learning_rate": 4.997225092440733e-05, "loss": 0.7693, "num_input_tokens_seen": 24866688, "step": 137 }, { "epoch": 0.015107145789430471, "grad_norm": 1.2153761699748957, "learning_rate": 4.9971844426293395e-05, "loss": 0.658, "num_input_tokens_seen": 25045440, "step": 138 }, { "epoch": 0.015216617860368374, "grad_norm": 1.5907334596120806, "learning_rate": 4.9971434974058e-05, "loss": 0.8443, "num_input_tokens_seen": 25220384, "step": 139 }, { "epoch": 0.015326089931306276, "grad_norm": 1.3242865350979092, "learning_rate": 4.997102256774959e-05, "loss": 0.6314, "num_input_tokens_seen": 25393760, "step": 140 }, { "epoch": 0.015435562002244177, "grad_norm": 1.3546339647039225, "learning_rate": 4.997060720741694e-05, "loss": 0.7165, "num_input_tokens_seen": 25558400, "step": 141 }, { "epoch": 0.01554503407318208, "grad_norm": 1.2851046288045158, "learning_rate": 4.9970188893109194e-05, "loss": 0.7813, "num_input_tokens_seen": 25745888, "step": 142 }, { "epoch": 0.015654506144119982, "grad_norm": 1.3505280345053423, "learning_rate": 4.996976762487584e-05, "loss": 0.7194, "num_input_tokens_seen": 25876928, "step": 143 }, { "epoch": 0.015763978215057883, "grad_norm": 1.3367935917746943, "learning_rate": 4.996934340276671e-05, "loss": 0.7349, "num_input_tokens_seen": 26026560, "step": 144 }, { "epoch": 0.015873450285995787, "grad_norm": 1.0933933766574906, "learning_rate": 4.996891622683199e-05, "loss": 0.8035, "num_input_tokens_seen": 26232192, "step": 145 }, { "epoch": 0.015982922356933688, "grad_norm": 1.1930001525143368, "learning_rate": 4.99684860971222e-05, "loss": 0.8288, "num_input_tokens_seen": 26404448, "step": 146 }, { "epoch": 0.01609239442787159, "grad_norm": 1.2073113570144962, "learning_rate": 4.996805301368825e-05, "loss": 0.7917, "num_input_tokens_seen": 26596640, "step": 147 }, { "epoch": 0.016201866498809493, "grad_norm": 1.259253393317462, "learning_rate": 4.9967616976581354e-05, "loss": 0.9584, "num_input_tokens_seen": 26787264, "step": 148 }, { "epoch": 0.016311338569747393, "grad_norm": 0.9447077558631755, "learning_rate": 4.99671779858531e-05, "loss": 0.4423, "num_input_tokens_seen": 26954816, "step": 149 }, { "epoch": 0.016420810640685294, "grad_norm": 1.1899499716781126, "learning_rate": 4.996673604155542e-05, "loss": 0.6417, "num_input_tokens_seen": 27141856, "step": 150 }, { "epoch": 0.016530282711623198, "grad_norm": 1.20810560294741, "learning_rate": 4.9966291143740595e-05, "loss": 0.8161, "num_input_tokens_seen": 27349056, "step": 151 }, { "epoch": 0.0166397547825611, "grad_norm": 1.1554031879635294, "learning_rate": 4.996584329246126e-05, "loss": 0.6427, "num_input_tokens_seen": 27561408, "step": 152 }, { "epoch": 0.016749226853499, "grad_norm": 1.2001107343603206, "learning_rate": 4.996539248777038e-05, "loss": 0.8038, "num_input_tokens_seen": 27762784, "step": 153 }, { "epoch": 0.016858698924436904, "grad_norm": 1.2942535378767162, "learning_rate": 4.99649387297213e-05, "loss": 0.74, "num_input_tokens_seen": 27954304, "step": 154 }, { "epoch": 0.016968170995374805, "grad_norm": 1.32273128542281, "learning_rate": 4.996448201836769e-05, "loss": 0.8268, "num_input_tokens_seen": 28126112, "step": 155 }, { "epoch": 0.017077643066312705, "grad_norm": 1.2041055226009494, "learning_rate": 4.9964022353763586e-05, "loss": 0.682, "num_input_tokens_seen": 28325248, "step": 156 }, { "epoch": 0.01718711513725061, "grad_norm": 1.3357101157686495, "learning_rate": 4.996355973596336e-05, "loss": 0.6126, "num_input_tokens_seen": 28487424, "step": 157 }, { "epoch": 0.01729658720818851, "grad_norm": 1.1628431552043688, "learning_rate": 4.996309416502174e-05, "loss": 0.8077, "num_input_tokens_seen": 28670880, "step": 158 }, { "epoch": 0.017406059279126414, "grad_norm": 1.331740003499244, "learning_rate": 4.99626256409938e-05, "loss": 0.6373, "num_input_tokens_seen": 28836864, "step": 159 }, { "epoch": 0.017515531350064315, "grad_norm": 1.2132496571158131, "learning_rate": 4.996215416393496e-05, "loss": 0.6675, "num_input_tokens_seen": 29027040, "step": 160 }, { "epoch": 0.017625003421002216, "grad_norm": 1.153445941953066, "learning_rate": 4.996167973390101e-05, "loss": 0.6027, "num_input_tokens_seen": 29220800, "step": 161 }, { "epoch": 0.01773447549194012, "grad_norm": 1.2664845220626775, "learning_rate": 4.996120235094807e-05, "loss": 0.7717, "num_input_tokens_seen": 29402240, "step": 162 }, { "epoch": 0.01784394756287802, "grad_norm": 1.5897797892683405, "learning_rate": 4.99607220151326e-05, "loss": 0.753, "num_input_tokens_seen": 29604960, "step": 163 }, { "epoch": 0.01795341963381592, "grad_norm": 1.1701446473948087, "learning_rate": 4.9960238726511435e-05, "loss": 0.6594, "num_input_tokens_seen": 29785504, "step": 164 }, { "epoch": 0.018062891704753826, "grad_norm": 1.2202959770226978, "learning_rate": 4.995975248514175e-05, "loss": 0.6007, "num_input_tokens_seen": 29945664, "step": 165 }, { "epoch": 0.018172363775691727, "grad_norm": 1.2282411787297483, "learning_rate": 4.995926329108106e-05, "loss": 0.5952, "num_input_tokens_seen": 30127776, "step": 166 }, { "epoch": 0.018281835846629627, "grad_norm": 1.2640172848907905, "learning_rate": 4.995877114438723e-05, "loss": 0.6946, "num_input_tokens_seen": 30298240, "step": 167 }, { "epoch": 0.01839130791756753, "grad_norm": 1.2008509261417466, "learning_rate": 4.995827604511849e-05, "loss": 0.641, "num_input_tokens_seen": 30466688, "step": 168 }, { "epoch": 0.018500779988505432, "grad_norm": 1.3243884762721467, "learning_rate": 4.995777799333341e-05, "loss": 0.8853, "num_input_tokens_seen": 30675456, "step": 169 }, { "epoch": 0.018610252059443333, "grad_norm": 1.4545201812791781, "learning_rate": 4.99572769890909e-05, "loss": 0.9814, "num_input_tokens_seen": 30839872, "step": 170 }, { "epoch": 0.018719724130381237, "grad_norm": 1.2242757291016226, "learning_rate": 4.9956773032450234e-05, "loss": 0.7272, "num_input_tokens_seen": 31008320, "step": 171 }, { "epoch": 0.018829196201319138, "grad_norm": 1.3488267763221111, "learning_rate": 4.995626612347103e-05, "loss": 0.6521, "num_input_tokens_seen": 31167136, "step": 172 }, { "epoch": 0.018938668272257042, "grad_norm": 1.3268669003829234, "learning_rate": 4.995575626221325e-05, "loss": 0.5594, "num_input_tokens_seen": 31368288, "step": 173 }, { "epoch": 0.019048140343194943, "grad_norm": 1.3298842951272898, "learning_rate": 4.995524344873721e-05, "loss": 0.6103, "num_input_tokens_seen": 31545248, "step": 174 }, { "epoch": 0.019157612414132844, "grad_norm": 1.3688183458239211, "learning_rate": 4.9954727683103576e-05, "loss": 0.6036, "num_input_tokens_seen": 31689504, "step": 175 }, { "epoch": 0.019267084485070748, "grad_norm": 1.2367860449231904, "learning_rate": 4.995420896537336e-05, "loss": 0.6305, "num_input_tokens_seen": 31844064, "step": 176 }, { "epoch": 0.01937655655600865, "grad_norm": 1.1431578640726583, "learning_rate": 4.995368729560793e-05, "loss": 0.8005, "num_input_tokens_seen": 32049472, "step": 177 }, { "epoch": 0.01948602862694655, "grad_norm": 1.3481964797663968, "learning_rate": 4.9953162673869005e-05, "loss": 0.835, "num_input_tokens_seen": 32213216, "step": 178 }, { "epoch": 0.019595500697884453, "grad_norm": 1.320128523970803, "learning_rate": 4.9952635100218623e-05, "loss": 0.7817, "num_input_tokens_seen": 32383008, "step": 179 }, { "epoch": 0.019704972768822354, "grad_norm": 1.1874281710935837, "learning_rate": 4.995210457471922e-05, "loss": 0.7382, "num_input_tokens_seen": 32601632, "step": 180 }, { "epoch": 0.019814444839760255, "grad_norm": 1.1670096040966296, "learning_rate": 4.995157109743354e-05, "loss": 0.6084, "num_input_tokens_seen": 32779712, "step": 181 }, { "epoch": 0.01992391691069816, "grad_norm": 1.3274182402582513, "learning_rate": 4.99510346684247e-05, "loss": 0.887, "num_input_tokens_seen": 32956672, "step": 182 }, { "epoch": 0.02003338898163606, "grad_norm": 1.182883776755829, "learning_rate": 4.995049528775616e-05, "loss": 0.867, "num_input_tokens_seen": 33140128, "step": 183 }, { "epoch": 0.02014286105257396, "grad_norm": 1.3510078218093886, "learning_rate": 4.994995295549173e-05, "loss": 0.6597, "num_input_tokens_seen": 33290208, "step": 184 }, { "epoch": 0.020252333123511865, "grad_norm": 1.3725559881912301, "learning_rate": 4.9949407671695554e-05, "loss": 1.0128, "num_input_tokens_seen": 33511520, "step": 185 }, { "epoch": 0.020361805194449765, "grad_norm": 1.2080280422530558, "learning_rate": 4.994885943643215e-05, "loss": 0.7103, "num_input_tokens_seen": 33703264, "step": 186 }, { "epoch": 0.02047127726538767, "grad_norm": 1.3011150676382275, "learning_rate": 4.994830824976636e-05, "loss": 0.8895, "num_input_tokens_seen": 33855136, "step": 187 }, { "epoch": 0.02058074933632557, "grad_norm": 1.266647161576189, "learning_rate": 4.99477541117634e-05, "loss": 0.7295, "num_input_tokens_seen": 34066816, "step": 188 }, { "epoch": 0.02069022140726347, "grad_norm": 1.0882706312331067, "learning_rate": 4.994719702248883e-05, "loss": 0.5532, "num_input_tokens_seen": 34269088, "step": 189 }, { "epoch": 0.020799693478201375, "grad_norm": 1.6104727399307883, "learning_rate": 4.9946636982008534e-05, "loss": 0.7301, "num_input_tokens_seen": 34457696, "step": 190 }, { "epoch": 0.020909165549139276, "grad_norm": 1.3437319466021311, "learning_rate": 4.994607399038877e-05, "loss": 0.8629, "num_input_tokens_seen": 34625920, "step": 191 }, { "epoch": 0.021018637620077177, "grad_norm": 1.2116179492571733, "learning_rate": 4.9945508047696154e-05, "loss": 0.63, "num_input_tokens_seen": 34823040, "step": 192 }, { "epoch": 0.02112810969101508, "grad_norm": 1.115837081283721, "learning_rate": 4.9944939153997614e-05, "loss": 0.6346, "num_input_tokens_seen": 35002464, "step": 193 }, { "epoch": 0.021237581761952982, "grad_norm": 1.229045517118139, "learning_rate": 4.994436730936046e-05, "loss": 0.7864, "num_input_tokens_seen": 35199584, "step": 194 }, { "epoch": 0.021347053832890883, "grad_norm": 1.1079801467569736, "learning_rate": 4.994379251385235e-05, "loss": 0.6279, "num_input_tokens_seen": 35414176, "step": 195 }, { "epoch": 0.021456525903828787, "grad_norm": 1.2821148535360536, "learning_rate": 4.9943214767541255e-05, "loss": 0.7367, "num_input_tokens_seen": 35598976, "step": 196 }, { "epoch": 0.021565997974766687, "grad_norm": 1.2999059349654116, "learning_rate": 4.994263407049554e-05, "loss": 0.7586, "num_input_tokens_seen": 35749504, "step": 197 }, { "epoch": 0.021675470045704588, "grad_norm": 1.3176660593763079, "learning_rate": 4.9942050422783906e-05, "loss": 0.797, "num_input_tokens_seen": 35929600, "step": 198 }, { "epoch": 0.021784942116642492, "grad_norm": 1.2688547063006441, "learning_rate": 4.994146382447538e-05, "loss": 0.8002, "num_input_tokens_seen": 36103200, "step": 199 }, { "epoch": 0.021894414187580393, "grad_norm": 1.3863673234967293, "learning_rate": 4.994087427563936e-05, "loss": 0.9701, "num_input_tokens_seen": 36276128, "step": 200 }, { "epoch": 0.022003886258518294, "grad_norm": 1.0883695962267241, "learning_rate": 4.9940281776345596e-05, "loss": 0.7224, "num_input_tokens_seen": 36457344, "step": 201 }, { "epoch": 0.022113358329456198, "grad_norm": 1.2334500964197828, "learning_rate": 4.993968632666417e-05, "loss": 0.6799, "num_input_tokens_seen": 36627360, "step": 202 }, { "epoch": 0.0222228304003941, "grad_norm": 1.2432949000308366, "learning_rate": 4.993908792666554e-05, "loss": 0.7229, "num_input_tokens_seen": 36761536, "step": 203 }, { "epoch": 0.022332302471332003, "grad_norm": 1.2874550709708967, "learning_rate": 4.9938486576420474e-05, "loss": 0.8847, "num_input_tokens_seen": 36932896, "step": 204 }, { "epoch": 0.022441774542269904, "grad_norm": 1.2862197047188586, "learning_rate": 4.993788227600013e-05, "loss": 0.5768, "num_input_tokens_seen": 37059904, "step": 205 }, { "epoch": 0.022551246613207804, "grad_norm": 1.2603784297891532, "learning_rate": 4.993727502547598e-05, "loss": 0.6446, "num_input_tokens_seen": 37243360, "step": 206 }, { "epoch": 0.02266071868414571, "grad_norm": 1.147274804135399, "learning_rate": 4.9936664824919865e-05, "loss": 0.5917, "num_input_tokens_seen": 37434880, "step": 207 }, { "epoch": 0.02277019075508361, "grad_norm": 1.2530918031324108, "learning_rate": 4.993605167440397e-05, "loss": 0.7557, "num_input_tokens_seen": 37638272, "step": 208 }, { "epoch": 0.02287966282602151, "grad_norm": 1.2693449852088732, "learning_rate": 4.9935435574000834e-05, "loss": 0.6493, "num_input_tokens_seen": 37796192, "step": 209 }, { "epoch": 0.022989134896959414, "grad_norm": 1.3096754471395613, "learning_rate": 4.993481652378334e-05, "loss": 0.6706, "num_input_tokens_seen": 37942688, "step": 210 }, { "epoch": 0.023098606967897315, "grad_norm": 1.143613323529403, "learning_rate": 4.9934194523824715e-05, "loss": 0.7749, "num_input_tokens_seen": 38133088, "step": 211 }, { "epoch": 0.023208079038835216, "grad_norm": 1.1703050906019363, "learning_rate": 4.993356957419855e-05, "loss": 0.7671, "num_input_tokens_seen": 38315648, "step": 212 }, { "epoch": 0.02331755110977312, "grad_norm": 1.26672070956805, "learning_rate": 4.993294167497876e-05, "loss": 0.7319, "num_input_tokens_seen": 38489248, "step": 213 }, { "epoch": 0.02342702318071102, "grad_norm": 1.1334037553188188, "learning_rate": 4.993231082623965e-05, "loss": 0.6718, "num_input_tokens_seen": 38708992, "step": 214 }, { "epoch": 0.02353649525164892, "grad_norm": 1.4361153443694465, "learning_rate": 4.993167702805581e-05, "loss": 0.7682, "num_input_tokens_seen": 38875200, "step": 215 }, { "epoch": 0.023645967322586826, "grad_norm": 1.1923143771791125, "learning_rate": 4.9931040280502255e-05, "loss": 0.6635, "num_input_tokens_seen": 39044992, "step": 216 }, { "epoch": 0.023755439393524726, "grad_norm": 1.2571120118252266, "learning_rate": 4.993040058365429e-05, "loss": 0.652, "num_input_tokens_seen": 39208512, "step": 217 }, { "epoch": 0.02386491146446263, "grad_norm": 1.318959010767799, "learning_rate": 4.992975793758759e-05, "loss": 0.8437, "num_input_tokens_seen": 39402720, "step": 218 }, { "epoch": 0.02397438353540053, "grad_norm": 1.1742430094981298, "learning_rate": 4.9929112342378194e-05, "loss": 0.9123, "num_input_tokens_seen": 39589088, "step": 219 }, { "epoch": 0.024083855606338432, "grad_norm": 1.1641526667345434, "learning_rate": 4.9928463798102456e-05, "loss": 0.8507, "num_input_tokens_seen": 39780832, "step": 220 }, { "epoch": 0.024193327677276336, "grad_norm": 1.2187799303341047, "learning_rate": 4.992781230483711e-05, "loss": 0.7906, "num_input_tokens_seen": 39963840, "step": 221 }, { "epoch": 0.024302799748214237, "grad_norm": 1.1645005189297455, "learning_rate": 4.9927157862659215e-05, "loss": 0.7648, "num_input_tokens_seen": 40166784, "step": 222 }, { "epoch": 0.024412271819152138, "grad_norm": 1.2273985327836345, "learning_rate": 4.992650047164621e-05, "loss": 0.8414, "num_input_tokens_seen": 40350912, "step": 223 }, { "epoch": 0.024521743890090042, "grad_norm": 1.1950098290298563, "learning_rate": 4.9925840131875845e-05, "loss": 0.6663, "num_input_tokens_seen": 40543104, "step": 224 }, { "epoch": 0.024631215961027943, "grad_norm": 1.1948555465600792, "learning_rate": 4.9925176843426236e-05, "loss": 0.708, "num_input_tokens_seen": 40715808, "step": 225 }, { "epoch": 0.024740688031965843, "grad_norm": 1.235569725826325, "learning_rate": 4.9924510606375864e-05, "loss": 0.7908, "num_input_tokens_seen": 40924800, "step": 226 }, { "epoch": 0.024850160102903748, "grad_norm": 1.1981373735643595, "learning_rate": 4.992384142080353e-05, "loss": 0.6441, "num_input_tokens_seen": 41110272, "step": 227 }, { "epoch": 0.02495963217384165, "grad_norm": 1.2093527976666871, "learning_rate": 4.99231692867884e-05, "loss": 0.722, "num_input_tokens_seen": 41304480, "step": 228 }, { "epoch": 0.02506910424477955, "grad_norm": 1.2989812086712251, "learning_rate": 4.9922494204409994e-05, "loss": 0.8136, "num_input_tokens_seen": 41474720, "step": 229 }, { "epoch": 0.025178576315717453, "grad_norm": 1.3890859252190275, "learning_rate": 4.9921816173748166e-05, "loss": 0.7715, "num_input_tokens_seen": 41645408, "step": 230 }, { "epoch": 0.025288048386655354, "grad_norm": 1.2863573763174536, "learning_rate": 4.9921135194883126e-05, "loss": 0.7798, "num_input_tokens_seen": 41828416, "step": 231 }, { "epoch": 0.025397520457593258, "grad_norm": 1.4082788878230268, "learning_rate": 4.992045126789543e-05, "loss": 0.7704, "num_input_tokens_seen": 41993056, "step": 232 }, { "epoch": 0.02550699252853116, "grad_norm": 1.3258896360225183, "learning_rate": 4.9919764392865994e-05, "loss": 0.761, "num_input_tokens_seen": 42166208, "step": 233 }, { "epoch": 0.02561646459946906, "grad_norm": 1.237284310164803, "learning_rate": 4.9919074569876066e-05, "loss": 0.8806, "num_input_tokens_seen": 42380576, "step": 234 }, { "epoch": 0.025725936670406964, "grad_norm": 1.2264816354713337, "learning_rate": 4.991838179900726e-05, "loss": 0.646, "num_input_tokens_seen": 42570528, "step": 235 }, { "epoch": 0.025835408741344865, "grad_norm": 1.2899247493530506, "learning_rate": 4.991768608034152e-05, "loss": 0.8508, "num_input_tokens_seen": 42769216, "step": 236 }, { "epoch": 0.025944880812282765, "grad_norm": 1.1602821153381027, "learning_rate": 4.991698741396115e-05, "loss": 0.7227, "num_input_tokens_seen": 42934304, "step": 237 }, { "epoch": 0.02605435288322067, "grad_norm": 1.2286032446831927, "learning_rate": 4.991628579994879e-05, "loss": 0.5724, "num_input_tokens_seen": 43116640, "step": 238 }, { "epoch": 0.02616382495415857, "grad_norm": 1.2278681809287362, "learning_rate": 4.9915581238387464e-05, "loss": 0.7538, "num_input_tokens_seen": 43296960, "step": 239 }, { "epoch": 0.02627329702509647, "grad_norm": 1.3100046323738022, "learning_rate": 4.991487372936051e-05, "loss": 0.7266, "num_input_tokens_seen": 43471456, "step": 240 }, { "epoch": 0.026382769096034375, "grad_norm": 1.4795537451096423, "learning_rate": 4.991416327295162e-05, "loss": 0.7425, "num_input_tokens_seen": 43628928, "step": 241 }, { "epoch": 0.026492241166972276, "grad_norm": 1.273879553708021, "learning_rate": 4.9913449869244844e-05, "loss": 0.7924, "num_input_tokens_seen": 43827616, "step": 242 }, { "epoch": 0.026601713237910177, "grad_norm": 1.2814357901988167, "learning_rate": 4.991273351832457e-05, "loss": 0.6683, "num_input_tokens_seen": 43983520, "step": 243 }, { "epoch": 0.02671118530884808, "grad_norm": 1.261838641537991, "learning_rate": 4.991201422027556e-05, "loss": 0.6728, "num_input_tokens_seen": 44170560, "step": 244 }, { "epoch": 0.02682065737978598, "grad_norm": 1.2888158388830369, "learning_rate": 4.991129197518287e-05, "loss": 0.6634, "num_input_tokens_seen": 44338112, "step": 245 }, { "epoch": 0.026930129450723882, "grad_norm": 1.3455974830829565, "learning_rate": 4.991056678313197e-05, "loss": 0.8105, "num_input_tokens_seen": 44499168, "step": 246 }, { "epoch": 0.027039601521661787, "grad_norm": 1.3327221015782305, "learning_rate": 4.990983864420865e-05, "loss": 0.8705, "num_input_tokens_seen": 44714432, "step": 247 }, { "epoch": 0.027149073592599687, "grad_norm": 1.1626558445480044, "learning_rate": 4.990910755849903e-05, "loss": 0.7122, "num_input_tokens_seen": 44886240, "step": 248 }, { "epoch": 0.02725854566353759, "grad_norm": 1.3426811047800222, "learning_rate": 4.99083735260896e-05, "loss": 0.7587, "num_input_tokens_seen": 45024672, "step": 249 }, { "epoch": 0.027368017734475492, "grad_norm": 1.454467153609092, "learning_rate": 4.990763654706721e-05, "loss": 0.7149, "num_input_tokens_seen": 45193792, "step": 250 }, { "epoch": 0.027477489805413393, "grad_norm": 1.1596072146914669, "learning_rate": 4.990689662151903e-05, "loss": 0.6512, "num_input_tokens_seen": 45360896, "step": 251 }, { "epoch": 0.027586961876351297, "grad_norm": 1.3565740119183756, "learning_rate": 4.990615374953258e-05, "loss": 0.6943, "num_input_tokens_seen": 45533600, "step": 252 }, { "epoch": 0.027696433947289198, "grad_norm": 1.1950208520067578, "learning_rate": 4.990540793119577e-05, "loss": 0.7342, "num_input_tokens_seen": 45733632, "step": 253 }, { "epoch": 0.0278059060182271, "grad_norm": 1.2785601875331627, "learning_rate": 4.99046591665968e-05, "loss": 0.7208, "num_input_tokens_seen": 45931648, "step": 254 }, { "epoch": 0.027915378089165003, "grad_norm": 1.0932308054647146, "learning_rate": 4.990390745582427e-05, "loss": 0.641, "num_input_tokens_seen": 46127200, "step": 255 }, { "epoch": 0.028024850160102904, "grad_norm": 1.3353284373226164, "learning_rate": 4.990315279896709e-05, "loss": 0.7197, "num_input_tokens_seen": 46290272, "step": 256 }, { "epoch": 0.028134322231040804, "grad_norm": 1.2790069950529865, "learning_rate": 4.990239519611454e-05, "loss": 0.7023, "num_input_tokens_seen": 46454016, "step": 257 }, { "epoch": 0.02824379430197871, "grad_norm": 1.3019729516608185, "learning_rate": 4.990163464735624e-05, "loss": 0.6438, "num_input_tokens_seen": 46614400, "step": 258 }, { "epoch": 0.02835326637291661, "grad_norm": 1.1552818988093567, "learning_rate": 4.990087115278218e-05, "loss": 0.7123, "num_input_tokens_seen": 46835712, "step": 259 }, { "epoch": 0.02846273844385451, "grad_norm": 1.358224579293587, "learning_rate": 4.9900104712482656e-05, "loss": 0.678, "num_input_tokens_seen": 47001024, "step": 260 }, { "epoch": 0.028572210514792414, "grad_norm": 1.326768743615148, "learning_rate": 4.9899335326548346e-05, "loss": 0.9007, "num_input_tokens_seen": 47192768, "step": 261 }, { "epoch": 0.028681682585730315, "grad_norm": 1.2723063962808452, "learning_rate": 4.9898562995070264e-05, "loss": 0.7573, "num_input_tokens_seen": 47388320, "step": 262 }, { "epoch": 0.02879115465666822, "grad_norm": 1.2027914402288304, "learning_rate": 4.9897787718139774e-05, "loss": 0.7314, "num_input_tokens_seen": 47572672, "step": 263 }, { "epoch": 0.02890062672760612, "grad_norm": 1.276393665368892, "learning_rate": 4.98970094958486e-05, "loss": 0.8009, "num_input_tokens_seen": 47783904, "step": 264 }, { "epoch": 0.02901009879854402, "grad_norm": 1.267165717938441, "learning_rate": 4.98962283282888e-05, "loss": 0.6173, "num_input_tokens_seen": 47981248, "step": 265 }, { "epoch": 0.029119570869481925, "grad_norm": 1.123672921207357, "learning_rate": 4.989544421555278e-05, "loss": 0.7067, "num_input_tokens_seen": 48189792, "step": 266 }, { "epoch": 0.029229042940419826, "grad_norm": 1.1009516002781823, "learning_rate": 4.989465715773331e-05, "loss": 0.724, "num_input_tokens_seen": 48370560, "step": 267 }, { "epoch": 0.029338515011357726, "grad_norm": 1.2609093493943713, "learning_rate": 4.989386715492347e-05, "loss": 0.7957, "num_input_tokens_seen": 48534528, "step": 268 }, { "epoch": 0.02944798708229563, "grad_norm": 1.3028893328561522, "learning_rate": 4.9893074207216745e-05, "loss": 0.7524, "num_input_tokens_seen": 48705888, "step": 269 }, { "epoch": 0.02955745915323353, "grad_norm": 1.1488976108325306, "learning_rate": 4.989227831470692e-05, "loss": 0.5817, "num_input_tokens_seen": 48887104, "step": 270 }, { "epoch": 0.029666931224171432, "grad_norm": 1.1117681024551747, "learning_rate": 4.989147947748817e-05, "loss": 0.6459, "num_input_tokens_seen": 49045472, "step": 271 }, { "epoch": 0.029776403295109336, "grad_norm": 1.220128233767854, "learning_rate": 4.989067769565498e-05, "loss": 0.7373, "num_input_tokens_seen": 49247744, "step": 272 }, { "epoch": 0.029885875366047237, "grad_norm": 1.255122892814321, "learning_rate": 4.9889872969302195e-05, "loss": 0.6751, "num_input_tokens_seen": 49440160, "step": 273 }, { "epoch": 0.029995347436985138, "grad_norm": 1.1315785494690456, "learning_rate": 4.988906529852502e-05, "loss": 0.6307, "num_input_tokens_seen": 49644224, "step": 274 }, { "epoch": 0.030104819507923042, "grad_norm": 1.2651994719818682, "learning_rate": 4.9888254683419e-05, "loss": 0.8009, "num_input_tokens_seen": 49827008, "step": 275 }, { "epoch": 0.030214291578860943, "grad_norm": 1.4032940208398472, "learning_rate": 4.988744112408003e-05, "loss": 0.7892, "num_input_tokens_seen": 49971264, "step": 276 }, { "epoch": 0.030323763649798847, "grad_norm": 1.2247778671654364, "learning_rate": 4.9886624620604354e-05, "loss": 0.6269, "num_input_tokens_seen": 50165920, "step": 277 }, { "epoch": 0.030433235720736748, "grad_norm": 1.301488185927137, "learning_rate": 4.9885805173088563e-05, "loss": 0.659, "num_input_tokens_seen": 50359456, "step": 278 }, { "epoch": 0.030542707791674648, "grad_norm": 1.3276435428176037, "learning_rate": 4.988498278162959e-05, "loss": 0.8028, "num_input_tokens_seen": 50552320, "step": 279 }, { "epoch": 0.030652179862612552, "grad_norm": 1.4044970713225344, "learning_rate": 4.988415744632472e-05, "loss": 0.7648, "num_input_tokens_seen": 50724128, "step": 280 }, { "epoch": 0.030761651933550453, "grad_norm": 1.1980381226818484, "learning_rate": 4.9883329167271595e-05, "loss": 0.6505, "num_input_tokens_seen": 50906912, "step": 281 }, { "epoch": 0.030871124004488354, "grad_norm": 1.2422600491048768, "learning_rate": 4.988249794456821e-05, "loss": 0.8363, "num_input_tokens_seen": 51079392, "step": 282 }, { "epoch": 0.030980596075426258, "grad_norm": 1.341817175477006, "learning_rate": 4.988166377831288e-05, "loss": 0.7892, "num_input_tokens_seen": 51275840, "step": 283 }, { "epoch": 0.03109006814636416, "grad_norm": 1.3366942911234634, "learning_rate": 4.988082666860429e-05, "loss": 0.7993, "num_input_tokens_seen": 51464896, "step": 284 }, { "epoch": 0.03119954021730206, "grad_norm": 1.4543506826507941, "learning_rate": 4.9879986615541464e-05, "loss": 1.0127, "num_input_tokens_seen": 51674336, "step": 285 }, { "epoch": 0.031309012288239964, "grad_norm": 1.169815746454706, "learning_rate": 4.987914361922379e-05, "loss": 0.6851, "num_input_tokens_seen": 51849280, "step": 286 }, { "epoch": 0.031418484359177865, "grad_norm": 1.3680953581044277, "learning_rate": 4.9878297679750986e-05, "loss": 0.8241, "num_input_tokens_seen": 52045280, "step": 287 }, { "epoch": 0.031527956430115765, "grad_norm": 1.3444737547883776, "learning_rate": 4.987744879722312e-05, "loss": 0.7814, "num_input_tokens_seen": 52237920, "step": 288 }, { "epoch": 0.031637428501053666, "grad_norm": 1.286449720634428, "learning_rate": 4.987659697174063e-05, "loss": 0.7596, "num_input_tokens_seen": 52413088, "step": 289 }, { "epoch": 0.031746900571991574, "grad_norm": 1.2511854272847438, "learning_rate": 4.987574220340427e-05, "loss": 0.817, "num_input_tokens_seen": 52598336, "step": 290 }, { "epoch": 0.031856372642929474, "grad_norm": 1.2419001408850954, "learning_rate": 4.9874884492315155e-05, "loss": 0.7345, "num_input_tokens_seen": 52771936, "step": 291 }, { "epoch": 0.031965844713867375, "grad_norm": 1.145282693972077, "learning_rate": 4.987402383857477e-05, "loss": 0.6996, "num_input_tokens_seen": 52943296, "step": 292 }, { "epoch": 0.032075316784805276, "grad_norm": 1.433084007505404, "learning_rate": 4.98731602422849e-05, "loss": 1.0966, "num_input_tokens_seen": 53133696, "step": 293 }, { "epoch": 0.03218478885574318, "grad_norm": 1.148897247673468, "learning_rate": 4.9872293703547735e-05, "loss": 0.8492, "num_input_tokens_seen": 53339552, "step": 294 }, { "epoch": 0.03229426092668108, "grad_norm": 1.2915470570384724, "learning_rate": 4.987142422246577e-05, "loss": 0.7442, "num_input_tokens_seen": 53519200, "step": 295 }, { "epoch": 0.032403732997618985, "grad_norm": 1.2387927710639086, "learning_rate": 4.987055179914186e-05, "loss": 0.8712, "num_input_tokens_seen": 53711392, "step": 296 }, { "epoch": 0.032513205068556886, "grad_norm": 1.194623761184304, "learning_rate": 4.9869676433679225e-05, "loss": 0.7391, "num_input_tokens_seen": 53878944, "step": 297 }, { "epoch": 0.032622677139494786, "grad_norm": 1.5045169531645761, "learning_rate": 4.98687981261814e-05, "loss": 0.7223, "num_input_tokens_seen": 54061504, "step": 298 }, { "epoch": 0.03273214921043269, "grad_norm": 1.1700454162683926, "learning_rate": 4.9867916876752306e-05, "loss": 0.6751, "num_input_tokens_seen": 54237120, "step": 299 }, { "epoch": 0.03284162128137059, "grad_norm": 1.3449521564638944, "learning_rate": 4.9867032685496185e-05, "loss": 0.8674, "num_input_tokens_seen": 54426400, "step": 300 }, { "epoch": 0.032951093352308496, "grad_norm": 1.3031536840122362, "learning_rate": 4.986614555251763e-05, "loss": 0.7386, "num_input_tokens_seen": 54585664, "step": 301 }, { "epoch": 0.033060565423246396, "grad_norm": 1.2556425533330426, "learning_rate": 4.98652554779216e-05, "loss": 0.871, "num_input_tokens_seen": 54773600, "step": 302 }, { "epoch": 0.0331700374941843, "grad_norm": 1.2368933182757873, "learning_rate": 4.9864362461813373e-05, "loss": 0.7653, "num_input_tokens_seen": 54960416, "step": 303 }, { "epoch": 0.0332795095651222, "grad_norm": 1.151554212190988, "learning_rate": 4.9863466504298604e-05, "loss": 0.6705, "num_input_tokens_seen": 55148352, "step": 304 }, { "epoch": 0.0333889816360601, "grad_norm": 1.2471428643184332, "learning_rate": 4.9862567605483277e-05, "loss": 0.7387, "num_input_tokens_seen": 55343008, "step": 305 }, { "epoch": 0.033498453706998, "grad_norm": 1.1767630802527322, "learning_rate": 4.986166576547373e-05, "loss": 0.7375, "num_input_tokens_seen": 55535200, "step": 306 }, { "epoch": 0.03360792577793591, "grad_norm": 1.1443740684717993, "learning_rate": 4.9860760984376656e-05, "loss": 0.6705, "num_input_tokens_seen": 55732768, "step": 307 }, { "epoch": 0.03371739784887381, "grad_norm": 1.3209934622915407, "learning_rate": 4.985985326229907e-05, "loss": 0.8133, "num_input_tokens_seen": 55904576, "step": 308 }, { "epoch": 0.03382686991981171, "grad_norm": 1.2601774740116845, "learning_rate": 4.985894259934838e-05, "loss": 0.7343, "num_input_tokens_seen": 56068992, "step": 309 }, { "epoch": 0.03393634199074961, "grad_norm": 1.176101590238498, "learning_rate": 4.98580289956323e-05, "loss": 0.6467, "num_input_tokens_seen": 56259168, "step": 310 }, { "epoch": 0.03404581406168751, "grad_norm": 1.345867263240265, "learning_rate": 4.985711245125891e-05, "loss": 0.7879, "num_input_tokens_seen": 56464576, "step": 311 }, { "epoch": 0.03415528613262541, "grad_norm": 1.2606013874647302, "learning_rate": 4.9856192966336634e-05, "loss": 0.7653, "num_input_tokens_seen": 56620256, "step": 312 }, { "epoch": 0.03426475820356332, "grad_norm": 1.2462876028588312, "learning_rate": 4.985527054097425e-05, "loss": 0.8726, "num_input_tokens_seen": 56830592, "step": 313 }, { "epoch": 0.03437423027450122, "grad_norm": 1.3290626357612387, "learning_rate": 4.985434517528087e-05, "loss": 0.7298, "num_input_tokens_seen": 57016512, "step": 314 }, { "epoch": 0.03448370234543912, "grad_norm": 1.2304029174632443, "learning_rate": 4.985341686936598e-05, "loss": 0.7489, "num_input_tokens_seen": 57160096, "step": 315 }, { "epoch": 0.03459317441637702, "grad_norm": 1.4706401431515475, "learning_rate": 4.9852485623339376e-05, "loss": 0.8126, "num_input_tokens_seen": 57318016, "step": 316 }, { "epoch": 0.03470264648731492, "grad_norm": 1.3214381458789868, "learning_rate": 4.985155143731124e-05, "loss": 0.727, "num_input_tokens_seen": 57530816, "step": 317 }, { "epoch": 0.03481211855825283, "grad_norm": 1.3309534360420159, "learning_rate": 4.985061431139207e-05, "loss": 0.5959, "num_input_tokens_seen": 57644608, "step": 318 }, { "epoch": 0.03492159062919073, "grad_norm": 1.1780759401503405, "learning_rate": 4.9849674245692735e-05, "loss": 0.701, "num_input_tokens_seen": 57848000, "step": 319 }, { "epoch": 0.03503106270012863, "grad_norm": 1.4244923239407739, "learning_rate": 4.9848731240324444e-05, "loss": 0.793, "num_input_tokens_seen": 58049824, "step": 320 }, { "epoch": 0.03514053477106653, "grad_norm": 1.3973502082594418, "learning_rate": 4.984778529539875e-05, "loss": 0.6787, "num_input_tokens_seen": 58230816, "step": 321 }, { "epoch": 0.03525000684200443, "grad_norm": 1.1945119433342848, "learning_rate": 4.984683641102755e-05, "loss": 0.6446, "num_input_tokens_seen": 58373504, "step": 322 }, { "epoch": 0.03535947891294233, "grad_norm": 1.2692012416632752, "learning_rate": 4.984588458732311e-05, "loss": 0.8173, "num_input_tokens_seen": 58537920, "step": 323 }, { "epoch": 0.03546895098388024, "grad_norm": 1.2326143963787772, "learning_rate": 4.984492982439802e-05, "loss": 0.6818, "num_input_tokens_seen": 58722496, "step": 324 }, { "epoch": 0.03557842305481814, "grad_norm": 1.5308261985522558, "learning_rate": 4.984397212236522e-05, "loss": 0.9523, "num_input_tokens_seen": 58920064, "step": 325 }, { "epoch": 0.03568789512575604, "grad_norm": 1.2088151113810222, "learning_rate": 4.984301148133802e-05, "loss": 0.8442, "num_input_tokens_seen": 59106208, "step": 326 }, { "epoch": 0.03579736719669394, "grad_norm": 1.211121821988035, "learning_rate": 4.9842047901430044e-05, "loss": 0.7644, "num_input_tokens_seen": 59300864, "step": 327 }, { "epoch": 0.03590683926763184, "grad_norm": 1.0882803802596297, "learning_rate": 4.98410813827553e-05, "loss": 0.5848, "num_input_tokens_seen": 59449600, "step": 328 }, { "epoch": 0.036016311338569744, "grad_norm": 1.1651982137175938, "learning_rate": 4.984011192542811e-05, "loss": 0.8383, "num_input_tokens_seen": 59623872, "step": 329 }, { "epoch": 0.03612578340950765, "grad_norm": 1.1633255437100112, "learning_rate": 4.983913952956317e-05, "loss": 0.6117, "num_input_tokens_seen": 59810240, "step": 330 }, { "epoch": 0.03623525548044555, "grad_norm": 1.3228628142602232, "learning_rate": 4.983816419527551e-05, "loss": 0.778, "num_input_tokens_seen": 59987200, "step": 331 }, { "epoch": 0.03634472755138345, "grad_norm": 1.2695243952639461, "learning_rate": 4.983718592268051e-05, "loss": 0.9366, "num_input_tokens_seen": 60187680, "step": 332 }, { "epoch": 0.036454199622321354, "grad_norm": 1.3638051073109894, "learning_rate": 4.983620471189389e-05, "loss": 0.867, "num_input_tokens_seen": 60373376, "step": 333 }, { "epoch": 0.036563671693259255, "grad_norm": 1.0659383229275146, "learning_rate": 4.9835220563031726e-05, "loss": 0.6922, "num_input_tokens_seen": 60568032, "step": 334 }, { "epoch": 0.03667314376419716, "grad_norm": 1.1704396931243783, "learning_rate": 4.9834233476210456e-05, "loss": 0.7688, "num_input_tokens_seen": 60769632, "step": 335 }, { "epoch": 0.03678261583513506, "grad_norm": 1.2292017635356394, "learning_rate": 4.9833243451546834e-05, "loss": 0.7368, "num_input_tokens_seen": 60954208, "step": 336 }, { "epoch": 0.036892087906072964, "grad_norm": 1.355771165691354, "learning_rate": 4.9832250489157994e-05, "loss": 0.7211, "num_input_tokens_seen": 61097120, "step": 337 }, { "epoch": 0.037001559977010864, "grad_norm": 1.149985535157496, "learning_rate": 4.983125458916138e-05, "loss": 0.7405, "num_input_tokens_seen": 61266464, "step": 338 }, { "epoch": 0.037111032047948765, "grad_norm": 1.465483340186568, "learning_rate": 4.9830255751674825e-05, "loss": 0.8812, "num_input_tokens_seen": 61469856, "step": 339 }, { "epoch": 0.037220504118886666, "grad_norm": 1.1616895414168138, "learning_rate": 4.982925397681648e-05, "loss": 0.6313, "num_input_tokens_seen": 61642336, "step": 340 }, { "epoch": 0.037329976189824574, "grad_norm": 1.3530234670979289, "learning_rate": 4.982824926470486e-05, "loss": 0.6163, "num_input_tokens_seen": 61794432, "step": 341 }, { "epoch": 0.037439448260762474, "grad_norm": 1.2309877300255225, "learning_rate": 4.982724161545881e-05, "loss": 0.851, "num_input_tokens_seen": 61994912, "step": 342 }, { "epoch": 0.037548920331700375, "grad_norm": 1.165854335856589, "learning_rate": 4.982623102919754e-05, "loss": 0.5694, "num_input_tokens_seen": 62163584, "step": 343 }, { "epoch": 0.037658392402638276, "grad_norm": 1.2877582772458636, "learning_rate": 4.98252175060406e-05, "loss": 0.6746, "num_input_tokens_seen": 62340096, "step": 344 }, { "epoch": 0.037767864473576176, "grad_norm": 1.3084733594486284, "learning_rate": 4.9824201046107885e-05, "loss": 0.7022, "num_input_tokens_seen": 62522880, "step": 345 }, { "epoch": 0.037877336544514084, "grad_norm": 1.4946208506245262, "learning_rate": 4.9823181649519645e-05, "loss": 0.7107, "num_input_tokens_seen": 62707232, "step": 346 }, { "epoch": 0.037986808615451985, "grad_norm": 1.4122702690259603, "learning_rate": 4.9822159316396465e-05, "loss": 0.8567, "num_input_tokens_seen": 62868064, "step": 347 }, { "epoch": 0.038096280686389886, "grad_norm": 1.351113525965865, "learning_rate": 4.9821134046859295e-05, "loss": 0.6395, "num_input_tokens_seen": 63034496, "step": 348 }, { "epoch": 0.038205752757327786, "grad_norm": 1.3495345584872556, "learning_rate": 4.9820105841029416e-05, "loss": 0.7132, "num_input_tokens_seen": 63191072, "step": 349 }, { "epoch": 0.03831522482826569, "grad_norm": 1.2242482700939392, "learning_rate": 4.9819074699028455e-05, "loss": 0.6755, "num_input_tokens_seen": 63329280, "step": 350 }, { "epoch": 0.03842469689920359, "grad_norm": 1.1848727325311514, "learning_rate": 4.981804062097841e-05, "loss": 0.757, "num_input_tokens_seen": 63528864, "step": 351 }, { "epoch": 0.038534168970141496, "grad_norm": 1.2119747026812357, "learning_rate": 4.9817003607001614e-05, "loss": 0.6294, "num_input_tokens_seen": 63730240, "step": 352 }, { "epoch": 0.038643641041079396, "grad_norm": 1.4057230915938506, "learning_rate": 4.981596365722072e-05, "loss": 0.9477, "num_input_tokens_seen": 63911008, "step": 353 }, { "epoch": 0.0387531131120173, "grad_norm": 1.2076062064525301, "learning_rate": 4.981492077175877e-05, "loss": 0.7056, "num_input_tokens_seen": 64113952, "step": 354 }, { "epoch": 0.0388625851829552, "grad_norm": 1.1555517511172717, "learning_rate": 4.9813874950739124e-05, "loss": 0.7086, "num_input_tokens_seen": 64312192, "step": 355 }, { "epoch": 0.0389720572538931, "grad_norm": 1.2747213526647274, "learning_rate": 4.9812826194285515e-05, "loss": 0.7446, "num_input_tokens_seen": 64516032, "step": 356 }, { "epoch": 0.039081529324831, "grad_norm": 1.214009283561341, "learning_rate": 4.9811774502522e-05, "loss": 0.6315, "num_input_tokens_seen": 64714272, "step": 357 }, { "epoch": 0.03919100139576891, "grad_norm": 1.1722387639355545, "learning_rate": 4.9810719875573e-05, "loss": 0.6953, "num_input_tokens_seen": 64905120, "step": 358 }, { "epoch": 0.03930047346670681, "grad_norm": 1.3651850395402259, "learning_rate": 4.980966231356326e-05, "loss": 0.9811, "num_input_tokens_seen": 65097760, "step": 359 }, { "epoch": 0.03940994553764471, "grad_norm": 1.389781481766964, "learning_rate": 4.98086018166179e-05, "loss": 0.734, "num_input_tokens_seen": 65270464, "step": 360 }, { "epoch": 0.03951941760858261, "grad_norm": 1.2461416301078714, "learning_rate": 4.980753838486236e-05, "loss": 0.6851, "num_input_tokens_seen": 65447872, "step": 361 }, { "epoch": 0.03962888967952051, "grad_norm": 1.1248373337870143, "learning_rate": 4.980647201842247e-05, "loss": 0.721, "num_input_tokens_seen": 65661344, "step": 362 }, { "epoch": 0.03973836175045842, "grad_norm": 1.4575673771198345, "learning_rate": 4.980540271742435e-05, "loss": 0.7708, "num_input_tokens_seen": 65852192, "step": 363 }, { "epoch": 0.03984783382139632, "grad_norm": 1.26735202083492, "learning_rate": 4.980433048199451e-05, "loss": 0.5962, "num_input_tokens_seen": 66034304, "step": 364 }, { "epoch": 0.03995730589233422, "grad_norm": 1.2587341343960408, "learning_rate": 4.98032553122598e-05, "loss": 0.7858, "num_input_tokens_seen": 66215296, "step": 365 }, { "epoch": 0.04006677796327212, "grad_norm": 1.3417287306715895, "learning_rate": 4.98021772083474e-05, "loss": 0.8307, "num_input_tokens_seen": 66372320, "step": 366 }, { "epoch": 0.04017625003421002, "grad_norm": 1.241964356155135, "learning_rate": 4.980109617038484e-05, "loss": 0.8013, "num_input_tokens_seen": 66570560, "step": 367 }, { "epoch": 0.04028572210514792, "grad_norm": 1.3731424181757574, "learning_rate": 4.980001219850002e-05, "loss": 0.9296, "num_input_tokens_seen": 66772160, "step": 368 }, { "epoch": 0.04039519417608583, "grad_norm": 1.2230850296623865, "learning_rate": 4.979892529282117e-05, "loss": 0.7534, "num_input_tokens_seen": 66947104, "step": 369 }, { "epoch": 0.04050466624702373, "grad_norm": 1.2586696855214523, "learning_rate": 4.979783545347686e-05, "loss": 0.8313, "num_input_tokens_seen": 67143328, "step": 370 }, { "epoch": 0.04061413831796163, "grad_norm": 1.2508590130814083, "learning_rate": 4.9796742680596034e-05, "loss": 0.5908, "num_input_tokens_seen": 67328576, "step": 371 }, { "epoch": 0.04072361038889953, "grad_norm": 1.2909696400542299, "learning_rate": 4.9795646974307936e-05, "loss": 0.6283, "num_input_tokens_seen": 67503968, "step": 372 }, { "epoch": 0.04083308245983743, "grad_norm": 1.1881689897688337, "learning_rate": 4.979454833474221e-05, "loss": 0.5829, "num_input_tokens_seen": 67653600, "step": 373 }, { "epoch": 0.04094255453077534, "grad_norm": 1.2759572007645803, "learning_rate": 4.9793446762028816e-05, "loss": 0.7869, "num_input_tokens_seen": 67869312, "step": 374 }, { "epoch": 0.04105202660171324, "grad_norm": 1.1344336124126786, "learning_rate": 4.9792342256298064e-05, "loss": 0.6395, "num_input_tokens_seen": 68077856, "step": 375 }, { "epoch": 0.04116149867265114, "grad_norm": 1.3911581410451872, "learning_rate": 4.979123481768062e-05, "loss": 0.9702, "num_input_tokens_seen": 68269376, "step": 376 }, { "epoch": 0.04127097074358904, "grad_norm": 1.2514068316209317, "learning_rate": 4.979012444630748e-05, "loss": 0.6403, "num_input_tokens_seen": 68445664, "step": 377 }, { "epoch": 0.04138044281452694, "grad_norm": 1.2438801203765992, "learning_rate": 4.978901114231003e-05, "loss": 0.6456, "num_input_tokens_seen": 68643232, "step": 378 }, { "epoch": 0.04148991488546484, "grad_norm": 1.2697865860263406, "learning_rate": 4.978789490581993e-05, "loss": 0.589, "num_input_tokens_seen": 68792640, "step": 379 }, { "epoch": 0.04159938695640275, "grad_norm": 1.2117188916676658, "learning_rate": 4.978677573696926e-05, "loss": 0.7765, "num_input_tokens_seen": 68967584, "step": 380 }, { "epoch": 0.04170885902734065, "grad_norm": 1.2426002824253441, "learning_rate": 4.978565363589041e-05, "loss": 0.7147, "num_input_tokens_seen": 69158656, "step": 381 }, { "epoch": 0.04181833109827855, "grad_norm": 1.2314611883145954, "learning_rate": 4.97845286027161e-05, "loss": 0.5852, "num_input_tokens_seen": 69353984, "step": 382 }, { "epoch": 0.04192780316921645, "grad_norm": 1.4894521517666635, "learning_rate": 4.978340063757945e-05, "loss": 1.0631, "num_input_tokens_seen": 69513472, "step": 383 }, { "epoch": 0.042037275240154354, "grad_norm": 1.3309451958674974, "learning_rate": 4.978226974061388e-05, "loss": 0.8497, "num_input_tokens_seen": 69715744, "step": 384 }, { "epoch": 0.042146747311092254, "grad_norm": 1.1721790682174904, "learning_rate": 4.978113591195317e-05, "loss": 0.7276, "num_input_tokens_seen": 69892704, "step": 385 }, { "epoch": 0.04225621938203016, "grad_norm": 1.3413446322259721, "learning_rate": 4.9779999151731456e-05, "loss": 0.8139, "num_input_tokens_seen": 70064288, "step": 386 }, { "epoch": 0.04236569145296806, "grad_norm": 1.200805409671764, "learning_rate": 4.977885946008322e-05, "loss": 0.6561, "num_input_tokens_seen": 70246848, "step": 387 }, { "epoch": 0.042475163523905964, "grad_norm": 1.1915468727045486, "learning_rate": 4.977771683714327e-05, "loss": 0.8013, "num_input_tokens_seen": 70442176, "step": 388 }, { "epoch": 0.042584635594843864, "grad_norm": 1.2839875168119266, "learning_rate": 4.9776571283046794e-05, "loss": 0.65, "num_input_tokens_seen": 70567616, "step": 389 }, { "epoch": 0.042694107665781765, "grad_norm": 1.312239212285397, "learning_rate": 4.977542279792929e-05, "loss": 0.7558, "num_input_tokens_seen": 70780192, "step": 390 }, { "epoch": 0.04280357973671967, "grad_norm": 1.2041577452138825, "learning_rate": 4.9774271381926644e-05, "loss": 0.5578, "num_input_tokens_seen": 70967680, "step": 391 }, { "epoch": 0.04291305180765757, "grad_norm": 1.1542764264924168, "learning_rate": 4.977311703517504e-05, "loss": 0.6114, "num_input_tokens_seen": 71117312, "step": 392 }, { "epoch": 0.043022523878595474, "grad_norm": 1.381836411978165, "learning_rate": 4.977195975781106e-05, "loss": 0.7356, "num_input_tokens_seen": 71279712, "step": 393 }, { "epoch": 0.043131995949533375, "grad_norm": 1.4904673782919784, "learning_rate": 4.977079954997159e-05, "loss": 0.9309, "num_input_tokens_seen": 71480192, "step": 394 }, { "epoch": 0.043241468020471276, "grad_norm": 1.1434260946759527, "learning_rate": 4.9769636411793894e-05, "loss": 0.5393, "num_input_tokens_seen": 71618176, "step": 395 }, { "epoch": 0.043350940091409176, "grad_norm": 1.2110931788632529, "learning_rate": 4.976847034341555e-05, "loss": 0.6832, "num_input_tokens_seen": 71796256, "step": 396 }, { "epoch": 0.043460412162347084, "grad_norm": 1.4374812581043006, "learning_rate": 4.976730134497453e-05, "loss": 0.9092, "num_input_tokens_seen": 72005472, "step": 397 }, { "epoch": 0.043569884233284985, "grad_norm": 1.2451390642226412, "learning_rate": 4.97661294166091e-05, "loss": 0.7243, "num_input_tokens_seen": 72199904, "step": 398 }, { "epoch": 0.043679356304222886, "grad_norm": 1.2014433763284218, "learning_rate": 4.97649545584579e-05, "loss": 0.808, "num_input_tokens_seen": 72400384, "step": 399 }, { "epoch": 0.043788828375160786, "grad_norm": 1.2762535645886295, "learning_rate": 4.976377677065992e-05, "loss": 0.8544, "num_input_tokens_seen": 72584288, "step": 400 }, { "epoch": 0.04389830044609869, "grad_norm": 1.4835513590481024, "learning_rate": 4.9762596053354496e-05, "loss": 0.8907, "num_input_tokens_seen": 72768416, "step": 401 }, { "epoch": 0.04400777251703659, "grad_norm": 1.2900772262345568, "learning_rate": 4.976141240668129e-05, "loss": 0.7397, "num_input_tokens_seen": 72907072, "step": 402 }, { "epoch": 0.044117244587974495, "grad_norm": 1.200652120145898, "learning_rate": 4.976022583078033e-05, "loss": 0.6544, "num_input_tokens_seen": 73077088, "step": 403 }, { "epoch": 0.044226716658912396, "grad_norm": 1.156125139247784, "learning_rate": 4.975903632579199e-05, "loss": 0.8024, "num_input_tokens_seen": 73252032, "step": 404 }, { "epoch": 0.0443361887298503, "grad_norm": 1.1928257724742113, "learning_rate": 4.9757843891856986e-05, "loss": 0.7861, "num_input_tokens_seen": 73444672, "step": 405 }, { "epoch": 0.0444456608007882, "grad_norm": 1.1931692618167635, "learning_rate": 4.975664852911638e-05, "loss": 0.594, "num_input_tokens_seen": 73597440, "step": 406 }, { "epoch": 0.0445551328717261, "grad_norm": 1.2925863939552813, "learning_rate": 4.9755450237711575e-05, "loss": 0.8075, "num_input_tokens_seen": 73773504, "step": 407 }, { "epoch": 0.044664604942664006, "grad_norm": 1.4103032508376063, "learning_rate": 4.975424901778434e-05, "loss": 0.7298, "num_input_tokens_seen": 73987200, "step": 408 }, { "epoch": 0.04477407701360191, "grad_norm": 1.231666248658711, "learning_rate": 4.975304486947676e-05, "loss": 0.6233, "num_input_tokens_seen": 74141088, "step": 409 }, { "epoch": 0.04488354908453981, "grad_norm": 1.2021138289278537, "learning_rate": 4.975183779293129e-05, "loss": 0.7581, "num_input_tokens_seen": 74304608, "step": 410 }, { "epoch": 0.04499302115547771, "grad_norm": 1.335698201124187, "learning_rate": 4.975062778829073e-05, "loss": 0.7161, "num_input_tokens_seen": 74489408, "step": 411 }, { "epoch": 0.04510249322641561, "grad_norm": 1.2748633517172567, "learning_rate": 4.9749414855698216e-05, "loss": 0.6762, "num_input_tokens_seen": 74692800, "step": 412 }, { "epoch": 0.04521196529735351, "grad_norm": 1.2898234577766194, "learning_rate": 4.974819899529725e-05, "loss": 0.6548, "num_input_tokens_seen": 74869984, "step": 413 }, { "epoch": 0.04532143736829142, "grad_norm": 1.2797452883312295, "learning_rate": 4.9746980207231634e-05, "loss": 0.7919, "num_input_tokens_seen": 75069792, "step": 414 }, { "epoch": 0.04543090943922932, "grad_norm": 1.3125094004703117, "learning_rate": 4.9745758491645576e-05, "loss": 0.7385, "num_input_tokens_seen": 75261536, "step": 415 }, { "epoch": 0.04554038151016722, "grad_norm": 1.1094454890678003, "learning_rate": 4.97445338486836e-05, "loss": 0.6277, "num_input_tokens_seen": 75421920, "step": 416 }, { "epoch": 0.04564985358110512, "grad_norm": 1.3827649124491792, "learning_rate": 4.974330627849057e-05, "loss": 0.9815, "num_input_tokens_seen": 75626880, "step": 417 }, { "epoch": 0.04575932565204302, "grad_norm": 1.149022375116371, "learning_rate": 4.974207578121171e-05, "loss": 0.7137, "num_input_tokens_seen": 75814592, "step": 418 }, { "epoch": 0.04586879772298093, "grad_norm": 1.220575383941238, "learning_rate": 4.974084235699258e-05, "loss": 0.6356, "num_input_tokens_seen": 75995584, "step": 419 }, { "epoch": 0.04597826979391883, "grad_norm": 1.16018481492349, "learning_rate": 4.973960600597909e-05, "loss": 0.6455, "num_input_tokens_seen": 76195840, "step": 420 }, { "epoch": 0.04608774186485673, "grad_norm": 1.1156656530330245, "learning_rate": 4.973836672831751e-05, "loss": 0.6132, "num_input_tokens_seen": 76358912, "step": 421 }, { "epoch": 0.04619721393579463, "grad_norm": 1.242815446780603, "learning_rate": 4.973712452415444e-05, "loss": 0.7666, "num_input_tokens_seen": 76554016, "step": 422 }, { "epoch": 0.04630668600673253, "grad_norm": 1.3795927678350834, "learning_rate": 4.9735879393636826e-05, "loss": 0.7632, "num_input_tokens_seen": 76725152, "step": 423 }, { "epoch": 0.04641615807767043, "grad_norm": 1.1096753533186534, "learning_rate": 4.9734631336911964e-05, "loss": 0.5968, "num_input_tokens_seen": 76925856, "step": 424 }, { "epoch": 0.04652563014860834, "grad_norm": 1.3731264007241732, "learning_rate": 4.97333803541275e-05, "loss": 0.7795, "num_input_tokens_seen": 77099680, "step": 425 }, { "epoch": 0.04663510221954624, "grad_norm": 1.2315516475111248, "learning_rate": 4.973212644543143e-05, "loss": 0.8507, "num_input_tokens_seen": 77301280, "step": 426 }, { "epoch": 0.04674457429048414, "grad_norm": 1.2928011229496035, "learning_rate": 4.973086961097207e-05, "loss": 0.6908, "num_input_tokens_seen": 77466816, "step": 427 }, { "epoch": 0.04685404636142204, "grad_norm": 1.3160059289074018, "learning_rate": 4.972960985089812e-05, "loss": 0.7843, "num_input_tokens_seen": 77651392, "step": 428 }, { "epoch": 0.04696351843235994, "grad_norm": 1.381453196221142, "learning_rate": 4.97283471653586e-05, "loss": 0.741, "num_input_tokens_seen": 77816256, "step": 429 }, { "epoch": 0.04707299050329784, "grad_norm": 1.2848142963212599, "learning_rate": 4.972708155450288e-05, "loss": 0.8123, "num_input_tokens_seen": 77988960, "step": 430 }, { "epoch": 0.04718246257423575, "grad_norm": 1.2320781052992047, "learning_rate": 4.972581301848068e-05, "loss": 0.7105, "num_input_tokens_seen": 78155392, "step": 431 }, { "epoch": 0.04729193464517365, "grad_norm": 1.1839771216411668, "learning_rate": 4.972454155744207e-05, "loss": 0.6943, "num_input_tokens_seen": 78351840, "step": 432 }, { "epoch": 0.04740140671611155, "grad_norm": 1.2871689731999223, "learning_rate": 4.9723267171537455e-05, "loss": 0.8766, "num_input_tokens_seen": 78511328, "step": 433 }, { "epoch": 0.04751087878704945, "grad_norm": 1.24918276774943, "learning_rate": 4.9721989860917605e-05, "loss": 0.7637, "num_input_tokens_seen": 78708000, "step": 434 }, { "epoch": 0.047620350857987354, "grad_norm": 1.224305378296055, "learning_rate": 4.9720709625733614e-05, "loss": 0.6539, "num_input_tokens_seen": 78898400, "step": 435 }, { "epoch": 0.04772982292892526, "grad_norm": 1.140988696915777, "learning_rate": 4.971942646613693e-05, "loss": 0.5621, "num_input_tokens_seen": 79078272, "step": 436 }, { "epoch": 0.04783929499986316, "grad_norm": 1.356373465758899, "learning_rate": 4.971814038227934e-05, "loss": 0.757, "num_input_tokens_seen": 79252544, "step": 437 }, { "epoch": 0.04794876707080106, "grad_norm": 1.198563387350333, "learning_rate": 4.971685137431301e-05, "loss": 0.5888, "num_input_tokens_seen": 79420096, "step": 438 }, { "epoch": 0.04805823914173896, "grad_norm": 1.2042754976575845, "learning_rate": 4.971555944239041e-05, "loss": 0.7311, "num_input_tokens_seen": 79601760, "step": 439 }, { "epoch": 0.048167711212676864, "grad_norm": 1.3334662798325192, "learning_rate": 4.971426458666437e-05, "loss": 0.8492, "num_input_tokens_seen": 79765056, "step": 440 }, { "epoch": 0.048277183283614765, "grad_norm": 1.3399496231288672, "learning_rate": 4.9712966807288085e-05, "loss": 0.6302, "num_input_tokens_seen": 79953664, "step": 441 }, { "epoch": 0.04838665535455267, "grad_norm": 1.2187090157684715, "learning_rate": 4.971166610441507e-05, "loss": 0.7277, "num_input_tokens_seen": 80133536, "step": 442 }, { "epoch": 0.04849612742549057, "grad_norm": 1.3320444056065437, "learning_rate": 4.9710362478199186e-05, "loss": 0.7773, "num_input_tokens_seen": 80327296, "step": 443 }, { "epoch": 0.048605599496428474, "grad_norm": 1.2876784784254371, "learning_rate": 4.9709055928794664e-05, "loss": 1.0182, "num_input_tokens_seen": 80531136, "step": 444 }, { "epoch": 0.048715071567366375, "grad_norm": 1.1137770770000033, "learning_rate": 4.970774645635606e-05, "loss": 0.6257, "num_input_tokens_seen": 80725120, "step": 445 }, { "epoch": 0.048824543638304276, "grad_norm": 1.3695275471490918, "learning_rate": 4.970643406103828e-05, "loss": 0.9036, "num_input_tokens_seen": 80884384, "step": 446 }, { "epoch": 0.048934015709242176, "grad_norm": 1.2203794004342803, "learning_rate": 4.970511874299659e-05, "loss": 0.7362, "num_input_tokens_seen": 81054624, "step": 447 }, { "epoch": 0.049043487780180084, "grad_norm": 1.339326719837054, "learning_rate": 4.9703800502386574e-05, "loss": 0.7956, "num_input_tokens_seen": 81238528, "step": 448 }, { "epoch": 0.049152959851117985, "grad_norm": 1.1636567400677655, "learning_rate": 4.970247933936418e-05, "loss": 0.5557, "num_input_tokens_seen": 81419968, "step": 449 }, { "epoch": 0.049262431922055885, "grad_norm": 1.2588433253250597, "learning_rate": 4.970115525408572e-05, "loss": 0.6252, "num_input_tokens_seen": 81599392, "step": 450 }, { "epoch": 0.049371903992993786, "grad_norm": 1.3163463907187487, "learning_rate": 4.96998282467078e-05, "loss": 0.76, "num_input_tokens_seen": 81751040, "step": 451 }, { "epoch": 0.04948137606393169, "grad_norm": 1.4627118275143405, "learning_rate": 4.969849831738742e-05, "loss": 0.9865, "num_input_tokens_seen": 81934944, "step": 452 }, { "epoch": 0.049590848134869595, "grad_norm": 1.1828924940666528, "learning_rate": 4.96971654662819e-05, "loss": 0.6187, "num_input_tokens_seen": 82089056, "step": 453 }, { "epoch": 0.049700320205807495, "grad_norm": 1.1559826309786794, "learning_rate": 4.969582969354892e-05, "loss": 0.6141, "num_input_tokens_seen": 82282816, "step": 454 }, { "epoch": 0.049809792276745396, "grad_norm": 1.2812394862790335, "learning_rate": 4.96944909993465e-05, "loss": 0.7181, "num_input_tokens_seen": 82441408, "step": 455 }, { "epoch": 0.0499192643476833, "grad_norm": 1.1770025354972242, "learning_rate": 4.969314938383301e-05, "loss": 0.8364, "num_input_tokens_seen": 82648160, "step": 456 }, { "epoch": 0.0500287364186212, "grad_norm": 1.3396659710886956, "learning_rate": 4.9691804847167146e-05, "loss": 0.6264, "num_input_tokens_seen": 82783232, "step": 457 }, { "epoch": 0.0501382084895591, "grad_norm": 1.1734990374281211, "learning_rate": 4.969045738950797e-05, "loss": 0.5421, "num_input_tokens_seen": 82939584, "step": 458 }, { "epoch": 0.050247680560497006, "grad_norm": 1.2176875839010162, "learning_rate": 4.968910701101489e-05, "loss": 0.8048, "num_input_tokens_seen": 83106464, "step": 459 }, { "epoch": 0.05035715263143491, "grad_norm": 1.4112865958186573, "learning_rate": 4.968775371184764e-05, "loss": 0.7047, "num_input_tokens_seen": 83278048, "step": 460 }, { "epoch": 0.05046662470237281, "grad_norm": 1.221565564479385, "learning_rate": 4.968639749216632e-05, "loss": 0.7417, "num_input_tokens_seen": 83439776, "step": 461 }, { "epoch": 0.05057609677331071, "grad_norm": 1.3664550126951365, "learning_rate": 4.968503835213138e-05, "loss": 0.9568, "num_input_tokens_seen": 83641600, "step": 462 }, { "epoch": 0.05068556884424861, "grad_norm": 1.2083154655100108, "learning_rate": 4.9683676291903594e-05, "loss": 0.676, "num_input_tokens_seen": 83836032, "step": 463 }, { "epoch": 0.050795040915186516, "grad_norm": 1.214547516745658, "learning_rate": 4.968231131164408e-05, "loss": 0.6472, "num_input_tokens_seen": 84042560, "step": 464 }, { "epoch": 0.05090451298612442, "grad_norm": 1.2887102512775095, "learning_rate": 4.968094341151433e-05, "loss": 0.7965, "num_input_tokens_seen": 84230048, "step": 465 }, { "epoch": 0.05101398505706232, "grad_norm": 1.1261028606056307, "learning_rate": 4.967957259167615e-05, "loss": 0.7549, "num_input_tokens_seen": 84424480, "step": 466 }, { "epoch": 0.05112345712800022, "grad_norm": 1.2550900774237321, "learning_rate": 4.967819885229171e-05, "loss": 0.8055, "num_input_tokens_seen": 84596736, "step": 467 }, { "epoch": 0.05123292919893812, "grad_norm": 1.2014939783447853, "learning_rate": 4.967682219352353e-05, "loss": 0.789, "num_input_tokens_seen": 84760256, "step": 468 }, { "epoch": 0.05134240126987602, "grad_norm": 1.1361987310746595, "learning_rate": 4.967544261553445e-05, "loss": 0.6364, "num_input_tokens_seen": 84909664, "step": 469 }, { "epoch": 0.05145187334081393, "grad_norm": 1.2186495910755282, "learning_rate": 4.967406011848769e-05, "loss": 0.715, "num_input_tokens_seen": 85081920, "step": 470 }, { "epoch": 0.05156134541175183, "grad_norm": 1.3481396236696488, "learning_rate": 4.967267470254678e-05, "loss": 0.8409, "num_input_tokens_seen": 85282848, "step": 471 }, { "epoch": 0.05167081748268973, "grad_norm": 1.1130553970123127, "learning_rate": 4.967128636787562e-05, "loss": 0.6408, "num_input_tokens_seen": 85435168, "step": 472 }, { "epoch": 0.05178028955362763, "grad_norm": 1.2831634097289402, "learning_rate": 4.9669895114638445e-05, "loss": 0.7552, "num_input_tokens_seen": 85615936, "step": 473 }, { "epoch": 0.05188976162456553, "grad_norm": 1.156044091113109, "learning_rate": 4.966850094299984e-05, "loss": 0.74, "num_input_tokens_seen": 85806112, "step": 474 }, { "epoch": 0.05199923369550343, "grad_norm": 1.2111151288099076, "learning_rate": 4.966710385312473e-05, "loss": 0.7766, "num_input_tokens_seen": 86004352, "step": 475 }, { "epoch": 0.05210870576644134, "grad_norm": 1.3218997240572548, "learning_rate": 4.966570384517839e-05, "loss": 0.9263, "num_input_tokens_seen": 86220064, "step": 476 }, { "epoch": 0.05221817783737924, "grad_norm": 1.3821811851307186, "learning_rate": 4.966430091932645e-05, "loss": 0.8275, "num_input_tokens_seen": 86406208, "step": 477 }, { "epoch": 0.05232764990831714, "grad_norm": 1.1432745482084243, "learning_rate": 4.9662895075734844e-05, "loss": 0.5834, "num_input_tokens_seen": 86605792, "step": 478 }, { "epoch": 0.05243712197925504, "grad_norm": 1.0993033435537147, "learning_rate": 4.9661486314569904e-05, "loss": 0.7686, "num_input_tokens_seen": 86778048, "step": 479 }, { "epoch": 0.05254659405019294, "grad_norm": 1.1109290722233565, "learning_rate": 4.966007463599828e-05, "loss": 0.645, "num_input_tokens_seen": 86978304, "step": 480 }, { "epoch": 0.05265606612113085, "grad_norm": 1.1978019272407063, "learning_rate": 4.9658660040186967e-05, "loss": 0.6926, "num_input_tokens_seen": 87170720, "step": 481 }, { "epoch": 0.05276553819206875, "grad_norm": 1.3149854197255881, "learning_rate": 4.965724252730331e-05, "loss": 0.8736, "num_input_tokens_seen": 87327968, "step": 482 }, { "epoch": 0.05287501026300665, "grad_norm": 1.2373697193039621, "learning_rate": 4.9655822097515e-05, "loss": 0.7194, "num_input_tokens_seen": 87485888, "step": 483 }, { "epoch": 0.05298448233394455, "grad_norm": 1.3246633857823369, "learning_rate": 4.9654398750990075e-05, "loss": 0.9113, "num_input_tokens_seen": 87643808, "step": 484 }, { "epoch": 0.05309395440488245, "grad_norm": 1.0467233120923776, "learning_rate": 4.96529724878969e-05, "loss": 0.6416, "num_input_tokens_seen": 87852128, "step": 485 }, { "epoch": 0.05320342647582035, "grad_norm": 1.325313869224656, "learning_rate": 4.9651543308404217e-05, "loss": 0.707, "num_input_tokens_seen": 88062912, "step": 486 }, { "epoch": 0.05331289854675826, "grad_norm": 1.3522424035136646, "learning_rate": 4.9650111212681073e-05, "loss": 0.7899, "num_input_tokens_seen": 88247936, "step": 487 }, { "epoch": 0.05342237061769616, "grad_norm": 1.4227289004265313, "learning_rate": 4.96486762008969e-05, "loss": 0.8035, "num_input_tokens_seen": 88433184, "step": 488 }, { "epoch": 0.05353184268863406, "grad_norm": 1.2273310786320994, "learning_rate": 4.964723827322147e-05, "loss": 0.6418, "num_input_tokens_seen": 88586848, "step": 489 }, { "epoch": 0.05364131475957196, "grad_norm": 1.1809519881293233, "learning_rate": 4.9645797429824846e-05, "loss": 0.6706, "num_input_tokens_seen": 88767392, "step": 490 }, { "epoch": 0.053750786830509864, "grad_norm": 1.4211338333256656, "learning_rate": 4.964435367087751e-05, "loss": 0.9752, "num_input_tokens_seen": 88959808, "step": 491 }, { "epoch": 0.053860258901447765, "grad_norm": 1.1989822402987793, "learning_rate": 4.9642906996550256e-05, "loss": 0.8196, "num_input_tokens_seen": 89153568, "step": 492 }, { "epoch": 0.05396973097238567, "grad_norm": 1.303699919121861, "learning_rate": 4.96414574070142e-05, "loss": 0.8177, "num_input_tokens_seen": 89306112, "step": 493 }, { "epoch": 0.05407920304332357, "grad_norm": 1.2540891519885076, "learning_rate": 4.964000490244084e-05, "loss": 0.6267, "num_input_tokens_seen": 89473440, "step": 494 }, { "epoch": 0.054188675114261474, "grad_norm": 1.2534911517366185, "learning_rate": 4.963854948300201e-05, "loss": 0.7453, "num_input_tokens_seen": 89633376, "step": 495 }, { "epoch": 0.054298147185199375, "grad_norm": 1.3473382104624632, "learning_rate": 4.963709114886988e-05, "loss": 0.7179, "num_input_tokens_seen": 89796224, "step": 496 }, { "epoch": 0.054407619256137275, "grad_norm": 1.3032269554596096, "learning_rate": 4.9635629900216956e-05, "loss": 0.707, "num_input_tokens_seen": 89959296, "step": 497 }, { "epoch": 0.05451709132707518, "grad_norm": 1.2329024694222008, "learning_rate": 4.963416573721611e-05, "loss": 0.6263, "num_input_tokens_seen": 90131776, "step": 498 }, { "epoch": 0.054626563398013084, "grad_norm": 1.2014511155983865, "learning_rate": 4.9632698660040556e-05, "loss": 0.6307, "num_input_tokens_seen": 90306720, "step": 499 }, { "epoch": 0.054736035468950985, "grad_norm": 1.2426576984041653, "learning_rate": 4.963122866886384e-05, "loss": 0.7336, "num_input_tokens_seen": 90500480, "step": 500 }, { "epoch": 0.054845507539888885, "grad_norm": 1.2723004934553266, "learning_rate": 4.9629755763859855e-05, "loss": 0.8413, "num_input_tokens_seen": 90666688, "step": 501 }, { "epoch": 0.054954979610826786, "grad_norm": 1.3471012907820437, "learning_rate": 4.9628279945202856e-05, "loss": 0.8012, "num_input_tokens_seen": 90863360, "step": 502 }, { "epoch": 0.05506445168176469, "grad_norm": 1.2328606986521993, "learning_rate": 4.962680121306741e-05, "loss": 0.7848, "num_input_tokens_seen": 91072128, "step": 503 }, { "epoch": 0.055173923752702594, "grad_norm": 1.3750287399756314, "learning_rate": 4.962531956762847e-05, "loss": 0.8546, "num_input_tokens_seen": 91237216, "step": 504 }, { "epoch": 0.055283395823640495, "grad_norm": 1.3098890583730216, "learning_rate": 4.9623835009061294e-05, "loss": 0.5563, "num_input_tokens_seen": 91396480, "step": 505 }, { "epoch": 0.055392867894578396, "grad_norm": 1.4117447631668554, "learning_rate": 4.962234753754151e-05, "loss": 1.0451, "num_input_tokens_seen": 91609504, "step": 506 }, { "epoch": 0.0555023399655163, "grad_norm": 1.2457478201531482, "learning_rate": 4.962085715324508e-05, "loss": 0.7494, "num_input_tokens_seen": 91806400, "step": 507 }, { "epoch": 0.0556118120364542, "grad_norm": 1.3290086835688026, "learning_rate": 4.9619363856348324e-05, "loss": 0.7988, "num_input_tokens_seen": 91965216, "step": 508 }, { "epoch": 0.055721284107392105, "grad_norm": 1.286109547560318, "learning_rate": 4.9617867647027876e-05, "loss": 0.6388, "num_input_tokens_seen": 92125824, "step": 509 }, { "epoch": 0.055830756178330006, "grad_norm": 1.2368408875389982, "learning_rate": 4.961636852546075e-05, "loss": 0.6663, "num_input_tokens_seen": 92264256, "step": 510 }, { "epoch": 0.055940228249267906, "grad_norm": 1.1053227843846107, "learning_rate": 4.961486649182429e-05, "loss": 0.6275, "num_input_tokens_seen": 92449952, "step": 511 }, { "epoch": 0.05604970032020581, "grad_norm": 1.3391077450171411, "learning_rate": 4.961336154629618e-05, "loss": 0.9085, "num_input_tokens_seen": 92635200, "step": 512 }, { "epoch": 0.05615917239114371, "grad_norm": 1.0747433023255157, "learning_rate": 4.961185368905445e-05, "loss": 0.6674, "num_input_tokens_seen": 92854720, "step": 513 }, { "epoch": 0.05626864446208161, "grad_norm": 1.2264472761520264, "learning_rate": 4.9610342920277475e-05, "loss": 0.8299, "num_input_tokens_seen": 93066848, "step": 514 }, { "epoch": 0.056378116533019516, "grad_norm": 1.2042295661698892, "learning_rate": 4.960882924014398e-05, "loss": 0.6689, "num_input_tokens_seen": 93251648, "step": 515 }, { "epoch": 0.05648758860395742, "grad_norm": 1.2064608117927165, "learning_rate": 4.960731264883304e-05, "loss": 0.5849, "num_input_tokens_seen": 93399040, "step": 516 }, { "epoch": 0.05659706067489532, "grad_norm": 1.2345367518810426, "learning_rate": 4.960579314652405e-05, "loss": 0.689, "num_input_tokens_seen": 93571968, "step": 517 }, { "epoch": 0.05670653274583322, "grad_norm": 1.3525682356680055, "learning_rate": 4.960427073339676e-05, "loss": 1.0275, "num_input_tokens_seen": 93755200, "step": 518 }, { "epoch": 0.05681600481677112, "grad_norm": 1.2105201740403702, "learning_rate": 4.960274540963129e-05, "loss": 0.7858, "num_input_tokens_seen": 93911776, "step": 519 }, { "epoch": 0.05692547688770902, "grad_norm": 1.1440662051164054, "learning_rate": 4.9601217175408064e-05, "loss": 0.6032, "num_input_tokens_seen": 94054016, "step": 520 }, { "epoch": 0.05703494895864693, "grad_norm": 1.293435649183875, "learning_rate": 4.959968603090788e-05, "loss": 0.9303, "num_input_tokens_seen": 94269952, "step": 521 }, { "epoch": 0.05714442102958483, "grad_norm": 1.1848379211805589, "learning_rate": 4.959815197631186e-05, "loss": 0.7497, "num_input_tokens_seen": 94468416, "step": 522 }, { "epoch": 0.05725389310052273, "grad_norm": 1.4218374394077413, "learning_rate": 4.9596615011801486e-05, "loss": 0.5854, "num_input_tokens_seen": 94661280, "step": 523 }, { "epoch": 0.05736336517146063, "grad_norm": 1.3382827518274327, "learning_rate": 4.959507513755858e-05, "loss": 0.6438, "num_input_tokens_seen": 94835776, "step": 524 }, { "epoch": 0.05747283724239853, "grad_norm": 1.3646530638294632, "learning_rate": 4.95935323537653e-05, "loss": 0.8989, "num_input_tokens_seen": 95017216, "step": 525 }, { "epoch": 0.05758230931333644, "grad_norm": 1.2211975073099546, "learning_rate": 4.9591986660604164e-05, "loss": 0.6791, "num_input_tokens_seen": 95220384, "step": 526 }, { "epoch": 0.05769178138427434, "grad_norm": 1.3806699112943515, "learning_rate": 4.959043805825801e-05, "loss": 0.7801, "num_input_tokens_seen": 95403392, "step": 527 }, { "epoch": 0.05780125345521224, "grad_norm": 1.3207649042753284, "learning_rate": 4.958888654691004e-05, "loss": 0.8164, "num_input_tokens_seen": 95595808, "step": 528 }, { "epoch": 0.05791072552615014, "grad_norm": 1.3293664403441656, "learning_rate": 4.95873321267438e-05, "loss": 0.7436, "num_input_tokens_seen": 95775680, "step": 529 }, { "epoch": 0.05802019759708804, "grad_norm": 1.1851265427670206, "learning_rate": 4.958577479794317e-05, "loss": 0.6967, "num_input_tokens_seen": 95961152, "step": 530 }, { "epoch": 0.05812966966802594, "grad_norm": 1.162349182518244, "learning_rate": 4.958421456069239e-05, "loss": 0.6337, "num_input_tokens_seen": 96172384, "step": 531 }, { "epoch": 0.05823914173896385, "grad_norm": 1.1574289651911693, "learning_rate": 4.9582651415176026e-05, "loss": 0.6596, "num_input_tokens_seen": 96363680, "step": 532 }, { "epoch": 0.05834861380990175, "grad_norm": 1.3005525771750652, "learning_rate": 4.958108536157899e-05, "loss": 0.664, "num_input_tokens_seen": 96523392, "step": 533 }, { "epoch": 0.05845808588083965, "grad_norm": 1.302405823508044, "learning_rate": 4.9579516400086545e-05, "loss": 0.687, "num_input_tokens_seen": 96688256, "step": 534 }, { "epoch": 0.05856755795177755, "grad_norm": 1.2646026249544748, "learning_rate": 4.9577944530884295e-05, "loss": 0.6981, "num_input_tokens_seen": 96857824, "step": 535 }, { "epoch": 0.05867703002271545, "grad_norm": 1.263244509472325, "learning_rate": 4.9576369754158194e-05, "loss": 0.6295, "num_input_tokens_seen": 97045312, "step": 536 }, { "epoch": 0.05878650209365335, "grad_norm": 1.4742862170478368, "learning_rate": 4.9574792070094534e-05, "loss": 0.9087, "num_input_tokens_seen": 97227200, "step": 537 }, { "epoch": 0.05889597416459126, "grad_norm": 1.4720544369572752, "learning_rate": 4.9573211478879955e-05, "loss": 0.7898, "num_input_tokens_seen": 97385792, "step": 538 }, { "epoch": 0.05900544623552916, "grad_norm": 1.2699016793556923, "learning_rate": 4.9571627980701426e-05, "loss": 0.63, "num_input_tokens_seen": 97566336, "step": 539 }, { "epoch": 0.05911491830646706, "grad_norm": 1.4325391335992232, "learning_rate": 4.9570041575746285e-05, "loss": 0.6892, "num_input_tokens_seen": 97741504, "step": 540 }, { "epoch": 0.05922439037740496, "grad_norm": 1.3527222877643184, "learning_rate": 4.9568452264202194e-05, "loss": 0.7597, "num_input_tokens_seen": 97925856, "step": 541 }, { "epoch": 0.059333862448342864, "grad_norm": 1.31062378067884, "learning_rate": 4.9566860046257166e-05, "loss": 0.756, "num_input_tokens_seen": 98098560, "step": 542 }, { "epoch": 0.05944333451928077, "grad_norm": 1.318625551833145, "learning_rate": 4.956526492209956e-05, "loss": 0.6924, "num_input_tokens_seen": 98290528, "step": 543 }, { "epoch": 0.05955280659021867, "grad_norm": 1.2371307226663255, "learning_rate": 4.956366689191808e-05, "loss": 0.5497, "num_input_tokens_seen": 98457408, "step": 544 }, { "epoch": 0.05966227866115657, "grad_norm": 1.3231171833973292, "learning_rate": 4.956206595590176e-05, "loss": 0.8472, "num_input_tokens_seen": 98638848, "step": 545 }, { "epoch": 0.059771750732094474, "grad_norm": 1.212281545642788, "learning_rate": 4.9560462114239995e-05, "loss": 0.7207, "num_input_tokens_seen": 98807520, "step": 546 }, { "epoch": 0.059881222803032375, "grad_norm": 1.1346462272296558, "learning_rate": 4.9558855367122505e-05, "loss": 0.6397, "num_input_tokens_seen": 99027936, "step": 547 }, { "epoch": 0.059990694873970275, "grad_norm": 1.1905080943146855, "learning_rate": 4.9557245714739374e-05, "loss": 0.5435, "num_input_tokens_seen": 99191232, "step": 548 }, { "epoch": 0.06010016694490818, "grad_norm": 1.2576022238921627, "learning_rate": 4.955563315728103e-05, "loss": 0.7598, "num_input_tokens_seen": 99371552, "step": 549 }, { "epoch": 0.060209639015846084, "grad_norm": 1.2995079613236615, "learning_rate": 4.955401769493822e-05, "loss": 0.7534, "num_input_tokens_seen": 99544032, "step": 550 }, { "epoch": 0.060319111086783984, "grad_norm": 1.4260527051893648, "learning_rate": 4.9552399327902054e-05, "loss": 0.6839, "num_input_tokens_seen": 99720096, "step": 551 }, { "epoch": 0.060428583157721885, "grad_norm": 1.1883042774063424, "learning_rate": 4.955077805636399e-05, "loss": 0.5825, "num_input_tokens_seen": 99881152, "step": 552 }, { "epoch": 0.060538055228659786, "grad_norm": 1.2686955444976922, "learning_rate": 4.954915388051581e-05, "loss": 0.9622, "num_input_tokens_seen": 100044672, "step": 553 }, { "epoch": 0.060647527299597694, "grad_norm": 1.346623873919876, "learning_rate": 4.954752680054966e-05, "loss": 0.6319, "num_input_tokens_seen": 100227680, "step": 554 }, { "epoch": 0.060756999370535594, "grad_norm": 1.165587789233046, "learning_rate": 4.954589681665801e-05, "loss": 0.7092, "num_input_tokens_seen": 100405312, "step": 555 }, { "epoch": 0.060866471441473495, "grad_norm": 1.30317714778928, "learning_rate": 4.95442639290337e-05, "loss": 0.7141, "num_input_tokens_seen": 100553600, "step": 556 }, { "epoch": 0.060975943512411396, "grad_norm": 1.4728388584561842, "learning_rate": 4.954262813786988e-05, "loss": 0.6965, "num_input_tokens_seen": 100721152, "step": 557 }, { "epoch": 0.061085415583349296, "grad_norm": 1.2233418745181017, "learning_rate": 4.954098944336007e-05, "loss": 0.7073, "num_input_tokens_seen": 100899456, "step": 558 }, { "epoch": 0.0611948876542872, "grad_norm": 1.0650290514784861, "learning_rate": 4.953934784569812e-05, "loss": 0.8067, "num_input_tokens_seen": 101121440, "step": 559 }, { "epoch": 0.061304359725225105, "grad_norm": 1.1240339900100607, "learning_rate": 4.953770334507824e-05, "loss": 0.5724, "num_input_tokens_seen": 101304448, "step": 560 }, { "epoch": 0.061413831796163006, "grad_norm": 1.2714864845082587, "learning_rate": 4.9536055941694956e-05, "loss": 0.6555, "num_input_tokens_seen": 101481184, "step": 561 }, { "epoch": 0.061523303867100906, "grad_norm": 1.2102645973281874, "learning_rate": 4.9534405635743165e-05, "loss": 0.6492, "num_input_tokens_seen": 101678976, "step": 562 }, { "epoch": 0.06163277593803881, "grad_norm": 1.240845152877832, "learning_rate": 4.953275242741808e-05, "loss": 0.7577, "num_input_tokens_seen": 101853472, "step": 563 }, { "epoch": 0.06174224800897671, "grad_norm": 1.373306835846376, "learning_rate": 4.953109631691529e-05, "loss": 0.7937, "num_input_tokens_seen": 102000192, "step": 564 }, { "epoch": 0.06185172007991461, "grad_norm": 1.3464411025568177, "learning_rate": 4.952943730443069e-05, "loss": 0.6746, "num_input_tokens_seen": 102169984, "step": 565 }, { "epoch": 0.061961192150852516, "grad_norm": 1.1813805159868969, "learning_rate": 4.952777539016056e-05, "loss": 0.6961, "num_input_tokens_seen": 102331712, "step": 566 }, { "epoch": 0.06207066422179042, "grad_norm": 1.1575424085226345, "learning_rate": 4.9526110574301485e-05, "loss": 0.5973, "num_input_tokens_seen": 102522560, "step": 567 }, { "epoch": 0.06218013629272832, "grad_norm": 1.1619561615289118, "learning_rate": 4.9524442857050424e-05, "loss": 0.7482, "num_input_tokens_seen": 102707584, "step": 568 }, { "epoch": 0.06228960836366622, "grad_norm": 1.3020662026867909, "learning_rate": 4.952277223860465e-05, "loss": 0.9118, "num_input_tokens_seen": 102924192, "step": 569 }, { "epoch": 0.06239908043460412, "grad_norm": 1.3280953347876074, "learning_rate": 4.9521098719161795e-05, "loss": 0.8055, "num_input_tokens_seen": 103119968, "step": 570 }, { "epoch": 0.06250855250554202, "grad_norm": 1.347484898385263, "learning_rate": 4.9519422298919844e-05, "loss": 0.7654, "num_input_tokens_seen": 103300736, "step": 571 }, { "epoch": 0.06261802457647993, "grad_norm": 1.3129331691378334, "learning_rate": 4.9517742978077106e-05, "loss": 0.7273, "num_input_tokens_seen": 103480832, "step": 572 }, { "epoch": 0.06272749664741782, "grad_norm": 1.1931661646092753, "learning_rate": 4.951606075683224e-05, "loss": 0.7502, "num_input_tokens_seen": 103681536, "step": 573 }, { "epoch": 0.06283696871835573, "grad_norm": 1.2345933841721484, "learning_rate": 4.9514375635384255e-05, "loss": 0.8977, "num_input_tokens_seen": 103860512, "step": 574 }, { "epoch": 0.06294644078929364, "grad_norm": 1.2563752676534932, "learning_rate": 4.9512687613932505e-05, "loss": 0.6015, "num_input_tokens_seen": 104043072, "step": 575 }, { "epoch": 0.06305591286023153, "grad_norm": 1.4793820381077714, "learning_rate": 4.951099669267666e-05, "loss": 0.6893, "num_input_tokens_seen": 104190464, "step": 576 }, { "epoch": 0.06316538493116944, "grad_norm": 1.2868563645038535, "learning_rate": 4.950930287181677e-05, "loss": 0.8904, "num_input_tokens_seen": 104391616, "step": 577 }, { "epoch": 0.06327485700210733, "grad_norm": 1.2652407404288106, "learning_rate": 4.95076061515532e-05, "loss": 0.6306, "num_input_tokens_seen": 104574400, "step": 578 }, { "epoch": 0.06338432907304524, "grad_norm": 1.2052388632733992, "learning_rate": 4.9505906532086676e-05, "loss": 0.679, "num_input_tokens_seen": 104740384, "step": 579 }, { "epoch": 0.06349380114398315, "grad_norm": 1.2658041838972334, "learning_rate": 4.950420401361825e-05, "loss": 0.7566, "num_input_tokens_seen": 104888448, "step": 580 }, { "epoch": 0.06360327321492104, "grad_norm": 1.2305015922886398, "learning_rate": 4.950249859634934e-05, "loss": 0.7865, "num_input_tokens_seen": 105069888, "step": 581 }, { "epoch": 0.06371274528585895, "grad_norm": 1.3130773909161704, "learning_rate": 4.9500790280481684e-05, "loss": 0.7362, "num_input_tokens_seen": 105263424, "step": 582 }, { "epoch": 0.06382221735679684, "grad_norm": 1.4173851809009426, "learning_rate": 4.9499079066217374e-05, "loss": 0.8289, "num_input_tokens_seen": 105433216, "step": 583 }, { "epoch": 0.06393168942773475, "grad_norm": 1.2489114312959668, "learning_rate": 4.9497364953758854e-05, "loss": 0.9543, "num_input_tokens_seen": 105652736, "step": 584 }, { "epoch": 0.06404116149867266, "grad_norm": 1.3432026568968705, "learning_rate": 4.9495647943308877e-05, "loss": 0.802, "num_input_tokens_seen": 105832608, "step": 585 }, { "epoch": 0.06415063356961055, "grad_norm": 1.292794253280704, "learning_rate": 4.949392803507058e-05, "loss": 0.6763, "num_input_tokens_seen": 106029056, "step": 586 }, { "epoch": 0.06426010564054846, "grad_norm": 1.2514864190035697, "learning_rate": 4.949220522924742e-05, "loss": 0.6194, "num_input_tokens_seen": 106190560, "step": 587 }, { "epoch": 0.06436957771148635, "grad_norm": 1.2874865984913633, "learning_rate": 4.949047952604321e-05, "loss": 0.7022, "num_input_tokens_seen": 106366624, "step": 588 }, { "epoch": 0.06447904978242426, "grad_norm": 1.4054010159687644, "learning_rate": 4.9488750925662083e-05, "loss": 0.9235, "num_input_tokens_seen": 106554560, "step": 589 }, { "epoch": 0.06458852185336215, "grad_norm": 1.1791585279437715, "learning_rate": 4.9487019428308547e-05, "loss": 0.7205, "num_input_tokens_seen": 106761984, "step": 590 }, { "epoch": 0.06469799392430006, "grad_norm": 1.1778984904407739, "learning_rate": 4.948528503418741e-05, "loss": 0.8584, "num_input_tokens_seen": 106939392, "step": 591 }, { "epoch": 0.06480746599523797, "grad_norm": 1.1466248408348334, "learning_rate": 4.9483547743503874e-05, "loss": 0.7038, "num_input_tokens_seen": 107119040, "step": 592 }, { "epoch": 0.06491693806617586, "grad_norm": 1.2351484651941531, "learning_rate": 4.9481807556463435e-05, "loss": 0.7352, "num_input_tokens_seen": 107268224, "step": 593 }, { "epoch": 0.06502641013711377, "grad_norm": 1.1479865505779772, "learning_rate": 4.948006447327197e-05, "loss": 0.6351, "num_input_tokens_seen": 107481920, "step": 594 }, { "epoch": 0.06513588220805167, "grad_norm": 1.0914209363491285, "learning_rate": 4.947831849413567e-05, "loss": 0.667, "num_input_tokens_seen": 107689344, "step": 595 }, { "epoch": 0.06524535427898957, "grad_norm": 1.327316037809871, "learning_rate": 4.947656961926109e-05, "loss": 0.8247, "num_input_tokens_seen": 107880640, "step": 596 }, { "epoch": 0.06535482634992748, "grad_norm": 1.9169247771355789, "learning_rate": 4.947481784885511e-05, "loss": 0.7345, "num_input_tokens_seen": 108056480, "step": 597 }, { "epoch": 0.06546429842086537, "grad_norm": 1.4359147826546896, "learning_rate": 4.947306318312497e-05, "loss": 0.7812, "num_input_tokens_seen": 108231648, "step": 598 }, { "epoch": 0.06557377049180328, "grad_norm": 1.3018658672567327, "learning_rate": 4.947130562227824e-05, "loss": 0.9278, "num_input_tokens_seen": 108421824, "step": 599 }, { "epoch": 0.06568324256274118, "grad_norm": 1.4700120414188804, "learning_rate": 4.9469545166522836e-05, "loss": 0.8514, "num_input_tokens_seen": 108588928, "step": 600 }, { "epoch": 0.06579271463367908, "grad_norm": 1.250539424852838, "learning_rate": 4.946778181606702e-05, "loss": 0.8285, "num_input_tokens_seen": 108764768, "step": 601 }, { "epoch": 0.06590218670461699, "grad_norm": 1.2325688439229106, "learning_rate": 4.946601557111938e-05, "loss": 0.6031, "num_input_tokens_seen": 108962560, "step": 602 }, { "epoch": 0.06601165877555489, "grad_norm": 1.424146409951856, "learning_rate": 4.9464246431888885e-05, "loss": 0.744, "num_input_tokens_seen": 109157440, "step": 603 }, { "epoch": 0.06612113084649279, "grad_norm": 1.2655459720757416, "learning_rate": 4.946247439858479e-05, "loss": 0.829, "num_input_tokens_seen": 109349408, "step": 604 }, { "epoch": 0.06623060291743069, "grad_norm": 1.1456937126868159, "learning_rate": 4.9460699471416745e-05, "loss": 0.5337, "num_input_tokens_seen": 109546976, "step": 605 }, { "epoch": 0.0663400749883686, "grad_norm": 1.3118746039547624, "learning_rate": 4.945892165059472e-05, "loss": 0.9072, "num_input_tokens_seen": 109713184, "step": 606 }, { "epoch": 0.06644954705930649, "grad_norm": 1.2852696887565285, "learning_rate": 4.9457140936329004e-05, "loss": 0.8036, "num_input_tokens_seen": 109901120, "step": 607 }, { "epoch": 0.0665590191302444, "grad_norm": 1.3242845997149462, "learning_rate": 4.9455357328830275e-05, "loss": 0.7907, "num_input_tokens_seen": 110082784, "step": 608 }, { "epoch": 0.0666684912011823, "grad_norm": 1.1412861096956648, "learning_rate": 4.9453570828309536e-05, "loss": 0.584, "num_input_tokens_seen": 110241152, "step": 609 }, { "epoch": 0.0667779632721202, "grad_norm": 1.1893810131343958, "learning_rate": 4.9451781434978104e-05, "loss": 0.6707, "num_input_tokens_seen": 110409824, "step": 610 }, { "epoch": 0.0668874353430581, "grad_norm": 1.1534451151482514, "learning_rate": 4.944998914904768e-05, "loss": 0.6707, "num_input_tokens_seen": 110580960, "step": 611 }, { "epoch": 0.066996907413996, "grad_norm": 1.1805146708919898, "learning_rate": 4.944819397073027e-05, "loss": 0.6299, "num_input_tokens_seen": 110778752, "step": 612 }, { "epoch": 0.0671063794849339, "grad_norm": 1.3814390441502067, "learning_rate": 4.944639590023826e-05, "loss": 0.7124, "num_input_tokens_seen": 110918528, "step": 613 }, { "epoch": 0.06721585155587181, "grad_norm": 1.2043078169324068, "learning_rate": 4.9444594937784336e-05, "loss": 0.6447, "num_input_tokens_seen": 111130432, "step": 614 }, { "epoch": 0.06732532362680971, "grad_norm": 1.3062325654394964, "learning_rate": 4.9442791083581575e-05, "loss": 0.6201, "num_input_tokens_seen": 111285664, "step": 615 }, { "epoch": 0.06743479569774762, "grad_norm": 1.3307173169150301, "learning_rate": 4.944098433784335e-05, "loss": 0.8074, "num_input_tokens_seen": 111470688, "step": 616 }, { "epoch": 0.06754426776868551, "grad_norm": 1.249080128758881, "learning_rate": 4.94391747007834e-05, "loss": 0.7161, "num_input_tokens_seen": 111681472, "step": 617 }, { "epoch": 0.06765373983962342, "grad_norm": 1.326819687210161, "learning_rate": 4.9437362172615806e-05, "loss": 0.7683, "num_input_tokens_seen": 111847008, "step": 618 }, { "epoch": 0.06776321191056132, "grad_norm": 1.326941198206124, "learning_rate": 4.9435546753554985e-05, "loss": 0.7126, "num_input_tokens_seen": 112023072, "step": 619 }, { "epoch": 0.06787268398149922, "grad_norm": 1.2992797525275548, "learning_rate": 4.943372844381568e-05, "loss": 0.7069, "num_input_tokens_seen": 112181664, "step": 620 }, { "epoch": 0.06798215605243713, "grad_norm": 1.1874583889547299, "learning_rate": 4.943190724361303e-05, "loss": 0.6956, "num_input_tokens_seen": 112396928, "step": 621 }, { "epoch": 0.06809162812337502, "grad_norm": 1.163400653887849, "learning_rate": 4.9430083153162456e-05, "loss": 0.7277, "num_input_tokens_seen": 112597632, "step": 622 }, { "epoch": 0.06820110019431293, "grad_norm": 1.1811871627967805, "learning_rate": 4.942825617267973e-05, "loss": 0.7074, "num_input_tokens_seen": 112773248, "step": 623 }, { "epoch": 0.06831057226525082, "grad_norm": 1.1610733158149706, "learning_rate": 4.9426426302381014e-05, "loss": 0.6544, "num_input_tokens_seen": 112939456, "step": 624 }, { "epoch": 0.06842004433618873, "grad_norm": 1.1701053736584912, "learning_rate": 4.9424593542482754e-05, "loss": 0.7088, "num_input_tokens_seen": 113123584, "step": 625 }, { "epoch": 0.06852951640712664, "grad_norm": 1.1717645211594758, "learning_rate": 4.942275789320178e-05, "loss": 0.7974, "num_input_tokens_seen": 113329888, "step": 626 }, { "epoch": 0.06863898847806453, "grad_norm": 1.3476177265355571, "learning_rate": 4.9420919354755225e-05, "loss": 0.7588, "num_input_tokens_seen": 113494304, "step": 627 }, { "epoch": 0.06874846054900244, "grad_norm": 1.210971757957954, "learning_rate": 4.9419077927360605e-05, "loss": 0.724, "num_input_tokens_seen": 113697696, "step": 628 }, { "epoch": 0.06885793261994033, "grad_norm": 1.2312897864560137, "learning_rate": 4.9417233611235735e-05, "loss": 0.7036, "num_input_tokens_seen": 113878464, "step": 629 }, { "epoch": 0.06896740469087824, "grad_norm": 1.2412145871184541, "learning_rate": 4.9415386406598816e-05, "loss": 0.8195, "num_input_tokens_seen": 114079168, "step": 630 }, { "epoch": 0.06907687676181615, "grad_norm": 1.2570574846013547, "learning_rate": 4.941353631366836e-05, "loss": 0.7532, "num_input_tokens_seen": 114235744, "step": 631 }, { "epoch": 0.06918634883275404, "grad_norm": 1.1984786443100726, "learning_rate": 4.9411683332663225e-05, "loss": 0.6755, "num_input_tokens_seen": 114414944, "step": 632 }, { "epoch": 0.06929582090369195, "grad_norm": 1.304859355640883, "learning_rate": 4.940982746380262e-05, "loss": 0.9562, "num_input_tokens_seen": 114598624, "step": 633 }, { "epoch": 0.06940529297462984, "grad_norm": 1.172316658716726, "learning_rate": 4.9407968707306085e-05, "loss": 0.8508, "num_input_tokens_seen": 114815008, "step": 634 }, { "epoch": 0.06951476504556775, "grad_norm": 1.1858179828796716, "learning_rate": 4.940610706339351e-05, "loss": 0.6447, "num_input_tokens_seen": 114956352, "step": 635 }, { "epoch": 0.06962423711650566, "grad_norm": 1.324518355513468, "learning_rate": 4.940424253228514e-05, "loss": 0.9054, "num_input_tokens_seen": 115127488, "step": 636 }, { "epoch": 0.06973370918744355, "grad_norm": 1.238731422785432, "learning_rate": 4.940237511420152e-05, "loss": 0.816, "num_input_tokens_seen": 115307136, "step": 637 }, { "epoch": 0.06984318125838146, "grad_norm": 1.3152400434443812, "learning_rate": 4.9400504809363576e-05, "loss": 0.693, "num_input_tokens_seen": 115510080, "step": 638 }, { "epoch": 0.06995265332931935, "grad_norm": 1.3375275279269363, "learning_rate": 4.9398631617992565e-05, "loss": 0.825, "num_input_tokens_seen": 115663744, "step": 639 }, { "epoch": 0.07006212540025726, "grad_norm": 1.4292233239927798, "learning_rate": 4.939675554031007e-05, "loss": 0.9089, "num_input_tokens_seen": 115873408, "step": 640 }, { "epoch": 0.07017159747119515, "grad_norm": 1.3234145611753318, "learning_rate": 4.939487657653803e-05, "loss": 0.6972, "num_input_tokens_seen": 116007136, "step": 641 }, { "epoch": 0.07028106954213306, "grad_norm": 1.316000946769258, "learning_rate": 4.9392994726898735e-05, "loss": 0.7176, "num_input_tokens_seen": 116174912, "step": 642 }, { "epoch": 0.07039054161307097, "grad_norm": 1.3336942700770582, "learning_rate": 4.939110999161479e-05, "loss": 0.8517, "num_input_tokens_seen": 116358816, "step": 643 }, { "epoch": 0.07050001368400886, "grad_norm": 1.1936019426172775, "learning_rate": 4.938922237090916e-05, "loss": 0.615, "num_input_tokens_seen": 116531072, "step": 644 }, { "epoch": 0.07060948575494677, "grad_norm": 1.3389585812995142, "learning_rate": 4.938733186500515e-05, "loss": 0.6759, "num_input_tokens_seen": 116700864, "step": 645 }, { "epoch": 0.07071895782588467, "grad_norm": 1.2223701995558744, "learning_rate": 4.93854384741264e-05, "loss": 0.7622, "num_input_tokens_seen": 116912320, "step": 646 }, { "epoch": 0.07082842989682257, "grad_norm": 1.2585075306263565, "learning_rate": 4.938354219849689e-05, "loss": 0.6514, "num_input_tokens_seen": 117083232, "step": 647 }, { "epoch": 0.07093790196776048, "grad_norm": 1.1661207704405006, "learning_rate": 4.9381643038340966e-05, "loss": 0.5842, "num_input_tokens_seen": 117249664, "step": 648 }, { "epoch": 0.07104737403869837, "grad_norm": 1.27622115241765, "learning_rate": 4.937974099388326e-05, "loss": 0.849, "num_input_tokens_seen": 117439616, "step": 649 }, { "epoch": 0.07115684610963628, "grad_norm": 1.2704499661810846, "learning_rate": 4.9377836065348814e-05, "loss": 0.6723, "num_input_tokens_seen": 117614560, "step": 650 }, { "epoch": 0.07126631818057418, "grad_norm": 1.6549038004503818, "learning_rate": 4.937592825296297e-05, "loss": 1.2026, "num_input_tokens_seen": 117789280, "step": 651 }, { "epoch": 0.07137579025151208, "grad_norm": 1.3464189682300394, "learning_rate": 4.93740175569514e-05, "loss": 0.7484, "num_input_tokens_seen": 117956384, "step": 652 }, { "epoch": 0.07148526232244999, "grad_norm": 1.3087219630957325, "learning_rate": 4.9372103977540154e-05, "loss": 0.8211, "num_input_tokens_seen": 118143424, "step": 653 }, { "epoch": 0.07159473439338788, "grad_norm": 1.1784062785906544, "learning_rate": 4.937018751495559e-05, "loss": 0.684, "num_input_tokens_seen": 118340096, "step": 654 }, { "epoch": 0.07170420646432579, "grad_norm": 1.122320746486529, "learning_rate": 4.9368268169424444e-05, "loss": 0.7986, "num_input_tokens_seen": 118531168, "step": 655 }, { "epoch": 0.07181367853526369, "grad_norm": 1.396174512511345, "learning_rate": 4.936634594117375e-05, "loss": 0.7139, "num_input_tokens_seen": 118688640, "step": 656 }, { "epoch": 0.0719231506062016, "grad_norm": 1.2073821017756186, "learning_rate": 4.936442083043091e-05, "loss": 0.8582, "num_input_tokens_seen": 118901888, "step": 657 }, { "epoch": 0.07203262267713949, "grad_norm": 1.1526899431973523, "learning_rate": 4.936249283742367e-05, "loss": 0.6959, "num_input_tokens_seen": 119069216, "step": 658 }, { "epoch": 0.0721420947480774, "grad_norm": 1.2936636449032681, "learning_rate": 4.93605619623801e-05, "loss": 0.6639, "num_input_tokens_seen": 119239456, "step": 659 }, { "epoch": 0.0722515668190153, "grad_norm": 1.1325249668762911, "learning_rate": 4.935862820552861e-05, "loss": 0.564, "num_input_tokens_seen": 119405664, "step": 660 }, { "epoch": 0.0723610388899532, "grad_norm": 1.3408023085639171, "learning_rate": 4.935669156709798e-05, "loss": 0.7854, "num_input_tokens_seen": 119570976, "step": 661 }, { "epoch": 0.0724705109608911, "grad_norm": 1.4512964865166293, "learning_rate": 4.93547520473173e-05, "loss": 0.7308, "num_input_tokens_seen": 119732928, "step": 662 }, { "epoch": 0.072579983031829, "grad_norm": 1.4501109705587412, "learning_rate": 4.9352809646416e-05, "loss": 0.7933, "num_input_tokens_seen": 119936768, "step": 663 }, { "epoch": 0.0726894551027669, "grad_norm": 1.2803268828828327, "learning_rate": 4.935086436462388e-05, "loss": 0.9965, "num_input_tokens_seen": 120138816, "step": 664 }, { "epoch": 0.07279892717370481, "grad_norm": 1.3209136858224562, "learning_rate": 4.934891620217106e-05, "loss": 0.8763, "num_input_tokens_seen": 120327648, "step": 665 }, { "epoch": 0.07290839924464271, "grad_norm": 1.2087405055875402, "learning_rate": 4.934696515928799e-05, "loss": 0.7658, "num_input_tokens_seen": 120532832, "step": 666 }, { "epoch": 0.07301787131558062, "grad_norm": 1.3318247704989479, "learning_rate": 4.93450112362055e-05, "loss": 0.9149, "num_input_tokens_seen": 120699936, "step": 667 }, { "epoch": 0.07312734338651851, "grad_norm": 1.13740178197698, "learning_rate": 4.934305443315471e-05, "loss": 0.6005, "num_input_tokens_seen": 120858304, "step": 668 }, { "epoch": 0.07323681545745642, "grad_norm": 1.7187939724932568, "learning_rate": 4.9341094750367126e-05, "loss": 0.7429, "num_input_tokens_seen": 121033248, "step": 669 }, { "epoch": 0.07334628752839432, "grad_norm": 1.2183971872482595, "learning_rate": 4.9339132188074556e-05, "loss": 0.7919, "num_input_tokens_seen": 121201024, "step": 670 }, { "epoch": 0.07345575959933222, "grad_norm": 1.213552690491384, "learning_rate": 4.933716674650918e-05, "loss": 0.8113, "num_input_tokens_seen": 121359840, "step": 671 }, { "epoch": 0.07356523167027013, "grad_norm": 1.2236823508060142, "learning_rate": 4.9335198425903497e-05, "loss": 0.6825, "num_input_tokens_seen": 121547104, "step": 672 }, { "epoch": 0.07367470374120802, "grad_norm": 1.1765998702681237, "learning_rate": 4.933322722649037e-05, "loss": 0.5768, "num_input_tokens_seen": 121743552, "step": 673 }, { "epoch": 0.07378417581214593, "grad_norm": 1.2111227078470743, "learning_rate": 4.933125314850297e-05, "loss": 0.6768, "num_input_tokens_seen": 121933280, "step": 674 }, { "epoch": 0.07389364788308384, "grad_norm": 1.1932347508864136, "learning_rate": 4.9329276192174845e-05, "loss": 0.7264, "num_input_tokens_seen": 122102848, "step": 675 }, { "epoch": 0.07400311995402173, "grad_norm": 1.2572595079676514, "learning_rate": 4.932729635773985e-05, "loss": 0.5635, "num_input_tokens_seen": 122308704, "step": 676 }, { "epoch": 0.07411259202495964, "grad_norm": 1.2071867907406242, "learning_rate": 4.93253136454322e-05, "loss": 0.5059, "num_input_tokens_seen": 122492384, "step": 677 }, { "epoch": 0.07422206409589753, "grad_norm": 1.4031255131574003, "learning_rate": 4.9323328055486464e-05, "loss": 0.7137, "num_input_tokens_seen": 122689728, "step": 678 }, { "epoch": 0.07433153616683544, "grad_norm": 1.4353964798185597, "learning_rate": 4.93213395881375e-05, "loss": 0.7443, "num_input_tokens_seen": 122862432, "step": 679 }, { "epoch": 0.07444100823777333, "grad_norm": 1.3992561629259987, "learning_rate": 4.9319348243620566e-05, "loss": 0.8724, "num_input_tokens_seen": 123083968, "step": 680 }, { "epoch": 0.07455048030871124, "grad_norm": 1.2900556033371344, "learning_rate": 4.931735402217122e-05, "loss": 0.7734, "num_input_tokens_seen": 123272128, "step": 681 }, { "epoch": 0.07465995237964915, "grad_norm": 1.2812828897116018, "learning_rate": 4.931535692402538e-05, "loss": 0.7111, "num_input_tokens_seen": 123468352, "step": 682 }, { "epoch": 0.07476942445058704, "grad_norm": 1.1783071361976705, "learning_rate": 4.93133569494193e-05, "loss": 0.6262, "num_input_tokens_seen": 123663680, "step": 683 }, { "epoch": 0.07487889652152495, "grad_norm": 1.2314681097184779, "learning_rate": 4.931135409858958e-05, "loss": 0.7148, "num_input_tokens_seen": 123802560, "step": 684 }, { "epoch": 0.07498836859246284, "grad_norm": 1.3036014810968666, "learning_rate": 4.930934837177313e-05, "loss": 0.7853, "num_input_tokens_seen": 123977728, "step": 685 }, { "epoch": 0.07509784066340075, "grad_norm": 1.2459612450060422, "learning_rate": 4.9307339769207257e-05, "loss": 0.8035, "num_input_tokens_seen": 124157824, "step": 686 }, { "epoch": 0.07520731273433866, "grad_norm": 1.248838648126801, "learning_rate": 4.930532829112955e-05, "loss": 0.9152, "num_input_tokens_seen": 124358976, "step": 687 }, { "epoch": 0.07531678480527655, "grad_norm": 1.1410900892219602, "learning_rate": 4.930331393777796e-05, "loss": 0.7773, "num_input_tokens_seen": 124570208, "step": 688 }, { "epoch": 0.07542625687621446, "grad_norm": 1.1958687180036813, "learning_rate": 4.93012967093908e-05, "loss": 0.7395, "num_input_tokens_seen": 124765984, "step": 689 }, { "epoch": 0.07553572894715235, "grad_norm": 1.3082851672367326, "learning_rate": 4.92992766062067e-05, "loss": 0.7581, "num_input_tokens_seen": 124943616, "step": 690 }, { "epoch": 0.07564520101809026, "grad_norm": 1.335366690420128, "learning_rate": 4.9297253628464624e-05, "loss": 0.7719, "num_input_tokens_seen": 125125280, "step": 691 }, { "epoch": 0.07575467308902817, "grad_norm": 1.289531229514433, "learning_rate": 4.9295227776403893e-05, "loss": 0.7598, "num_input_tokens_seen": 125272448, "step": 692 }, { "epoch": 0.07586414515996606, "grad_norm": 1.135713900003085, "learning_rate": 4.929319905026416e-05, "loss": 0.6961, "num_input_tokens_seen": 125444704, "step": 693 }, { "epoch": 0.07597361723090397, "grad_norm": 1.2613612279626847, "learning_rate": 4.929116745028542e-05, "loss": 0.681, "num_input_tokens_seen": 125590976, "step": 694 }, { "epoch": 0.07608308930184186, "grad_norm": 1.3110200167160078, "learning_rate": 4.928913297670801e-05, "loss": 0.7037, "num_input_tokens_seen": 125737024, "step": 695 }, { "epoch": 0.07619256137277977, "grad_norm": 1.257798331044637, "learning_rate": 4.92870956297726e-05, "loss": 0.6665, "num_input_tokens_seen": 125925184, "step": 696 }, { "epoch": 0.07630203344371767, "grad_norm": 1.260164892518217, "learning_rate": 4.92850554097202e-05, "loss": 0.6778, "num_input_tokens_seen": 126109536, "step": 697 }, { "epoch": 0.07641150551465557, "grad_norm": 1.338476924097404, "learning_rate": 4.928301231679218e-05, "loss": 0.688, "num_input_tokens_seen": 126295904, "step": 698 }, { "epoch": 0.07652097758559348, "grad_norm": 1.2499255635032878, "learning_rate": 4.9280966351230226e-05, "loss": 0.6903, "num_input_tokens_seen": 126487424, "step": 699 }, { "epoch": 0.07663044965653137, "grad_norm": 1.1642380140464534, "learning_rate": 4.927891751327636e-05, "loss": 0.6162, "num_input_tokens_seen": 126685664, "step": 700 }, { "epoch": 0.07673992172746928, "grad_norm": 1.3168204813255495, "learning_rate": 4.9276865803172965e-05, "loss": 0.8016, "num_input_tokens_seen": 126863744, "step": 701 }, { "epoch": 0.07684939379840718, "grad_norm": 1.3163957906698032, "learning_rate": 4.9274811221162764e-05, "loss": 0.858, "num_input_tokens_seen": 127062208, "step": 702 }, { "epoch": 0.07695886586934508, "grad_norm": 1.3982356687698347, "learning_rate": 4.92727537674888e-05, "loss": 0.8053, "num_input_tokens_seen": 127258880, "step": 703 }, { "epoch": 0.07706833794028299, "grad_norm": 1.296808355986015, "learning_rate": 4.927069344239447e-05, "loss": 0.8269, "num_input_tokens_seen": 127457568, "step": 704 }, { "epoch": 0.07717781001122088, "grad_norm": 1.159575757024653, "learning_rate": 4.9268630246123495e-05, "loss": 0.7908, "num_input_tokens_seen": 127640128, "step": 705 }, { "epoch": 0.07728728208215879, "grad_norm": 1.3826799490810884, "learning_rate": 4.926656417891996e-05, "loss": 0.8948, "num_input_tokens_seen": 127787968, "step": 706 }, { "epoch": 0.07739675415309669, "grad_norm": 1.1756282466050645, "learning_rate": 4.926449524102826e-05, "loss": 0.765, "num_input_tokens_seen": 127941632, "step": 707 }, { "epoch": 0.0775062262240346, "grad_norm": 1.2941152784891943, "learning_rate": 4.9262423432693175e-05, "loss": 0.6304, "num_input_tokens_seen": 128130464, "step": 708 }, { "epoch": 0.0776156982949725, "grad_norm": 1.1988072388222641, "learning_rate": 4.926034875415977e-05, "loss": 0.9715, "num_input_tokens_seen": 128338784, "step": 709 }, { "epoch": 0.0777251703659104, "grad_norm": 1.1402284115631782, "learning_rate": 4.925827120567349e-05, "loss": 0.748, "num_input_tokens_seen": 128541280, "step": 710 }, { "epoch": 0.0778346424368483, "grad_norm": 1.1787467388540809, "learning_rate": 4.9256190787480104e-05, "loss": 0.7341, "num_input_tokens_seen": 128745792, "step": 711 }, { "epoch": 0.0779441145077862, "grad_norm": 1.3596553732588554, "learning_rate": 4.9254107499825705e-05, "loss": 0.7121, "num_input_tokens_seen": 128944928, "step": 712 }, { "epoch": 0.0780535865787241, "grad_norm": 1.5628303178680372, "learning_rate": 4.925202134295677e-05, "loss": 1.073, "num_input_tokens_seen": 129112032, "step": 713 }, { "epoch": 0.078163058649662, "grad_norm": 1.3912120394399108, "learning_rate": 4.924993231712006e-05, "loss": 0.7132, "num_input_tokens_seen": 129271072, "step": 714 }, { "epoch": 0.0782725307205999, "grad_norm": 1.2277195812163006, "learning_rate": 4.924784042256273e-05, "loss": 0.7086, "num_input_tokens_seen": 129470432, "step": 715 }, { "epoch": 0.07838200279153781, "grad_norm": 1.2264116036478923, "learning_rate": 4.9245745659532214e-05, "loss": 0.708, "num_input_tokens_seen": 129628576, "step": 716 }, { "epoch": 0.07849147486247571, "grad_norm": 1.2093236197663673, "learning_rate": 4.924364802827635e-05, "loss": 0.8432, "num_input_tokens_seen": 129841376, "step": 717 }, { "epoch": 0.07860094693341362, "grad_norm": 1.2658401588940673, "learning_rate": 4.924154752904326e-05, "loss": 0.8947, "num_input_tokens_seen": 130029536, "step": 718 }, { "epoch": 0.07871041900435151, "grad_norm": 1.2711900408472923, "learning_rate": 4.923944416208145e-05, "loss": 0.7372, "num_input_tokens_seen": 130197536, "step": 719 }, { "epoch": 0.07881989107528942, "grad_norm": 1.148299487897358, "learning_rate": 4.9237337927639725e-05, "loss": 0.6077, "num_input_tokens_seen": 130362400, "step": 720 }, { "epoch": 0.07892936314622732, "grad_norm": 1.2702800244523906, "learning_rate": 4.923522882596726e-05, "loss": 0.757, "num_input_tokens_seen": 130571840, "step": 721 }, { "epoch": 0.07903883521716522, "grad_norm": 1.1588986448974745, "learning_rate": 4.9233116857313554e-05, "loss": 0.6372, "num_input_tokens_seen": 130753952, "step": 722 }, { "epoch": 0.07914830728810313, "grad_norm": 1.2302513778266408, "learning_rate": 4.923100202192845e-05, "loss": 0.5178, "num_input_tokens_seen": 130910304, "step": 723 }, { "epoch": 0.07925777935904102, "grad_norm": 1.1584267617472213, "learning_rate": 4.922888432006213e-05, "loss": 0.6108, "num_input_tokens_seen": 131084352, "step": 724 }, { "epoch": 0.07936725142997893, "grad_norm": 1.551549764313919, "learning_rate": 4.922676375196511e-05, "loss": 0.6605, "num_input_tokens_seen": 131263776, "step": 725 }, { "epoch": 0.07947672350091683, "grad_norm": 1.3283443494186002, "learning_rate": 4.922464031788826e-05, "loss": 0.7337, "num_input_tokens_seen": 131479264, "step": 726 }, { "epoch": 0.07958619557185473, "grad_norm": 1.2437549514288562, "learning_rate": 4.922251401808276e-05, "loss": 0.6106, "num_input_tokens_seen": 131640992, "step": 727 }, { "epoch": 0.07969566764279264, "grad_norm": 1.1289905133858236, "learning_rate": 4.922038485280016e-05, "loss": 0.6467, "num_input_tokens_seen": 131836768, "step": 728 }, { "epoch": 0.07980513971373053, "grad_norm": 1.2411957146336114, "learning_rate": 4.921825282229233e-05, "loss": 0.7412, "num_input_tokens_seen": 131998272, "step": 729 }, { "epoch": 0.07991461178466844, "grad_norm": 1.0246292048348578, "learning_rate": 4.92161179268115e-05, "loss": 0.54, "num_input_tokens_seen": 132182848, "step": 730 }, { "epoch": 0.08002408385560633, "grad_norm": 1.2843596241719786, "learning_rate": 4.921398016661021e-05, "loss": 0.7165, "num_input_tokens_seen": 132351968, "step": 731 }, { "epoch": 0.08013355592654424, "grad_norm": 1.3008935095198846, "learning_rate": 4.9211839541941345e-05, "loss": 0.8466, "num_input_tokens_seen": 132549088, "step": 732 }, { "epoch": 0.08024302799748215, "grad_norm": 1.265308670260493, "learning_rate": 4.920969605305815e-05, "loss": 0.7257, "num_input_tokens_seen": 132720672, "step": 733 }, { "epoch": 0.08035250006842004, "grad_norm": 1.3101878918151895, "learning_rate": 4.92075497002142e-05, "loss": 0.7737, "num_input_tokens_seen": 132905024, "step": 734 }, { "epoch": 0.08046197213935795, "grad_norm": 1.0475730845918771, "learning_rate": 4.92054004836634e-05, "loss": 0.5127, "num_input_tokens_seen": 133055552, "step": 735 }, { "epoch": 0.08057144421029584, "grad_norm": 1.186945836111294, "learning_rate": 4.920324840365998e-05, "loss": 0.5494, "num_input_tokens_seen": 133226464, "step": 736 }, { "epoch": 0.08068091628123375, "grad_norm": 1.0890877675610215, "learning_rate": 4.9201093460458555e-05, "loss": 0.6358, "num_input_tokens_seen": 133423584, "step": 737 }, { "epoch": 0.08079038835217166, "grad_norm": 1.26387161492268, "learning_rate": 4.9198935654314036e-05, "loss": 0.7337, "num_input_tokens_seen": 133594048, "step": 738 }, { "epoch": 0.08089986042310955, "grad_norm": 1.2055254682447782, "learning_rate": 4.919677498548169e-05, "loss": 0.6787, "num_input_tokens_seen": 133781312, "step": 739 }, { "epoch": 0.08100933249404746, "grad_norm": 1.4923391215201003, "learning_rate": 4.9194611454217124e-05, "loss": 0.9397, "num_input_tokens_seen": 133964096, "step": 740 }, { "epoch": 0.08111880456498535, "grad_norm": 1.3352677746796564, "learning_rate": 4.9192445060776264e-05, "loss": 0.7926, "num_input_tokens_seen": 134168160, "step": 741 }, { "epoch": 0.08122827663592326, "grad_norm": 1.2615119599279265, "learning_rate": 4.919027580541541e-05, "loss": 0.7434, "num_input_tokens_seen": 134362144, "step": 742 }, { "epoch": 0.08133774870686117, "grad_norm": 1.4338579895803654, "learning_rate": 4.918810368839117e-05, "loss": 0.8817, "num_input_tokens_seen": 134545600, "step": 743 }, { "epoch": 0.08144722077779906, "grad_norm": 1.1648333256726455, "learning_rate": 4.91859287099605e-05, "loss": 0.6353, "num_input_tokens_seen": 134693216, "step": 744 }, { "epoch": 0.08155669284873697, "grad_norm": 1.0726908967826114, "learning_rate": 4.9183750870380704e-05, "loss": 0.5205, "num_input_tokens_seen": 134882048, "step": 745 }, { "epoch": 0.08166616491967486, "grad_norm": 1.3234200558644338, "learning_rate": 4.918157016990941e-05, "loss": 1.1545, "num_input_tokens_seen": 135092608, "step": 746 }, { "epoch": 0.08177563699061277, "grad_norm": 1.2334515461243938, "learning_rate": 4.917938660880459e-05, "loss": 0.8898, "num_input_tokens_seen": 135255008, "step": 747 }, { "epoch": 0.08188510906155068, "grad_norm": 1.2840966955715916, "learning_rate": 4.9177200187324556e-05, "loss": 0.7135, "num_input_tokens_seen": 135431296, "step": 748 }, { "epoch": 0.08199458113248857, "grad_norm": 1.191942988328398, "learning_rate": 4.917501090572797e-05, "loss": 0.6785, "num_input_tokens_seen": 135585184, "step": 749 }, { "epoch": 0.08210405320342648, "grad_norm": 1.1738948453960796, "learning_rate": 4.91728187642738e-05, "loss": 0.6401, "num_input_tokens_seen": 135749152, "step": 750 }, { "epoch": 0.08221352527436437, "grad_norm": 1.1733253151085008, "learning_rate": 4.917062376322138e-05, "loss": 0.6404, "num_input_tokens_seen": 135965312, "step": 751 }, { "epoch": 0.08232299734530228, "grad_norm": 1.4027982522500546, "learning_rate": 4.916842590283037e-05, "loss": 0.9701, "num_input_tokens_seen": 136155936, "step": 752 }, { "epoch": 0.08243246941624018, "grad_norm": 1.2911566629862228, "learning_rate": 4.916622518336079e-05, "loss": 0.8394, "num_input_tokens_seen": 136329088, "step": 753 }, { "epoch": 0.08254194148717808, "grad_norm": 1.3042369224385537, "learning_rate": 4.916402160507296e-05, "loss": 0.9354, "num_input_tokens_seen": 136530240, "step": 754 }, { "epoch": 0.08265141355811599, "grad_norm": 1.333643097458927, "learning_rate": 4.9161815168227576e-05, "loss": 0.6836, "num_input_tokens_seen": 136715264, "step": 755 }, { "epoch": 0.08276088562905388, "grad_norm": 1.3516139202589865, "learning_rate": 4.915960587308564e-05, "loss": 0.6397, "num_input_tokens_seen": 136866688, "step": 756 }, { "epoch": 0.08287035769999179, "grad_norm": 1.186535845405159, "learning_rate": 4.915739371990852e-05, "loss": 0.5921, "num_input_tokens_seen": 137036256, "step": 757 }, { "epoch": 0.08297982977092969, "grad_norm": 1.3825563096483446, "learning_rate": 4.9155178708957896e-05, "loss": 0.6946, "num_input_tokens_seen": 137239424, "step": 758 }, { "epoch": 0.0830893018418676, "grad_norm": 1.588440371759743, "learning_rate": 4.915296084049582e-05, "loss": 0.9498, "num_input_tokens_seen": 137408992, "step": 759 }, { "epoch": 0.0831987739128055, "grad_norm": 1.243834261699686, "learning_rate": 4.915074011478463e-05, "loss": 0.673, "num_input_tokens_seen": 137578112, "step": 760 }, { "epoch": 0.0833082459837434, "grad_norm": 1.1729087339215427, "learning_rate": 4.914851653208707e-05, "loss": 0.7599, "num_input_tokens_seen": 137744992, "step": 761 }, { "epoch": 0.0834177180546813, "grad_norm": 1.0741041642543987, "learning_rate": 4.9146290092666163e-05, "loss": 0.6717, "num_input_tokens_seen": 137918144, "step": 762 }, { "epoch": 0.0835271901256192, "grad_norm": 1.1300925387410379, "learning_rate": 4.91440607967853e-05, "loss": 0.6558, "num_input_tokens_seen": 138106080, "step": 763 }, { "epoch": 0.0836366621965571, "grad_norm": 1.2267573499504083, "learning_rate": 4.91418286447082e-05, "loss": 0.5753, "num_input_tokens_seen": 138282144, "step": 764 }, { "epoch": 0.08374613426749501, "grad_norm": 1.277546857732223, "learning_rate": 4.913959363669892e-05, "loss": 0.7834, "num_input_tokens_seen": 138457760, "step": 765 }, { "epoch": 0.0838556063384329, "grad_norm": 1.324788154005103, "learning_rate": 4.9137355773021856e-05, "loss": 0.8654, "num_input_tokens_seen": 138656672, "step": 766 }, { "epoch": 0.08396507840937081, "grad_norm": 1.206818250115041, "learning_rate": 4.913511505394175e-05, "loss": 0.7416, "num_input_tokens_seen": 138854016, "step": 767 }, { "epoch": 0.08407455048030871, "grad_norm": 1.2401822777566882, "learning_rate": 4.9132871479723675e-05, "loss": 0.6422, "num_input_tokens_seen": 139026048, "step": 768 }, { "epoch": 0.08418402255124662, "grad_norm": 1.2560074927993379, "learning_rate": 4.9130625050633036e-05, "loss": 0.8059, "num_input_tokens_seen": 139206592, "step": 769 }, { "epoch": 0.08429349462218451, "grad_norm": 1.2427999078478624, "learning_rate": 4.912837576693559e-05, "loss": 0.7513, "num_input_tokens_seen": 139348384, "step": 770 }, { "epoch": 0.08440296669312242, "grad_norm": 1.443966047627523, "learning_rate": 4.9126123628897406e-05, "loss": 0.8439, "num_input_tokens_seen": 139513024, "step": 771 }, { "epoch": 0.08451243876406032, "grad_norm": 1.2563678192762369, "learning_rate": 4.912386863678492e-05, "loss": 0.6829, "num_input_tokens_seen": 139649888, "step": 772 }, { "epoch": 0.08462191083499822, "grad_norm": 1.419291654194922, "learning_rate": 4.91216107908649e-05, "loss": 0.9824, "num_input_tokens_seen": 139833120, "step": 773 }, { "epoch": 0.08473138290593613, "grad_norm": 1.289219008683699, "learning_rate": 4.911935009140443e-05, "loss": 0.7618, "num_input_tokens_seen": 140047040, "step": 774 }, { "epoch": 0.08484085497687402, "grad_norm": 1.268298821491457, "learning_rate": 4.911708653867095e-05, "loss": 0.9318, "num_input_tokens_seen": 140231840, "step": 775 }, { "epoch": 0.08495032704781193, "grad_norm": 1.2539066302812898, "learning_rate": 4.911482013293224e-05, "loss": 0.768, "num_input_tokens_seen": 140409472, "step": 776 }, { "epoch": 0.08505979911874983, "grad_norm": 1.422615696739116, "learning_rate": 4.91125508744564e-05, "loss": 0.8267, "num_input_tokens_seen": 140611744, "step": 777 }, { "epoch": 0.08516927118968773, "grad_norm": 1.27614072557913, "learning_rate": 4.9110278763511897e-05, "loss": 0.8866, "num_input_tokens_seen": 140774368, "step": 778 }, { "epoch": 0.08527874326062564, "grad_norm": 1.1423420458397293, "learning_rate": 4.910800380036751e-05, "loss": 0.6919, "num_input_tokens_seen": 140976416, "step": 779 }, { "epoch": 0.08538821533156353, "grad_norm": 1.212614313616261, "learning_rate": 4.910572598529235e-05, "loss": 0.6341, "num_input_tokens_seen": 141143296, "step": 780 }, { "epoch": 0.08549768740250144, "grad_norm": 1.1090653215610515, "learning_rate": 4.910344531855589e-05, "loss": 0.5611, "num_input_tokens_seen": 141325632, "step": 781 }, { "epoch": 0.08560715947343935, "grad_norm": 1.1316794681433646, "learning_rate": 4.910116180042793e-05, "loss": 0.7065, "num_input_tokens_seen": 141512896, "step": 782 }, { "epoch": 0.08571663154437724, "grad_norm": 1.2812186446656335, "learning_rate": 4.90988754311786e-05, "loss": 0.6342, "num_input_tokens_seen": 141688512, "step": 783 }, { "epoch": 0.08582610361531515, "grad_norm": 1.353860566193388, "learning_rate": 4.9096586211078376e-05, "loss": 0.615, "num_input_tokens_seen": 141845536, "step": 784 }, { "epoch": 0.08593557568625304, "grad_norm": 1.146457548331264, "learning_rate": 4.9094294140398075e-05, "loss": 0.7083, "num_input_tokens_seen": 142004800, "step": 785 }, { "epoch": 0.08604504775719095, "grad_norm": 1.2460992029722477, "learning_rate": 4.909199921940883e-05, "loss": 0.6695, "num_input_tokens_seen": 142201472, "step": 786 }, { "epoch": 0.08615451982812884, "grad_norm": 1.4336069714510806, "learning_rate": 4.908970144838214e-05, "loss": 0.9538, "num_input_tokens_seen": 142384256, "step": 787 }, { "epoch": 0.08626399189906675, "grad_norm": 1.4821857570933117, "learning_rate": 4.9087400827589814e-05, "loss": 0.9346, "num_input_tokens_seen": 142551360, "step": 788 }, { "epoch": 0.08637346397000466, "grad_norm": 1.2529948597442577, "learning_rate": 4.908509735730402e-05, "loss": 0.7631, "num_input_tokens_seen": 142762816, "step": 789 }, { "epoch": 0.08648293604094255, "grad_norm": 1.2034761486122711, "learning_rate": 4.908279103779725e-05, "loss": 0.6486, "num_input_tokens_seen": 142943360, "step": 790 }, { "epoch": 0.08659240811188046, "grad_norm": 1.2698685887450223, "learning_rate": 4.908048186934234e-05, "loss": 0.7172, "num_input_tokens_seen": 143127040, "step": 791 }, { "epoch": 0.08670188018281835, "grad_norm": 1.1946912160661454, "learning_rate": 4.9078169852212454e-05, "loss": 0.7696, "num_input_tokens_seen": 143330880, "step": 792 }, { "epoch": 0.08681135225375626, "grad_norm": 1.4363763813028423, "learning_rate": 4.907585498668111e-05, "loss": 0.81, "num_input_tokens_seen": 143491264, "step": 793 }, { "epoch": 0.08692082432469417, "grad_norm": 1.211931817157542, "learning_rate": 4.907353727302214e-05, "loss": 0.6442, "num_input_tokens_seen": 143648288, "step": 794 }, { "epoch": 0.08703029639563206, "grad_norm": 1.1848248918137543, "learning_rate": 4.907121671150974e-05, "loss": 0.5882, "num_input_tokens_seen": 143825696, "step": 795 }, { "epoch": 0.08713976846656997, "grad_norm": 1.2124176823309833, "learning_rate": 4.906889330241842e-05, "loss": 0.627, "num_input_tokens_seen": 143986080, "step": 796 }, { "epoch": 0.08724924053750786, "grad_norm": 1.1876417878480787, "learning_rate": 4.9066567046023025e-05, "loss": 0.8028, "num_input_tokens_seen": 144177824, "step": 797 }, { "epoch": 0.08735871260844577, "grad_norm": 1.3504895046889795, "learning_rate": 4.906423794259876e-05, "loss": 0.879, "num_input_tokens_seen": 144341792, "step": 798 }, { "epoch": 0.08746818467938368, "grad_norm": 1.1809497673727962, "learning_rate": 4.906190599242115e-05, "loss": 0.8659, "num_input_tokens_seen": 144533984, "step": 799 }, { "epoch": 0.08757765675032157, "grad_norm": 1.2484174724399963, "learning_rate": 4.9059571195766066e-05, "loss": 0.8295, "num_input_tokens_seen": 144721024, "step": 800 }, { "epoch": 0.08768712882125948, "grad_norm": 1.1612614976623759, "learning_rate": 4.90572335529097e-05, "loss": 0.6388, "num_input_tokens_seen": 144908288, "step": 801 }, { "epoch": 0.08779660089219737, "grad_norm": 1.2768293739117702, "learning_rate": 4.9054893064128584e-05, "loss": 0.5966, "num_input_tokens_seen": 145097792, "step": 802 }, { "epoch": 0.08790607296313528, "grad_norm": 1.1640415573796425, "learning_rate": 4.905254972969962e-05, "loss": 0.6211, "num_input_tokens_seen": 145303424, "step": 803 }, { "epoch": 0.08801554503407318, "grad_norm": 1.2617221513353922, "learning_rate": 4.9050203549899984e-05, "loss": 0.7511, "num_input_tokens_seen": 145508160, "step": 804 }, { "epoch": 0.08812501710501108, "grad_norm": 1.3907638651917673, "learning_rate": 4.904785452500726e-05, "loss": 0.733, "num_input_tokens_seen": 145673920, "step": 805 }, { "epoch": 0.08823448917594899, "grad_norm": 1.2671817577151991, "learning_rate": 4.904550265529932e-05, "loss": 0.6224, "num_input_tokens_seen": 145849760, "step": 806 }, { "epoch": 0.08834396124688688, "grad_norm": 1.268424904801417, "learning_rate": 4.904314794105437e-05, "loss": 0.798, "num_input_tokens_seen": 146045536, "step": 807 }, { "epoch": 0.08845343331782479, "grad_norm": 1.379289611607141, "learning_rate": 4.9040790382550985e-05, "loss": 0.8995, "num_input_tokens_seen": 146232576, "step": 808 }, { "epoch": 0.08856290538876269, "grad_norm": 1.2280165685101323, "learning_rate": 4.903842998006806e-05, "loss": 0.6881, "num_input_tokens_seen": 146427456, "step": 809 }, { "epoch": 0.0886723774597006, "grad_norm": 1.1547320428291532, "learning_rate": 4.903606673388482e-05, "loss": 0.7038, "num_input_tokens_seen": 146614048, "step": 810 }, { "epoch": 0.0887818495306385, "grad_norm": 1.47495630490513, "learning_rate": 4.903370064428083e-05, "loss": 1.0258, "num_input_tokens_seen": 146790784, "step": 811 }, { "epoch": 0.0888913216015764, "grad_norm": 1.3454314512683099, "learning_rate": 4.903133171153601e-05, "loss": 0.9551, "num_input_tokens_seen": 146999328, "step": 812 }, { "epoch": 0.0890007936725143, "grad_norm": 1.1417099482738813, "learning_rate": 4.902895993593058e-05, "loss": 0.5975, "num_input_tokens_seen": 147183904, "step": 813 }, { "epoch": 0.0891102657434522, "grad_norm": 1.210804741257785, "learning_rate": 4.902658531774512e-05, "loss": 0.6499, "num_input_tokens_seen": 147375648, "step": 814 }, { "epoch": 0.0892197378143901, "grad_norm": 1.263113284578123, "learning_rate": 4.902420785726056e-05, "loss": 0.8148, "num_input_tokens_seen": 147548128, "step": 815 }, { "epoch": 0.08932920988532801, "grad_norm": 1.2665833506455015, "learning_rate": 4.902182755475813e-05, "loss": 0.8289, "num_input_tokens_seen": 147737856, "step": 816 }, { "epoch": 0.0894386819562659, "grad_norm": 1.382550058407096, "learning_rate": 4.9019444410519425e-05, "loss": 0.9015, "num_input_tokens_seen": 147918400, "step": 817 }, { "epoch": 0.08954815402720381, "grad_norm": 1.2752201731696762, "learning_rate": 4.9017058424826366e-05, "loss": 0.6044, "num_input_tokens_seen": 148081024, "step": 818 }, { "epoch": 0.08965762609814171, "grad_norm": 1.0831481457420262, "learning_rate": 4.901466959796121e-05, "loss": 0.6394, "num_input_tokens_seen": 148276352, "step": 819 }, { "epoch": 0.08976709816907961, "grad_norm": 1.2932123605857773, "learning_rate": 4.9012277930206536e-05, "loss": 0.7425, "num_input_tokens_seen": 148465632, "step": 820 }, { "epoch": 0.08987657024001751, "grad_norm": 1.3096755836725669, "learning_rate": 4.900988342184529e-05, "loss": 0.7632, "num_input_tokens_seen": 148653120, "step": 821 }, { "epoch": 0.08998604231095542, "grad_norm": 1.3152016581541128, "learning_rate": 4.9007486073160746e-05, "loss": 0.6866, "num_input_tokens_seen": 148813952, "step": 822 }, { "epoch": 0.09009551438189332, "grad_norm": 1.3537884061654197, "learning_rate": 4.900508588443649e-05, "loss": 0.707, "num_input_tokens_seen": 148983744, "step": 823 }, { "epoch": 0.09020498645283122, "grad_norm": 1.3651076709634902, "learning_rate": 4.900268285595645e-05, "loss": 0.7952, "num_input_tokens_seen": 149200800, "step": 824 }, { "epoch": 0.09031445852376913, "grad_norm": 1.3318919894899437, "learning_rate": 4.9000276988004925e-05, "loss": 0.6926, "num_input_tokens_seen": 149385824, "step": 825 }, { "epoch": 0.09042393059470702, "grad_norm": 1.341742946201787, "learning_rate": 4.899786828086651e-05, "loss": 1.0813, "num_input_tokens_seen": 149601984, "step": 826 }, { "epoch": 0.09053340266564493, "grad_norm": 1.5622520250004222, "learning_rate": 4.899545673482616e-05, "loss": 0.7668, "num_input_tokens_seen": 149744672, "step": 827 }, { "epoch": 0.09064287473658283, "grad_norm": 1.4232415978367792, "learning_rate": 4.8993042350169145e-05, "loss": 0.8394, "num_input_tokens_seen": 149964192, "step": 828 }, { "epoch": 0.09075234680752073, "grad_norm": 1.2662216004842508, "learning_rate": 4.899062512718109e-05, "loss": 0.6735, "num_input_tokens_seen": 150138912, "step": 829 }, { "epoch": 0.09086181887845864, "grad_norm": 1.3164025371736865, "learning_rate": 4.898820506614794e-05, "loss": 0.6986, "num_input_tokens_seen": 150272864, "step": 830 }, { "epoch": 0.09097129094939653, "grad_norm": 1.2757134722653818, "learning_rate": 4.898578216735599e-05, "loss": 0.8561, "num_input_tokens_seen": 150478496, "step": 831 }, { "epoch": 0.09108076302033444, "grad_norm": 1.2431544253327116, "learning_rate": 4.8983356431091864e-05, "loss": 0.7374, "num_input_tokens_seen": 150650304, "step": 832 }, { "epoch": 0.09119023509127235, "grad_norm": 1.358872550975483, "learning_rate": 4.8980927857642514e-05, "loss": 0.7869, "num_input_tokens_seen": 150804192, "step": 833 }, { "epoch": 0.09129970716221024, "grad_norm": 1.1919895432736831, "learning_rate": 4.897849644729525e-05, "loss": 0.6249, "num_input_tokens_seen": 150946208, "step": 834 }, { "epoch": 0.09140917923314815, "grad_norm": 1.2930218089300873, "learning_rate": 4.8976062200337695e-05, "loss": 0.5683, "num_input_tokens_seen": 151109952, "step": 835 }, { "epoch": 0.09151865130408604, "grad_norm": 1.135807681634811, "learning_rate": 4.897362511705781e-05, "loss": 0.7016, "num_input_tokens_seen": 151293856, "step": 836 }, { "epoch": 0.09162812337502395, "grad_norm": 1.236315879339285, "learning_rate": 4.897118519774391e-05, "loss": 0.7584, "num_input_tokens_seen": 151474848, "step": 837 }, { "epoch": 0.09173759544596186, "grad_norm": 1.2983191848264728, "learning_rate": 4.8968742442684625e-05, "loss": 0.873, "num_input_tokens_seen": 151634112, "step": 838 }, { "epoch": 0.09184706751689975, "grad_norm": 1.3008830629203165, "learning_rate": 4.896629685216892e-05, "loss": 0.7044, "num_input_tokens_seen": 151794048, "step": 839 }, { "epoch": 0.09195653958783766, "grad_norm": 1.1253238847533245, "learning_rate": 4.896384842648612e-05, "loss": 0.5683, "num_input_tokens_seen": 151947936, "step": 840 }, { "epoch": 0.09206601165877555, "grad_norm": 1.2186873493230679, "learning_rate": 4.8961397165925874e-05, "loss": 0.6199, "num_input_tokens_seen": 152135648, "step": 841 }, { "epoch": 0.09217548372971346, "grad_norm": 1.1402029536937175, "learning_rate": 4.895894307077814e-05, "loss": 0.6604, "num_input_tokens_seen": 152325152, "step": 842 }, { "epoch": 0.09228495580065135, "grad_norm": 1.3229138632116375, "learning_rate": 4.895648614133324e-05, "loss": 0.7524, "num_input_tokens_seen": 152524512, "step": 843 }, { "epoch": 0.09239442787158926, "grad_norm": 1.3356570568530768, "learning_rate": 4.895402637788183e-05, "loss": 0.6847, "num_input_tokens_seen": 152703936, "step": 844 }, { "epoch": 0.09250389994252717, "grad_norm": 1.182628174785098, "learning_rate": 4.895156378071489e-05, "loss": 0.6819, "num_input_tokens_seen": 152910688, "step": 845 }, { "epoch": 0.09261337201346506, "grad_norm": 1.3630595641803553, "learning_rate": 4.894909835012374e-05, "loss": 0.7543, "num_input_tokens_seen": 153072416, "step": 846 }, { "epoch": 0.09272284408440297, "grad_norm": 1.1947005456945063, "learning_rate": 4.894663008640004e-05, "loss": 0.5573, "num_input_tokens_seen": 153241088, "step": 847 }, { "epoch": 0.09283231615534086, "grad_norm": 1.3869286176881777, "learning_rate": 4.894415898983578e-05, "loss": 0.8251, "num_input_tokens_seen": 153431264, "step": 848 }, { "epoch": 0.09294178822627877, "grad_norm": 1.258401199851704, "learning_rate": 4.894168506072329e-05, "loss": 0.6088, "num_input_tokens_seen": 153616064, "step": 849 }, { "epoch": 0.09305126029721668, "grad_norm": 1.1125934282997818, "learning_rate": 4.8939208299355215e-05, "loss": 0.7778, "num_input_tokens_seen": 153795936, "step": 850 }, { "epoch": 0.09316073236815457, "grad_norm": 1.2507231527890117, "learning_rate": 4.893672870602457e-05, "loss": 0.659, "num_input_tokens_seen": 153950272, "step": 851 }, { "epoch": 0.09327020443909248, "grad_norm": 1.2999321626423141, "learning_rate": 4.893424628102468e-05, "loss": 0.7125, "num_input_tokens_seen": 154124544, "step": 852 }, { "epoch": 0.09337967651003037, "grad_norm": 1.2413904764026746, "learning_rate": 4.8931761024649206e-05, "loss": 0.706, "num_input_tokens_seen": 154330176, "step": 853 }, { "epoch": 0.09348914858096828, "grad_norm": 1.4672027717096154, "learning_rate": 4.8929272937192147e-05, "loss": 0.8021, "num_input_tokens_seen": 154488096, "step": 854 }, { "epoch": 0.09359862065190619, "grad_norm": 1.5036540030440384, "learning_rate": 4.892678201894785e-05, "loss": 0.8338, "num_input_tokens_seen": 154652512, "step": 855 }, { "epoch": 0.09370809272284408, "grad_norm": 1.3196280697433382, "learning_rate": 4.892428827021098e-05, "loss": 0.7688, "num_input_tokens_seen": 154825216, "step": 856 }, { "epoch": 0.09381756479378199, "grad_norm": 1.2182167549474137, "learning_rate": 4.892179169127654e-05, "loss": 0.6916, "num_input_tokens_seen": 155017184, "step": 857 }, { "epoch": 0.09392703686471988, "grad_norm": 1.3375957526041429, "learning_rate": 4.891929228243988e-05, "loss": 0.7637, "num_input_tokens_seen": 155211168, "step": 858 }, { "epoch": 0.09403650893565779, "grad_norm": 1.2909056000564647, "learning_rate": 4.8916790043996665e-05, "loss": 0.6796, "num_input_tokens_seen": 155384320, "step": 859 }, { "epoch": 0.09414598100659569, "grad_norm": 1.1745247447889384, "learning_rate": 4.891428497624291e-05, "loss": 0.5568, "num_input_tokens_seen": 155527680, "step": 860 }, { "epoch": 0.0942554530775336, "grad_norm": 1.3367470413326512, "learning_rate": 4.891177707947496e-05, "loss": 0.6695, "num_input_tokens_seen": 155728384, "step": 861 }, { "epoch": 0.0943649251484715, "grad_norm": 1.3051381960222632, "learning_rate": 4.890926635398949e-05, "loss": 0.6203, "num_input_tokens_seen": 155873984, "step": 862 }, { "epoch": 0.0944743972194094, "grad_norm": 1.2545543196173956, "learning_rate": 4.890675280008352e-05, "loss": 0.7613, "num_input_tokens_seen": 156072672, "step": 863 }, { "epoch": 0.0945838692903473, "grad_norm": 1.3425484985598892, "learning_rate": 4.8904236418054395e-05, "loss": 0.7572, "num_input_tokens_seen": 156239552, "step": 864 }, { "epoch": 0.0946933413612852, "grad_norm": 1.1401662557235295, "learning_rate": 4.890171720819979e-05, "loss": 0.6268, "num_input_tokens_seen": 156433984, "step": 865 }, { "epoch": 0.0948028134322231, "grad_norm": 1.3519354509654928, "learning_rate": 4.889919517081775e-05, "loss": 0.8435, "num_input_tokens_seen": 156588992, "step": 866 }, { "epoch": 0.09491228550316101, "grad_norm": 1.3728937576458784, "learning_rate": 4.889667030620659e-05, "loss": 0.8546, "num_input_tokens_seen": 156771776, "step": 867 }, { "epoch": 0.0950217575740989, "grad_norm": 1.3846705825106247, "learning_rate": 4.889414261466503e-05, "loss": 0.8201, "num_input_tokens_seen": 156948512, "step": 868 }, { "epoch": 0.09513122964503681, "grad_norm": 1.1584287684162742, "learning_rate": 4.8891612096492066e-05, "loss": 0.553, "num_input_tokens_seen": 157098816, "step": 869 }, { "epoch": 0.09524070171597471, "grad_norm": 1.1247287529208052, "learning_rate": 4.8889078751987074e-05, "loss": 0.5557, "num_input_tokens_seen": 157295712, "step": 870 }, { "epoch": 0.09535017378691261, "grad_norm": 1.3104988464559975, "learning_rate": 4.8886542581449726e-05, "loss": 0.7024, "num_input_tokens_seen": 157470656, "step": 871 }, { "epoch": 0.09545964585785052, "grad_norm": 1.2277290967972865, "learning_rate": 4.8884003585180053e-05, "loss": 0.7516, "num_input_tokens_seen": 157678304, "step": 872 }, { "epoch": 0.09556911792878842, "grad_norm": 1.2719484762576951, "learning_rate": 4.888146176347842e-05, "loss": 0.7469, "num_input_tokens_seen": 157853024, "step": 873 }, { "epoch": 0.09567858999972632, "grad_norm": 1.2988070149247817, "learning_rate": 4.8878917116645514e-05, "loss": 0.7961, "num_input_tokens_seen": 158029088, "step": 874 }, { "epoch": 0.09578806207066422, "grad_norm": 1.5410966162919377, "learning_rate": 4.887636964498236e-05, "loss": 0.9732, "num_input_tokens_seen": 158202016, "step": 875 }, { "epoch": 0.09589753414160213, "grad_norm": 1.2235468341794697, "learning_rate": 4.887381934879032e-05, "loss": 0.605, "num_input_tokens_seen": 158357248, "step": 876 }, { "epoch": 0.09600700621254002, "grad_norm": 1.086006327032834, "learning_rate": 4.887126622837109e-05, "loss": 0.624, "num_input_tokens_seen": 158556384, "step": 877 }, { "epoch": 0.09611647828347793, "grad_norm": 1.5304641264007814, "learning_rate": 4.88687102840267e-05, "loss": 0.7061, "num_input_tokens_seen": 158700864, "step": 878 }, { "epoch": 0.09622595035441583, "grad_norm": 1.2213667141636533, "learning_rate": 4.886615151605951e-05, "loss": 0.8227, "num_input_tokens_seen": 158870432, "step": 879 }, { "epoch": 0.09633542242535373, "grad_norm": 1.2929751116547141, "learning_rate": 4.886358992477222e-05, "loss": 0.738, "num_input_tokens_seen": 159068896, "step": 880 }, { "epoch": 0.09644489449629164, "grad_norm": 1.2866709864169987, "learning_rate": 4.886102551046786e-05, "loss": 0.6958, "num_input_tokens_seen": 159242048, "step": 881 }, { "epoch": 0.09655436656722953, "grad_norm": 1.1535300145735674, "learning_rate": 4.8858458273449806e-05, "loss": 0.5828, "num_input_tokens_seen": 159412064, "step": 882 }, { "epoch": 0.09666383863816744, "grad_norm": 1.2175678486492871, "learning_rate": 4.885588821402174e-05, "loss": 0.898, "num_input_tokens_seen": 159625312, "step": 883 }, { "epoch": 0.09677331070910535, "grad_norm": 1.3318796379191193, "learning_rate": 4.88533153324877e-05, "loss": 0.8381, "num_input_tokens_seen": 159813472, "step": 884 }, { "epoch": 0.09688278278004324, "grad_norm": 1.0756042977645486, "learning_rate": 4.885073962915207e-05, "loss": 0.555, "num_input_tokens_seen": 160008800, "step": 885 }, { "epoch": 0.09699225485098115, "grad_norm": 1.3002185692812374, "learning_rate": 4.8848161104319525e-05, "loss": 0.6893, "num_input_tokens_seen": 160192704, "step": 886 }, { "epoch": 0.09710172692191904, "grad_norm": 1.2672096034143663, "learning_rate": 4.8845579758295114e-05, "loss": 0.6842, "num_input_tokens_seen": 160349280, "step": 887 }, { "epoch": 0.09721119899285695, "grad_norm": 1.2505971435921774, "learning_rate": 4.88429955913842e-05, "loss": 0.8236, "num_input_tokens_seen": 160572608, "step": 888 }, { "epoch": 0.09732067106379486, "grad_norm": 1.3572855709443572, "learning_rate": 4.8840408603892495e-05, "loss": 0.7238, "num_input_tokens_seen": 160731648, "step": 889 }, { "epoch": 0.09743014313473275, "grad_norm": 1.3190747390031055, "learning_rate": 4.883781879612602e-05, "loss": 0.7868, "num_input_tokens_seen": 160928992, "step": 890 }, { "epoch": 0.09753961520567066, "grad_norm": 1.3137162340871882, "learning_rate": 4.883522616839116e-05, "loss": 0.8868, "num_input_tokens_seen": 161118720, "step": 891 }, { "epoch": 0.09764908727660855, "grad_norm": 1.4073180487263695, "learning_rate": 4.88326307209946e-05, "loss": 0.6829, "num_input_tokens_seen": 161288736, "step": 892 }, { "epoch": 0.09775855934754646, "grad_norm": 1.2139060765341296, "learning_rate": 4.883003245424339e-05, "loss": 0.6839, "num_input_tokens_seen": 161476224, "step": 893 }, { "epoch": 0.09786803141848435, "grad_norm": 1.2518908980641335, "learning_rate": 4.8827431368444896e-05, "loss": 0.879, "num_input_tokens_seen": 161682080, "step": 894 }, { "epoch": 0.09797750348942226, "grad_norm": 1.380787762588453, "learning_rate": 4.882482746390682e-05, "loss": 0.9603, "num_input_tokens_seen": 161846720, "step": 895 }, { "epoch": 0.09808697556036017, "grad_norm": 1.2537833655682205, "learning_rate": 4.8822220740937195e-05, "loss": 0.6528, "num_input_tokens_seen": 161958720, "step": 896 }, { "epoch": 0.09819644763129806, "grad_norm": 1.129217718643049, "learning_rate": 4.8819611199844406e-05, "loss": 0.7086, "num_input_tokens_seen": 162172416, "step": 897 }, { "epoch": 0.09830591970223597, "grad_norm": 1.3460339498050224, "learning_rate": 4.881699884093715e-05, "loss": 0.8896, "num_input_tokens_seen": 162360800, "step": 898 }, { "epoch": 0.09841539177317386, "grad_norm": 1.2008561378412044, "learning_rate": 4.881438366452446e-05, "loss": 0.7655, "num_input_tokens_seen": 162525216, "step": 899 }, { "epoch": 0.09852486384411177, "grad_norm": 1.3613339920683738, "learning_rate": 4.88117656709157e-05, "loss": 0.9436, "num_input_tokens_seen": 162717184, "step": 900 }, { "epoch": 0.09863433591504968, "grad_norm": 1.3329206612850972, "learning_rate": 4.880914486042059e-05, "loss": 0.6904, "num_input_tokens_seen": 162904000, "step": 901 }, { "epoch": 0.09874380798598757, "grad_norm": 1.2789171893025388, "learning_rate": 4.8806521233349146e-05, "loss": 0.8643, "num_input_tokens_seen": 163096416, "step": 902 }, { "epoch": 0.09885328005692548, "grad_norm": 1.3181317620354924, "learning_rate": 4.880389479001176e-05, "loss": 0.7828, "num_input_tokens_seen": 163276960, "step": 903 }, { "epoch": 0.09896275212786337, "grad_norm": 1.2084213202877752, "learning_rate": 4.880126553071912e-05, "loss": 0.8228, "num_input_tokens_seen": 163496256, "step": 904 }, { "epoch": 0.09907222419880128, "grad_norm": 1.3070105697537173, "learning_rate": 4.879863345578227e-05, "loss": 0.752, "num_input_tokens_seen": 163654176, "step": 905 }, { "epoch": 0.09918169626973919, "grad_norm": 1.3629092170669566, "learning_rate": 4.879599856551258e-05, "loss": 0.8627, "num_input_tokens_seen": 163834496, "step": 906 }, { "epoch": 0.09929116834067708, "grad_norm": 1.2550279110392277, "learning_rate": 4.879336086022175e-05, "loss": 0.6578, "num_input_tokens_seen": 164006528, "step": 907 }, { "epoch": 0.09940064041161499, "grad_norm": 1.2801070348870096, "learning_rate": 4.879072034022182e-05, "loss": 0.8276, "num_input_tokens_seen": 164215520, "step": 908 }, { "epoch": 0.09951011248255288, "grad_norm": 1.698222672984846, "learning_rate": 4.8788077005825146e-05, "loss": 0.7947, "num_input_tokens_seen": 164395168, "step": 909 }, { "epoch": 0.09961958455349079, "grad_norm": 1.2818927083310452, "learning_rate": 4.878543085734444e-05, "loss": 0.6657, "num_input_tokens_seen": 164566528, "step": 910 }, { "epoch": 0.09972905662442869, "grad_norm": 1.2852236373533517, "learning_rate": 4.8782781895092734e-05, "loss": 0.9609, "num_input_tokens_seen": 164762528, "step": 911 }, { "epoch": 0.0998385286953666, "grad_norm": 1.2355349438525702, "learning_rate": 4.878013011938339e-05, "loss": 0.6463, "num_input_tokens_seen": 164933664, "step": 912 }, { "epoch": 0.0999480007663045, "grad_norm": 1.5445833281331884, "learning_rate": 4.877747553053012e-05, "loss": 0.8373, "num_input_tokens_seen": 165124512, "step": 913 }, { "epoch": 0.1000574728372424, "grad_norm": 1.250051497352295, "learning_rate": 4.877481812884695e-05, "loss": 0.7064, "num_input_tokens_seen": 165311776, "step": 914 }, { "epoch": 0.1001669449081803, "grad_norm": 1.2846823981129738, "learning_rate": 4.877215791464824e-05, "loss": 0.8218, "num_input_tokens_seen": 165483584, "step": 915 }, { "epoch": 0.1002764169791182, "grad_norm": 1.3256895286469361, "learning_rate": 4.876949488824869e-05, "loss": 0.7181, "num_input_tokens_seen": 165640384, "step": 916 }, { "epoch": 0.1003858890500561, "grad_norm": 1.2535704165971993, "learning_rate": 4.8766829049963344e-05, "loss": 0.7043, "num_input_tokens_seen": 165851168, "step": 917 }, { "epoch": 0.10049536112099401, "grad_norm": 1.2898514154161373, "learning_rate": 4.876416040010755e-05, "loss": 0.7461, "num_input_tokens_seen": 166040672, "step": 918 }, { "epoch": 0.1006048331919319, "grad_norm": 1.2277950241542317, "learning_rate": 4.876148893899701e-05, "loss": 0.6692, "num_input_tokens_seen": 166219424, "step": 919 }, { "epoch": 0.10071430526286981, "grad_norm": 1.2941962002497558, "learning_rate": 4.8758814666947756e-05, "loss": 0.8863, "num_input_tokens_seen": 166421248, "step": 920 }, { "epoch": 0.10082377733380771, "grad_norm": 1.3211918898753643, "learning_rate": 4.875613758427614e-05, "loss": 0.7305, "num_input_tokens_seen": 166598208, "step": 921 }, { "epoch": 0.10093324940474561, "grad_norm": 1.109651933947143, "learning_rate": 4.875345769129887e-05, "loss": 0.6228, "num_input_tokens_seen": 166793536, "step": 922 }, { "epoch": 0.10104272147568352, "grad_norm": 1.3041746307877378, "learning_rate": 4.875077498833296e-05, "loss": 0.8925, "num_input_tokens_seen": 166970496, "step": 923 }, { "epoch": 0.10115219354662142, "grad_norm": 1.3469222129105436, "learning_rate": 4.874808947569577e-05, "loss": 0.8568, "num_input_tokens_seen": 167159552, "step": 924 }, { "epoch": 0.10126166561755932, "grad_norm": 1.2002238680714796, "learning_rate": 4.8745401153704996e-05, "loss": 0.624, "num_input_tokens_seen": 167337632, "step": 925 }, { "epoch": 0.10137113768849722, "grad_norm": 1.2514967540374249, "learning_rate": 4.874271002267866e-05, "loss": 0.6356, "num_input_tokens_seen": 167505408, "step": 926 }, { "epoch": 0.10148060975943513, "grad_norm": 1.1662822271639217, "learning_rate": 4.874001608293511e-05, "loss": 0.7896, "num_input_tokens_seen": 167707456, "step": 927 }, { "epoch": 0.10159008183037303, "grad_norm": 1.4355046064940031, "learning_rate": 4.873731933479305e-05, "loss": 0.7985, "num_input_tokens_seen": 167886208, "step": 928 }, { "epoch": 0.10169955390131093, "grad_norm": 1.3802946627506991, "learning_rate": 4.873461977857149e-05, "loss": 0.8132, "num_input_tokens_seen": 168025536, "step": 929 }, { "epoch": 0.10180902597224883, "grad_norm": 1.276692168841782, "learning_rate": 4.8731917414589776e-05, "loss": 0.7088, "num_input_tokens_seen": 168209216, "step": 930 }, { "epoch": 0.10191849804318673, "grad_norm": 1.1717711847733017, "learning_rate": 4.872921224316761e-05, "loss": 0.5851, "num_input_tokens_seen": 168390880, "step": 931 }, { "epoch": 0.10202797011412464, "grad_norm": 1.1486843129369155, "learning_rate": 4.872650426462499e-05, "loss": 0.6904, "num_input_tokens_seen": 168590240, "step": 932 }, { "epoch": 0.10213744218506253, "grad_norm": 1.2590068099070286, "learning_rate": 4.8723793479282274e-05, "loss": 0.7619, "num_input_tokens_seen": 168796544, "step": 933 }, { "epoch": 0.10224691425600044, "grad_norm": 1.403207751852087, "learning_rate": 4.872107988746014e-05, "loss": 0.6939, "num_input_tokens_seen": 168956704, "step": 934 }, { "epoch": 0.10235638632693835, "grad_norm": 1.2999746668621421, "learning_rate": 4.871836348947961e-05, "loss": 0.6586, "num_input_tokens_seen": 169133664, "step": 935 }, { "epoch": 0.10246585839787624, "grad_norm": 1.1805066428928659, "learning_rate": 4.871564428566201e-05, "loss": 0.579, "num_input_tokens_seen": 169330112, "step": 936 }, { "epoch": 0.10257533046881415, "grad_norm": 1.1283295950459058, "learning_rate": 4.8712922276329035e-05, "loss": 0.6513, "num_input_tokens_seen": 169520512, "step": 937 }, { "epoch": 0.10268480253975204, "grad_norm": 1.2174271471288474, "learning_rate": 4.8710197461802686e-05, "loss": 0.7276, "num_input_tokens_seen": 169714272, "step": 938 }, { "epoch": 0.10279427461068995, "grad_norm": 1.0902498643923586, "learning_rate": 4.8707469842405304e-05, "loss": 0.5897, "num_input_tokens_seen": 169885408, "step": 939 }, { "epoch": 0.10290374668162786, "grad_norm": 1.2203392524082484, "learning_rate": 4.870473941845955e-05, "loss": 0.7027, "num_input_tokens_seen": 170049376, "step": 940 }, { "epoch": 0.10301321875256575, "grad_norm": 1.2806162778856154, "learning_rate": 4.870200619028845e-05, "loss": 0.6164, "num_input_tokens_seen": 170244928, "step": 941 }, { "epoch": 0.10312269082350366, "grad_norm": 1.495071819073051, "learning_rate": 4.869927015821533e-05, "loss": 0.9216, "num_input_tokens_seen": 170375296, "step": 942 }, { "epoch": 0.10323216289444155, "grad_norm": 1.1784312146246056, "learning_rate": 4.8696531322563857e-05, "loss": 0.6821, "num_input_tokens_seen": 170571744, "step": 943 }, { "epoch": 0.10334163496537946, "grad_norm": 1.304127967227795, "learning_rate": 4.869378968365802e-05, "loss": 0.6696, "num_input_tokens_seen": 170708384, "step": 944 }, { "epoch": 0.10345110703631737, "grad_norm": 1.2588005686871093, "learning_rate": 4.869104524182216e-05, "loss": 0.7371, "num_input_tokens_seen": 170902816, "step": 945 }, { "epoch": 0.10356057910725526, "grad_norm": 1.2386213732011966, "learning_rate": 4.868829799738094e-05, "loss": 0.6522, "num_input_tokens_seen": 171067680, "step": 946 }, { "epoch": 0.10367005117819317, "grad_norm": 1.1962541349333569, "learning_rate": 4.8685547950659346e-05, "loss": 0.6111, "num_input_tokens_seen": 171226272, "step": 947 }, { "epoch": 0.10377952324913106, "grad_norm": 1.3179640339674858, "learning_rate": 4.868279510198271e-05, "loss": 0.5951, "num_input_tokens_seen": 171408160, "step": 948 }, { "epoch": 0.10388899532006897, "grad_norm": 1.4114232592664757, "learning_rate": 4.8680039451676695e-05, "loss": 0.7885, "num_input_tokens_seen": 171588704, "step": 949 }, { "epoch": 0.10399846739100686, "grad_norm": 1.3491836958563093, "learning_rate": 4.867728100006728e-05, "loss": 0.6358, "num_input_tokens_seen": 171774848, "step": 950 }, { "epoch": 0.10410793946194477, "grad_norm": 1.2675684853585456, "learning_rate": 4.8674519747480774e-05, "loss": 0.7533, "num_input_tokens_seen": 171970400, "step": 951 }, { "epoch": 0.10421741153288268, "grad_norm": 1.2342665902348482, "learning_rate": 4.867175569424385e-05, "loss": 0.6149, "num_input_tokens_seen": 172117120, "step": 952 }, { "epoch": 0.10432688360382057, "grad_norm": 1.2384806090443572, "learning_rate": 4.866898884068348e-05, "loss": 0.715, "num_input_tokens_seen": 172268096, "step": 953 }, { "epoch": 0.10443635567475848, "grad_norm": 1.2826400752470273, "learning_rate": 4.866621918712697e-05, "loss": 0.7553, "num_input_tokens_seen": 172415488, "step": 954 }, { "epoch": 0.10454582774569637, "grad_norm": 1.2677159873628945, "learning_rate": 4.866344673390198e-05, "loss": 0.6576, "num_input_tokens_seen": 172624928, "step": 955 }, { "epoch": 0.10465529981663428, "grad_norm": 1.3895583187217604, "learning_rate": 4.8660671481336475e-05, "loss": 0.6706, "num_input_tokens_seen": 172812192, "step": 956 }, { "epoch": 0.10476477188757219, "grad_norm": 1.3270570388097325, "learning_rate": 4.865789342975877e-05, "loss": 0.6402, "num_input_tokens_seen": 173019168, "step": 957 }, { "epoch": 0.10487424395851008, "grad_norm": 1.2261571518086074, "learning_rate": 4.865511257949749e-05, "loss": 0.8545, "num_input_tokens_seen": 173223008, "step": 958 }, { "epoch": 0.10498371602944799, "grad_norm": 1.2363148186857988, "learning_rate": 4.865232893088162e-05, "loss": 0.7539, "num_input_tokens_seen": 173417888, "step": 959 }, { "epoch": 0.10509318810038588, "grad_norm": 1.1261174691958653, "learning_rate": 4.864954248424045e-05, "loss": 0.6802, "num_input_tokens_seen": 173625536, "step": 960 }, { "epoch": 0.10520266017132379, "grad_norm": 1.287553956465964, "learning_rate": 4.864675323990361e-05, "loss": 0.7212, "num_input_tokens_seen": 173815712, "step": 961 }, { "epoch": 0.1053121322422617, "grad_norm": 1.1735464256312476, "learning_rate": 4.864396119820108e-05, "loss": 0.6268, "num_input_tokens_seen": 174001184, "step": 962 }, { "epoch": 0.1054216043131996, "grad_norm": 1.3259011892668853, "learning_rate": 4.864116635946313e-05, "loss": 0.6949, "num_input_tokens_seen": 174220032, "step": 963 }, { "epoch": 0.1055310763841375, "grad_norm": 1.2055880612992829, "learning_rate": 4.863836872402039e-05, "loss": 0.5744, "num_input_tokens_seen": 174402368, "step": 964 }, { "epoch": 0.1056405484550754, "grad_norm": 1.3388281440189491, "learning_rate": 4.863556829220383e-05, "loss": 0.9442, "num_input_tokens_seen": 174593440, "step": 965 }, { "epoch": 0.1057500205260133, "grad_norm": 1.2805831848734925, "learning_rate": 4.863276506434471e-05, "loss": 0.6836, "num_input_tokens_seen": 174748224, "step": 966 }, { "epoch": 0.1058594925969512, "grad_norm": 1.3352311704272521, "learning_rate": 4.862995904077468e-05, "loss": 0.7871, "num_input_tokens_seen": 174914432, "step": 967 }, { "epoch": 0.1059689646678891, "grad_norm": 1.4122926374155322, "learning_rate": 4.8627150221825654e-05, "loss": 0.7583, "num_input_tokens_seen": 175066752, "step": 968 }, { "epoch": 0.10607843673882701, "grad_norm": 1.3398300926468887, "learning_rate": 4.862433860782993e-05, "loss": 0.6804, "num_input_tokens_seen": 175255584, "step": 969 }, { "epoch": 0.1061879088097649, "grad_norm": 1.3480835168192296, "learning_rate": 4.8621524199120106e-05, "loss": 0.7711, "num_input_tokens_seen": 175453152, "step": 970 }, { "epoch": 0.10629738088070281, "grad_norm": 1.3240551242618432, "learning_rate": 4.861870699602913e-05, "loss": 0.7371, "num_input_tokens_seen": 175644000, "step": 971 }, { "epoch": 0.1064068529516407, "grad_norm": 1.236230156317847, "learning_rate": 4.8615886998890266e-05, "loss": 0.8159, "num_input_tokens_seen": 175830592, "step": 972 }, { "epoch": 0.10651632502257861, "grad_norm": 1.1780394606562814, "learning_rate": 4.861306420803712e-05, "loss": 0.6739, "num_input_tokens_seen": 176043616, "step": 973 }, { "epoch": 0.10662579709351652, "grad_norm": 1.295463943371306, "learning_rate": 4.861023862380361e-05, "loss": 0.8644, "num_input_tokens_seen": 176254624, "step": 974 }, { "epoch": 0.10673526916445442, "grad_norm": 1.1985161428498285, "learning_rate": 4.860741024652401e-05, "loss": 0.7162, "num_input_tokens_seen": 176423072, "step": 975 }, { "epoch": 0.10684474123539232, "grad_norm": 1.3126598896547834, "learning_rate": 4.860457907653291e-05, "loss": 0.9685, "num_input_tokens_seen": 176637440, "step": 976 }, { "epoch": 0.10695421330633022, "grad_norm": 1.2878300179037752, "learning_rate": 4.860174511416523e-05, "loss": 0.7168, "num_input_tokens_seen": 176812384, "step": 977 }, { "epoch": 0.10706368537726813, "grad_norm": 1.327133212307466, "learning_rate": 4.8598908359756226e-05, "loss": 0.7853, "num_input_tokens_seen": 177001664, "step": 978 }, { "epoch": 0.10717315744820603, "grad_norm": 1.281851882986516, "learning_rate": 4.859606881364146e-05, "loss": 0.7571, "num_input_tokens_seen": 177153536, "step": 979 }, { "epoch": 0.10728262951914393, "grad_norm": 1.2766298404707541, "learning_rate": 4.859322647615687e-05, "loss": 0.6039, "num_input_tokens_seen": 177336544, "step": 980 }, { "epoch": 0.10739210159008183, "grad_norm": 1.4063261223429055, "learning_rate": 4.85903813476387e-05, "loss": 0.8699, "num_input_tokens_seen": 177516640, "step": 981 }, { "epoch": 0.10750157366101973, "grad_norm": 1.40743320279482, "learning_rate": 4.8587533428423504e-05, "loss": 0.7725, "num_input_tokens_seen": 177671424, "step": 982 }, { "epoch": 0.10761104573195764, "grad_norm": 1.3211939200172191, "learning_rate": 4.85846827188482e-05, "loss": 0.818, "num_input_tokens_seen": 177877728, "step": 983 }, { "epoch": 0.10772051780289553, "grad_norm": 1.3206806008314564, "learning_rate": 4.858182921925001e-05, "loss": 0.6957, "num_input_tokens_seen": 178066560, "step": 984 }, { "epoch": 0.10782998987383344, "grad_norm": 1.3426051030848907, "learning_rate": 4.857897292996651e-05, "loss": 0.8008, "num_input_tokens_seen": 178253376, "step": 985 }, { "epoch": 0.10793946194477134, "grad_norm": 1.3110898026029727, "learning_rate": 4.857611385133559e-05, "loss": 0.6187, "num_input_tokens_seen": 178434816, "step": 986 }, { "epoch": 0.10804893401570924, "grad_norm": 1.1398141147246283, "learning_rate": 4.857325198369546e-05, "loss": 0.7601, "num_input_tokens_seen": 178634848, "step": 987 }, { "epoch": 0.10815840608664715, "grad_norm": 1.3302042224952948, "learning_rate": 4.8570387327384695e-05, "loss": 0.8135, "num_input_tokens_seen": 178818752, "step": 988 }, { "epoch": 0.10826787815758504, "grad_norm": 1.2574514498732952, "learning_rate": 4.856751988274216e-05, "loss": 0.7095, "num_input_tokens_seen": 179019680, "step": 989 }, { "epoch": 0.10837735022852295, "grad_norm": 1.249889178131839, "learning_rate": 4.8564649650107084e-05, "loss": 0.6195, "num_input_tokens_seen": 179179840, "step": 990 }, { "epoch": 0.10848682229946086, "grad_norm": 1.24312872585469, "learning_rate": 4.8561776629819e-05, "loss": 0.6993, "num_input_tokens_seen": 179350528, "step": 991 }, { "epoch": 0.10859629437039875, "grad_norm": 1.2541090661611436, "learning_rate": 4.855890082221778e-05, "loss": 0.5838, "num_input_tokens_seen": 179524576, "step": 992 }, { "epoch": 0.10870576644133666, "grad_norm": 1.0915661945165824, "learning_rate": 4.8556022227643636e-05, "loss": 0.7074, "num_input_tokens_seen": 179731776, "step": 993 }, { "epoch": 0.10881523851227455, "grad_norm": 1.3705914656002247, "learning_rate": 4.8553140846437094e-05, "loss": 0.7154, "num_input_tokens_seen": 179870880, "step": 994 }, { "epoch": 0.10892471058321246, "grad_norm": 1.2189702055140015, "learning_rate": 4.855025667893901e-05, "loss": 0.7043, "num_input_tokens_seen": 180044032, "step": 995 }, { "epoch": 0.10903418265415037, "grad_norm": 1.505804830273296, "learning_rate": 4.854736972549058e-05, "loss": 0.9028, "num_input_tokens_seen": 180214720, "step": 996 }, { "epoch": 0.10914365472508826, "grad_norm": 1.336941614974714, "learning_rate": 4.854447998643333e-05, "loss": 0.8202, "num_input_tokens_seen": 180392800, "step": 997 }, { "epoch": 0.10925312679602617, "grad_norm": 1.3277951896905564, "learning_rate": 4.8541587462109105e-05, "loss": 0.8383, "num_input_tokens_seen": 180569312, "step": 998 }, { "epoch": 0.10936259886696406, "grad_norm": 1.4176738001391838, "learning_rate": 4.8538692152860094e-05, "loss": 0.8878, "num_input_tokens_seen": 180761952, "step": 999 }, { "epoch": 0.10947207093790197, "grad_norm": 1.3087005571646475, "learning_rate": 4.853579405902879e-05, "loss": 0.6891, "num_input_tokens_seen": 180949216, "step": 1000 }, { "epoch": 0.10958154300883986, "grad_norm": 1.264189058623954, "learning_rate": 4.853289318095805e-05, "loss": 0.6913, "num_input_tokens_seen": 181110944, "step": 1001 }, { "epoch": 0.10969101507977777, "grad_norm": 1.2916122189192794, "learning_rate": 4.8529989518991033e-05, "loss": 0.6892, "num_input_tokens_seen": 181279392, "step": 1002 }, { "epoch": 0.10980048715071568, "grad_norm": 1.2239247527148993, "learning_rate": 4.8527083073471236e-05, "loss": 0.7772, "num_input_tokens_seen": 181448064, "step": 1003 }, { "epoch": 0.10990995922165357, "grad_norm": 1.330045007861966, "learning_rate": 4.852417384474248e-05, "loss": 0.6991, "num_input_tokens_seen": 181609344, "step": 1004 }, { "epoch": 0.11001943129259148, "grad_norm": 1.3924964570526033, "learning_rate": 4.852126183314894e-05, "loss": 0.6477, "num_input_tokens_seen": 181806016, "step": 1005 }, { "epoch": 0.11012890336352937, "grad_norm": 1.2223970181048058, "learning_rate": 4.851834703903508e-05, "loss": 0.6692, "num_input_tokens_seen": 181990816, "step": 1006 }, { "epoch": 0.11023837543446728, "grad_norm": 1.2670770956879187, "learning_rate": 4.851542946274573e-05, "loss": 0.616, "num_input_tokens_seen": 182144480, "step": 1007 }, { "epoch": 0.11034784750540519, "grad_norm": 1.261682988182599, "learning_rate": 4.8512509104626036e-05, "loss": 0.7609, "num_input_tokens_seen": 182318528, "step": 1008 }, { "epoch": 0.11045731957634308, "grad_norm": 1.2300999164316744, "learning_rate": 4.850958596502145e-05, "loss": 0.6533, "num_input_tokens_seen": 182477120, "step": 1009 }, { "epoch": 0.11056679164728099, "grad_norm": 1.314663375658542, "learning_rate": 4.85066600442778e-05, "loss": 0.6286, "num_input_tokens_seen": 182640416, "step": 1010 }, { "epoch": 0.11067626371821888, "grad_norm": 1.2297566124229764, "learning_rate": 4.8503731342741195e-05, "loss": 0.6152, "num_input_tokens_seen": 182837760, "step": 1011 }, { "epoch": 0.11078573578915679, "grad_norm": 1.180298400746217, "learning_rate": 4.8500799860758105e-05, "loss": 0.8555, "num_input_tokens_seen": 183048096, "step": 1012 }, { "epoch": 0.1108952078600947, "grad_norm": 1.2699334549177896, "learning_rate": 4.849786559867532e-05, "loss": 0.7277, "num_input_tokens_seen": 183228416, "step": 1013 }, { "epoch": 0.1110046799310326, "grad_norm": 1.5127357393776852, "learning_rate": 4.8494928556839946e-05, "loss": 0.8214, "num_input_tokens_seen": 183383200, "step": 1014 }, { "epoch": 0.1111141520019705, "grad_norm": 1.1505597804626555, "learning_rate": 4.849198873559945e-05, "loss": 0.7543, "num_input_tokens_seen": 183595552, "step": 1015 }, { "epoch": 0.1112236240729084, "grad_norm": 1.246835087493518, "learning_rate": 4.848904613530159e-05, "loss": 0.6592, "num_input_tokens_seen": 183762432, "step": 1016 }, { "epoch": 0.1113330961438463, "grad_norm": 1.15868291839693, "learning_rate": 4.848610075629447e-05, "loss": 0.8744, "num_input_tokens_seen": 183970080, "step": 1017 }, { "epoch": 0.11144256821478421, "grad_norm": 1.2497388566938374, "learning_rate": 4.848315259892654e-05, "loss": 0.6931, "num_input_tokens_seen": 184178624, "step": 1018 }, { "epoch": 0.1115520402857221, "grad_norm": 1.3191940717005268, "learning_rate": 4.848020166354654e-05, "loss": 0.8305, "num_input_tokens_seen": 184336320, "step": 1019 }, { "epoch": 0.11166151235666001, "grad_norm": 1.1145008567645895, "learning_rate": 4.847724795050358e-05, "loss": 0.5545, "num_input_tokens_seen": 184497376, "step": 1020 }, { "epoch": 0.1117709844275979, "grad_norm": 1.3624914554013046, "learning_rate": 4.847429146014706e-05, "loss": 0.8031, "num_input_tokens_seen": 184703680, "step": 1021 }, { "epoch": 0.11188045649853581, "grad_norm": 1.1807010666922617, "learning_rate": 4.847133219282674e-05, "loss": 0.6681, "num_input_tokens_seen": 184894752, "step": 1022 }, { "epoch": 0.1119899285694737, "grad_norm": 1.2824166266722916, "learning_rate": 4.846837014889269e-05, "loss": 0.7536, "num_input_tokens_seen": 185073504, "step": 1023 }, { "epoch": 0.11209940064041161, "grad_norm": 1.3562868051764763, "learning_rate": 4.8465405328695315e-05, "loss": 0.8242, "num_input_tokens_seen": 185254944, "step": 1024 }, { "epoch": 0.11220887271134952, "grad_norm": 1.2934251884322432, "learning_rate": 4.8462437732585345e-05, "loss": 0.8374, "num_input_tokens_seen": 185456320, "step": 1025 }, { "epoch": 0.11231834478228742, "grad_norm": 1.2319330131538466, "learning_rate": 4.845946736091384e-05, "loss": 0.6488, "num_input_tokens_seen": 185636640, "step": 1026 }, { "epoch": 0.11242781685322532, "grad_norm": 1.1456855967676867, "learning_rate": 4.8456494214032205e-05, "loss": 0.5977, "num_input_tokens_seen": 185827936, "step": 1027 }, { "epoch": 0.11253728892416322, "grad_norm": 1.256286548443438, "learning_rate": 4.8453518292292146e-05, "loss": 0.7064, "num_input_tokens_seen": 186013184, "step": 1028 }, { "epoch": 0.11264676099510113, "grad_norm": 1.2097970329644323, "learning_rate": 4.8450539596045694e-05, "loss": 0.7034, "num_input_tokens_seen": 186166624, "step": 1029 }, { "epoch": 0.11275623306603903, "grad_norm": 1.212476773383669, "learning_rate": 4.844755812564525e-05, "loss": 0.6872, "num_input_tokens_seen": 186342240, "step": 1030 }, { "epoch": 0.11286570513697693, "grad_norm": 1.3027351070556414, "learning_rate": 4.84445738814435e-05, "loss": 0.7132, "num_input_tokens_seen": 186536896, "step": 1031 }, { "epoch": 0.11297517720791483, "grad_norm": 1.167696815908066, "learning_rate": 4.8441586863793475e-05, "loss": 0.7331, "num_input_tokens_seen": 186743200, "step": 1032 }, { "epoch": 0.11308464927885273, "grad_norm": 1.3243208764229133, "learning_rate": 4.843859707304854e-05, "loss": 0.6708, "num_input_tokens_seen": 186953536, "step": 1033 }, { "epoch": 0.11319412134979064, "grad_norm": 1.3088936533235824, "learning_rate": 4.843560450956238e-05, "loss": 1.0204, "num_input_tokens_seen": 187169472, "step": 1034 }, { "epoch": 0.11330359342072854, "grad_norm": 1.2057832852680448, "learning_rate": 4.8432609173689004e-05, "loss": 0.7375, "num_input_tokens_seen": 187349120, "step": 1035 }, { "epoch": 0.11341306549166644, "grad_norm": 1.4046843820437578, "learning_rate": 4.8429611065782765e-05, "loss": 0.7696, "num_input_tokens_seen": 187537056, "step": 1036 }, { "epoch": 0.11352253756260434, "grad_norm": 1.1007081634971505, "learning_rate": 4.8426610186198315e-05, "loss": 0.6767, "num_input_tokens_seen": 187717824, "step": 1037 }, { "epoch": 0.11363200963354224, "grad_norm": 1.176999142541954, "learning_rate": 4.8423606535290675e-05, "loss": 0.5902, "num_input_tokens_seen": 187853792, "step": 1038 }, { "epoch": 0.11374148170448015, "grad_norm": 1.1898485350183865, "learning_rate": 4.842060011341516e-05, "loss": 0.7321, "num_input_tokens_seen": 188039936, "step": 1039 }, { "epoch": 0.11385095377541804, "grad_norm": 1.2649578172639167, "learning_rate": 4.841759092092741e-05, "loss": 0.8522, "num_input_tokens_seen": 188225856, "step": 1040 }, { "epoch": 0.11396042584635595, "grad_norm": 1.195403002136428, "learning_rate": 4.841457895818344e-05, "loss": 0.6799, "num_input_tokens_seen": 188357120, "step": 1041 }, { "epoch": 0.11406989791729386, "grad_norm": 1.2081684076727561, "learning_rate": 4.841156422553953e-05, "loss": 0.7518, "num_input_tokens_seen": 188529376, "step": 1042 }, { "epoch": 0.11417936998823175, "grad_norm": 1.2161054529624125, "learning_rate": 4.840854672335233e-05, "loss": 0.6675, "num_input_tokens_seen": 188713280, "step": 1043 }, { "epoch": 0.11428884205916966, "grad_norm": 1.1678200821685252, "learning_rate": 4.84055264519788e-05, "loss": 0.632, "num_input_tokens_seen": 188861120, "step": 1044 }, { "epoch": 0.11439831413010755, "grad_norm": 1.2505189295315606, "learning_rate": 4.8402503411776235e-05, "loss": 0.5964, "num_input_tokens_seen": 189041440, "step": 1045 }, { "epoch": 0.11450778620104546, "grad_norm": 1.2643411034619259, "learning_rate": 4.839947760310226e-05, "loss": 0.9238, "num_input_tokens_seen": 189237888, "step": 1046 }, { "epoch": 0.11461725827198337, "grad_norm": 1.199134600044696, "learning_rate": 4.8396449026314803e-05, "loss": 0.5199, "num_input_tokens_seen": 189417760, "step": 1047 }, { "epoch": 0.11472673034292126, "grad_norm": 1.310206270940614, "learning_rate": 4.839341768177217e-05, "loss": 0.7467, "num_input_tokens_seen": 189599872, "step": 1048 }, { "epoch": 0.11483620241385917, "grad_norm": 1.2074556561338148, "learning_rate": 4.839038356983293e-05, "loss": 0.5758, "num_input_tokens_seen": 189784000, "step": 1049 }, { "epoch": 0.11494567448479706, "grad_norm": 1.1103251511947896, "learning_rate": 4.838734669085604e-05, "loss": 0.6175, "num_input_tokens_seen": 189969024, "step": 1050 }, { "epoch": 0.11505514655573497, "grad_norm": 1.21754014044982, "learning_rate": 4.838430704520074e-05, "loss": 0.7275, "num_input_tokens_seen": 190148672, "step": 1051 }, { "epoch": 0.11516461862667288, "grad_norm": 1.2452185735376655, "learning_rate": 4.838126463322662e-05, "loss": 0.6634, "num_input_tokens_seen": 190325632, "step": 1052 }, { "epoch": 0.11527409069761077, "grad_norm": 1.3871403013115746, "learning_rate": 4.8378219455293595e-05, "loss": 0.7543, "num_input_tokens_seen": 190502368, "step": 1053 }, { "epoch": 0.11538356276854868, "grad_norm": 1.2417094912387214, "learning_rate": 4.8375171511761895e-05, "loss": 0.8233, "num_input_tokens_seen": 190693216, "step": 1054 }, { "epoch": 0.11549303483948657, "grad_norm": 1.1855139437128566, "learning_rate": 4.837212080299209e-05, "loss": 0.5905, "num_input_tokens_seen": 190835232, "step": 1055 }, { "epoch": 0.11560250691042448, "grad_norm": 1.1836262814926477, "learning_rate": 4.836906732934508e-05, "loss": 0.7737, "num_input_tokens_seen": 191023616, "step": 1056 }, { "epoch": 0.11571197898136237, "grad_norm": 1.9669874869813078, "learning_rate": 4.836601109118208e-05, "loss": 1.0295, "num_input_tokens_seen": 191180640, "step": 1057 }, { "epoch": 0.11582145105230028, "grad_norm": 1.283276327939582, "learning_rate": 4.836295208886463e-05, "loss": 0.6463, "num_input_tokens_seen": 191370144, "step": 1058 }, { "epoch": 0.11593092312323819, "grad_norm": 1.3119514318030585, "learning_rate": 4.835989032275461e-05, "loss": 0.7186, "num_input_tokens_seen": 191548224, "step": 1059 }, { "epoch": 0.11604039519417608, "grad_norm": 1.1668720007900275, "learning_rate": 4.835682579321423e-05, "loss": 0.5458, "num_input_tokens_seen": 191724512, "step": 1060 }, { "epoch": 0.11614986726511399, "grad_norm": 1.3011486920616566, "learning_rate": 4.8353758500606e-05, "loss": 0.8529, "num_input_tokens_seen": 191916032, "step": 1061 }, { "epoch": 0.11625933933605188, "grad_norm": 1.3733464181532842, "learning_rate": 4.8350688445292794e-05, "loss": 0.7331, "num_input_tokens_seen": 192100832, "step": 1062 }, { "epoch": 0.11636881140698979, "grad_norm": 1.2121973190834348, "learning_rate": 4.834761562763777e-05, "loss": 0.6914, "num_input_tokens_seen": 192265472, "step": 1063 }, { "epoch": 0.1164782834779277, "grad_norm": 1.235193681240226, "learning_rate": 4.834454004800446e-05, "loss": 0.5881, "num_input_tokens_seen": 192450048, "step": 1064 }, { "epoch": 0.1165877555488656, "grad_norm": 1.211889274398994, "learning_rate": 4.8341461706756686e-05, "loss": 0.6059, "num_input_tokens_seen": 192640224, "step": 1065 }, { "epoch": 0.1166972276198035, "grad_norm": 1.2655524715787536, "learning_rate": 4.833838060425862e-05, "loss": 0.6311, "num_input_tokens_seen": 192838464, "step": 1066 }, { "epoch": 0.1168066996907414, "grad_norm": 1.2882532527326565, "learning_rate": 4.8335296740874735e-05, "loss": 0.7326, "num_input_tokens_seen": 193019232, "step": 1067 }, { "epoch": 0.1169161717616793, "grad_norm": 1.2564311053556372, "learning_rate": 4.8332210116969855e-05, "loss": 0.7613, "num_input_tokens_seen": 193217472, "step": 1068 }, { "epoch": 0.11702564383261721, "grad_norm": 1.2392114460765598, "learning_rate": 4.832912073290913e-05, "loss": 0.7442, "num_input_tokens_seen": 193380992, "step": 1069 }, { "epoch": 0.1171351159035551, "grad_norm": 1.3870622236380215, "learning_rate": 4.832602858905801e-05, "loss": 0.7619, "num_input_tokens_seen": 193566240, "step": 1070 }, { "epoch": 0.11724458797449301, "grad_norm": 1.3514575366237442, "learning_rate": 4.8322933685782304e-05, "loss": 0.6023, "num_input_tokens_seen": 193738720, "step": 1071 }, { "epoch": 0.1173540600454309, "grad_norm": 1.451138194374879, "learning_rate": 4.831983602344813e-05, "loss": 0.7588, "num_input_tokens_seen": 193910080, "step": 1072 }, { "epoch": 0.11746353211636881, "grad_norm": 1.1446549801134556, "learning_rate": 4.8316735602421935e-05, "loss": 0.7136, "num_input_tokens_seen": 194097568, "step": 1073 }, { "epoch": 0.1175730041873067, "grad_norm": 1.224792272121286, "learning_rate": 4.831363242307049e-05, "loss": 0.7162, "num_input_tokens_seen": 194257728, "step": 1074 }, { "epoch": 0.11768247625824461, "grad_norm": 1.3113023652565572, "learning_rate": 4.83105264857609e-05, "loss": 0.8369, "num_input_tokens_seen": 194465600, "step": 1075 }, { "epoch": 0.11779194832918252, "grad_norm": 1.2316532791315185, "learning_rate": 4.8307417790860586e-05, "loss": 0.5408, "num_input_tokens_seen": 194622624, "step": 1076 }, { "epoch": 0.11790142040012042, "grad_norm": 1.2929406292672283, "learning_rate": 4.830430633873731e-05, "loss": 0.6627, "num_input_tokens_seen": 194790848, "step": 1077 }, { "epoch": 0.11801089247105832, "grad_norm": 1.3054353318197554, "learning_rate": 4.830119212975914e-05, "loss": 0.8924, "num_input_tokens_seen": 194989984, "step": 1078 }, { "epoch": 0.11812036454199622, "grad_norm": 1.326150885571772, "learning_rate": 4.8298075164294484e-05, "loss": 0.7917, "num_input_tokens_seen": 195196064, "step": 1079 }, { "epoch": 0.11822983661293412, "grad_norm": 1.204399474231348, "learning_rate": 4.829495544271208e-05, "loss": 0.728, "num_input_tokens_seen": 195366304, "step": 1080 }, { "epoch": 0.11833930868387203, "grad_norm": 1.2662489495080054, "learning_rate": 4.829183296538097e-05, "loss": 0.7152, "num_input_tokens_seen": 195539456, "step": 1081 }, { "epoch": 0.11844878075480993, "grad_norm": 1.1783654086555315, "learning_rate": 4.828870773267056e-05, "loss": 0.5901, "num_input_tokens_seen": 195724256, "step": 1082 }, { "epoch": 0.11855825282574783, "grad_norm": 1.2312733275241727, "learning_rate": 4.8285579744950535e-05, "loss": 0.7107, "num_input_tokens_seen": 195867616, "step": 1083 }, { "epoch": 0.11866772489668573, "grad_norm": 1.2899569096132852, "learning_rate": 4.828244900259094e-05, "loss": 0.6382, "num_input_tokens_seen": 196049728, "step": 1084 }, { "epoch": 0.11877719696762364, "grad_norm": 1.4033384487862277, "learning_rate": 4.827931550596214e-05, "loss": 0.6994, "num_input_tokens_seen": 196196224, "step": 1085 }, { "epoch": 0.11888666903856154, "grad_norm": 1.4503480362026706, "learning_rate": 4.827617925543482e-05, "loss": 0.751, "num_input_tokens_seen": 196380352, "step": 1086 }, { "epoch": 0.11899614110949944, "grad_norm": 1.174158592485147, "learning_rate": 4.8273040251379985e-05, "loss": 0.5442, "num_input_tokens_seen": 196557312, "step": 1087 }, { "epoch": 0.11910561318043734, "grad_norm": 1.2124326274283683, "learning_rate": 4.826989849416899e-05, "loss": 0.6971, "num_input_tokens_seen": 196709408, "step": 1088 }, { "epoch": 0.11921508525137524, "grad_norm": 1.3224291115783262, "learning_rate": 4.826675398417347e-05, "loss": 0.8622, "num_input_tokens_seen": 196915040, "step": 1089 }, { "epoch": 0.11932455732231315, "grad_norm": 1.350830497307284, "learning_rate": 4.826360672176544e-05, "loss": 0.7381, "num_input_tokens_seen": 197111488, "step": 1090 }, { "epoch": 0.11943402939325104, "grad_norm": 1.5349976746482508, "learning_rate": 4.826045670731722e-05, "loss": 0.8174, "num_input_tokens_seen": 197276576, "step": 1091 }, { "epoch": 0.11954350146418895, "grad_norm": 1.3377939008290014, "learning_rate": 4.825730394120142e-05, "loss": 0.7172, "num_input_tokens_seen": 197459136, "step": 1092 }, { "epoch": 0.11965297353512686, "grad_norm": 1.149706029636365, "learning_rate": 4.8254148423791035e-05, "loss": 0.5935, "num_input_tokens_seen": 197653792, "step": 1093 }, { "epoch": 0.11976244560606475, "grad_norm": 1.3045242116018374, "learning_rate": 4.825099015545934e-05, "loss": 0.8727, "num_input_tokens_seen": 197833216, "step": 1094 }, { "epoch": 0.11987191767700266, "grad_norm": 1.2942273332844745, "learning_rate": 4.824782913657996e-05, "loss": 0.813, "num_input_tokens_seen": 198018688, "step": 1095 }, { "epoch": 0.11998138974794055, "grad_norm": 1.1065504086471327, "learning_rate": 4.824466536752683e-05, "loss": 0.7642, "num_input_tokens_seen": 198196544, "step": 1096 }, { "epoch": 0.12009086181887846, "grad_norm": 1.2432812368065265, "learning_rate": 4.8241498848674236e-05, "loss": 0.7276, "num_input_tokens_seen": 198361408, "step": 1097 }, { "epoch": 0.12020033388981637, "grad_norm": 1.38715267645568, "learning_rate": 4.823832958039675e-05, "loss": 0.7087, "num_input_tokens_seen": 198539712, "step": 1098 }, { "epoch": 0.12030980596075426, "grad_norm": 1.115772231188005, "learning_rate": 4.82351575630693e-05, "loss": 0.5878, "num_input_tokens_seen": 198714656, "step": 1099 }, { "epoch": 0.12041927803169217, "grad_norm": 1.1899278817642895, "learning_rate": 4.823198279706713e-05, "loss": 0.7793, "num_input_tokens_seen": 198896096, "step": 1100 }, { "epoch": 0.12052875010263006, "grad_norm": 1.1710326155266544, "learning_rate": 4.8228805282765803e-05, "loss": 0.6473, "num_input_tokens_seen": 199071936, "step": 1101 }, { "epoch": 0.12063822217356797, "grad_norm": 1.322514926054578, "learning_rate": 4.822562502054122e-05, "loss": 0.8471, "num_input_tokens_seen": 199265920, "step": 1102 }, { "epoch": 0.12074769424450588, "grad_norm": 1.1620392080970354, "learning_rate": 4.82224420107696e-05, "loss": 0.7165, "num_input_tokens_seen": 199454752, "step": 1103 }, { "epoch": 0.12085716631544377, "grad_norm": 1.2289457553690428, "learning_rate": 4.821925625382748e-05, "loss": 0.7643, "num_input_tokens_seen": 199638880, "step": 1104 }, { "epoch": 0.12096663838638168, "grad_norm": 1.2483827619524253, "learning_rate": 4.821606775009173e-05, "loss": 0.6976, "num_input_tokens_seen": 199809120, "step": 1105 }, { "epoch": 0.12107611045731957, "grad_norm": 1.2966261475581047, "learning_rate": 4.8212876499939555e-05, "loss": 0.6002, "num_input_tokens_seen": 199952256, "step": 1106 }, { "epoch": 0.12118558252825748, "grad_norm": 1.278236800249669, "learning_rate": 4.8209682503748455e-05, "loss": 0.668, "num_input_tokens_seen": 200120480, "step": 1107 }, { "epoch": 0.12129505459919539, "grad_norm": 1.3354627870083122, "learning_rate": 4.820648576189629e-05, "loss": 0.7596, "num_input_tokens_seen": 200282880, "step": 1108 }, { "epoch": 0.12140452667013328, "grad_norm": 1.44440823697087, "learning_rate": 4.820328627476122e-05, "loss": 0.8501, "num_input_tokens_seen": 200468576, "step": 1109 }, { "epoch": 0.12151399874107119, "grad_norm": 1.2855926514794742, "learning_rate": 4.820008404272175e-05, "loss": 0.7494, "num_input_tokens_seen": 200686976, "step": 1110 }, { "epoch": 0.12162347081200908, "grad_norm": 1.2877126388506628, "learning_rate": 4.819687906615668e-05, "loss": 0.6706, "num_input_tokens_seen": 200863264, "step": 1111 }, { "epoch": 0.12173294288294699, "grad_norm": 1.2605888574853705, "learning_rate": 4.819367134544516e-05, "loss": 0.648, "num_input_tokens_seen": 201078976, "step": 1112 }, { "epoch": 0.12184241495388488, "grad_norm": 1.2468947942018274, "learning_rate": 4.819046088096666e-05, "loss": 0.7267, "num_input_tokens_seen": 201249216, "step": 1113 }, { "epoch": 0.12195188702482279, "grad_norm": 1.2730149112046862, "learning_rate": 4.818724767310098e-05, "loss": 0.7926, "num_input_tokens_seen": 201444320, "step": 1114 }, { "epoch": 0.1220613590957607, "grad_norm": 1.117676047895474, "learning_rate": 4.8184031722228216e-05, "loss": 0.5634, "num_input_tokens_seen": 201629344, "step": 1115 }, { "epoch": 0.12217083116669859, "grad_norm": 1.2019422588284927, "learning_rate": 4.818081302872882e-05, "loss": 0.6993, "num_input_tokens_seen": 201821088, "step": 1116 }, { "epoch": 0.1222803032376365, "grad_norm": 1.3334778121315265, "learning_rate": 4.817759159298356e-05, "loss": 0.6645, "num_input_tokens_seen": 201992896, "step": 1117 }, { "epoch": 0.1223897753085744, "grad_norm": 1.194620967112966, "learning_rate": 4.817436741537352e-05, "loss": 0.6137, "num_input_tokens_seen": 202160224, "step": 1118 }, { "epoch": 0.1224992473795123, "grad_norm": 1.2189971966574324, "learning_rate": 4.817114049628012e-05, "loss": 0.5721, "num_input_tokens_seen": 202324416, "step": 1119 }, { "epoch": 0.12260871945045021, "grad_norm": 1.2073892105803354, "learning_rate": 4.81679108360851e-05, "loss": 0.7126, "num_input_tokens_seen": 202502720, "step": 1120 }, { "epoch": 0.1227181915213881, "grad_norm": 1.4162713602581432, "learning_rate": 4.8164678435170505e-05, "loss": 0.8152, "num_input_tokens_seen": 202690208, "step": 1121 }, { "epoch": 0.12282766359232601, "grad_norm": 1.3503406271535456, "learning_rate": 4.8161443293918746e-05, "loss": 0.7046, "num_input_tokens_seen": 202841408, "step": 1122 }, { "epoch": 0.1229371356632639, "grad_norm": 1.2555034127046607, "learning_rate": 4.815820541271252e-05, "loss": 0.7138, "num_input_tokens_seen": 202994400, "step": 1123 }, { "epoch": 0.12304660773420181, "grad_norm": 1.4571195273976028, "learning_rate": 4.815496479193486e-05, "loss": 0.826, "num_input_tokens_seen": 203139552, "step": 1124 }, { "epoch": 0.12315607980513972, "grad_norm": 1.3006386435295856, "learning_rate": 4.815172143196913e-05, "loss": 0.6858, "num_input_tokens_seen": 203332192, "step": 1125 }, { "epoch": 0.12326555187607761, "grad_norm": 1.2867434362048054, "learning_rate": 4.814847533319902e-05, "loss": 0.7775, "num_input_tokens_seen": 203503328, "step": 1126 }, { "epoch": 0.12337502394701552, "grad_norm": 1.2466394072797622, "learning_rate": 4.814522649600852e-05, "loss": 0.7706, "num_input_tokens_seen": 203723520, "step": 1127 }, { "epoch": 0.12348449601795342, "grad_norm": 1.249158003465074, "learning_rate": 4.814197492078198e-05, "loss": 0.8207, "num_input_tokens_seen": 203896448, "step": 1128 }, { "epoch": 0.12359396808889132, "grad_norm": 1.2049452153490186, "learning_rate": 4.813872060790404e-05, "loss": 0.7653, "num_input_tokens_seen": 204089760, "step": 1129 }, { "epoch": 0.12370344015982922, "grad_norm": 1.2989291732602692, "learning_rate": 4.813546355775969e-05, "loss": 0.6552, "num_input_tokens_seen": 204246336, "step": 1130 }, { "epoch": 0.12381291223076712, "grad_norm": 1.1764386642556488, "learning_rate": 4.813220377073423e-05, "loss": 0.6613, "num_input_tokens_seen": 204424416, "step": 1131 }, { "epoch": 0.12392238430170503, "grad_norm": 1.3132452438474604, "learning_rate": 4.8128941247213286e-05, "loss": 0.6629, "num_input_tokens_seen": 204594656, "step": 1132 }, { "epoch": 0.12403185637264293, "grad_norm": 1.271374581983222, "learning_rate": 4.812567598758281e-05, "loss": 0.8369, "num_input_tokens_seen": 204794912, "step": 1133 }, { "epoch": 0.12414132844358083, "grad_norm": 1.2166912590223924, "learning_rate": 4.812240799222906e-05, "loss": 0.6613, "num_input_tokens_seen": 204964928, "step": 1134 }, { "epoch": 0.12425080051451873, "grad_norm": 1.3109742333377357, "learning_rate": 4.811913726153866e-05, "loss": 0.5747, "num_input_tokens_seen": 205109632, "step": 1135 }, { "epoch": 0.12436027258545664, "grad_norm": 1.3690515508259613, "learning_rate": 4.8115863795898514e-05, "loss": 0.6956, "num_input_tokens_seen": 205309888, "step": 1136 }, { "epoch": 0.12446974465639454, "grad_norm": 1.277932360991897, "learning_rate": 4.811258759569587e-05, "loss": 0.8307, "num_input_tokens_seen": 205496256, "step": 1137 }, { "epoch": 0.12457921672733244, "grad_norm": 1.4191097156404167, "learning_rate": 4.8109308661318296e-05, "loss": 0.9829, "num_input_tokens_seen": 205669632, "step": 1138 }, { "epoch": 0.12468868879827034, "grad_norm": 1.3726112684023843, "learning_rate": 4.810602699315369e-05, "loss": 0.8558, "num_input_tokens_seen": 205815904, "step": 1139 }, { "epoch": 0.12479816086920824, "grad_norm": 1.4383680790888584, "learning_rate": 4.810274259159026e-05, "loss": 0.8638, "num_input_tokens_seen": 206006976, "step": 1140 }, { "epoch": 0.12490763294014615, "grad_norm": 1.1893712408634822, "learning_rate": 4.809945545701654e-05, "loss": 0.6241, "num_input_tokens_seen": 206184832, "step": 1141 }, { "epoch": 0.12501710501108404, "grad_norm": 1.1792413739553476, "learning_rate": 4.8096165589821404e-05, "loss": 0.6551, "num_input_tokens_seen": 206352832, "step": 1142 }, { "epoch": 0.12512657708202196, "grad_norm": 1.2615983273698874, "learning_rate": 4.809287299039403e-05, "loss": 0.7376, "num_input_tokens_seen": 206538752, "step": 1143 }, { "epoch": 0.12523604915295986, "grad_norm": 1.1522014465398438, "learning_rate": 4.808957765912393e-05, "loss": 0.7462, "num_input_tokens_seen": 206732288, "step": 1144 }, { "epoch": 0.12534552122389775, "grad_norm": 1.3162953134762971, "learning_rate": 4.808627959640093e-05, "loss": 0.8108, "num_input_tokens_seen": 206917760, "step": 1145 }, { "epoch": 0.12545499329483564, "grad_norm": 1.2170226743305133, "learning_rate": 4.808297880261518e-05, "loss": 0.6857, "num_input_tokens_seen": 207113984, "step": 1146 }, { "epoch": 0.12556446536577356, "grad_norm": 1.2283727323216735, "learning_rate": 4.807967527815718e-05, "loss": 0.5671, "num_input_tokens_seen": 207319840, "step": 1147 }, { "epoch": 0.12567393743671146, "grad_norm": 1.1166835795492929, "learning_rate": 4.807636902341771e-05, "loss": 0.5684, "num_input_tokens_seen": 207490752, "step": 1148 }, { "epoch": 0.12578340950764935, "grad_norm": 1.3619846856884947, "learning_rate": 4.80730600387879e-05, "loss": 0.6801, "num_input_tokens_seen": 207681824, "step": 1149 }, { "epoch": 0.12589288157858727, "grad_norm": 1.2771523708499368, "learning_rate": 4.8069748324659193e-05, "loss": 0.7386, "num_input_tokens_seen": 207873568, "step": 1150 }, { "epoch": 0.12600235364952517, "grad_norm": 1.2051526113084512, "learning_rate": 4.8066433881423354e-05, "loss": 0.6109, "num_input_tokens_seen": 208036192, "step": 1151 }, { "epoch": 0.12611182572046306, "grad_norm": 1.1711754300247579, "learning_rate": 4.806311670947249e-05, "loss": 0.6863, "num_input_tokens_seen": 208189184, "step": 1152 }, { "epoch": 0.12622129779140098, "grad_norm": 1.1879155787036257, "learning_rate": 4.805979680919901e-05, "loss": 0.6409, "num_input_tokens_seen": 208350688, "step": 1153 }, { "epoch": 0.12633076986233888, "grad_norm": 1.4005037950997512, "learning_rate": 4.8056474180995645e-05, "loss": 0.5998, "num_input_tokens_seen": 208500096, "step": 1154 }, { "epoch": 0.12644024193327677, "grad_norm": 1.2669821162357928, "learning_rate": 4.8053148825255466e-05, "loss": 0.7037, "num_input_tokens_seen": 208676384, "step": 1155 }, { "epoch": 0.12654971400421466, "grad_norm": 1.250493286075132, "learning_rate": 4.804982074237185e-05, "loss": 0.6879, "num_input_tokens_seen": 208852448, "step": 1156 }, { "epoch": 0.12665918607515259, "grad_norm": 1.282407795750059, "learning_rate": 4.8046489932738504e-05, "loss": 0.6181, "num_input_tokens_seen": 209027168, "step": 1157 }, { "epoch": 0.12676865814609048, "grad_norm": 1.8037633470026928, "learning_rate": 4.8043156396749454e-05, "loss": 1.3005, "num_input_tokens_seen": 209203008, "step": 1158 }, { "epoch": 0.12687813021702837, "grad_norm": 1.301401488750797, "learning_rate": 4.8039820134799054e-05, "loss": 0.7303, "num_input_tokens_seen": 209386912, "step": 1159 }, { "epoch": 0.1269876022879663, "grad_norm": 1.14946695496134, "learning_rate": 4.8036481147281975e-05, "loss": 0.7447, "num_input_tokens_seen": 209587840, "step": 1160 }, { "epoch": 0.1270970743589042, "grad_norm": 1.257691276821772, "learning_rate": 4.8033139434593224e-05, "loss": 0.693, "num_input_tokens_seen": 209749120, "step": 1161 }, { "epoch": 0.12720654642984208, "grad_norm": 1.2410345905164766, "learning_rate": 4.8029794997128096e-05, "loss": 0.9108, "num_input_tokens_seen": 209935712, "step": 1162 }, { "epoch": 0.12731601850077998, "grad_norm": 1.1695539249319244, "learning_rate": 4.8026447835282256e-05, "loss": 0.6871, "num_input_tokens_seen": 210093408, "step": 1163 }, { "epoch": 0.1274254905717179, "grad_norm": 1.1568360261210238, "learning_rate": 4.802309794945165e-05, "loss": 0.8759, "num_input_tokens_seen": 210305536, "step": 1164 }, { "epoch": 0.1275349626426558, "grad_norm": 1.2191310678075058, "learning_rate": 4.8019745340032574e-05, "loss": 0.7458, "num_input_tokens_seen": 210506240, "step": 1165 }, { "epoch": 0.12764443471359369, "grad_norm": 1.2162981557379047, "learning_rate": 4.801639000742163e-05, "loss": 0.7389, "num_input_tokens_seen": 210697760, "step": 1166 }, { "epoch": 0.1277539067845316, "grad_norm": 1.3483807371708305, "learning_rate": 4.801303195201574e-05, "loss": 0.7821, "num_input_tokens_seen": 210884352, "step": 1167 }, { "epoch": 0.1278633788554695, "grad_norm": 1.276849726997726, "learning_rate": 4.8009671174212176e-05, "loss": 0.6153, "num_input_tokens_seen": 211022336, "step": 1168 }, { "epoch": 0.1279728509264074, "grad_norm": 1.2671390002055734, "learning_rate": 4.8006307674408494e-05, "loss": 0.6053, "num_input_tokens_seen": 211192352, "step": 1169 }, { "epoch": 0.12808232299734532, "grad_norm": 1.26596982991396, "learning_rate": 4.800294145300259e-05, "loss": 0.7252, "num_input_tokens_seen": 211379840, "step": 1170 }, { "epoch": 0.1281917950682832, "grad_norm": 1.3891186645961995, "learning_rate": 4.799957251039269e-05, "loss": 0.8351, "num_input_tokens_seen": 211567328, "step": 1171 }, { "epoch": 0.1283012671392211, "grad_norm": 1.3186948448326272, "learning_rate": 4.799620084697732e-05, "loss": 0.697, "num_input_tokens_seen": 211754592, "step": 1172 }, { "epoch": 0.128410739210159, "grad_norm": 1.3229626574994937, "learning_rate": 4.799282646315537e-05, "loss": 0.8519, "num_input_tokens_seen": 211957088, "step": 1173 }, { "epoch": 0.12852021128109692, "grad_norm": 1.2322448506602734, "learning_rate": 4.798944935932599e-05, "loss": 0.7239, "num_input_tokens_seen": 212139648, "step": 1174 }, { "epoch": 0.1286296833520348, "grad_norm": 1.16145390850416, "learning_rate": 4.798606953588871e-05, "loss": 0.7863, "num_input_tokens_seen": 212337440, "step": 1175 }, { "epoch": 0.1287391554229727, "grad_norm": 1.3589132116533529, "learning_rate": 4.7982686993243335e-05, "loss": 0.7712, "num_input_tokens_seen": 212512384, "step": 1176 }, { "epoch": 0.12884862749391063, "grad_norm": 1.2575147288163278, "learning_rate": 4.797930173179003e-05, "loss": 0.8943, "num_input_tokens_seen": 212702784, "step": 1177 }, { "epoch": 0.12895809956484852, "grad_norm": 1.2576966321189296, "learning_rate": 4.797591375192926e-05, "loss": 0.6998, "num_input_tokens_seen": 212879296, "step": 1178 }, { "epoch": 0.12906757163578642, "grad_norm": 1.186718683701112, "learning_rate": 4.7972523054061815e-05, "loss": 0.7541, "num_input_tokens_seen": 213052000, "step": 1179 }, { "epoch": 0.1291770437067243, "grad_norm": 1.273209018921855, "learning_rate": 4.7969129638588805e-05, "loss": 0.6441, "num_input_tokens_seen": 213231872, "step": 1180 }, { "epoch": 0.12928651577766223, "grad_norm": 1.341535739947239, "learning_rate": 4.796573350591167e-05, "loss": 0.6092, "num_input_tokens_seen": 213431904, "step": 1181 }, { "epoch": 0.12939598784860012, "grad_norm": 1.340797863110385, "learning_rate": 4.796233465643216e-05, "loss": 0.6959, "num_input_tokens_seen": 213620288, "step": 1182 }, { "epoch": 0.12950545991953802, "grad_norm": 1.2438843850997057, "learning_rate": 4.7958933090552365e-05, "loss": 0.6207, "num_input_tokens_seen": 213790752, "step": 1183 }, { "epoch": 0.12961493199047594, "grad_norm": 1.470541814744604, "learning_rate": 4.795552880867467e-05, "loss": 0.8396, "num_input_tokens_seen": 214008256, "step": 1184 }, { "epoch": 0.12972440406141383, "grad_norm": 1.2364968038657251, "learning_rate": 4.795212181120181e-05, "loss": 0.5226, "num_input_tokens_seen": 214210976, "step": 1185 }, { "epoch": 0.12983387613235173, "grad_norm": 1.3619624538624022, "learning_rate": 4.79487120985368e-05, "loss": 0.8702, "num_input_tokens_seen": 214397344, "step": 1186 }, { "epoch": 0.12994334820328965, "grad_norm": 1.3621547821589408, "learning_rate": 4.7945299671083036e-05, "loss": 0.8755, "num_input_tokens_seen": 214589088, "step": 1187 }, { "epoch": 0.13005282027422754, "grad_norm": 1.282265981400936, "learning_rate": 4.7941884529244175e-05, "loss": 0.8339, "num_input_tokens_seen": 214773888, "step": 1188 }, { "epoch": 0.13016229234516544, "grad_norm": 1.2663443134277332, "learning_rate": 4.793846667342423e-05, "loss": 0.7587, "num_input_tokens_seen": 214955552, "step": 1189 }, { "epoch": 0.13027176441610333, "grad_norm": 1.2959265860380156, "learning_rate": 4.793504610402754e-05, "loss": 0.7152, "num_input_tokens_seen": 215124896, "step": 1190 }, { "epoch": 0.13038123648704125, "grad_norm": 1.2114623870300714, "learning_rate": 4.7931622821458726e-05, "loss": 0.717, "num_input_tokens_seen": 215307904, "step": 1191 }, { "epoch": 0.13049070855797915, "grad_norm": 1.2400243866737433, "learning_rate": 4.7928196826122775e-05, "loss": 0.8882, "num_input_tokens_seen": 215475456, "step": 1192 }, { "epoch": 0.13060018062891704, "grad_norm": 1.203729254839941, "learning_rate": 4.7924768118424975e-05, "loss": 0.7955, "num_input_tokens_seen": 215666528, "step": 1193 }, { "epoch": 0.13070965269985496, "grad_norm": 1.162527621361517, "learning_rate": 4.7921336698770926e-05, "loss": 0.9059, "num_input_tokens_seen": 215871712, "step": 1194 }, { "epoch": 0.13081912477079286, "grad_norm": 1.0809053698680653, "learning_rate": 4.791790256756657e-05, "loss": 0.7021, "num_input_tokens_seen": 216077568, "step": 1195 }, { "epoch": 0.13092859684173075, "grad_norm": 1.073556591907116, "learning_rate": 4.791446572521815e-05, "loss": 0.7179, "num_input_tokens_seen": 216239744, "step": 1196 }, { "epoch": 0.13103806891266864, "grad_norm": 1.174011876200535, "learning_rate": 4.791102617213223e-05, "loss": 0.7936, "num_input_tokens_seen": 216435744, "step": 1197 }, { "epoch": 0.13114754098360656, "grad_norm": 1.1697277487562339, "learning_rate": 4.7907583908715725e-05, "loss": 0.5434, "num_input_tokens_seen": 216578208, "step": 1198 }, { "epoch": 0.13125701305454446, "grad_norm": 1.4920246774407053, "learning_rate": 4.790413893537583e-05, "loss": 0.7392, "num_input_tokens_seen": 216755840, "step": 1199 }, { "epoch": 0.13136648512548235, "grad_norm": 1.2962596766405885, "learning_rate": 4.790069125252009e-05, "loss": 0.8309, "num_input_tokens_seen": 216965504, "step": 1200 }, { "epoch": 0.13147595719642027, "grad_norm": 1.2611610431887732, "learning_rate": 4.7897240860556345e-05, "loss": 0.7778, "num_input_tokens_seen": 217160832, "step": 1201 }, { "epoch": 0.13158542926735817, "grad_norm": 1.3479557754919818, "learning_rate": 4.789378775989278e-05, "loss": 0.6507, "num_input_tokens_seen": 217335552, "step": 1202 }, { "epoch": 0.13169490133829606, "grad_norm": 1.332762039499424, "learning_rate": 4.789033195093789e-05, "loss": 0.6143, "num_input_tokens_seen": 217514752, "step": 1203 }, { "epoch": 0.13180437340923398, "grad_norm": 1.226499713821471, "learning_rate": 4.7886873434100486e-05, "loss": 0.6973, "num_input_tokens_seen": 217678272, "step": 1204 }, { "epoch": 0.13191384548017188, "grad_norm": 1.2548155057794819, "learning_rate": 4.7883412209789714e-05, "loss": 0.6889, "num_input_tokens_seen": 217859488, "step": 1205 }, { "epoch": 0.13202331755110977, "grad_norm": 1.4049841375575491, "learning_rate": 4.787994827841502e-05, "loss": 0.6443, "num_input_tokens_seen": 218029504, "step": 1206 }, { "epoch": 0.13213278962204766, "grad_norm": 1.1889022779354503, "learning_rate": 4.7876481640386184e-05, "loss": 0.8995, "num_input_tokens_seen": 218243424, "step": 1207 }, { "epoch": 0.13224226169298559, "grad_norm": 1.2059260373090972, "learning_rate": 4.78730122961133e-05, "loss": 0.7954, "num_input_tokens_seen": 218408512, "step": 1208 }, { "epoch": 0.13235173376392348, "grad_norm": 1.0879557673950566, "learning_rate": 4.78695402460068e-05, "loss": 0.5628, "num_input_tokens_seen": 218589728, "step": 1209 }, { "epoch": 0.13246120583486137, "grad_norm": 1.1469855959419364, "learning_rate": 4.7866065490477386e-05, "loss": 0.6415, "num_input_tokens_seen": 218760416, "step": 1210 }, { "epoch": 0.1325706779057993, "grad_norm": 1.3804933313708145, "learning_rate": 4.786258802993615e-05, "loss": 0.8429, "num_input_tokens_seen": 218953952, "step": 1211 }, { "epoch": 0.1326801499767372, "grad_norm": 1.488090362722494, "learning_rate": 4.785910786479445e-05, "loss": 0.8278, "num_input_tokens_seen": 219122176, "step": 1212 }, { "epoch": 0.13278962204767508, "grad_norm": 1.3001012084151362, "learning_rate": 4.7855624995464e-05, "loss": 0.6846, "num_input_tokens_seen": 219303168, "step": 1213 }, { "epoch": 0.13289909411861298, "grad_norm": 1.3351778692701606, "learning_rate": 4.785213942235679e-05, "loss": 0.6599, "num_input_tokens_seen": 219511712, "step": 1214 }, { "epoch": 0.1330085661895509, "grad_norm": 1.3161328311947678, "learning_rate": 4.784865114588518e-05, "loss": 0.668, "num_input_tokens_seen": 219695168, "step": 1215 }, { "epoch": 0.1331180382604888, "grad_norm": 1.254951113018465, "learning_rate": 4.784516016646182e-05, "loss": 0.7701, "num_input_tokens_seen": 219835616, "step": 1216 }, { "epoch": 0.13322751033142669, "grad_norm": 1.2789643998578775, "learning_rate": 4.784166648449969e-05, "loss": 0.7214, "num_input_tokens_seen": 220009440, "step": 1217 }, { "epoch": 0.1333369824023646, "grad_norm": 1.220015626713465, "learning_rate": 4.783817010041207e-05, "loss": 0.7259, "num_input_tokens_seen": 220192000, "step": 1218 }, { "epoch": 0.1334464544733025, "grad_norm": 1.11807876994567, "learning_rate": 4.783467101461259e-05, "loss": 0.71, "num_input_tokens_seen": 220387104, "step": 1219 }, { "epoch": 0.1335559265442404, "grad_norm": 1.223774212996101, "learning_rate": 4.783116922751518e-05, "loss": 0.7189, "num_input_tokens_seen": 220552864, "step": 1220 }, { "epoch": 0.13366539861517832, "grad_norm": 1.1922481876390711, "learning_rate": 4.78276647395341e-05, "loss": 0.6644, "num_input_tokens_seen": 220743264, "step": 1221 }, { "epoch": 0.1337748706861162, "grad_norm": 1.2669237796103936, "learning_rate": 4.782415755108392e-05, "loss": 0.7144, "num_input_tokens_seen": 220916192, "step": 1222 }, { "epoch": 0.1338843427570541, "grad_norm": 1.413152623267223, "learning_rate": 4.782064766257953e-05, "loss": 0.6621, "num_input_tokens_seen": 221097408, "step": 1223 }, { "epoch": 0.133993814827992, "grad_norm": 1.2863371273296862, "learning_rate": 4.781713507443615e-05, "loss": 0.8934, "num_input_tokens_seen": 221309984, "step": 1224 }, { "epoch": 0.13410328689892992, "grad_norm": 1.342255448740804, "learning_rate": 4.7813619787069314e-05, "loss": 0.7203, "num_input_tokens_seen": 221499488, "step": 1225 }, { "epoch": 0.1342127589698678, "grad_norm": 1.1910661055494343, "learning_rate": 4.781010180089487e-05, "loss": 0.6175, "num_input_tokens_seen": 221637248, "step": 1226 }, { "epoch": 0.1343222310408057, "grad_norm": 1.3370000172103618, "learning_rate": 4.7806581116328976e-05, "loss": 0.7926, "num_input_tokens_seen": 221845120, "step": 1227 }, { "epoch": 0.13443170311174363, "grad_norm": 1.210057092375844, "learning_rate": 4.780305773378815e-05, "loss": 0.7109, "num_input_tokens_seen": 222038208, "step": 1228 }, { "epoch": 0.13454117518268152, "grad_norm": 1.301280982448442, "learning_rate": 4.779953165368917e-05, "loss": 0.6611, "num_input_tokens_seen": 222203072, "step": 1229 }, { "epoch": 0.13465064725361942, "grad_norm": 1.271622610708077, "learning_rate": 4.779600287644919e-05, "loss": 0.7126, "num_input_tokens_seen": 222397728, "step": 1230 }, { "epoch": 0.1347601193245573, "grad_norm": 1.2623793346037115, "learning_rate": 4.779247140248565e-05, "loss": 0.7454, "num_input_tokens_seen": 222568640, "step": 1231 }, { "epoch": 0.13486959139549523, "grad_norm": 1.2540019346432159, "learning_rate": 4.778893723221631e-05, "loss": 0.8002, "num_input_tokens_seen": 222744480, "step": 1232 }, { "epoch": 0.13497906346643312, "grad_norm": 1.234620564818056, "learning_rate": 4.7785400366059266e-05, "loss": 0.6589, "num_input_tokens_seen": 222900160, "step": 1233 }, { "epoch": 0.13508853553737102, "grad_norm": 1.1811602664349057, "learning_rate": 4.778186080443291e-05, "loss": 0.616, "num_input_tokens_seen": 223074208, "step": 1234 }, { "epoch": 0.13519800760830894, "grad_norm": 1.3846277940430414, "learning_rate": 4.777831854775598e-05, "loss": 0.6828, "num_input_tokens_seen": 223231008, "step": 1235 }, { "epoch": 0.13530747967924683, "grad_norm": 1.3802256121243637, "learning_rate": 4.777477359644751e-05, "loss": 0.8746, "num_input_tokens_seen": 223399680, "step": 1236 }, { "epoch": 0.13541695175018473, "grad_norm": 1.1944172816674965, "learning_rate": 4.7771225950926854e-05, "loss": 0.6213, "num_input_tokens_seen": 223582016, "step": 1237 }, { "epoch": 0.13552642382112265, "grad_norm": 1.2991014236077105, "learning_rate": 4.7767675611613704e-05, "loss": 0.7855, "num_input_tokens_seen": 223764576, "step": 1238 }, { "epoch": 0.13563589589206054, "grad_norm": 1.2863480164130678, "learning_rate": 4.776412257892805e-05, "loss": 0.7594, "num_input_tokens_seen": 223937280, "step": 1239 }, { "epoch": 0.13574536796299844, "grad_norm": 1.1471407049824307, "learning_rate": 4.7760566853290215e-05, "loss": 0.6476, "num_input_tokens_seen": 224157920, "step": 1240 }, { "epoch": 0.13585484003393633, "grad_norm": 1.292954828008743, "learning_rate": 4.775700843512084e-05, "loss": 0.6917, "num_input_tokens_seen": 224325248, "step": 1241 }, { "epoch": 0.13596431210487425, "grad_norm": 1.2510786647656589, "learning_rate": 4.775344732484086e-05, "loss": 0.8417, "num_input_tokens_seen": 224527296, "step": 1242 }, { "epoch": 0.13607378417581215, "grad_norm": 1.2095574667636704, "learning_rate": 4.774988352287156e-05, "loss": 0.6032, "num_input_tokens_seen": 224714112, "step": 1243 }, { "epoch": 0.13618325624675004, "grad_norm": 1.2729944968063416, "learning_rate": 4.774631702963453e-05, "loss": 0.6384, "num_input_tokens_seen": 224904960, "step": 1244 }, { "epoch": 0.13629272831768796, "grad_norm": 1.2799400518207416, "learning_rate": 4.7742747845551685e-05, "loss": 0.8046, "num_input_tokens_seen": 225053696, "step": 1245 }, { "epoch": 0.13640220038862585, "grad_norm": 1.2038616550907244, "learning_rate": 4.773917597104525e-05, "loss": 0.695, "num_input_tokens_seen": 225238944, "step": 1246 }, { "epoch": 0.13651167245956375, "grad_norm": 1.2013096179914688, "learning_rate": 4.773560140653775e-05, "loss": 0.6213, "num_input_tokens_seen": 225415680, "step": 1247 }, { "epoch": 0.13662114453050164, "grad_norm": 1.3310943619992779, "learning_rate": 4.773202415245208e-05, "loss": 0.7096, "num_input_tokens_seen": 225587040, "step": 1248 }, { "epoch": 0.13673061660143956, "grad_norm": 1.1944007138568868, "learning_rate": 4.772844420921141e-05, "loss": 0.5362, "num_input_tokens_seen": 225751680, "step": 1249 }, { "epoch": 0.13684008867237746, "grad_norm": 1.0938074914263827, "learning_rate": 4.772486157723923e-05, "loss": 0.5776, "num_input_tokens_seen": 225953728, "step": 1250 }, { "epoch": 0.13694956074331535, "grad_norm": 1.466329893909562, "learning_rate": 4.772127625695937e-05, "loss": 0.801, "num_input_tokens_seen": 226098656, "step": 1251 }, { "epoch": 0.13705903281425327, "grad_norm": 1.3077182858894472, "learning_rate": 4.771768824879597e-05, "loss": 0.645, "num_input_tokens_seen": 226261504, "step": 1252 }, { "epoch": 0.13716850488519117, "grad_norm": 1.3145303916044164, "learning_rate": 4.771409755317348e-05, "loss": 0.5859, "num_input_tokens_seen": 226434656, "step": 1253 }, { "epoch": 0.13727797695612906, "grad_norm": 1.1505927797164894, "learning_rate": 4.771050417051667e-05, "loss": 0.6591, "num_input_tokens_seen": 226594368, "step": 1254 }, { "epoch": 0.13738744902706698, "grad_norm": 1.1101988835910028, "learning_rate": 4.770690810125062e-05, "loss": 0.6216, "num_input_tokens_seen": 226786336, "step": 1255 }, { "epoch": 0.13749692109800488, "grad_norm": 1.4231378166549722, "learning_rate": 4.7703309345800766e-05, "loss": 0.7215, "num_input_tokens_seen": 226956576, "step": 1256 }, { "epoch": 0.13760639316894277, "grad_norm": 1.4217905711165215, "learning_rate": 4.769970790459282e-05, "loss": 0.9164, "num_input_tokens_seen": 227139808, "step": 1257 }, { "epoch": 0.13771586523988066, "grad_norm": 1.3545536173096833, "learning_rate": 4.769610377805281e-05, "loss": 0.7662, "num_input_tokens_seen": 227322592, "step": 1258 }, { "epoch": 0.13782533731081859, "grad_norm": 1.1983193867755657, "learning_rate": 4.769249696660711e-05, "loss": 0.806, "num_input_tokens_seen": 227517920, "step": 1259 }, { "epoch": 0.13793480938175648, "grad_norm": 1.2323856499572858, "learning_rate": 4.768888747068241e-05, "loss": 0.6878, "num_input_tokens_seen": 227684128, "step": 1260 }, { "epoch": 0.13804428145269437, "grad_norm": 1.1153906931445934, "learning_rate": 4.7685275290705686e-05, "loss": 0.577, "num_input_tokens_seen": 227868256, "step": 1261 }, { "epoch": 0.1381537535236323, "grad_norm": 1.3301956640171428, "learning_rate": 4.7681660427104266e-05, "loss": 0.6753, "num_input_tokens_seen": 228028192, "step": 1262 }, { "epoch": 0.1382632255945702, "grad_norm": 1.1826005047756274, "learning_rate": 4.7678042880305785e-05, "loss": 0.7942, "num_input_tokens_seen": 228209184, "step": 1263 }, { "epoch": 0.13837269766550808, "grad_norm": 1.317554414627979, "learning_rate": 4.767442265073818e-05, "loss": 0.6687, "num_input_tokens_seen": 228361504, "step": 1264 }, { "epoch": 0.13848216973644598, "grad_norm": 1.2593787257137041, "learning_rate": 4.767079973882972e-05, "loss": 0.6364, "num_input_tokens_seen": 228532416, "step": 1265 }, { "epoch": 0.1385916418073839, "grad_norm": 1.2173951761446271, "learning_rate": 4.766717414500898e-05, "loss": 0.7894, "num_input_tokens_seen": 228704224, "step": 1266 }, { "epoch": 0.1387011138783218, "grad_norm": 1.1126440533587043, "learning_rate": 4.766354586970489e-05, "loss": 0.6682, "num_input_tokens_seen": 228890368, "step": 1267 }, { "epoch": 0.13881058594925968, "grad_norm": 1.3151186297033706, "learning_rate": 4.7659914913346634e-05, "loss": 0.6803, "num_input_tokens_seen": 229068224, "step": 1268 }, { "epoch": 0.1389200580201976, "grad_norm": 1.2222754522253447, "learning_rate": 4.7656281276363765e-05, "loss": 0.666, "num_input_tokens_seen": 229266688, "step": 1269 }, { "epoch": 0.1390295300911355, "grad_norm": 1.333952069764051, "learning_rate": 4.7652644959186146e-05, "loss": 0.6594, "num_input_tokens_seen": 229446784, "step": 1270 }, { "epoch": 0.1391390021620734, "grad_norm": 1.2084809026823002, "learning_rate": 4.764900596224392e-05, "loss": 0.7602, "num_input_tokens_seen": 229598208, "step": 1271 }, { "epoch": 0.13924847423301132, "grad_norm": 1.2587338029260686, "learning_rate": 4.7645364285967584e-05, "loss": 0.788, "num_input_tokens_seen": 229783904, "step": 1272 }, { "epoch": 0.1393579463039492, "grad_norm": 1.2659979376753512, "learning_rate": 4.764171993078795e-05, "loss": 0.6389, "num_input_tokens_seen": 229948320, "step": 1273 }, { "epoch": 0.1394674183748871, "grad_norm": 1.3428975439806086, "learning_rate": 4.763807289713613e-05, "loss": 0.9654, "num_input_tokens_seen": 230137824, "step": 1274 }, { "epoch": 0.139576890445825, "grad_norm": 1.236987765098268, "learning_rate": 4.763442318544356e-05, "loss": 0.7913, "num_input_tokens_seen": 230325088, "step": 1275 }, { "epoch": 0.13968636251676292, "grad_norm": 1.2407801110762366, "learning_rate": 4.7630770796142e-05, "loss": 0.7346, "num_input_tokens_seen": 230518624, "step": 1276 }, { "epoch": 0.1397958345877008, "grad_norm": 1.2054405718394554, "learning_rate": 4.762711572966352e-05, "loss": 0.5652, "num_input_tokens_seen": 230701408, "step": 1277 }, { "epoch": 0.1399053066586387, "grad_norm": 1.20486329573483, "learning_rate": 4.76234579864405e-05, "loss": 0.7643, "num_input_tokens_seen": 230885312, "step": 1278 }, { "epoch": 0.14001477872957663, "grad_norm": 1.2571950783194157, "learning_rate": 4.761979756690565e-05, "loss": 0.7445, "num_input_tokens_seen": 231066752, "step": 1279 }, { "epoch": 0.14012425080051452, "grad_norm": 1.1514348988120833, "learning_rate": 4.761613447149199e-05, "loss": 0.7607, "num_input_tokens_seen": 231277088, "step": 1280 }, { "epoch": 0.14023372287145242, "grad_norm": 1.1544033459713374, "learning_rate": 4.761246870063286e-05, "loss": 0.7039, "num_input_tokens_seen": 231484064, "step": 1281 }, { "epoch": 0.1403431949423903, "grad_norm": 1.2397545073900942, "learning_rate": 4.760880025476191e-05, "loss": 0.658, "num_input_tokens_seen": 231650496, "step": 1282 }, { "epoch": 0.14045266701332823, "grad_norm": 1.2520186813258727, "learning_rate": 4.76051291343131e-05, "loss": 0.7526, "num_input_tokens_seen": 231841344, "step": 1283 }, { "epoch": 0.14056213908426612, "grad_norm": 1.3993690840207198, "learning_rate": 4.7601455339720736e-05, "loss": 0.7538, "num_input_tokens_seen": 231998144, "step": 1284 }, { "epoch": 0.14067161115520402, "grad_norm": 1.3464820058351927, "learning_rate": 4.759777887141941e-05, "loss": 0.5775, "num_input_tokens_seen": 232174880, "step": 1285 }, { "epoch": 0.14078108322614194, "grad_norm": 1.300562109831334, "learning_rate": 4.7594099729844045e-05, "loss": 0.6722, "num_input_tokens_seen": 232333696, "step": 1286 }, { "epoch": 0.14089055529707983, "grad_norm": 1.391919208881406, "learning_rate": 4.759041791542987e-05, "loss": 0.6954, "num_input_tokens_seen": 232527680, "step": 1287 }, { "epoch": 0.14100002736801773, "grad_norm": 1.2004187727413926, "learning_rate": 4.7586733428612454e-05, "loss": 0.637, "num_input_tokens_seen": 232734432, "step": 1288 }, { "epoch": 0.14110949943895565, "grad_norm": 1.189456031432461, "learning_rate": 4.758304626982764e-05, "loss": 0.6359, "num_input_tokens_seen": 232939168, "step": 1289 }, { "epoch": 0.14121897150989354, "grad_norm": 1.2555134298258799, "learning_rate": 4.757935643951163e-05, "loss": 0.7564, "num_input_tokens_seen": 233103136, "step": 1290 }, { "epoch": 0.14132844358083144, "grad_norm": 1.4503569952746342, "learning_rate": 4.757566393810091e-05, "loss": 0.9284, "num_input_tokens_seen": 233278080, "step": 1291 }, { "epoch": 0.14143791565176933, "grad_norm": 1.3231387164774853, "learning_rate": 4.75719687660323e-05, "loss": 0.8183, "num_input_tokens_seen": 233470272, "step": 1292 }, { "epoch": 0.14154738772270725, "grad_norm": 1.2865800014933446, "learning_rate": 4.756827092374295e-05, "loss": 0.8962, "num_input_tokens_seen": 233653280, "step": 1293 }, { "epoch": 0.14165685979364515, "grad_norm": 1.237149898056879, "learning_rate": 4.7564570411670284e-05, "loss": 0.8683, "num_input_tokens_seen": 233832704, "step": 1294 }, { "epoch": 0.14176633186458304, "grad_norm": 1.219030152424392, "learning_rate": 4.756086723025208e-05, "loss": 0.6789, "num_input_tokens_seen": 234021088, "step": 1295 }, { "epoch": 0.14187580393552096, "grad_norm": 1.231360218517389, "learning_rate": 4.755716137992641e-05, "loss": 0.764, "num_input_tokens_seen": 234191776, "step": 1296 }, { "epoch": 0.14198527600645885, "grad_norm": 1.1503419201940408, "learning_rate": 4.755345286113166e-05, "loss": 0.8036, "num_input_tokens_seen": 234346560, "step": 1297 }, { "epoch": 0.14209474807739675, "grad_norm": 1.2243846827110032, "learning_rate": 4.7549741674306567e-05, "loss": 0.7765, "num_input_tokens_seen": 234530464, "step": 1298 }, { "epoch": 0.14220422014833464, "grad_norm": 1.2130704245852162, "learning_rate": 4.754602781989013e-05, "loss": 0.7371, "num_input_tokens_seen": 234709440, "step": 1299 }, { "epoch": 0.14231369221927256, "grad_norm": 1.2503312148924817, "learning_rate": 4.754231129832171e-05, "loss": 0.6459, "num_input_tokens_seen": 234885504, "step": 1300 }, { "epoch": 0.14242316429021046, "grad_norm": 1.3428682051075442, "learning_rate": 4.753859211004096e-05, "loss": 0.8833, "num_input_tokens_seen": 235082624, "step": 1301 }, { "epoch": 0.14253263636114835, "grad_norm": 1.1703341208321023, "learning_rate": 4.753487025548784e-05, "loss": 0.722, "num_input_tokens_seen": 235257792, "step": 1302 }, { "epoch": 0.14264210843208627, "grad_norm": 1.6495064979973328, "learning_rate": 4.753114573510265e-05, "loss": 0.8241, "num_input_tokens_seen": 235410560, "step": 1303 }, { "epoch": 0.14275158050302417, "grad_norm": 1.0491553565080525, "learning_rate": 4.7527418549326e-05, "loss": 0.5141, "num_input_tokens_seen": 235607456, "step": 1304 }, { "epoch": 0.14286105257396206, "grad_norm": 1.2474636093978255, "learning_rate": 4.752368869859879e-05, "loss": 0.7885, "num_input_tokens_seen": 235801440, "step": 1305 }, { "epoch": 0.14297052464489998, "grad_norm": 1.3513384947616276, "learning_rate": 4.751995618336227e-05, "loss": 0.7191, "num_input_tokens_seen": 236018944, "step": 1306 }, { "epoch": 0.14307999671583788, "grad_norm": 1.3018159159394331, "learning_rate": 4.751622100405798e-05, "loss": 0.6686, "num_input_tokens_seen": 236209344, "step": 1307 }, { "epoch": 0.14318946878677577, "grad_norm": 1.1609775192325082, "learning_rate": 4.7512483161127794e-05, "loss": 0.5666, "num_input_tokens_seen": 236404000, "step": 1308 }, { "epoch": 0.14329894085771366, "grad_norm": 1.2545826831327334, "learning_rate": 4.750874265501389e-05, "loss": 0.6433, "num_input_tokens_seen": 236577600, "step": 1309 }, { "epoch": 0.14340841292865159, "grad_norm": 1.4550856622078279, "learning_rate": 4.750499948615875e-05, "loss": 0.7324, "num_input_tokens_seen": 236741568, "step": 1310 }, { "epoch": 0.14351788499958948, "grad_norm": 1.2955718789663584, "learning_rate": 4.750125365500521e-05, "loss": 0.6275, "num_input_tokens_seen": 236910912, "step": 1311 }, { "epoch": 0.14362735707052737, "grad_norm": 1.3801428245496934, "learning_rate": 4.7497505161996356e-05, "loss": 0.7143, "num_input_tokens_seen": 237111616, "step": 1312 }, { "epoch": 0.1437368291414653, "grad_norm": 1.309201045532928, "learning_rate": 4.749375400757566e-05, "loss": 0.8587, "num_input_tokens_seen": 237275584, "step": 1313 }, { "epoch": 0.1438463012124032, "grad_norm": 1.3334576081931535, "learning_rate": 4.749000019218687e-05, "loss": 0.6385, "num_input_tokens_seen": 237445824, "step": 1314 }, { "epoch": 0.14395577328334108, "grad_norm": 1.40318787227879, "learning_rate": 4.7486243716274036e-05, "loss": 0.788, "num_input_tokens_seen": 237612928, "step": 1315 }, { "epoch": 0.14406524535427898, "grad_norm": 1.3055126755799247, "learning_rate": 4.748248458028157e-05, "loss": 0.7473, "num_input_tokens_seen": 237807584, "step": 1316 }, { "epoch": 0.1441747174252169, "grad_norm": 1.2036907797850487, "learning_rate": 4.747872278465416e-05, "loss": 0.6219, "num_input_tokens_seen": 237949152, "step": 1317 }, { "epoch": 0.1442841894961548, "grad_norm": 1.310611410584441, "learning_rate": 4.7474958329836805e-05, "loss": 0.6392, "num_input_tokens_seen": 238133280, "step": 1318 }, { "epoch": 0.14439366156709268, "grad_norm": 1.164915911296146, "learning_rate": 4.747119121627485e-05, "loss": 0.656, "num_input_tokens_seen": 238327936, "step": 1319 }, { "epoch": 0.1445031336380306, "grad_norm": 1.2989643685150458, "learning_rate": 4.746742144441393e-05, "loss": 0.7357, "num_input_tokens_seen": 238488096, "step": 1320 }, { "epoch": 0.1446126057089685, "grad_norm": 1.1910489906849633, "learning_rate": 4.7463649014700004e-05, "loss": 0.6705, "num_input_tokens_seen": 238660352, "step": 1321 }, { "epoch": 0.1447220777799064, "grad_norm": 1.2109378445532524, "learning_rate": 4.7459873927579345e-05, "loss": 0.6446, "num_input_tokens_seen": 238847616, "step": 1322 }, { "epoch": 0.14483154985084432, "grad_norm": 1.2300344518163016, "learning_rate": 4.745609618349853e-05, "loss": 0.6836, "num_input_tokens_seen": 239023008, "step": 1323 }, { "epoch": 0.1449410219217822, "grad_norm": 1.3663297741941238, "learning_rate": 4.7452315782904477e-05, "loss": 0.8015, "num_input_tokens_seen": 239192800, "step": 1324 }, { "epoch": 0.1450504939927201, "grad_norm": 1.2884539005587476, "learning_rate": 4.744853272624438e-05, "loss": 0.6096, "num_input_tokens_seen": 239378048, "step": 1325 }, { "epoch": 0.145159966063658, "grad_norm": 1.2045939012079516, "learning_rate": 4.7444747013965776e-05, "loss": 0.7963, "num_input_tokens_seen": 239589280, "step": 1326 }, { "epoch": 0.14526943813459592, "grad_norm": 1.2749347426974065, "learning_rate": 4.744095864651651e-05, "loss": 0.7845, "num_input_tokens_seen": 239787968, "step": 1327 }, { "epoch": 0.1453789102055338, "grad_norm": 1.2111722657068265, "learning_rate": 4.7437167624344736e-05, "loss": 0.598, "num_input_tokens_seen": 239953952, "step": 1328 }, { "epoch": 0.1454883822764717, "grad_norm": 1.3765646528558426, "learning_rate": 4.743337394789892e-05, "loss": 1.0846, "num_input_tokens_seen": 240152640, "step": 1329 }, { "epoch": 0.14559785434740963, "grad_norm": 1.198433236682774, "learning_rate": 4.7429577617627864e-05, "loss": 0.5813, "num_input_tokens_seen": 240359616, "step": 1330 }, { "epoch": 0.14570732641834752, "grad_norm": 1.2159438472632138, "learning_rate": 4.7425778633980636e-05, "loss": 0.7968, "num_input_tokens_seen": 240537696, "step": 1331 }, { "epoch": 0.14581679848928542, "grad_norm": 1.278872680223479, "learning_rate": 4.742197699740668e-05, "loss": 0.8136, "num_input_tokens_seen": 240732800, "step": 1332 }, { "epoch": 0.14592627056022334, "grad_norm": 1.0917278124644823, "learning_rate": 4.74181727083557e-05, "loss": 0.6057, "num_input_tokens_seen": 240911776, "step": 1333 }, { "epoch": 0.14603574263116123, "grad_norm": 1.1570769265564937, "learning_rate": 4.741436576727775e-05, "loss": 0.756, "num_input_tokens_seen": 241118304, "step": 1334 }, { "epoch": 0.14614521470209912, "grad_norm": 1.2257750103161376, "learning_rate": 4.741055617462318e-05, "loss": 0.8881, "num_input_tokens_seen": 241301760, "step": 1335 }, { "epoch": 0.14625468677303702, "grad_norm": 1.1399478208579854, "learning_rate": 4.7406743930842655e-05, "loss": 0.6046, "num_input_tokens_seen": 241471776, "step": 1336 }, { "epoch": 0.14636415884397494, "grad_norm": 1.2439901838865937, "learning_rate": 4.740292903638716e-05, "loss": 0.6165, "num_input_tokens_seen": 241629920, "step": 1337 }, { "epoch": 0.14647363091491283, "grad_norm": 1.2330285950227589, "learning_rate": 4.739911149170798e-05, "loss": 0.772, "num_input_tokens_seen": 241810464, "step": 1338 }, { "epoch": 0.14658310298585073, "grad_norm": 1.3054081645355493, "learning_rate": 4.7395291297256725e-05, "loss": 0.749, "num_input_tokens_seen": 241981600, "step": 1339 }, { "epoch": 0.14669257505678865, "grad_norm": 1.22530850009708, "learning_rate": 4.7391468453485334e-05, "loss": 0.6604, "num_input_tokens_seen": 242140192, "step": 1340 }, { "epoch": 0.14680204712772654, "grad_norm": 1.3138826898119882, "learning_rate": 4.738764296084603e-05, "loss": 0.8946, "num_input_tokens_seen": 242310656, "step": 1341 }, { "epoch": 0.14691151919866444, "grad_norm": 1.1975487627840486, "learning_rate": 4.738381481979136e-05, "loss": 0.6092, "num_input_tokens_seen": 242472832, "step": 1342 }, { "epoch": 0.14702099126960233, "grad_norm": 1.424768208173345, "learning_rate": 4.7379984030774184e-05, "loss": 0.7997, "num_input_tokens_seen": 242664128, "step": 1343 }, { "epoch": 0.14713046334054025, "grad_norm": 1.3908222787203268, "learning_rate": 4.737615059424768e-05, "loss": 0.7865, "num_input_tokens_seen": 242851840, "step": 1344 }, { "epoch": 0.14723993541147815, "grad_norm": 1.2604312528076251, "learning_rate": 4.737231451066534e-05, "loss": 0.7408, "num_input_tokens_seen": 243059488, "step": 1345 }, { "epoch": 0.14734940748241604, "grad_norm": 1.300599598157628, "learning_rate": 4.7368475780480956e-05, "loss": 0.8787, "num_input_tokens_seen": 243233760, "step": 1346 }, { "epoch": 0.14745887955335396, "grad_norm": 1.3729174188004991, "learning_rate": 4.7364634404148655e-05, "loss": 0.7509, "num_input_tokens_seen": 243382048, "step": 1347 }, { "epoch": 0.14756835162429185, "grad_norm": 1.1777287573229671, "learning_rate": 4.736079038212286e-05, "loss": 0.7923, "num_input_tokens_seen": 243578496, "step": 1348 }, { "epoch": 0.14767782369522975, "grad_norm": 1.2454448194098706, "learning_rate": 4.7356943714858306e-05, "loss": 0.773, "num_input_tokens_seen": 243770240, "step": 1349 }, { "epoch": 0.14778729576616767, "grad_norm": 1.26645581326697, "learning_rate": 4.735309440281005e-05, "loss": 0.7505, "num_input_tokens_seen": 243940704, "step": 1350 }, { "epoch": 0.14789676783710556, "grad_norm": 1.2980063685509278, "learning_rate": 4.7349242446433464e-05, "loss": 0.9581, "num_input_tokens_seen": 244129760, "step": 1351 }, { "epoch": 0.14800623990804346, "grad_norm": 1.150484764508859, "learning_rate": 4.734538784618421e-05, "loss": 0.8142, "num_input_tokens_seen": 244339872, "step": 1352 }, { "epoch": 0.14811571197898135, "grad_norm": 1.0547787662153967, "learning_rate": 4.734153060251829e-05, "loss": 0.6503, "num_input_tokens_seen": 244539904, "step": 1353 }, { "epoch": 0.14822518404991927, "grad_norm": 1.1287519620115816, "learning_rate": 4.733767071589202e-05, "loss": 0.7656, "num_input_tokens_seen": 244741952, "step": 1354 }, { "epoch": 0.14833465612085717, "grad_norm": 1.222570446769614, "learning_rate": 4.7333808186761996e-05, "loss": 0.5994, "num_input_tokens_seen": 244903456, "step": 1355 }, { "epoch": 0.14844412819179506, "grad_norm": 1.4143577815350834, "learning_rate": 4.732994301558516e-05, "loss": 0.8564, "num_input_tokens_seen": 245054656, "step": 1356 }, { "epoch": 0.14855360026273298, "grad_norm": 1.2344770751468455, "learning_rate": 4.7326075202818765e-05, "loss": 0.7511, "num_input_tokens_seen": 245230272, "step": 1357 }, { "epoch": 0.14866307233367088, "grad_norm": 1.1613342264466149, "learning_rate": 4.7322204748920345e-05, "loss": 0.7001, "num_input_tokens_seen": 245412160, "step": 1358 }, { "epoch": 0.14877254440460877, "grad_norm": 1.310761799365042, "learning_rate": 4.731833165434778e-05, "loss": 0.8003, "num_input_tokens_seen": 245590688, "step": 1359 }, { "epoch": 0.14888201647554666, "grad_norm": 1.307485629380211, "learning_rate": 4.731445591955924e-05, "loss": 0.8152, "num_input_tokens_seen": 245785120, "step": 1360 }, { "epoch": 0.14899148854648459, "grad_norm": 1.2493661704044814, "learning_rate": 4.7310577545013224e-05, "loss": 0.6393, "num_input_tokens_seen": 245950656, "step": 1361 }, { "epoch": 0.14910096061742248, "grad_norm": 1.3754320766244807, "learning_rate": 4.7306696531168535e-05, "loss": 0.7065, "num_input_tokens_seen": 246095808, "step": 1362 }, { "epoch": 0.14921043268836037, "grad_norm": 1.237156305847267, "learning_rate": 4.7302812878484294e-05, "loss": 0.7815, "num_input_tokens_seen": 246301888, "step": 1363 }, { "epoch": 0.1493199047592983, "grad_norm": 1.1852847065522054, "learning_rate": 4.7298926587419924e-05, "loss": 0.7585, "num_input_tokens_seen": 246471008, "step": 1364 }, { "epoch": 0.1494293768302362, "grad_norm": 1.183489854740935, "learning_rate": 4.729503765843516e-05, "loss": 0.6733, "num_input_tokens_seen": 246626688, "step": 1365 }, { "epoch": 0.14953884890117408, "grad_norm": 1.1552969639140847, "learning_rate": 4.7291146091990066e-05, "loss": 0.6085, "num_input_tokens_seen": 246809920, "step": 1366 }, { "epoch": 0.149648320972112, "grad_norm": 1.3636221910231474, "learning_rate": 4.7287251888545005e-05, "loss": 0.6327, "num_input_tokens_seen": 246989120, "step": 1367 }, { "epoch": 0.1497577930430499, "grad_norm": 1.1997835401173815, "learning_rate": 4.728335504856065e-05, "loss": 0.6761, "num_input_tokens_seen": 247184672, "step": 1368 }, { "epoch": 0.1498672651139878, "grad_norm": 1.2830448604629014, "learning_rate": 4.727945557249799e-05, "loss": 0.5573, "num_input_tokens_seen": 247376864, "step": 1369 }, { "epoch": 0.14997673718492568, "grad_norm": 1.126840939549202, "learning_rate": 4.727555346081833e-05, "loss": 0.6686, "num_input_tokens_seen": 247565024, "step": 1370 }, { "epoch": 0.1500862092558636, "grad_norm": 1.2038474452901735, "learning_rate": 4.7271648713983276e-05, "loss": 0.8797, "num_input_tokens_seen": 247761696, "step": 1371 }, { "epoch": 0.1501956813268015, "grad_norm": 1.2143731386304912, "learning_rate": 4.726774133245476e-05, "loss": 0.6095, "num_input_tokens_seen": 247919168, "step": 1372 }, { "epoch": 0.1503051533977394, "grad_norm": 1.3978117496480789, "learning_rate": 4.7263831316695005e-05, "loss": 0.8636, "num_input_tokens_seen": 248106432, "step": 1373 }, { "epoch": 0.15041462546867732, "grad_norm": 1.1592634985431765, "learning_rate": 4.725991866716657e-05, "loss": 0.7765, "num_input_tokens_seen": 248286080, "step": 1374 }, { "epoch": 0.1505240975396152, "grad_norm": 1.238869897176999, "learning_rate": 4.7256003384332314e-05, "loss": 0.6218, "num_input_tokens_seen": 248476704, "step": 1375 }, { "epoch": 0.1506335696105531, "grad_norm": 1.4917869629471943, "learning_rate": 4.72520854686554e-05, "loss": 0.9153, "num_input_tokens_seen": 248655680, "step": 1376 }, { "epoch": 0.150743041681491, "grad_norm": 1.2356319064158108, "learning_rate": 4.724816492059932e-05, "loss": 0.6723, "num_input_tokens_seen": 248838688, "step": 1377 }, { "epoch": 0.15085251375242892, "grad_norm": 1.2027904491231851, "learning_rate": 4.724424174062786e-05, "loss": 0.6343, "num_input_tokens_seen": 249004672, "step": 1378 }, { "epoch": 0.1509619858233668, "grad_norm": 1.2050020017646845, "learning_rate": 4.724031592920512e-05, "loss": 0.6019, "num_input_tokens_seen": 249170880, "step": 1379 }, { "epoch": 0.1510714578943047, "grad_norm": 1.2409950146571131, "learning_rate": 4.7236387486795525e-05, "loss": 0.7691, "num_input_tokens_seen": 249360160, "step": 1380 }, { "epoch": 0.15118092996524263, "grad_norm": 1.1711723255469342, "learning_rate": 4.72324564138638e-05, "loss": 0.6459, "num_input_tokens_seen": 249556832, "step": 1381 }, { "epoch": 0.15129040203618052, "grad_norm": 1.35485628125996, "learning_rate": 4.722852271087498e-05, "loss": 0.7898, "num_input_tokens_seen": 249722592, "step": 1382 }, { "epoch": 0.15139987410711842, "grad_norm": 1.397161274362872, "learning_rate": 4.722458637829442e-05, "loss": 0.8013, "num_input_tokens_seen": 249896864, "step": 1383 }, { "epoch": 0.15150934617805634, "grad_norm": 1.167082485392511, "learning_rate": 4.722064741658777e-05, "loss": 0.6231, "num_input_tokens_seen": 250087488, "step": 1384 }, { "epoch": 0.15161881824899423, "grad_norm": 1.2958822472142364, "learning_rate": 4.721670582622102e-05, "loss": 0.6553, "num_input_tokens_seen": 250248544, "step": 1385 }, { "epoch": 0.15172829031993212, "grad_norm": 1.2307405201951513, "learning_rate": 4.721276160766043e-05, "loss": 0.618, "num_input_tokens_seen": 250422368, "step": 1386 }, { "epoch": 0.15183776239087002, "grad_norm": 1.182558620524138, "learning_rate": 4.720881476137261e-05, "loss": 0.8347, "num_input_tokens_seen": 250632928, "step": 1387 }, { "epoch": 0.15194723446180794, "grad_norm": 1.2140209195787128, "learning_rate": 4.720486528782447e-05, "loss": 0.6543, "num_input_tokens_seen": 250820416, "step": 1388 }, { "epoch": 0.15205670653274583, "grad_norm": 1.2020188237714107, "learning_rate": 4.720091318748321e-05, "loss": 0.8169, "num_input_tokens_seen": 251015296, "step": 1389 }, { "epoch": 0.15216617860368373, "grad_norm": 1.4546971993783697, "learning_rate": 4.7196958460816356e-05, "loss": 0.8252, "num_input_tokens_seen": 251222944, "step": 1390 }, { "epoch": 0.15227565067462165, "grad_norm": 1.2499545565927819, "learning_rate": 4.719300110829174e-05, "loss": 0.6979, "num_input_tokens_seen": 251431264, "step": 1391 }, { "epoch": 0.15238512274555954, "grad_norm": 1.3713415633782824, "learning_rate": 4.718904113037754e-05, "loss": 0.6846, "num_input_tokens_seen": 251605760, "step": 1392 }, { "epoch": 0.15249459481649744, "grad_norm": 1.2921286443847324, "learning_rate": 4.718507852754218e-05, "loss": 0.7524, "num_input_tokens_seen": 251784064, "step": 1393 }, { "epoch": 0.15260406688743533, "grad_norm": 1.2650055016710866, "learning_rate": 4.718111330025444e-05, "loss": 0.5712, "num_input_tokens_seen": 251938176, "step": 1394 }, { "epoch": 0.15271353895837325, "grad_norm": 1.3184121354460054, "learning_rate": 4.717714544898341e-05, "loss": 0.6583, "num_input_tokens_seen": 252096992, "step": 1395 }, { "epoch": 0.15282301102931115, "grad_norm": 1.3764447511503541, "learning_rate": 4.717317497419846e-05, "loss": 0.8149, "num_input_tokens_seen": 252281792, "step": 1396 }, { "epoch": 0.15293248310024904, "grad_norm": 1.3395875735930178, "learning_rate": 4.7169201876369295e-05, "loss": 0.6314, "num_input_tokens_seen": 252428288, "step": 1397 }, { "epoch": 0.15304195517118696, "grad_norm": 1.1463060906517382, "learning_rate": 4.7165226155965936e-05, "loss": 0.5287, "num_input_tokens_seen": 252591136, "step": 1398 }, { "epoch": 0.15315142724212485, "grad_norm": 1.4707910681137177, "learning_rate": 4.7161247813458696e-05, "loss": 0.9543, "num_input_tokens_seen": 252768320, "step": 1399 }, { "epoch": 0.15326089931306275, "grad_norm": 1.3121691695566209, "learning_rate": 4.71572668493182e-05, "loss": 0.8586, "num_input_tokens_seen": 252968800, "step": 1400 }, { "epoch": 0.15337037138400067, "grad_norm": 1.3521580789573022, "learning_rate": 4.7153283264015394e-05, "loss": 0.9379, "num_input_tokens_seen": 253184288, "step": 1401 }, { "epoch": 0.15347984345493856, "grad_norm": 1.3537419117609317, "learning_rate": 4.714929705802153e-05, "loss": 0.8419, "num_input_tokens_seen": 253361696, "step": 1402 }, { "epoch": 0.15358931552587646, "grad_norm": 1.273685735367256, "learning_rate": 4.714530823180816e-05, "loss": 0.7545, "num_input_tokens_seen": 253536864, "step": 1403 }, { "epoch": 0.15369878759681435, "grad_norm": 1.306186294827443, "learning_rate": 4.7141316785847176e-05, "loss": 0.7545, "num_input_tokens_seen": 253716064, "step": 1404 }, { "epoch": 0.15380825966775227, "grad_norm": 1.2576525568709414, "learning_rate": 4.713732272061073e-05, "loss": 0.6513, "num_input_tokens_seen": 253875776, "step": 1405 }, { "epoch": 0.15391773173869017, "grad_norm": 1.3336317006696436, "learning_rate": 4.713332603657133e-05, "loss": 0.6302, "num_input_tokens_seen": 254041760, "step": 1406 }, { "epoch": 0.15402720380962806, "grad_norm": 1.1624419946452886, "learning_rate": 4.712932673420177e-05, "loss": 0.6368, "num_input_tokens_seen": 254229920, "step": 1407 }, { "epoch": 0.15413667588056598, "grad_norm": 1.281965800119284, "learning_rate": 4.7125324813975155e-05, "loss": 0.5526, "num_input_tokens_seen": 254380672, "step": 1408 }, { "epoch": 0.15424614795150388, "grad_norm": 1.2632271929081917, "learning_rate": 4.712132027636492e-05, "loss": 0.7591, "num_input_tokens_seen": 254564352, "step": 1409 }, { "epoch": 0.15435562002244177, "grad_norm": 1.3673654059054248, "learning_rate": 4.711731312184479e-05, "loss": 0.7529, "num_input_tokens_seen": 254723840, "step": 1410 }, { "epoch": 0.15446509209337966, "grad_norm": 1.4013979812056585, "learning_rate": 4.711330335088879e-05, "loss": 0.9235, "num_input_tokens_seen": 254932608, "step": 1411 }, { "epoch": 0.15457456416431758, "grad_norm": 1.2709600771044636, "learning_rate": 4.710929096397127e-05, "loss": 0.6455, "num_input_tokens_seen": 255107552, "step": 1412 }, { "epoch": 0.15468403623525548, "grad_norm": 1.214909297241939, "learning_rate": 4.710527596156691e-05, "loss": 0.5425, "num_input_tokens_seen": 255292352, "step": 1413 }, { "epoch": 0.15479350830619337, "grad_norm": 1.2623270180669062, "learning_rate": 4.710125834415065e-05, "loss": 0.8238, "num_input_tokens_seen": 255472224, "step": 1414 }, { "epoch": 0.1549029803771313, "grad_norm": 1.3783059691204052, "learning_rate": 4.709723811219779e-05, "loss": 0.7698, "num_input_tokens_seen": 255626336, "step": 1415 }, { "epoch": 0.1550124524480692, "grad_norm": 1.2627427991300368, "learning_rate": 4.70932152661839e-05, "loss": 0.8038, "num_input_tokens_seen": 255816288, "step": 1416 }, { "epoch": 0.15512192451900708, "grad_norm": 1.341703231186909, "learning_rate": 4.7089189806584874e-05, "loss": 0.7319, "num_input_tokens_seen": 255962112, "step": 1417 }, { "epoch": 0.155231396589945, "grad_norm": 1.2213876498688434, "learning_rate": 4.708516173387692e-05, "loss": 0.922, "num_input_tokens_seen": 256176256, "step": 1418 }, { "epoch": 0.1553408686608829, "grad_norm": 1.1525062058585809, "learning_rate": 4.7081131048536564e-05, "loss": 0.5817, "num_input_tokens_seen": 256363968, "step": 1419 }, { "epoch": 0.1554503407318208, "grad_norm": 1.384329271099457, "learning_rate": 4.70770977510406e-05, "loss": 0.7691, "num_input_tokens_seen": 256512480, "step": 1420 }, { "epoch": 0.15555981280275868, "grad_norm": 1.1925848962246102, "learning_rate": 4.70730618418662e-05, "loss": 0.6224, "num_input_tokens_seen": 256665696, "step": 1421 }, { "epoch": 0.1556692848736966, "grad_norm": 1.119304278859051, "learning_rate": 4.7069023321490754e-05, "loss": 0.5609, "num_input_tokens_seen": 256837504, "step": 1422 }, { "epoch": 0.1557787569446345, "grad_norm": 1.3845616716375435, "learning_rate": 4.706498219039206e-05, "loss": 0.7151, "num_input_tokens_seen": 257006848, "step": 1423 }, { "epoch": 0.1558882290155724, "grad_norm": 1.1412626600451172, "learning_rate": 4.706093844904814e-05, "loss": 0.6813, "num_input_tokens_seen": 257175520, "step": 1424 }, { "epoch": 0.15599770108651032, "grad_norm": 1.2638653425787705, "learning_rate": 4.7056892097937376e-05, "loss": 0.6728, "num_input_tokens_seen": 257380256, "step": 1425 }, { "epoch": 0.1561071731574482, "grad_norm": 1.2169692895504736, "learning_rate": 4.705284313753845e-05, "loss": 0.6883, "num_input_tokens_seen": 257591712, "step": 1426 }, { "epoch": 0.1562166452283861, "grad_norm": 1.3117324310370848, "learning_rate": 4.7048791568330333e-05, "loss": 0.7034, "num_input_tokens_seen": 257786144, "step": 1427 }, { "epoch": 0.156326117299324, "grad_norm": 1.3045151648087747, "learning_rate": 4.7044737390792326e-05, "loss": 0.7254, "num_input_tokens_seen": 257976544, "step": 1428 }, { "epoch": 0.15643558937026192, "grad_norm": 1.2351223095857897, "learning_rate": 4.704068060540402e-05, "loss": 0.8161, "num_input_tokens_seen": 258157984, "step": 1429 }, { "epoch": 0.1565450614411998, "grad_norm": 1.2807819162711187, "learning_rate": 4.703662121264535e-05, "loss": 0.905, "num_input_tokens_seen": 258364736, "step": 1430 }, { "epoch": 0.1566545335121377, "grad_norm": 1.300213343280532, "learning_rate": 4.70325592129965e-05, "loss": 0.701, "num_input_tokens_seen": 258581344, "step": 1431 }, { "epoch": 0.15676400558307563, "grad_norm": 1.3520971574747662, "learning_rate": 4.7028494606938025e-05, "loss": 0.9636, "num_input_tokens_seen": 258758080, "step": 1432 }, { "epoch": 0.15687347765401352, "grad_norm": 1.332905457308302, "learning_rate": 4.7024427394950745e-05, "loss": 0.7459, "num_input_tokens_seen": 258933696, "step": 1433 }, { "epoch": 0.15698294972495141, "grad_norm": 1.3503088015130704, "learning_rate": 4.702035757751581e-05, "loss": 0.7365, "num_input_tokens_seen": 259109984, "step": 1434 }, { "epoch": 0.15709242179588934, "grad_norm": 1.3061527088200424, "learning_rate": 4.701628515511467e-05, "loss": 0.7074, "num_input_tokens_seen": 259321216, "step": 1435 }, { "epoch": 0.15720189386682723, "grad_norm": 1.2976687656375487, "learning_rate": 4.701221012822908e-05, "loss": 0.6889, "num_input_tokens_seen": 259504672, "step": 1436 }, { "epoch": 0.15731136593776512, "grad_norm": 1.358148262467214, "learning_rate": 4.7008132497341116e-05, "loss": 0.9133, "num_input_tokens_seen": 259678496, "step": 1437 }, { "epoch": 0.15742083800870302, "grad_norm": 1.1908834342125327, "learning_rate": 4.700405226293314e-05, "loss": 0.7161, "num_input_tokens_seen": 259845376, "step": 1438 }, { "epoch": 0.15753031007964094, "grad_norm": 1.3199925232444325, "learning_rate": 4.6999969425487864e-05, "loss": 0.6339, "num_input_tokens_seen": 259999936, "step": 1439 }, { "epoch": 0.15763978215057883, "grad_norm": 1.232860746998816, "learning_rate": 4.699588398548825e-05, "loss": 0.6262, "num_input_tokens_seen": 260186304, "step": 1440 }, { "epoch": 0.15774925422151673, "grad_norm": 1.2629791672378572, "learning_rate": 4.699179594341761e-05, "loss": 0.7425, "num_input_tokens_seen": 260391488, "step": 1441 }, { "epoch": 0.15785872629245465, "grad_norm": 1.2355075903504593, "learning_rate": 4.698770529975956e-05, "loss": 0.6812, "num_input_tokens_seen": 260565536, "step": 1442 }, { "epoch": 0.15796819836339254, "grad_norm": 1.3098123370861867, "learning_rate": 4.698361205499799e-05, "loss": 0.7717, "num_input_tokens_seen": 260740928, "step": 1443 }, { "epoch": 0.15807767043433044, "grad_norm": 1.2037535672714283, "learning_rate": 4.6979516209617144e-05, "loss": 0.7051, "num_input_tokens_seen": 260930208, "step": 1444 }, { "epoch": 0.15818714250526833, "grad_norm": 1.1275848786405476, "learning_rate": 4.697541776410156e-05, "loss": 0.5688, "num_input_tokens_seen": 261116800, "step": 1445 }, { "epoch": 0.15829661457620625, "grad_norm": 1.2158137922610055, "learning_rate": 4.697131671893605e-05, "loss": 0.5024, "num_input_tokens_seen": 261270464, "step": 1446 }, { "epoch": 0.15840608664714415, "grad_norm": 1.2986046161364526, "learning_rate": 4.696721307460579e-05, "loss": 0.7838, "num_input_tokens_seen": 261453920, "step": 1447 }, { "epoch": 0.15851555871808204, "grad_norm": 1.3654833390634793, "learning_rate": 4.6963106831596206e-05, "loss": 0.8274, "num_input_tokens_seen": 261604000, "step": 1448 }, { "epoch": 0.15862503078901996, "grad_norm": 1.3412000725404543, "learning_rate": 4.695899799039307e-05, "loss": 0.6234, "num_input_tokens_seen": 261785888, "step": 1449 }, { "epoch": 0.15873450285995785, "grad_norm": 1.357343493057917, "learning_rate": 4.695488655148245e-05, "loss": 0.8017, "num_input_tokens_seen": 261957920, "step": 1450 }, { "epoch": 0.15884397493089575, "grad_norm": 1.2539649545588016, "learning_rate": 4.695077251535073e-05, "loss": 0.7015, "num_input_tokens_seen": 262151232, "step": 1451 }, { "epoch": 0.15895344700183367, "grad_norm": 1.3909298851886331, "learning_rate": 4.6946655882484575e-05, "loss": 0.7989, "num_input_tokens_seen": 262328640, "step": 1452 }, { "epoch": 0.15906291907277156, "grad_norm": 1.3859090670966627, "learning_rate": 4.694253665337099e-05, "loss": 0.8251, "num_input_tokens_seen": 262537184, "step": 1453 }, { "epoch": 0.15917239114370946, "grad_norm": 1.5101924949121033, "learning_rate": 4.693841482849726e-05, "loss": 0.8237, "num_input_tokens_seen": 262702720, "step": 1454 }, { "epoch": 0.15928186321464735, "grad_norm": 1.29846544293061, "learning_rate": 4.6934290408351e-05, "loss": 0.8038, "num_input_tokens_seen": 262893344, "step": 1455 }, { "epoch": 0.15939133528558527, "grad_norm": 1.2067877761558066, "learning_rate": 4.693016339342011e-05, "loss": 0.5497, "num_input_tokens_seen": 263058208, "step": 1456 }, { "epoch": 0.15950080735652317, "grad_norm": 1.2761723925003017, "learning_rate": 4.692603378419282e-05, "loss": 0.5789, "num_input_tokens_seen": 263219712, "step": 1457 }, { "epoch": 0.15961027942746106, "grad_norm": 1.2239104836491879, "learning_rate": 4.692190158115765e-05, "loss": 0.7238, "num_input_tokens_seen": 263372256, "step": 1458 }, { "epoch": 0.15971975149839898, "grad_norm": 1.2584622049760372, "learning_rate": 4.691776678480343e-05, "loss": 0.7467, "num_input_tokens_seen": 263571168, "step": 1459 }, { "epoch": 0.15982922356933688, "grad_norm": 1.2627952400430766, "learning_rate": 4.69136293956193e-05, "loss": 0.7387, "num_input_tokens_seen": 263740288, "step": 1460 }, { "epoch": 0.15993869564027477, "grad_norm": 1.1856550139075401, "learning_rate": 4.6909489414094694e-05, "loss": 0.8547, "num_input_tokens_seen": 263924640, "step": 1461 }, { "epoch": 0.16004816771121266, "grad_norm": 1.1883600032073194, "learning_rate": 4.6905346840719386e-05, "loss": 0.6846, "num_input_tokens_seen": 264108544, "step": 1462 }, { "epoch": 0.16015763978215058, "grad_norm": 1.2459326852246653, "learning_rate": 4.690120167598341e-05, "loss": 0.6742, "num_input_tokens_seen": 264300064, "step": 1463 }, { "epoch": 0.16026711185308848, "grad_norm": 1.2385899011680386, "learning_rate": 4.689705392037716e-05, "loss": 0.7547, "num_input_tokens_seen": 264469408, "step": 1464 }, { "epoch": 0.16037658392402637, "grad_norm": 1.2310889240378007, "learning_rate": 4.689290357439128e-05, "loss": 0.5629, "num_input_tokens_seen": 264658240, "step": 1465 }, { "epoch": 0.1604860559949643, "grad_norm": 1.3447199580986828, "learning_rate": 4.688875063851676e-05, "loss": 0.7411, "num_input_tokens_seen": 264799584, "step": 1466 }, { "epoch": 0.1605955280659022, "grad_norm": 1.2983536161830642, "learning_rate": 4.688459511324489e-05, "loss": 0.7588, "num_input_tokens_seen": 264943616, "step": 1467 }, { "epoch": 0.16070500013684008, "grad_norm": 1.2472380642544434, "learning_rate": 4.688043699906725e-05, "loss": 0.6871, "num_input_tokens_seen": 265153280, "step": 1468 }, { "epoch": 0.160814472207778, "grad_norm": 1.3472038947275635, "learning_rate": 4.687627629647573e-05, "loss": 0.7719, "num_input_tokens_seen": 265312320, "step": 1469 }, { "epoch": 0.1609239442787159, "grad_norm": 1.3299923055469254, "learning_rate": 4.687211300596256e-05, "loss": 0.9027, "num_input_tokens_seen": 265487712, "step": 1470 }, { "epoch": 0.1610334163496538, "grad_norm": 1.4132678080310175, "learning_rate": 4.686794712802023e-05, "loss": 0.865, "num_input_tokens_seen": 265636672, "step": 1471 }, { "epoch": 0.16114288842059168, "grad_norm": 1.31331817919965, "learning_rate": 4.6863778663141556e-05, "loss": 0.6975, "num_input_tokens_seen": 265793696, "step": 1472 }, { "epoch": 0.1612523604915296, "grad_norm": 1.1240078471782073, "learning_rate": 4.6859607611819664e-05, "loss": 0.6301, "num_input_tokens_seen": 265956320, "step": 1473 }, { "epoch": 0.1613618325624675, "grad_norm": 1.2305431040747914, "learning_rate": 4.685543397454799e-05, "loss": 0.6863, "num_input_tokens_seen": 266108864, "step": 1474 }, { "epoch": 0.1614713046334054, "grad_norm": 1.1898055533216199, "learning_rate": 4.685125775182024e-05, "loss": 0.79, "num_input_tokens_seen": 266278880, "step": 1475 }, { "epoch": 0.16158077670434332, "grad_norm": 1.2888683750906125, "learning_rate": 4.684707894413048e-05, "loss": 0.7672, "num_input_tokens_seen": 266479808, "step": 1476 }, { "epoch": 0.1616902487752812, "grad_norm": 1.1406058740971943, "learning_rate": 4.684289755197305e-05, "loss": 0.6446, "num_input_tokens_seen": 266652960, "step": 1477 }, { "epoch": 0.1617997208462191, "grad_norm": 1.19959752565455, "learning_rate": 4.683871357584259e-05, "loss": 0.6489, "num_input_tokens_seen": 266805728, "step": 1478 }, { "epoch": 0.161909192917157, "grad_norm": 1.276795389136854, "learning_rate": 4.6834527016234065e-05, "loss": 0.644, "num_input_tokens_seen": 266987168, "step": 1479 }, { "epoch": 0.16201866498809492, "grad_norm": 1.2392268857110484, "learning_rate": 4.6830337873642724e-05, "loss": 0.6839, "num_input_tokens_seen": 267159872, "step": 1480 }, { "epoch": 0.1621281370590328, "grad_norm": 1.379482937570751, "learning_rate": 4.682614614856416e-05, "loss": 0.7335, "num_input_tokens_seen": 267318912, "step": 1481 }, { "epoch": 0.1622376091299707, "grad_norm": 1.3085712943356635, "learning_rate": 4.6821951841494225e-05, "loss": 0.7909, "num_input_tokens_seen": 267514464, "step": 1482 }, { "epoch": 0.16234708120090863, "grad_norm": 1.2715747525723349, "learning_rate": 4.6817754952929106e-05, "loss": 0.6752, "num_input_tokens_seen": 267699936, "step": 1483 }, { "epoch": 0.16245655327184652, "grad_norm": 1.245119006585681, "learning_rate": 4.681355548336528e-05, "loss": 0.8886, "num_input_tokens_seen": 267889216, "step": 1484 }, { "epoch": 0.16256602534278441, "grad_norm": 1.2562204556763046, "learning_rate": 4.680935343329954e-05, "loss": 0.6516, "num_input_tokens_seen": 268088576, "step": 1485 }, { "epoch": 0.16267549741372234, "grad_norm": 1.4681826368169115, "learning_rate": 4.680514880322898e-05, "loss": 0.7621, "num_input_tokens_seen": 268278752, "step": 1486 }, { "epoch": 0.16278496948466023, "grad_norm": 1.1888402101963236, "learning_rate": 4.680094159365101e-05, "loss": 0.6528, "num_input_tokens_seen": 268470048, "step": 1487 }, { "epoch": 0.16289444155559812, "grad_norm": 1.2624755101614147, "learning_rate": 4.679673180506332e-05, "loss": 0.7321, "num_input_tokens_seen": 268643872, "step": 1488 }, { "epoch": 0.16300391362653602, "grad_norm": 1.322320798908892, "learning_rate": 4.679251943796393e-05, "loss": 0.8103, "num_input_tokens_seen": 268818816, "step": 1489 }, { "epoch": 0.16311338569747394, "grad_norm": 1.1390723543303376, "learning_rate": 4.678830449285114e-05, "loss": 0.6323, "num_input_tokens_seen": 268993536, "step": 1490 }, { "epoch": 0.16322285776841183, "grad_norm": 1.197050987034131, "learning_rate": 4.6784086970223596e-05, "loss": 0.6342, "num_input_tokens_seen": 269173856, "step": 1491 }, { "epoch": 0.16333232983934973, "grad_norm": 1.3724053784882375, "learning_rate": 4.677986687058019e-05, "loss": 0.921, "num_input_tokens_seen": 269360672, "step": 1492 }, { "epoch": 0.16344180191028765, "grad_norm": 1.186605317230116, "learning_rate": 4.6775644194420184e-05, "loss": 0.6604, "num_input_tokens_seen": 269553312, "step": 1493 }, { "epoch": 0.16355127398122554, "grad_norm": 1.2238165374217027, "learning_rate": 4.6771418942243096e-05, "loss": 0.6726, "num_input_tokens_seen": 269760288, "step": 1494 }, { "epoch": 0.16366074605216344, "grad_norm": 1.217967442382737, "learning_rate": 4.6767191114548755e-05, "loss": 0.9044, "num_input_tokens_seen": 269953152, "step": 1495 }, { "epoch": 0.16377021812310136, "grad_norm": 1.1333488510041865, "learning_rate": 4.676296071183733e-05, "loss": 0.5553, "num_input_tokens_seen": 270132576, "step": 1496 }, { "epoch": 0.16387969019403925, "grad_norm": 1.1436302622670804, "learning_rate": 4.6758727734609256e-05, "loss": 0.5936, "num_input_tokens_seen": 270307968, "step": 1497 }, { "epoch": 0.16398916226497715, "grad_norm": 1.0697237051994772, "learning_rate": 4.675449218336528e-05, "loss": 0.5061, "num_input_tokens_seen": 270504192, "step": 1498 }, { "epoch": 0.16409863433591504, "grad_norm": 1.114854898376968, "learning_rate": 4.6750254058606467e-05, "loss": 0.507, "num_input_tokens_seen": 270676224, "step": 1499 }, { "epoch": 0.16420810640685296, "grad_norm": 1.2841752316937907, "learning_rate": 4.6746013360834184e-05, "loss": 0.7169, "num_input_tokens_seen": 270822720, "step": 1500 }, { "epoch": 0.16431757847779085, "grad_norm": 1.353951564235214, "learning_rate": 4.6741770090550084e-05, "loss": 0.7463, "num_input_tokens_seen": 271001248, "step": 1501 }, { "epoch": 0.16442705054872875, "grad_norm": 1.3256381819167207, "learning_rate": 4.673752424825615e-05, "loss": 0.7822, "num_input_tokens_seen": 271184032, "step": 1502 }, { "epoch": 0.16453652261966667, "grad_norm": 1.428835672394795, "learning_rate": 4.673327583445465e-05, "loss": 0.8156, "num_input_tokens_seen": 271376000, "step": 1503 }, { "epoch": 0.16464599469060456, "grad_norm": 1.3195648333633168, "learning_rate": 4.672902484964817e-05, "loss": 0.6269, "num_input_tokens_seen": 271561696, "step": 1504 }, { "epoch": 0.16475546676154246, "grad_norm": 1.42735274990107, "learning_rate": 4.672477129433959e-05, "loss": 0.8395, "num_input_tokens_seen": 271745600, "step": 1505 }, { "epoch": 0.16486493883248035, "grad_norm": 1.4922037173683116, "learning_rate": 4.672051516903209e-05, "loss": 0.7432, "num_input_tokens_seen": 271912704, "step": 1506 }, { "epoch": 0.16497441090341827, "grad_norm": 1.3512579955253516, "learning_rate": 4.671625647422917e-05, "loss": 0.7383, "num_input_tokens_seen": 272101088, "step": 1507 }, { "epoch": 0.16508388297435617, "grad_norm": 1.2987696125359718, "learning_rate": 4.6711995210434625e-05, "loss": 0.7614, "num_input_tokens_seen": 272298432, "step": 1508 }, { "epoch": 0.16519335504529406, "grad_norm": 1.3327150798939118, "learning_rate": 4.670773137815255e-05, "loss": 0.9494, "num_input_tokens_seen": 272484352, "step": 1509 }, { "epoch": 0.16530282711623198, "grad_norm": 1.2469393453033224, "learning_rate": 4.670346497788736e-05, "loss": 0.7336, "num_input_tokens_seen": 272658400, "step": 1510 }, { "epoch": 0.16541229918716988, "grad_norm": 1.3755374204980502, "learning_rate": 4.669919601014374e-05, "loss": 0.8216, "num_input_tokens_seen": 272849920, "step": 1511 }, { "epoch": 0.16552177125810777, "grad_norm": 1.2335905165626906, "learning_rate": 4.669492447542673e-05, "loss": 0.6921, "num_input_tokens_seen": 273032704, "step": 1512 }, { "epoch": 0.1656312433290457, "grad_norm": 1.1602877661146447, "learning_rate": 4.669065037424161e-05, "loss": 0.5839, "num_input_tokens_seen": 273241920, "step": 1513 }, { "epoch": 0.16574071539998358, "grad_norm": 1.1857333343029577, "learning_rate": 4.668637370709403e-05, "loss": 0.7662, "num_input_tokens_seen": 273432320, "step": 1514 }, { "epoch": 0.16585018747092148, "grad_norm": 1.3862314576954688, "learning_rate": 4.668209447448989e-05, "loss": 0.6626, "num_input_tokens_seen": 273617344, "step": 1515 }, { "epoch": 0.16595965954185937, "grad_norm": 1.4069815957872704, "learning_rate": 4.667781267693543e-05, "loss": 0.8782, "num_input_tokens_seen": 273786464, "step": 1516 }, { "epoch": 0.1660691316127973, "grad_norm": 1.2478583108358574, "learning_rate": 4.6673528314937166e-05, "loss": 0.6219, "num_input_tokens_seen": 273967456, "step": 1517 }, { "epoch": 0.1661786036837352, "grad_norm": 1.3163098942387763, "learning_rate": 4.666924138900194e-05, "loss": 0.7154, "num_input_tokens_seen": 274119776, "step": 1518 }, { "epoch": 0.16628807575467308, "grad_norm": 1.1875665545635514, "learning_rate": 4.666495189963688e-05, "loss": 0.7352, "num_input_tokens_seen": 274295840, "step": 1519 }, { "epoch": 0.166397547825611, "grad_norm": 1.1690057342134128, "learning_rate": 4.666065984734942e-05, "loss": 0.7133, "num_input_tokens_seen": 274492288, "step": 1520 }, { "epoch": 0.1665070198965489, "grad_norm": 1.1710212649459562, "learning_rate": 4.6656365232647316e-05, "loss": 0.6485, "num_input_tokens_seen": 274670592, "step": 1521 }, { "epoch": 0.1666164919674868, "grad_norm": 1.2328152519939926, "learning_rate": 4.66520680560386e-05, "loss": 0.6739, "num_input_tokens_seen": 274866592, "step": 1522 }, { "epoch": 0.16672596403842468, "grad_norm": 1.2287521255955982, "learning_rate": 4.664776831803163e-05, "loss": 0.6798, "num_input_tokens_seen": 275029440, "step": 1523 }, { "epoch": 0.1668354361093626, "grad_norm": 1.2309215862856018, "learning_rate": 4.664346601913504e-05, "loss": 0.6982, "num_input_tokens_seen": 275215136, "step": 1524 }, { "epoch": 0.1669449081803005, "grad_norm": 1.225426360300258, "learning_rate": 4.663916115985781e-05, "loss": 0.7057, "num_input_tokens_seen": 275398816, "step": 1525 }, { "epoch": 0.1670543802512384, "grad_norm": 1.287073670327634, "learning_rate": 4.663485374070917e-05, "loss": 0.7479, "num_input_tokens_seen": 275601984, "step": 1526 }, { "epoch": 0.16716385232217632, "grad_norm": 1.2138490683199934, "learning_rate": 4.66305437621987e-05, "loss": 0.6276, "num_input_tokens_seen": 275782976, "step": 1527 }, { "epoch": 0.1672733243931142, "grad_norm": 1.3061376952303583, "learning_rate": 4.6626231224836245e-05, "loss": 0.8127, "num_input_tokens_seen": 275975616, "step": 1528 }, { "epoch": 0.1673827964640521, "grad_norm": 1.350174383980448, "learning_rate": 4.662191612913199e-05, "loss": 0.827, "num_input_tokens_seen": 276165344, "step": 1529 }, { "epoch": 0.16749226853499002, "grad_norm": 1.2433251982943498, "learning_rate": 4.661759847559638e-05, "loss": 0.5903, "num_input_tokens_seen": 276365824, "step": 1530 }, { "epoch": 0.16760174060592792, "grad_norm": 1.5395954584135494, "learning_rate": 4.66132782647402e-05, "loss": 0.8826, "num_input_tokens_seen": 276572800, "step": 1531 }, { "epoch": 0.1677112126768658, "grad_norm": 1.1903930858715057, "learning_rate": 4.6608955497074526e-05, "loss": 0.6796, "num_input_tokens_seen": 276726688, "step": 1532 }, { "epoch": 0.1678206847478037, "grad_norm": 1.252008731458156, "learning_rate": 4.660463017311072e-05, "loss": 0.8858, "num_input_tokens_seen": 276923584, "step": 1533 }, { "epoch": 0.16793015681874163, "grad_norm": 1.060680458268902, "learning_rate": 4.660030229336046e-05, "loss": 0.5721, "num_input_tokens_seen": 277092480, "step": 1534 }, { "epoch": 0.16803962888967952, "grad_norm": 1.1328212343076534, "learning_rate": 4.659597185833574e-05, "loss": 0.6596, "num_input_tokens_seen": 277287136, "step": 1535 }, { "epoch": 0.16814910096061741, "grad_norm": 1.1800938089711797, "learning_rate": 4.6591638868548824e-05, "loss": 0.6495, "num_input_tokens_seen": 277486720, "step": 1536 }, { "epoch": 0.16825857303155534, "grad_norm": 1.2334823277834595, "learning_rate": 4.6587303324512324e-05, "loss": 0.9191, "num_input_tokens_seen": 277689664, "step": 1537 }, { "epoch": 0.16836804510249323, "grad_norm": 1.2408433275266493, "learning_rate": 4.6582965226739094e-05, "loss": 0.6179, "num_input_tokens_seen": 277872896, "step": 1538 }, { "epoch": 0.16847751717343112, "grad_norm": 1.0328698008190726, "learning_rate": 4.6578624575742335e-05, "loss": 0.5461, "num_input_tokens_seen": 278048960, "step": 1539 }, { "epoch": 0.16858698924436902, "grad_norm": 1.1906468206693912, "learning_rate": 4.6574281372035546e-05, "loss": 0.542, "num_input_tokens_seen": 278248320, "step": 1540 }, { "epoch": 0.16869646131530694, "grad_norm": 1.2080966235290234, "learning_rate": 4.6569935616132516e-05, "loss": 0.6508, "num_input_tokens_seen": 278406912, "step": 1541 }, { "epoch": 0.16880593338624483, "grad_norm": 1.4217814320389484, "learning_rate": 4.6565587308547334e-05, "loss": 0.9863, "num_input_tokens_seen": 278599776, "step": 1542 }, { "epoch": 0.16891540545718273, "grad_norm": 1.343300145278022, "learning_rate": 4.65612364497944e-05, "loss": 0.739, "num_input_tokens_seen": 278774944, "step": 1543 }, { "epoch": 0.16902487752812065, "grad_norm": 1.2744952062738897, "learning_rate": 4.655688304038841e-05, "loss": 0.7061, "num_input_tokens_seen": 278942496, "step": 1544 }, { "epoch": 0.16913434959905854, "grad_norm": 1.3543694515493097, "learning_rate": 4.6552527080844374e-05, "loss": 0.9761, "num_input_tokens_seen": 279133344, "step": 1545 }, { "epoch": 0.16924382166999644, "grad_norm": 1.2065483040655003, "learning_rate": 4.6548168571677574e-05, "loss": 0.584, "num_input_tokens_seen": 279330464, "step": 1546 }, { "epoch": 0.16935329374093436, "grad_norm": 1.2200309672881127, "learning_rate": 4.6543807513403636e-05, "loss": 0.6647, "num_input_tokens_seen": 279518400, "step": 1547 }, { "epoch": 0.16946276581187225, "grad_norm": 1.2063493994045023, "learning_rate": 4.653944390653845e-05, "loss": 0.6937, "num_input_tokens_seen": 279676992, "step": 1548 }, { "epoch": 0.16957223788281015, "grad_norm": 1.109813388654608, "learning_rate": 4.6535077751598224e-05, "loss": 0.6195, "num_input_tokens_seen": 279858432, "step": 1549 }, { "epoch": 0.16968170995374804, "grad_norm": 1.289694983477056, "learning_rate": 4.653070904909947e-05, "loss": 0.6947, "num_input_tokens_seen": 280040096, "step": 1550 }, { "epoch": 0.16979118202468596, "grad_norm": 1.2512120093066867, "learning_rate": 4.6526337799559e-05, "loss": 0.6698, "num_input_tokens_seen": 280220416, "step": 1551 }, { "epoch": 0.16990065409562385, "grad_norm": 1.1790705197948994, "learning_rate": 4.652196400349391e-05, "loss": 0.5624, "num_input_tokens_seen": 280369376, "step": 1552 }, { "epoch": 0.17001012616656175, "grad_norm": 1.2875361798582476, "learning_rate": 4.651758766142162e-05, "loss": 0.7419, "num_input_tokens_seen": 280562240, "step": 1553 }, { "epoch": 0.17011959823749967, "grad_norm": 1.3949515513412265, "learning_rate": 4.6513208773859854e-05, "loss": 0.7965, "num_input_tokens_seen": 280753984, "step": 1554 }, { "epoch": 0.17022907030843756, "grad_norm": 1.239691379657403, "learning_rate": 4.650882734132661e-05, "loss": 0.8029, "num_input_tokens_seen": 280964320, "step": 1555 }, { "epoch": 0.17033854237937546, "grad_norm": 1.2516004806641985, "learning_rate": 4.650444336434021e-05, "loss": 0.5798, "num_input_tokens_seen": 281123808, "step": 1556 }, { "epoch": 0.17044801445031335, "grad_norm": 1.2846428677677715, "learning_rate": 4.650005684341928e-05, "loss": 0.7068, "num_input_tokens_seen": 281309056, "step": 1557 }, { "epoch": 0.17055748652125127, "grad_norm": 1.3062979529673062, "learning_rate": 4.6495667779082716e-05, "loss": 0.711, "num_input_tokens_seen": 281483328, "step": 1558 }, { "epoch": 0.17066695859218917, "grad_norm": 1.3901749214735555, "learning_rate": 4.649127617184975e-05, "loss": 0.7761, "num_input_tokens_seen": 281652000, "step": 1559 }, { "epoch": 0.17077643066312706, "grad_norm": 1.3233961402888668, "learning_rate": 4.6486882022239895e-05, "loss": 0.733, "num_input_tokens_seen": 281862560, "step": 1560 }, { "epoch": 0.17088590273406498, "grad_norm": 1.3050668587493781, "learning_rate": 4.648248533077297e-05, "loss": 0.559, "num_input_tokens_seen": 282020032, "step": 1561 }, { "epoch": 0.17099537480500288, "grad_norm": 1.383801682979345, "learning_rate": 4.6478086097969104e-05, "loss": 0.9127, "num_input_tokens_seen": 282205280, "step": 1562 }, { "epoch": 0.17110484687594077, "grad_norm": 1.2825157147461632, "learning_rate": 4.647368432434871e-05, "loss": 0.7845, "num_input_tokens_seen": 282407328, "step": 1563 }, { "epoch": 0.1712143189468787, "grad_norm": 1.4722013302086405, "learning_rate": 4.646928001043251e-05, "loss": 0.6889, "num_input_tokens_seen": 282550464, "step": 1564 }, { "epoch": 0.17132379101781658, "grad_norm": 1.3254701446054593, "learning_rate": 4.646487315674153e-05, "loss": 0.7499, "num_input_tokens_seen": 282727648, "step": 1565 }, { "epoch": 0.17143326308875448, "grad_norm": 1.14861348159732, "learning_rate": 4.646046376379708e-05, "loss": 0.7225, "num_input_tokens_seen": 282921856, "step": 1566 }, { "epoch": 0.17154273515969237, "grad_norm": 1.2488494823492156, "learning_rate": 4.64560518321208e-05, "loss": 0.8488, "num_input_tokens_seen": 283110464, "step": 1567 }, { "epoch": 0.1716522072306303, "grad_norm": 1.214551540937966, "learning_rate": 4.6451637362234604e-05, "loss": 0.8527, "num_input_tokens_seen": 283308256, "step": 1568 }, { "epoch": 0.1717616793015682, "grad_norm": 1.161716478110826, "learning_rate": 4.644722035466072e-05, "loss": 0.5577, "num_input_tokens_seen": 283481408, "step": 1569 }, { "epoch": 0.17187115137250608, "grad_norm": 1.296953819361576, "learning_rate": 4.644280080992166e-05, "loss": 0.8266, "num_input_tokens_seen": 283679872, "step": 1570 }, { "epoch": 0.171980623443444, "grad_norm": 1.20145781300331, "learning_rate": 4.643837872854027e-05, "loss": 0.6238, "num_input_tokens_seen": 283857056, "step": 1571 }, { "epoch": 0.1720900955143819, "grad_norm": 1.2659760918310783, "learning_rate": 4.643395411103965e-05, "loss": 0.8533, "num_input_tokens_seen": 284046784, "step": 1572 }, { "epoch": 0.1721995675853198, "grad_norm": 1.2832206364892695, "learning_rate": 4.642952695794323e-05, "loss": 0.6206, "num_input_tokens_seen": 284218592, "step": 1573 }, { "epoch": 0.17230903965625768, "grad_norm": 1.2488144020374221, "learning_rate": 4.6425097269774744e-05, "loss": 0.7451, "num_input_tokens_seen": 284411232, "step": 1574 }, { "epoch": 0.1724185117271956, "grad_norm": 1.2556943414205635, "learning_rate": 4.64206650470582e-05, "loss": 0.7152, "num_input_tokens_seen": 284583712, "step": 1575 }, { "epoch": 0.1725279837981335, "grad_norm": 1.3827776284858688, "learning_rate": 4.6416230290317934e-05, "loss": 0.8709, "num_input_tokens_seen": 284754176, "step": 1576 }, { "epoch": 0.1726374558690714, "grad_norm": 1.2395400619434043, "learning_rate": 4.641179300007857e-05, "loss": 0.6065, "num_input_tokens_seen": 284897984, "step": 1577 }, { "epoch": 0.17274692794000931, "grad_norm": 1.3785152583393356, "learning_rate": 4.640735317686502e-05, "loss": 0.713, "num_input_tokens_seen": 285065536, "step": 1578 }, { "epoch": 0.1728564000109472, "grad_norm": 1.2886881891740243, "learning_rate": 4.6402910821202525e-05, "loss": 0.7582, "num_input_tokens_seen": 285237568, "step": 1579 }, { "epoch": 0.1729658720818851, "grad_norm": 1.2701818589040808, "learning_rate": 4.6398465933616585e-05, "loss": 0.8263, "num_input_tokens_seen": 285436032, "step": 1580 }, { "epoch": 0.17307534415282302, "grad_norm": 1.3159950134710434, "learning_rate": 4.6394018514633033e-05, "loss": 0.6285, "num_input_tokens_seen": 285630016, "step": 1581 }, { "epoch": 0.17318481622376092, "grad_norm": 1.4241910244350724, "learning_rate": 4.6389568564777994e-05, "loss": 0.7992, "num_input_tokens_seen": 285788608, "step": 1582 }, { "epoch": 0.1732942882946988, "grad_norm": 1.3345344269543515, "learning_rate": 4.6385116084577874e-05, "loss": 0.8261, "num_input_tokens_seen": 285963104, "step": 1583 }, { "epoch": 0.1734037603656367, "grad_norm": 1.4626524136670942, "learning_rate": 4.638066107455941e-05, "loss": 0.7439, "num_input_tokens_seen": 286128416, "step": 1584 }, { "epoch": 0.17351323243657463, "grad_norm": 1.3546521368107949, "learning_rate": 4.637620353524962e-05, "loss": 0.8627, "num_input_tokens_seen": 286308288, "step": 1585 }, { "epoch": 0.17362270450751252, "grad_norm": 1.3684858859877602, "learning_rate": 4.637174346717581e-05, "loss": 0.7389, "num_input_tokens_seen": 286471136, "step": 1586 }, { "epoch": 0.17373217657845041, "grad_norm": 1.2982083555668178, "learning_rate": 4.63672808708656e-05, "loss": 0.7723, "num_input_tokens_seen": 286649664, "step": 1587 }, { "epoch": 0.17384164864938834, "grad_norm": 1.0539612141409147, "learning_rate": 4.636281574684691e-05, "loss": 0.5475, "num_input_tokens_seen": 286856416, "step": 1588 }, { "epoch": 0.17395112072032623, "grad_norm": 1.0831770356231727, "learning_rate": 4.635834809564796e-05, "loss": 0.7104, "num_input_tokens_seen": 287033152, "step": 1589 }, { "epoch": 0.17406059279126412, "grad_norm": 1.1550257619651705, "learning_rate": 4.635387791779726e-05, "loss": 0.5432, "num_input_tokens_seen": 287200032, "step": 1590 }, { "epoch": 0.17417006486220202, "grad_norm": 1.2445372423888612, "learning_rate": 4.634940521382362e-05, "loss": 0.7116, "num_input_tokens_seen": 287390880, "step": 1591 }, { "epoch": 0.17427953693313994, "grad_norm": 1.1699452596884263, "learning_rate": 4.634492998425616e-05, "loss": 0.7421, "num_input_tokens_seen": 287597856, "step": 1592 }, { "epoch": 0.17438900900407783, "grad_norm": 1.255501608664858, "learning_rate": 4.6340452229624286e-05, "loss": 0.6394, "num_input_tokens_seen": 287777056, "step": 1593 }, { "epoch": 0.17449848107501573, "grad_norm": 1.215957523554606, "learning_rate": 4.6335971950457715e-05, "loss": 0.5943, "num_input_tokens_seen": 287943264, "step": 1594 }, { "epoch": 0.17460795314595365, "grad_norm": 1.231869968460158, "learning_rate": 4.6331489147286444e-05, "loss": 0.7475, "num_input_tokens_seen": 288130752, "step": 1595 }, { "epoch": 0.17471742521689154, "grad_norm": 1.821709812848131, "learning_rate": 4.632700382064079e-05, "loss": 0.9054, "num_input_tokens_seen": 288286208, "step": 1596 }, { "epoch": 0.17482689728782944, "grad_norm": 1.2452559569575719, "learning_rate": 4.632251597105135e-05, "loss": 0.7435, "num_input_tokens_seen": 288461376, "step": 1597 }, { "epoch": 0.17493636935876736, "grad_norm": 1.1001795752205836, "learning_rate": 4.631802559904903e-05, "loss": 0.7022, "num_input_tokens_seen": 288645056, "step": 1598 }, { "epoch": 0.17504584142970525, "grad_norm": 1.2069220632407736, "learning_rate": 4.631353270516504e-05, "loss": 0.6644, "num_input_tokens_seen": 288820000, "step": 1599 }, { "epoch": 0.17515531350064314, "grad_norm": 1.3015526211330481, "learning_rate": 4.6309037289930875e-05, "loss": 0.7047, "num_input_tokens_seen": 288994272, "step": 1600 }, { "epoch": 0.17526478557158104, "grad_norm": 1.169328491329954, "learning_rate": 4.630453935387833e-05, "loss": 0.7493, "num_input_tokens_seen": 289202816, "step": 1601 }, { "epoch": 0.17537425764251896, "grad_norm": 1.2433156103161311, "learning_rate": 4.630003889753951e-05, "loss": 0.6486, "num_input_tokens_seen": 289386496, "step": 1602 }, { "epoch": 0.17548372971345685, "grad_norm": 1.1595512379572457, "learning_rate": 4.629553592144681e-05, "loss": 0.6121, "num_input_tokens_seen": 289581824, "step": 1603 }, { "epoch": 0.17559320178439475, "grad_norm": 1.2315429358391232, "learning_rate": 4.629103042613292e-05, "loss": 0.6971, "num_input_tokens_seen": 289776480, "step": 1604 }, { "epoch": 0.17570267385533267, "grad_norm": 1.278931683547857, "learning_rate": 4.628652241213083e-05, "loss": 0.7046, "num_input_tokens_seen": 289928352, "step": 1605 }, { "epoch": 0.17581214592627056, "grad_norm": 1.260372802139238, "learning_rate": 4.6282011879973833e-05, "loss": 0.8522, "num_input_tokens_seen": 290111136, "step": 1606 }, { "epoch": 0.17592161799720846, "grad_norm": 1.3434535298319523, "learning_rate": 4.627749883019551e-05, "loss": 0.9065, "num_input_tokens_seen": 290292128, "step": 1607 }, { "epoch": 0.17603109006814635, "grad_norm": 1.2645514373695108, "learning_rate": 4.627298326332975e-05, "loss": 0.9542, "num_input_tokens_seen": 290497760, "step": 1608 }, { "epoch": 0.17614056213908427, "grad_norm": 1.1724388292049779, "learning_rate": 4.626846517991075e-05, "loss": 0.5236, "num_input_tokens_seen": 290698688, "step": 1609 }, { "epoch": 0.17625003421002217, "grad_norm": 1.24962381424332, "learning_rate": 4.626394458047296e-05, "loss": 0.5208, "num_input_tokens_seen": 290864448, "step": 1610 }, { "epoch": 0.17635950628096006, "grad_norm": 1.143927665627743, "learning_rate": 4.625942146555119e-05, "loss": 0.6694, "num_input_tokens_seen": 291056416, "step": 1611 }, { "epoch": 0.17646897835189798, "grad_norm": 1.438745953830097, "learning_rate": 4.62548958356805e-05, "loss": 0.7317, "num_input_tokens_seen": 291248832, "step": 1612 }, { "epoch": 0.17657845042283588, "grad_norm": 1.2866866440696276, "learning_rate": 4.625036769139626e-05, "loss": 0.6754, "num_input_tokens_seen": 291439232, "step": 1613 }, { "epoch": 0.17668792249377377, "grad_norm": 1.2544398613294057, "learning_rate": 4.624583703323415e-05, "loss": 0.8223, "num_input_tokens_seen": 291600960, "step": 1614 }, { "epoch": 0.1767973945647117, "grad_norm": 1.3081826546435158, "learning_rate": 4.624130386173013e-05, "loss": 0.7705, "num_input_tokens_seen": 291793824, "step": 1615 }, { "epoch": 0.17690686663564958, "grad_norm": 1.267527715917927, "learning_rate": 4.623676817742047e-05, "loss": 0.6863, "num_input_tokens_seen": 291940768, "step": 1616 }, { "epoch": 0.17701633870658748, "grad_norm": 1.2038586844145809, "learning_rate": 4.623222998084174e-05, "loss": 0.5966, "num_input_tokens_seen": 292139456, "step": 1617 }, { "epoch": 0.17712581077752537, "grad_norm": 1.2497784895132564, "learning_rate": 4.6227689272530785e-05, "loss": 0.7892, "num_input_tokens_seen": 292348448, "step": 1618 }, { "epoch": 0.1772352828484633, "grad_norm": 1.2248920801853898, "learning_rate": 4.622314605302477e-05, "loss": 0.6496, "num_input_tokens_seen": 292542880, "step": 1619 }, { "epoch": 0.1773447549194012, "grad_norm": 1.3860164529531207, "learning_rate": 4.621860032286115e-05, "loss": 0.7724, "num_input_tokens_seen": 292732384, "step": 1620 }, { "epoch": 0.17745422699033908, "grad_norm": 1.2500393384465465, "learning_rate": 4.621405208257767e-05, "loss": 0.667, "num_input_tokens_seen": 292902848, "step": 1621 }, { "epoch": 0.177563699061277, "grad_norm": 1.2375379672150537, "learning_rate": 4.620950133271239e-05, "loss": 0.6736, "num_input_tokens_seen": 293098624, "step": 1622 }, { "epoch": 0.1776731711322149, "grad_norm": 1.2655885008270256, "learning_rate": 4.620494807380365e-05, "loss": 0.7815, "num_input_tokens_seen": 293305152, "step": 1623 }, { "epoch": 0.1777826432031528, "grad_norm": 1.3023243134346283, "learning_rate": 4.620039230639008e-05, "loss": 0.6715, "num_input_tokens_seen": 293465760, "step": 1624 }, { "epoch": 0.17789211527409068, "grad_norm": 1.3745433309114565, "learning_rate": 4.619583403101063e-05, "loss": 0.8146, "num_input_tokens_seen": 293627712, "step": 1625 }, { "epoch": 0.1780015873450286, "grad_norm": 1.3175101702911047, "learning_rate": 4.619127324820454e-05, "loss": 0.9117, "num_input_tokens_seen": 293799744, "step": 1626 }, { "epoch": 0.1781110594159665, "grad_norm": 1.1979084622751768, "learning_rate": 4.6186709958511334e-05, "loss": 0.6862, "num_input_tokens_seen": 293968640, "step": 1627 }, { "epoch": 0.1782205314869044, "grad_norm": 1.2648896801003642, "learning_rate": 4.618214416247084e-05, "loss": 0.7929, "num_input_tokens_seen": 294150752, "step": 1628 }, { "epoch": 0.17833000355784231, "grad_norm": 1.4282733197477717, "learning_rate": 4.617757586062319e-05, "loss": 0.8336, "num_input_tokens_seen": 294321664, "step": 1629 }, { "epoch": 0.1784394756287802, "grad_norm": 1.2266350985867145, "learning_rate": 4.61730050535088e-05, "loss": 0.584, "num_input_tokens_seen": 294498400, "step": 1630 }, { "epoch": 0.1785489476997181, "grad_norm": 1.2207026367186524, "learning_rate": 4.6168431741668386e-05, "loss": 0.6597, "num_input_tokens_seen": 294658112, "step": 1631 }, { "epoch": 0.17865841977065602, "grad_norm": 1.221822387265037, "learning_rate": 4.6163855925642955e-05, "loss": 0.6758, "num_input_tokens_seen": 294864864, "step": 1632 }, { "epoch": 0.17876789184159392, "grad_norm": 1.3927278371323537, "learning_rate": 4.6159277605973836e-05, "loss": 0.8215, "num_input_tokens_seen": 295054592, "step": 1633 }, { "epoch": 0.1788773639125318, "grad_norm": 1.146294339236582, "learning_rate": 4.615469678320262e-05, "loss": 0.5943, "num_input_tokens_seen": 295243872, "step": 1634 }, { "epoch": 0.1789868359834697, "grad_norm": 1.3220612721606768, "learning_rate": 4.615011345787122e-05, "loss": 0.9439, "num_input_tokens_seen": 295434944, "step": 1635 }, { "epoch": 0.17909630805440763, "grad_norm": 1.3369819618297922, "learning_rate": 4.6145527630521834e-05, "loss": 0.7777, "num_input_tokens_seen": 295609888, "step": 1636 }, { "epoch": 0.17920578012534552, "grad_norm": 1.3558044018010096, "learning_rate": 4.614093930169695e-05, "loss": 0.7905, "num_input_tokens_seen": 295775424, "step": 1637 }, { "epoch": 0.17931525219628341, "grad_norm": 1.1934157637589229, "learning_rate": 4.613634847193936e-05, "loss": 0.6148, "num_input_tokens_seen": 295973888, "step": 1638 }, { "epoch": 0.17942472426722134, "grad_norm": 1.298461678804285, "learning_rate": 4.613175514179215e-05, "loss": 0.7418, "num_input_tokens_seen": 296153760, "step": 1639 }, { "epoch": 0.17953419633815923, "grad_norm": 1.2787911616736176, "learning_rate": 4.6127159311798705e-05, "loss": 0.7874, "num_input_tokens_seen": 296353568, "step": 1640 }, { "epoch": 0.17964366840909712, "grad_norm": 1.2289973104585623, "learning_rate": 4.61225609825027e-05, "loss": 0.6322, "num_input_tokens_seen": 296537248, "step": 1641 }, { "epoch": 0.17975314048003502, "grad_norm": 1.310124927708191, "learning_rate": 4.6117960154448115e-05, "loss": 0.7468, "num_input_tokens_seen": 296731008, "step": 1642 }, { "epoch": 0.17986261255097294, "grad_norm": 1.293272737569748, "learning_rate": 4.611335682817921e-05, "loss": 0.7362, "num_input_tokens_seen": 296926336, "step": 1643 }, { "epoch": 0.17997208462191083, "grad_norm": 1.3975503334439434, "learning_rate": 4.610875100424056e-05, "loss": 0.7212, "num_input_tokens_seen": 297065216, "step": 1644 }, { "epoch": 0.18008155669284873, "grad_norm": 1.2382543422531187, "learning_rate": 4.610414268317701e-05, "loss": 0.5876, "num_input_tokens_seen": 297235680, "step": 1645 }, { "epoch": 0.18019102876378665, "grad_norm": 1.230188392307427, "learning_rate": 4.609953186553373e-05, "loss": 0.6713, "num_input_tokens_seen": 297435040, "step": 1646 }, { "epoch": 0.18030050083472454, "grad_norm": 1.3598720474919421, "learning_rate": 4.609491855185616e-05, "loss": 0.587, "num_input_tokens_seen": 297590496, "step": 1647 }, { "epoch": 0.18040997290566244, "grad_norm": 1.2904010643086121, "learning_rate": 4.609030274269006e-05, "loss": 0.6131, "num_input_tokens_seen": 297746624, "step": 1648 }, { "epoch": 0.18051944497660036, "grad_norm": 1.2594788690712373, "learning_rate": 4.6085684438581464e-05, "loss": 0.6679, "num_input_tokens_seen": 297921120, "step": 1649 }, { "epoch": 0.18062891704753825, "grad_norm": 1.2163276073634308, "learning_rate": 4.60810636400767e-05, "loss": 0.6286, "num_input_tokens_seen": 298105696, "step": 1650 }, { "epoch": 0.18073838911847614, "grad_norm": 1.357602685097111, "learning_rate": 4.6076440347722415e-05, "loss": 0.7899, "num_input_tokens_seen": 298285792, "step": 1651 }, { "epoch": 0.18084786118941404, "grad_norm": 1.369596658418826, "learning_rate": 4.6071814562065524e-05, "loss": 0.7693, "num_input_tokens_seen": 298481568, "step": 1652 }, { "epoch": 0.18095733326035196, "grad_norm": 1.309070329567595, "learning_rate": 4.6067186283653255e-05, "loss": 0.707, "num_input_tokens_seen": 298643296, "step": 1653 }, { "epoch": 0.18106680533128985, "grad_norm": 1.3382309084107156, "learning_rate": 4.606255551303312e-05, "loss": 0.7795, "num_input_tokens_seen": 298839744, "step": 1654 }, { "epoch": 0.18117627740222775, "grad_norm": 1.275982052195494, "learning_rate": 4.6057922250752935e-05, "loss": 0.7214, "num_input_tokens_seen": 299018496, "step": 1655 }, { "epoch": 0.18128574947316567, "grad_norm": 1.303994183006652, "learning_rate": 4.60532864973608e-05, "loss": 0.7098, "num_input_tokens_seen": 299235776, "step": 1656 }, { "epoch": 0.18139522154410356, "grad_norm": 1.3966418197549053, "learning_rate": 4.604864825340512e-05, "loss": 0.7857, "num_input_tokens_seen": 299423936, "step": 1657 }, { "epoch": 0.18150469361504146, "grad_norm": 1.1260051475529202, "learning_rate": 4.6044007519434594e-05, "loss": 0.6768, "num_input_tokens_seen": 299578720, "step": 1658 }, { "epoch": 0.18161416568597935, "grad_norm": 1.152248779521353, "learning_rate": 4.603936429599821e-05, "loss": 0.6078, "num_input_tokens_seen": 299726112, "step": 1659 }, { "epoch": 0.18172363775691727, "grad_norm": 1.27556110078121, "learning_rate": 4.6034718583645244e-05, "loss": 0.818, "num_input_tokens_seen": 299916960, "step": 1660 }, { "epoch": 0.18183310982785517, "grad_norm": 1.267747958756934, "learning_rate": 4.603007038292528e-05, "loss": 0.6143, "num_input_tokens_seen": 300115648, "step": 1661 }, { "epoch": 0.18194258189879306, "grad_norm": 1.254027363759993, "learning_rate": 4.602541969438819e-05, "loss": 0.7028, "num_input_tokens_seen": 300279840, "step": 1662 }, { "epoch": 0.18205205396973098, "grad_norm": 1.201333086314019, "learning_rate": 4.602076651858416e-05, "loss": 0.7645, "num_input_tokens_seen": 300503392, "step": 1663 }, { "epoch": 0.18216152604066888, "grad_norm": 1.3195752224052082, "learning_rate": 4.601611085606362e-05, "loss": 0.7749, "num_input_tokens_seen": 300698496, "step": 1664 }, { "epoch": 0.18227099811160677, "grad_norm": 1.3085125798344788, "learning_rate": 4.601145270737735e-05, "loss": 0.6195, "num_input_tokens_seen": 300862016, "step": 1665 }, { "epoch": 0.1823804701825447, "grad_norm": 1.23178345214746, "learning_rate": 4.6006792073076385e-05, "loss": 0.7233, "num_input_tokens_seen": 301058688, "step": 1666 }, { "epoch": 0.18248994225348258, "grad_norm": 1.279424157253496, "learning_rate": 4.600212895371208e-05, "loss": 0.8297, "num_input_tokens_seen": 301235200, "step": 1667 }, { "epoch": 0.18259941432442048, "grad_norm": 1.256603346898116, "learning_rate": 4.5997463349836066e-05, "loss": 0.873, "num_input_tokens_seen": 301441504, "step": 1668 }, { "epoch": 0.18270888639535837, "grad_norm": 1.271898099797309, "learning_rate": 4.5992795262000285e-05, "loss": 0.6468, "num_input_tokens_seen": 301591808, "step": 1669 }, { "epoch": 0.1828183584662963, "grad_norm": 1.2704808366449465, "learning_rate": 4.598812469075695e-05, "loss": 0.8206, "num_input_tokens_seen": 301788032, "step": 1670 }, { "epoch": 0.1829278305372342, "grad_norm": 1.10654760764916, "learning_rate": 4.598345163665859e-05, "loss": 0.695, "num_input_tokens_seen": 301994784, "step": 1671 }, { "epoch": 0.18303730260817208, "grad_norm": 1.166384129145198, "learning_rate": 4.5978776100258006e-05, "loss": 0.6364, "num_input_tokens_seen": 302156960, "step": 1672 }, { "epoch": 0.18314677467911, "grad_norm": 1.3237428063043182, "learning_rate": 4.597409808210832e-05, "loss": 0.8763, "num_input_tokens_seen": 302351840, "step": 1673 }, { "epoch": 0.1832562467500479, "grad_norm": 1.341060030763405, "learning_rate": 4.596941758276293e-05, "loss": 0.7779, "num_input_tokens_seen": 302553664, "step": 1674 }, { "epoch": 0.1833657188209858, "grad_norm": 1.3461096250690552, "learning_rate": 4.596473460277553e-05, "loss": 0.7778, "num_input_tokens_seen": 302754144, "step": 1675 }, { "epoch": 0.1834751908919237, "grad_norm": 1.123581734047834, "learning_rate": 4.5960049142700096e-05, "loss": 0.5931, "num_input_tokens_seen": 302931776, "step": 1676 }, { "epoch": 0.1835846629628616, "grad_norm": 1.185551500389968, "learning_rate": 4.595536120309092e-05, "loss": 0.6655, "num_input_tokens_seen": 303115232, "step": 1677 }, { "epoch": 0.1836941350337995, "grad_norm": 1.4129627229426387, "learning_rate": 4.595067078450257e-05, "loss": 0.8402, "num_input_tokens_seen": 303300032, "step": 1678 }, { "epoch": 0.1838036071047374, "grad_norm": 1.3138374598579168, "learning_rate": 4.5945977887489925e-05, "loss": 0.8069, "num_input_tokens_seen": 303513504, "step": 1679 }, { "epoch": 0.18391307917567531, "grad_norm": 1.1105877869081329, "learning_rate": 4.594128251260813e-05, "loss": 0.4643, "num_input_tokens_seen": 303698976, "step": 1680 }, { "epoch": 0.1840225512466132, "grad_norm": 1.4573677226741393, "learning_rate": 4.593658466041265e-05, "loss": 0.7694, "num_input_tokens_seen": 303853536, "step": 1681 }, { "epoch": 0.1841320233175511, "grad_norm": 1.2086729964430527, "learning_rate": 4.593188433145923e-05, "loss": 0.6043, "num_input_tokens_seen": 304040128, "step": 1682 }, { "epoch": 0.18424149538848902, "grad_norm": 1.25872991998561, "learning_rate": 4.5927181526303906e-05, "loss": 0.5601, "num_input_tokens_seen": 304212608, "step": 1683 }, { "epoch": 0.18435096745942692, "grad_norm": 1.4165269651906323, "learning_rate": 4.592247624550301e-05, "loss": 1.0625, "num_input_tokens_seen": 304425184, "step": 1684 }, { "epoch": 0.1844604395303648, "grad_norm": 1.343291448377158, "learning_rate": 4.591776848961318e-05, "loss": 0.7646, "num_input_tokens_seen": 304639328, "step": 1685 }, { "epoch": 0.1845699116013027, "grad_norm": 1.3590601852513062, "learning_rate": 4.591305825919132e-05, "loss": 0.6684, "num_input_tokens_seen": 304817408, "step": 1686 }, { "epoch": 0.18467938367224063, "grad_norm": 1.4640171821607373, "learning_rate": 4.590834555479465e-05, "loss": 0.702, "num_input_tokens_seen": 305009824, "step": 1687 }, { "epoch": 0.18478885574317852, "grad_norm": 1.3727958959890478, "learning_rate": 4.590363037698067e-05, "loss": 0.604, "num_input_tokens_seen": 305182976, "step": 1688 }, { "epoch": 0.18489832781411641, "grad_norm": 1.3893636919763497, "learning_rate": 4.589891272630717e-05, "loss": 0.8205, "num_input_tokens_seen": 305355008, "step": 1689 }, { "epoch": 0.18500779988505434, "grad_norm": 1.4153135045393976, "learning_rate": 4.5894192603332254e-05, "loss": 0.8788, "num_input_tokens_seen": 305548544, "step": 1690 }, { "epoch": 0.18511727195599223, "grad_norm": 1.2153281077302076, "learning_rate": 4.58894700086143e-05, "loss": 0.6037, "num_input_tokens_seen": 305742304, "step": 1691 }, { "epoch": 0.18522674402693012, "grad_norm": 1.3114849888782016, "learning_rate": 4.5884744942711964e-05, "loss": 0.9564, "num_input_tokens_seen": 305950848, "step": 1692 }, { "epoch": 0.18533621609786805, "grad_norm": 1.3808225980228934, "learning_rate": 4.588001740618424e-05, "loss": 0.819, "num_input_tokens_seen": 306120640, "step": 1693 }, { "epoch": 0.18544568816880594, "grad_norm": 1.397379128376054, "learning_rate": 4.587528739959036e-05, "loss": 0.8601, "num_input_tokens_seen": 306301632, "step": 1694 }, { "epoch": 0.18555516023974383, "grad_norm": 1.1727222177552468, "learning_rate": 4.58705549234899e-05, "loss": 0.6594, "num_input_tokens_seen": 306441632, "step": 1695 }, { "epoch": 0.18566463231068173, "grad_norm": 1.2339179242999476, "learning_rate": 4.5865819978442685e-05, "loss": 0.6408, "num_input_tokens_seen": 306586784, "step": 1696 }, { "epoch": 0.18577410438161965, "grad_norm": 1.227020664050412, "learning_rate": 4.586108256500885e-05, "loss": 0.8256, "num_input_tokens_seen": 306764864, "step": 1697 }, { "epoch": 0.18588357645255754, "grad_norm": 1.1124577203496004, "learning_rate": 4.585634268374884e-05, "loss": 0.5926, "num_input_tokens_seen": 306965568, "step": 1698 }, { "epoch": 0.18599304852349544, "grad_norm": 1.1675860438201164, "learning_rate": 4.585160033522335e-05, "loss": 0.5739, "num_input_tokens_seen": 307133792, "step": 1699 }, { "epoch": 0.18610252059443336, "grad_norm": 1.3405853532383984, "learning_rate": 4.5846855519993404e-05, "loss": 0.7133, "num_input_tokens_seen": 307281632, "step": 1700 }, { "epoch": 0.18621199266537125, "grad_norm": 1.2221941577454007, "learning_rate": 4.584210823862031e-05, "loss": 0.7289, "num_input_tokens_seen": 307441792, "step": 1701 }, { "epoch": 0.18632146473630914, "grad_norm": 1.2094644754908535, "learning_rate": 4.583735849166564e-05, "loss": 0.5245, "num_input_tokens_seen": 307607328, "step": 1702 }, { "epoch": 0.18643093680724704, "grad_norm": 1.2825408108856495, "learning_rate": 4.583260627969131e-05, "loss": 0.7066, "num_input_tokens_seen": 307763008, "step": 1703 }, { "epoch": 0.18654040887818496, "grad_norm": 1.3042781438907747, "learning_rate": 4.5827851603259475e-05, "loss": 0.7144, "num_input_tokens_seen": 307930336, "step": 1704 }, { "epoch": 0.18664988094912285, "grad_norm": 1.440746538906385, "learning_rate": 4.582309446293261e-05, "loss": 0.9109, "num_input_tokens_seen": 308096320, "step": 1705 }, { "epoch": 0.18675935302006075, "grad_norm": 1.3154326327715031, "learning_rate": 4.581833485927348e-05, "loss": 0.6924, "num_input_tokens_seen": 308263648, "step": 1706 }, { "epoch": 0.18686882509099867, "grad_norm": 1.4989521022063588, "learning_rate": 4.5813572792845134e-05, "loss": 0.7552, "num_input_tokens_seen": 308417984, "step": 1707 }, { "epoch": 0.18697829716193656, "grad_norm": 1.2401478654621994, "learning_rate": 4.580880826421091e-05, "loss": 0.6805, "num_input_tokens_seen": 308620256, "step": 1708 }, { "epoch": 0.18708776923287446, "grad_norm": 1.3708310215166226, "learning_rate": 4.580404127393445e-05, "loss": 0.7154, "num_input_tokens_seen": 308807744, "step": 1709 }, { "epoch": 0.18719724130381238, "grad_norm": 1.2149983887210414, "learning_rate": 4.579927182257968e-05, "loss": 0.5956, "num_input_tokens_seen": 308975072, "step": 1710 }, { "epoch": 0.18730671337475027, "grad_norm": 1.5097150115576239, "learning_rate": 4.579449991071082e-05, "loss": 0.8904, "num_input_tokens_seen": 309158080, "step": 1711 }, { "epoch": 0.18741618544568817, "grad_norm": 1.2623070918843389, "learning_rate": 4.578972553889237e-05, "loss": 0.5257, "num_input_tokens_seen": 309339968, "step": 1712 }, { "epoch": 0.18752565751662606, "grad_norm": 1.318844673410409, "learning_rate": 4.578494870768912e-05, "loss": 0.8125, "num_input_tokens_seen": 309524096, "step": 1713 }, { "epoch": 0.18763512958756398, "grad_norm": 1.3605173068868186, "learning_rate": 4.578016941766619e-05, "loss": 0.9795, "num_input_tokens_seen": 309719424, "step": 1714 }, { "epoch": 0.18774460165850188, "grad_norm": 1.1809895384710347, "learning_rate": 4.5775387669388935e-05, "loss": 0.5501, "num_input_tokens_seen": 309914976, "step": 1715 }, { "epoch": 0.18785407372943977, "grad_norm": 1.1558247377002213, "learning_rate": 4.5770603463423035e-05, "loss": 0.7762, "num_input_tokens_seen": 310101344, "step": 1716 }, { "epoch": 0.1879635458003777, "grad_norm": 1.3285933622404302, "learning_rate": 4.576581680033445e-05, "loss": 0.7358, "num_input_tokens_seen": 310286816, "step": 1717 }, { "epoch": 0.18807301787131558, "grad_norm": 1.335264783882044, "learning_rate": 4.576102768068944e-05, "loss": 0.7913, "num_input_tokens_seen": 310462432, "step": 1718 }, { "epoch": 0.18818248994225348, "grad_norm": 1.3438167111892472, "learning_rate": 4.5756236105054534e-05, "loss": 0.6864, "num_input_tokens_seen": 310600864, "step": 1719 }, { "epoch": 0.18829196201319137, "grad_norm": 1.258148527574445, "learning_rate": 4.575144207399658e-05, "loss": 0.7649, "num_input_tokens_seen": 310794624, "step": 1720 }, { "epoch": 0.1884014340841293, "grad_norm": 1.1692135916492499, "learning_rate": 4.574664558808271e-05, "loss": 0.556, "num_input_tokens_seen": 310951424, "step": 1721 }, { "epoch": 0.1885109061550672, "grad_norm": 1.364403030869454, "learning_rate": 4.574184664788031e-05, "loss": 0.7297, "num_input_tokens_seen": 311130848, "step": 1722 }, { "epoch": 0.18862037822600508, "grad_norm": 1.2915789923086811, "learning_rate": 4.573704525395711e-05, "loss": 0.7508, "num_input_tokens_seen": 311330656, "step": 1723 }, { "epoch": 0.188729850296943, "grad_norm": 1.2245240493568545, "learning_rate": 4.573224140688111e-05, "loss": 0.7034, "num_input_tokens_seen": 311480288, "step": 1724 }, { "epoch": 0.1888393223678809, "grad_norm": 1.1876032433050214, "learning_rate": 4.5727435107220576e-05, "loss": 0.5854, "num_input_tokens_seen": 311653216, "step": 1725 }, { "epoch": 0.1889487944388188, "grad_norm": 1.3172828630745155, "learning_rate": 4.5722626355544085e-05, "loss": 0.6283, "num_input_tokens_seen": 311787616, "step": 1726 }, { "epoch": 0.1890582665097567, "grad_norm": 1.2900577748838022, "learning_rate": 4.5717815152420515e-05, "loss": 0.8857, "num_input_tokens_seen": 311982944, "step": 1727 }, { "epoch": 0.1891677385806946, "grad_norm": 1.3895309945486647, "learning_rate": 4.5713001498419025e-05, "loss": 0.8558, "num_input_tokens_seen": 312163040, "step": 1728 }, { "epoch": 0.1892772106516325, "grad_norm": 1.1814117966708257, "learning_rate": 4.570818539410905e-05, "loss": 0.7916, "num_input_tokens_seen": 312342240, "step": 1729 }, { "epoch": 0.1893866827225704, "grad_norm": 1.2136927116380762, "learning_rate": 4.5703366840060335e-05, "loss": 0.6366, "num_input_tokens_seen": 312512928, "step": 1730 }, { "epoch": 0.18949615479350831, "grad_norm": 1.3623152756364967, "learning_rate": 4.5698545836842896e-05, "loss": 0.7346, "num_input_tokens_seen": 312678016, "step": 1731 }, { "epoch": 0.1896056268644462, "grad_norm": 1.4040325908247366, "learning_rate": 4.569372238502705e-05, "loss": 0.7743, "num_input_tokens_seen": 312838400, "step": 1732 }, { "epoch": 0.1897150989353841, "grad_norm": 1.3044090659167191, "learning_rate": 4.568889648518341e-05, "loss": 0.7329, "num_input_tokens_seen": 312994528, "step": 1733 }, { "epoch": 0.18982457100632202, "grad_norm": 1.2043349882090377, "learning_rate": 4.568406813788287e-05, "loss": 0.8068, "num_input_tokens_seen": 313172384, "step": 1734 }, { "epoch": 0.18993404307725992, "grad_norm": 1.185773347091126, "learning_rate": 4.5679237343696604e-05, "loss": 0.7402, "num_input_tokens_seen": 313359872, "step": 1735 }, { "epoch": 0.1900435151481978, "grad_norm": 1.3483405545815013, "learning_rate": 4.567440410319609e-05, "loss": 0.7864, "num_input_tokens_seen": 313520704, "step": 1736 }, { "epoch": 0.1901529872191357, "grad_norm": 1.2627140052633197, "learning_rate": 4.56695684169531e-05, "loss": 0.9233, "num_input_tokens_seen": 313712224, "step": 1737 }, { "epoch": 0.19026245929007363, "grad_norm": 1.3193747898031163, "learning_rate": 4.5664730285539684e-05, "loss": 0.8014, "num_input_tokens_seen": 313900832, "step": 1738 }, { "epoch": 0.19037193136101152, "grad_norm": 1.256655095390317, "learning_rate": 4.565988970952817e-05, "loss": 0.7296, "num_input_tokens_seen": 314086304, "step": 1739 }, { "epoch": 0.19048140343194941, "grad_norm": 1.313999726999449, "learning_rate": 4.5655046689491204e-05, "loss": 0.761, "num_input_tokens_seen": 314264384, "step": 1740 }, { "epoch": 0.19059087550288734, "grad_norm": 1.3535448211559575, "learning_rate": 4.56502012260017e-05, "loss": 0.7569, "num_input_tokens_seen": 314404608, "step": 1741 }, { "epoch": 0.19070034757382523, "grad_norm": 1.2265642008192577, "learning_rate": 4.564535331963287e-05, "loss": 0.6949, "num_input_tokens_seen": 314576192, "step": 1742 }, { "epoch": 0.19080981964476312, "grad_norm": 1.180489475919094, "learning_rate": 4.56405029709582e-05, "loss": 0.6304, "num_input_tokens_seen": 314738592, "step": 1743 }, { "epoch": 0.19091929171570104, "grad_norm": 1.364391847332586, "learning_rate": 4.5635650180551494e-05, "loss": 0.6863, "num_input_tokens_seen": 314924960, "step": 1744 }, { "epoch": 0.19102876378663894, "grad_norm": 1.2266734856609496, "learning_rate": 4.5630794948986814e-05, "loss": 0.5743, "num_input_tokens_seen": 315058016, "step": 1745 }, { "epoch": 0.19113823585757683, "grad_norm": 1.2649992820677398, "learning_rate": 4.562593727683854e-05, "loss": 0.6203, "num_input_tokens_seen": 315254912, "step": 1746 }, { "epoch": 0.19124770792851473, "grad_norm": 1.3815159727147224, "learning_rate": 4.562107716468131e-05, "loss": 0.6785, "num_input_tokens_seen": 315426944, "step": 1747 }, { "epoch": 0.19135717999945265, "grad_norm": 1.1569094452761166, "learning_rate": 4.561621461309007e-05, "loss": 0.6488, "num_input_tokens_seen": 315621824, "step": 1748 }, { "epoch": 0.19146665207039054, "grad_norm": 1.2573091909751197, "learning_rate": 4.561134962264006e-05, "loss": 0.7032, "num_input_tokens_seen": 315816928, "step": 1749 }, { "epoch": 0.19157612414132844, "grad_norm": 1.24555436182395, "learning_rate": 4.560648219390678e-05, "loss": 0.6696, "num_input_tokens_seen": 315982688, "step": 1750 }, { "epoch": 0.19168559621226636, "grad_norm": 1.265288555496231, "learning_rate": 4.560161232746606e-05, "loss": 0.8413, "num_input_tokens_seen": 316194816, "step": 1751 }, { "epoch": 0.19179506828320425, "grad_norm": 1.2566237429835765, "learning_rate": 4.5596740023893986e-05, "loss": 0.8308, "num_input_tokens_seen": 316410752, "step": 1752 }, { "epoch": 0.19190454035414214, "grad_norm": 1.1322065640180912, "learning_rate": 4.559186528376694e-05, "loss": 0.54, "num_input_tokens_seen": 316593088, "step": 1753 }, { "epoch": 0.19201401242508004, "grad_norm": 1.2353629974921603, "learning_rate": 4.558698810766159e-05, "loss": 0.7964, "num_input_tokens_seen": 316767808, "step": 1754 }, { "epoch": 0.19212348449601796, "grad_norm": 1.133739381633188, "learning_rate": 4.558210849615491e-05, "loss": 0.4845, "num_input_tokens_seen": 316934688, "step": 1755 }, { "epoch": 0.19223295656695585, "grad_norm": 1.2772266205913851, "learning_rate": 4.557722644982414e-05, "loss": 0.7998, "num_input_tokens_seen": 317108288, "step": 1756 }, { "epoch": 0.19234242863789375, "grad_norm": 1.253848921276349, "learning_rate": 4.5572341969246814e-05, "loss": 0.6236, "num_input_tokens_seen": 317283456, "step": 1757 }, { "epoch": 0.19245190070883167, "grad_norm": 1.2685578355757465, "learning_rate": 4.556745505500076e-05, "loss": 0.8564, "num_input_tokens_seen": 317472960, "step": 1758 }, { "epoch": 0.19256137277976956, "grad_norm": 1.1408451860665263, "learning_rate": 4.55625657076641e-05, "loss": 0.5888, "num_input_tokens_seen": 317666496, "step": 1759 }, { "epoch": 0.19267084485070746, "grad_norm": 1.3125794704537757, "learning_rate": 4.555767392781522e-05, "loss": 0.8448, "num_input_tokens_seen": 317863840, "step": 1760 }, { "epoch": 0.19278031692164538, "grad_norm": 1.3263593268747846, "learning_rate": 4.5552779716032815e-05, "loss": 0.7811, "num_input_tokens_seen": 318042368, "step": 1761 }, { "epoch": 0.19288978899258327, "grad_norm": 1.3689743136886487, "learning_rate": 4.554788307289585e-05, "loss": 0.8339, "num_input_tokens_seen": 318253600, "step": 1762 }, { "epoch": 0.19299926106352117, "grad_norm": 1.1823214500585768, "learning_rate": 4.5542983998983605e-05, "loss": 0.7666, "num_input_tokens_seen": 318432576, "step": 1763 }, { "epoch": 0.19310873313445906, "grad_norm": 1.0951512760893545, "learning_rate": 4.5538082494875626e-05, "loss": 0.4976, "num_input_tokens_seen": 318582880, "step": 1764 }, { "epoch": 0.19321820520539698, "grad_norm": 1.402577882735999, "learning_rate": 4.553317856115176e-05, "loss": 0.8529, "num_input_tokens_seen": 318781568, "step": 1765 }, { "epoch": 0.19332767727633487, "grad_norm": 1.199040601588542, "learning_rate": 4.552827219839211e-05, "loss": 0.7746, "num_input_tokens_seen": 318959872, "step": 1766 }, { "epoch": 0.19343714934727277, "grad_norm": 1.1547898016225275, "learning_rate": 4.55233634071771e-05, "loss": 0.5816, "num_input_tokens_seen": 319149152, "step": 1767 }, { "epoch": 0.1935466214182107, "grad_norm": 1.3060656823616341, "learning_rate": 4.5518452188087444e-05, "loss": 0.89, "num_input_tokens_seen": 319311104, "step": 1768 }, { "epoch": 0.19365609348914858, "grad_norm": 1.302289941290542, "learning_rate": 4.551353854170411e-05, "loss": 0.7718, "num_input_tokens_seen": 319489184, "step": 1769 }, { "epoch": 0.19376556556008648, "grad_norm": 1.216233663498252, "learning_rate": 4.550862246860839e-05, "loss": 0.7554, "num_input_tokens_seen": 319679584, "step": 1770 }, { "epoch": 0.19387503763102437, "grad_norm": 1.2798970685491187, "learning_rate": 4.5503703969381826e-05, "loss": 0.7449, "num_input_tokens_seen": 319879392, "step": 1771 }, { "epoch": 0.1939845097019623, "grad_norm": 1.3103765840368895, "learning_rate": 4.5498783044606285e-05, "loss": 0.6338, "num_input_tokens_seen": 320044256, "step": 1772 }, { "epoch": 0.1940939817729002, "grad_norm": 1.2036539376532247, "learning_rate": 4.5493859694863894e-05, "loss": 0.6581, "num_input_tokens_seen": 320241152, "step": 1773 }, { "epoch": 0.19420345384383808, "grad_norm": 1.3460591458451832, "learning_rate": 4.5488933920737087e-05, "loss": 0.8496, "num_input_tokens_seen": 320391680, "step": 1774 }, { "epoch": 0.194312925914776, "grad_norm": 1.3270136053829515, "learning_rate": 4.5484005722808566e-05, "loss": 0.7812, "num_input_tokens_seen": 320592832, "step": 1775 }, { "epoch": 0.1944223979857139, "grad_norm": 1.2490906268577968, "learning_rate": 4.5479075101661316e-05, "loss": 0.7774, "num_input_tokens_seen": 320757696, "step": 1776 }, { "epoch": 0.1945318700566518, "grad_norm": 1.2421564542141665, "learning_rate": 4.5474142057878636e-05, "loss": 0.664, "num_input_tokens_seen": 320967584, "step": 1777 }, { "epoch": 0.1946413421275897, "grad_norm": 1.383317542656239, "learning_rate": 4.546920659204409e-05, "loss": 0.806, "num_input_tokens_seen": 321136704, "step": 1778 }, { "epoch": 0.1947508141985276, "grad_norm": 1.2963523271097281, "learning_rate": 4.546426870474154e-05, "loss": 0.7577, "num_input_tokens_seen": 321349504, "step": 1779 }, { "epoch": 0.1948602862694655, "grad_norm": 1.2200807983971018, "learning_rate": 4.5459328396555114e-05, "loss": 0.7678, "num_input_tokens_seen": 321558720, "step": 1780 }, { "epoch": 0.1949697583404034, "grad_norm": 1.1520201665152925, "learning_rate": 4.545438566806925e-05, "loss": 0.693, "num_input_tokens_seen": 321740160, "step": 1781 }, { "epoch": 0.19507923041134131, "grad_norm": 1.2752283995009912, "learning_rate": 4.5449440519868675e-05, "loss": 0.7572, "num_input_tokens_seen": 321929888, "step": 1782 }, { "epoch": 0.1951887024822792, "grad_norm": 1.2857947806864307, "learning_rate": 4.544449295253837e-05, "loss": 0.5926, "num_input_tokens_seen": 322055328, "step": 1783 }, { "epoch": 0.1952981745532171, "grad_norm": 1.251537045566439, "learning_rate": 4.543954296666363e-05, "loss": 0.708, "num_input_tokens_seen": 322217728, "step": 1784 }, { "epoch": 0.19540764662415502, "grad_norm": 1.1882681119193779, "learning_rate": 4.5434590562830035e-05, "loss": 0.6501, "num_input_tokens_seen": 322398048, "step": 1785 }, { "epoch": 0.19551711869509292, "grad_norm": 1.390195501630253, "learning_rate": 4.542963574162344e-05, "loss": 0.7077, "num_input_tokens_seen": 322562688, "step": 1786 }, { "epoch": 0.1956265907660308, "grad_norm": 1.262070267312561, "learning_rate": 4.542467850363e-05, "loss": 0.6376, "num_input_tokens_seen": 322744352, "step": 1787 }, { "epoch": 0.1957360628369687, "grad_norm": 1.9429088894680697, "learning_rate": 4.541971884943613e-05, "loss": 1.2777, "num_input_tokens_seen": 322963424, "step": 1788 }, { "epoch": 0.19584553490790663, "grad_norm": 1.2336852162366725, "learning_rate": 4.5414756779628556e-05, "loss": 0.8742, "num_input_tokens_seen": 323149792, "step": 1789 }, { "epoch": 0.19595500697884452, "grad_norm": 1.024073919768651, "learning_rate": 4.5409792294794284e-05, "loss": 0.6351, "num_input_tokens_seen": 323338176, "step": 1790 }, { "epoch": 0.19606447904978241, "grad_norm": 1.33444563345936, "learning_rate": 4.54048253955206e-05, "loss": 0.6919, "num_input_tokens_seen": 323499904, "step": 1791 }, { "epoch": 0.19617395112072034, "grad_norm": 1.099869800578315, "learning_rate": 4.5399856082395074e-05, "loss": 0.787, "num_input_tokens_seen": 323684480, "step": 1792 }, { "epoch": 0.19628342319165823, "grad_norm": 1.2160104135707372, "learning_rate": 4.5394884356005574e-05, "loss": 0.6584, "num_input_tokens_seen": 323868832, "step": 1793 }, { "epoch": 0.19639289526259612, "grad_norm": 1.3475664191782848, "learning_rate": 4.538991021694025e-05, "loss": 0.9239, "num_input_tokens_seen": 324041312, "step": 1794 }, { "epoch": 0.19650236733353404, "grad_norm": 1.2612991816418306, "learning_rate": 4.5384933665787524e-05, "loss": 0.764, "num_input_tokens_seen": 324250304, "step": 1795 }, { "epoch": 0.19661183940447194, "grad_norm": 1.257896894384011, "learning_rate": 4.537995470313611e-05, "loss": 0.7487, "num_input_tokens_seen": 324412032, "step": 1796 }, { "epoch": 0.19672131147540983, "grad_norm": 1.2217089606055234, "learning_rate": 4.537497332957501e-05, "loss": 0.6143, "num_input_tokens_seen": 324591008, "step": 1797 }, { "epoch": 0.19683078354634773, "grad_norm": 1.3054458333075145, "learning_rate": 4.536998954569353e-05, "loss": 0.8605, "num_input_tokens_seen": 324757888, "step": 1798 }, { "epoch": 0.19694025561728565, "grad_norm": 1.1003722124654851, "learning_rate": 4.536500335208121e-05, "loss": 0.5422, "num_input_tokens_seen": 324932160, "step": 1799 }, { "epoch": 0.19704972768822354, "grad_norm": 1.227926382840362, "learning_rate": 4.536001474932793e-05, "loss": 0.7896, "num_input_tokens_seen": 325134880, "step": 1800 }, { "epoch": 0.19715919975916144, "grad_norm": 1.356230771679966, "learning_rate": 4.535502373802383e-05, "loss": 0.92, "num_input_tokens_seen": 325292800, "step": 1801 }, { "epoch": 0.19726867183009936, "grad_norm": 1.251132642670986, "learning_rate": 4.535003031875934e-05, "loss": 0.9008, "num_input_tokens_seen": 325484096, "step": 1802 }, { "epoch": 0.19737814390103725, "grad_norm": 1.150463004822577, "learning_rate": 4.534503449212516e-05, "loss": 0.6776, "num_input_tokens_seen": 325674272, "step": 1803 }, { "epoch": 0.19748761597197514, "grad_norm": 1.076570523568471, "learning_rate": 4.534003625871229e-05, "loss": 0.6254, "num_input_tokens_seen": 325862656, "step": 1804 }, { "epoch": 0.19759708804291304, "grad_norm": 1.2415050815350765, "learning_rate": 4.533503561911202e-05, "loss": 0.8917, "num_input_tokens_seen": 326057984, "step": 1805 }, { "epoch": 0.19770656011385096, "grad_norm": 1.320878218039604, "learning_rate": 4.5330032573915903e-05, "loss": 0.8582, "num_input_tokens_seen": 326232256, "step": 1806 }, { "epoch": 0.19781603218478885, "grad_norm": 1.2037244021980877, "learning_rate": 4.53250271237158e-05, "loss": 0.734, "num_input_tokens_seen": 326412800, "step": 1807 }, { "epoch": 0.19792550425572675, "grad_norm": 1.1527712271528507, "learning_rate": 4.532001926910385e-05, "loss": 0.7288, "num_input_tokens_seen": 326589312, "step": 1808 }, { "epoch": 0.19803497632666467, "grad_norm": 1.1961174542092077, "learning_rate": 4.531500901067246e-05, "loss": 0.812, "num_input_tokens_seen": 326775008, "step": 1809 }, { "epoch": 0.19814444839760256, "grad_norm": 1.4125977828351783, "learning_rate": 4.5309996349014336e-05, "loss": 0.7664, "num_input_tokens_seen": 326953536, "step": 1810 }, { "epoch": 0.19825392046854046, "grad_norm": 1.1330017984010676, "learning_rate": 4.5304981284722484e-05, "loss": 0.684, "num_input_tokens_seen": 327135424, "step": 1811 }, { "epoch": 0.19836339253947838, "grad_norm": 1.2003143474998648, "learning_rate": 4.5299963818390144e-05, "loss": 0.8002, "num_input_tokens_seen": 327329632, "step": 1812 }, { "epoch": 0.19847286461041627, "grad_norm": 1.3107206908708386, "learning_rate": 4.5294943950610904e-05, "loss": 0.7857, "num_input_tokens_seen": 327479936, "step": 1813 }, { "epoch": 0.19858233668135417, "grad_norm": 1.2435609913506391, "learning_rate": 4.528992168197859e-05, "loss": 0.6425, "num_input_tokens_seen": 327668320, "step": 1814 }, { "epoch": 0.19869180875229206, "grad_norm": 1.333410896588379, "learning_rate": 4.5284897013087326e-05, "loss": 0.7205, "num_input_tokens_seen": 327841920, "step": 1815 }, { "epoch": 0.19880128082322998, "grad_norm": 1.2471229344962922, "learning_rate": 4.527986994453152e-05, "loss": 0.8387, "num_input_tokens_seen": 328045536, "step": 1816 }, { "epoch": 0.19891075289416787, "grad_norm": 1.2350041681288835, "learning_rate": 4.5274840476905873e-05, "loss": 0.5109, "num_input_tokens_seen": 328208832, "step": 1817 }, { "epoch": 0.19902022496510577, "grad_norm": 1.37072483761958, "learning_rate": 4.526980861080535e-05, "loss": 0.8093, "num_input_tokens_seen": 328397888, "step": 1818 }, { "epoch": 0.1991296970360437, "grad_norm": 1.2605366084931124, "learning_rate": 4.5264774346825226e-05, "loss": 0.6252, "num_input_tokens_seen": 328565216, "step": 1819 }, { "epoch": 0.19923916910698158, "grad_norm": 1.2905442478142337, "learning_rate": 4.5259737685561035e-05, "loss": 0.7201, "num_input_tokens_seen": 328753152, "step": 1820 }, { "epoch": 0.19934864117791948, "grad_norm": 1.2502566074195978, "learning_rate": 4.52546986276086e-05, "loss": 0.7332, "num_input_tokens_seen": 328952512, "step": 1821 }, { "epoch": 0.19945811324885737, "grad_norm": 1.0953153036084393, "learning_rate": 4.524965717356405e-05, "loss": 0.5897, "num_input_tokens_seen": 329144704, "step": 1822 }, { "epoch": 0.1995675853197953, "grad_norm": 1.0913203910855627, "learning_rate": 4.524461332402375e-05, "loss": 0.5396, "num_input_tokens_seen": 329324800, "step": 1823 }, { "epoch": 0.1996770573907332, "grad_norm": 1.2209278601721594, "learning_rate": 4.523956707958441e-05, "loss": 0.6519, "num_input_tokens_seen": 329487200, "step": 1824 }, { "epoch": 0.19978652946167108, "grad_norm": 1.2864859522687477, "learning_rate": 4.523451844084297e-05, "loss": 0.8408, "num_input_tokens_seen": 329679392, "step": 1825 }, { "epoch": 0.199896001532609, "grad_norm": 1.2888195849335313, "learning_rate": 4.5229467408396686e-05, "loss": 0.6825, "num_input_tokens_seen": 329848960, "step": 1826 }, { "epoch": 0.2000054736035469, "grad_norm": 1.2343866854730212, "learning_rate": 4.5224413982843075e-05, "loss": 0.7168, "num_input_tokens_seen": 330012704, "step": 1827 }, { "epoch": 0.2001149456744848, "grad_norm": 1.3061186655996957, "learning_rate": 4.521935816477995e-05, "loss": 0.6705, "num_input_tokens_seen": 330208704, "step": 1828 }, { "epoch": 0.2002244177454227, "grad_norm": 1.2068886780716892, "learning_rate": 4.5214299954805404e-05, "loss": 0.5973, "num_input_tokens_seen": 330363040, "step": 1829 }, { "epoch": 0.2003338898163606, "grad_norm": 1.3294822041302163, "learning_rate": 4.520923935351782e-05, "loss": 0.6642, "num_input_tokens_seen": 330551648, "step": 1830 }, { "epoch": 0.2004433618872985, "grad_norm": 1.2032846617343804, "learning_rate": 4.520417636151586e-05, "loss": 0.6393, "num_input_tokens_seen": 330732640, "step": 1831 }, { "epoch": 0.2005528339582364, "grad_norm": 1.1315290876047248, "learning_rate": 4.5199110979398454e-05, "loss": 0.728, "num_input_tokens_seen": 330914528, "step": 1832 }, { "epoch": 0.20066230602917431, "grad_norm": 1.384447930268508, "learning_rate": 4.5194043207764835e-05, "loss": 0.6575, "num_input_tokens_seen": 331119712, "step": 1833 }, { "epoch": 0.2007717781001122, "grad_norm": 1.3592579670655562, "learning_rate": 4.5188973047214514e-05, "loss": 0.7018, "num_input_tokens_seen": 331300704, "step": 1834 }, { "epoch": 0.2008812501710501, "grad_norm": 1.2033577235665667, "learning_rate": 4.518390049834727e-05, "loss": 0.6257, "num_input_tokens_seen": 331492224, "step": 1835 }, { "epoch": 0.20099072224198802, "grad_norm": 1.3640476466328242, "learning_rate": 4.517882556176318e-05, "loss": 0.7575, "num_input_tokens_seen": 331644768, "step": 1836 }, { "epoch": 0.20110019431292592, "grad_norm": 1.3768358978800495, "learning_rate": 4.51737482380626e-05, "loss": 0.7399, "num_input_tokens_seen": 331826432, "step": 1837 }, { "epoch": 0.2012096663838638, "grad_norm": 1.113670147346219, "learning_rate": 4.516866852784618e-05, "loss": 0.6275, "num_input_tokens_seen": 332001152, "step": 1838 }, { "epoch": 0.2013191384548017, "grad_norm": 1.3728450013569828, "learning_rate": 4.516358643171482e-05, "loss": 0.8783, "num_input_tokens_seen": 332196928, "step": 1839 }, { "epoch": 0.20142861052573963, "grad_norm": 1.21147916430962, "learning_rate": 4.515850195026974e-05, "loss": 0.6919, "num_input_tokens_seen": 332383744, "step": 1840 }, { "epoch": 0.20153808259667752, "grad_norm": 1.2210579074277892, "learning_rate": 4.5153415084112406e-05, "loss": 0.6655, "num_input_tokens_seen": 332574592, "step": 1841 }, { "epoch": 0.20164755466761541, "grad_norm": 1.2770458655697765, "learning_rate": 4.5148325833844595e-05, "loss": 0.7179, "num_input_tokens_seen": 332767456, "step": 1842 }, { "epoch": 0.20175702673855334, "grad_norm": 1.255429065126984, "learning_rate": 4.514323420006836e-05, "loss": 0.5503, "num_input_tokens_seen": 332904992, "step": 1843 }, { "epoch": 0.20186649880949123, "grad_norm": 1.4760159888913436, "learning_rate": 4.5138140183386025e-05, "loss": 1.0478, "num_input_tokens_seen": 333109952, "step": 1844 }, { "epoch": 0.20197597088042912, "grad_norm": 1.168133523092511, "learning_rate": 4.51330437844002e-05, "loss": 0.7447, "num_input_tokens_seen": 333285344, "step": 1845 }, { "epoch": 0.20208544295136704, "grad_norm": 1.1853813043201646, "learning_rate": 4.512794500371379e-05, "loss": 0.7165, "num_input_tokens_seen": 333478432, "step": 1846 }, { "epoch": 0.20219491502230494, "grad_norm": 1.229672710324915, "learning_rate": 4.5122843841929965e-05, "loss": 0.8159, "num_input_tokens_seen": 333687872, "step": 1847 }, { "epoch": 0.20230438709324283, "grad_norm": 1.306080742415381, "learning_rate": 4.5117740299652175e-05, "loss": 0.7467, "num_input_tokens_seen": 333809504, "step": 1848 }, { "epoch": 0.20241385916418073, "grad_norm": 1.4086696999925366, "learning_rate": 4.511263437748416e-05, "loss": 0.8614, "num_input_tokens_seen": 333958464, "step": 1849 }, { "epoch": 0.20252333123511865, "grad_norm": 1.280874429877787, "learning_rate": 4.510752607602996e-05, "loss": 0.7666, "num_input_tokens_seen": 334115264, "step": 1850 }, { "epoch": 0.20263280330605654, "grad_norm": 1.0672052206017542, "learning_rate": 4.510241539589386e-05, "loss": 0.5959, "num_input_tokens_seen": 334293792, "step": 1851 }, { "epoch": 0.20274227537699444, "grad_norm": 1.1691615925191852, "learning_rate": 4.509730233768045e-05, "loss": 0.662, "num_input_tokens_seen": 334467616, "step": 1852 }, { "epoch": 0.20285174744793236, "grad_norm": 1.2125496745994684, "learning_rate": 4.5092186901994594e-05, "loss": 0.551, "num_input_tokens_seen": 334666304, "step": 1853 }, { "epoch": 0.20296121951887025, "grad_norm": 1.3071609624238614, "learning_rate": 4.5087069089441434e-05, "loss": 0.6095, "num_input_tokens_seen": 334850880, "step": 1854 }, { "epoch": 0.20307069158980814, "grad_norm": 1.202581951893846, "learning_rate": 4.50819489006264e-05, "loss": 0.6165, "num_input_tokens_seen": 335013728, "step": 1855 }, { "epoch": 0.20318016366074607, "grad_norm": 1.4970842474023265, "learning_rate": 4.5076826336155196e-05, "loss": 0.8556, "num_input_tokens_seen": 335142752, "step": 1856 }, { "epoch": 0.20328963573168396, "grad_norm": 1.4861690721864915, "learning_rate": 4.507170139663382e-05, "loss": 0.8702, "num_input_tokens_seen": 335284320, "step": 1857 }, { "epoch": 0.20339910780262185, "grad_norm": 1.6231004910386817, "learning_rate": 4.506657408266855e-05, "loss": 0.7581, "num_input_tokens_seen": 335446944, "step": 1858 }, { "epoch": 0.20350857987355975, "grad_norm": 1.3355819050428244, "learning_rate": 4.506144439486591e-05, "loss": 1.0415, "num_input_tokens_seen": 335659296, "step": 1859 }, { "epoch": 0.20361805194449767, "grad_norm": 1.1058968298150083, "learning_rate": 4.5056312333832764e-05, "loss": 0.5702, "num_input_tokens_seen": 335815872, "step": 1860 }, { "epoch": 0.20372752401543556, "grad_norm": 1.141856169229549, "learning_rate": 4.505117790017621e-05, "loss": 0.631, "num_input_tokens_seen": 336027552, "step": 1861 }, { "epoch": 0.20383699608637346, "grad_norm": 1.2929250473519602, "learning_rate": 4.504604109450363e-05, "loss": 0.6895, "num_input_tokens_seen": 336215264, "step": 1862 }, { "epoch": 0.20394646815731138, "grad_norm": 1.4034654811668088, "learning_rate": 4.504090191742272e-05, "loss": 0.8398, "num_input_tokens_seen": 336386400, "step": 1863 }, { "epoch": 0.20405594022824927, "grad_norm": 1.2694455631389368, "learning_rate": 4.503576036954142e-05, "loss": 0.8084, "num_input_tokens_seen": 336585760, "step": 1864 }, { "epoch": 0.20416541229918717, "grad_norm": 1.221307418785342, "learning_rate": 4.5030616451467964e-05, "loss": 0.8067, "num_input_tokens_seen": 336792064, "step": 1865 }, { "epoch": 0.20427488437012506, "grad_norm": 1.4665574285979164, "learning_rate": 4.502547016381089e-05, "loss": 0.7804, "num_input_tokens_seen": 336964320, "step": 1866 }, { "epoch": 0.20438435644106298, "grad_norm": 1.3138340119740763, "learning_rate": 4.5020321507178965e-05, "loss": 0.8149, "num_input_tokens_seen": 337153824, "step": 1867 }, { "epoch": 0.20449382851200087, "grad_norm": 1.245377529078543, "learning_rate": 4.501517048218128e-05, "loss": 0.7003, "num_input_tokens_seen": 337320480, "step": 1868 }, { "epoch": 0.20460330058293877, "grad_norm": 1.4590860150130092, "learning_rate": 4.5010017089427195e-05, "loss": 0.7328, "num_input_tokens_seen": 337478848, "step": 1869 }, { "epoch": 0.2047127726538767, "grad_norm": 1.1359392357646374, "learning_rate": 4.500486132952634e-05, "loss": 0.6813, "num_input_tokens_seen": 337669248, "step": 1870 }, { "epoch": 0.20482224472481458, "grad_norm": 1.2348336442111272, "learning_rate": 4.499970320308863e-05, "loss": 0.5509, "num_input_tokens_seen": 337808128, "step": 1871 }, { "epoch": 0.20493171679575248, "grad_norm": 1.2922519414071296, "learning_rate": 4.4994542710724264e-05, "loss": 0.5976, "num_input_tokens_seen": 337979264, "step": 1872 }, { "epoch": 0.2050411888666904, "grad_norm": 1.2393593412132442, "learning_rate": 4.498937985304371e-05, "loss": 0.619, "num_input_tokens_seen": 338153088, "step": 1873 }, { "epoch": 0.2051506609376283, "grad_norm": 1.2177864080134486, "learning_rate": 4.4984214630657744e-05, "loss": 0.7421, "num_input_tokens_seen": 338347744, "step": 1874 }, { "epoch": 0.2052601330085662, "grad_norm": 1.4050769151560984, "learning_rate": 4.497904704417739e-05, "loss": 1.0407, "num_input_tokens_seen": 338535680, "step": 1875 }, { "epoch": 0.20536960507950408, "grad_norm": 1.3625250962772133, "learning_rate": 4.4973877094213954e-05, "loss": 0.6343, "num_input_tokens_seen": 338662240, "step": 1876 }, { "epoch": 0.205479077150442, "grad_norm": 1.3836102959081336, "learning_rate": 4.496870478137906e-05, "loss": 0.7782, "num_input_tokens_seen": 338840992, "step": 1877 }, { "epoch": 0.2055885492213799, "grad_norm": 1.2465219748434844, "learning_rate": 4.496353010628455e-05, "loss": 0.6019, "num_input_tokens_seen": 339032064, "step": 1878 }, { "epoch": 0.2056980212923178, "grad_norm": 1.2390294747683719, "learning_rate": 4.495835306954259e-05, "loss": 0.6098, "num_input_tokens_seen": 339198944, "step": 1879 }, { "epoch": 0.2058074933632557, "grad_norm": 1.351740566276479, "learning_rate": 4.495317367176562e-05, "loss": 0.6677, "num_input_tokens_seen": 339359552, "step": 1880 }, { "epoch": 0.2059169654341936, "grad_norm": 1.3224085134724883, "learning_rate": 4.4947991913566355e-05, "loss": 0.8619, "num_input_tokens_seen": 339544128, "step": 1881 }, { "epoch": 0.2060264375051315, "grad_norm": 1.3714232307000847, "learning_rate": 4.494280779555777e-05, "loss": 1.0113, "num_input_tokens_seen": 339720192, "step": 1882 }, { "epoch": 0.2061359095760694, "grad_norm": 1.1184978136937376, "learning_rate": 4.493762131835315e-05, "loss": 0.5132, "num_input_tokens_seen": 339892448, "step": 1883 }, { "epoch": 0.20624538164700731, "grad_norm": 1.2468893925115805, "learning_rate": 4.4932432482566045e-05, "loss": 0.5052, "num_input_tokens_seen": 340049024, "step": 1884 }, { "epoch": 0.2063548537179452, "grad_norm": 1.4199811458679166, "learning_rate": 4.492724128881029e-05, "loss": 0.7394, "num_input_tokens_seen": 340253312, "step": 1885 }, { "epoch": 0.2064643257888831, "grad_norm": 1.2779890291223666, "learning_rate": 4.492204773769997e-05, "loss": 0.6585, "num_input_tokens_seen": 340421088, "step": 1886 }, { "epoch": 0.20657379785982102, "grad_norm": 1.3117187081950978, "learning_rate": 4.491685182984949e-05, "loss": 0.6671, "num_input_tokens_seen": 340583488, "step": 1887 }, { "epoch": 0.20668326993075892, "grad_norm": 1.185059297904869, "learning_rate": 4.4911653565873524e-05, "loss": 0.5914, "num_input_tokens_seen": 340788448, "step": 1888 }, { "epoch": 0.2067927420016968, "grad_norm": 1.2621625468164477, "learning_rate": 4.4906452946386995e-05, "loss": 0.6703, "num_input_tokens_seen": 340983328, "step": 1889 }, { "epoch": 0.20690221407263473, "grad_norm": 1.196842336582463, "learning_rate": 4.490124997200514e-05, "loss": 0.6257, "num_input_tokens_seen": 341184256, "step": 1890 }, { "epoch": 0.20701168614357263, "grad_norm": 1.385871994035939, "learning_rate": 4.489604464334346e-05, "loss": 0.7084, "num_input_tokens_seen": 341379360, "step": 1891 }, { "epoch": 0.20712115821451052, "grad_norm": 1.2164829394127663, "learning_rate": 4.489083696101773e-05, "loss": 0.6465, "num_input_tokens_seen": 341516224, "step": 1892 }, { "epoch": 0.20723063028544841, "grad_norm": 1.3441101301348608, "learning_rate": 4.4885626925644016e-05, "loss": 0.7529, "num_input_tokens_seen": 341673920, "step": 1893 }, { "epoch": 0.20734010235638634, "grad_norm": 1.318819319001496, "learning_rate": 4.4880414537838643e-05, "loss": 0.7849, "num_input_tokens_seen": 341837888, "step": 1894 }, { "epoch": 0.20744957442732423, "grad_norm": 1.2064902071375563, "learning_rate": 4.487519979821824e-05, "loss": 0.6271, "num_input_tokens_seen": 342005664, "step": 1895 }, { "epoch": 0.20755904649826212, "grad_norm": 1.4366731338768453, "learning_rate": 4.486998270739971e-05, "loss": 1.0387, "num_input_tokens_seen": 342178592, "step": 1896 }, { "epoch": 0.20766851856920004, "grad_norm": 1.2134134882521663, "learning_rate": 4.486476326600019e-05, "loss": 0.8455, "num_input_tokens_seen": 342353312, "step": 1897 }, { "epoch": 0.20777799064013794, "grad_norm": 1.2129641401583389, "learning_rate": 4.4859541474637153e-05, "loss": 0.7172, "num_input_tokens_seen": 342528704, "step": 1898 }, { "epoch": 0.20788746271107583, "grad_norm": 1.1453582067593862, "learning_rate": 4.4854317333928335e-05, "loss": 0.6398, "num_input_tokens_seen": 342722464, "step": 1899 }, { "epoch": 0.20799693478201373, "grad_norm": 1.1517412065121027, "learning_rate": 4.484909084449172e-05, "loss": 0.6009, "num_input_tokens_seen": 342896960, "step": 1900 }, { "epoch": 0.20810640685295165, "grad_norm": 1.2380116647592938, "learning_rate": 4.484386200694561e-05, "loss": 0.8514, "num_input_tokens_seen": 343096544, "step": 1901 }, { "epoch": 0.20821587892388954, "grad_norm": 1.250549195759857, "learning_rate": 4.4838630821908564e-05, "loss": 0.6413, "num_input_tokens_seen": 343256704, "step": 1902 }, { "epoch": 0.20832535099482744, "grad_norm": 1.2710020495292411, "learning_rate": 4.483339728999941e-05, "loss": 0.7912, "num_input_tokens_seen": 343473536, "step": 1903 }, { "epoch": 0.20843482306576536, "grad_norm": 1.3000071286959696, "learning_rate": 4.482816141183728e-05, "loss": 0.7125, "num_input_tokens_seen": 343610624, "step": 1904 }, { "epoch": 0.20854429513670325, "grad_norm": 1.3547220069511896, "learning_rate": 4.4822923188041555e-05, "loss": 0.7315, "num_input_tokens_seen": 343779296, "step": 1905 }, { "epoch": 0.20865376720764114, "grad_norm": 1.2980917312994649, "learning_rate": 4.481768261923191e-05, "loss": 0.7621, "num_input_tokens_seen": 343981120, "step": 1906 }, { "epoch": 0.20876323927857907, "grad_norm": 1.241061822201285, "learning_rate": 4.48124397060283e-05, "loss": 0.5179, "num_input_tokens_seen": 344165920, "step": 1907 }, { "epoch": 0.20887271134951696, "grad_norm": 1.2647722927405276, "learning_rate": 4.4807194449050936e-05, "loss": 0.7012, "num_input_tokens_seen": 344377376, "step": 1908 }, { "epoch": 0.20898218342045485, "grad_norm": 1.3358976123945245, "learning_rate": 4.480194684892035e-05, "loss": 0.7162, "num_input_tokens_seen": 344533952, "step": 1909 }, { "epoch": 0.20909165549139275, "grad_norm": 1.4008256776469257, "learning_rate": 4.4796696906257294e-05, "loss": 0.8421, "num_input_tokens_seen": 344714272, "step": 1910 }, { "epoch": 0.20920112756233067, "grad_norm": 1.3611933068133595, "learning_rate": 4.479144462168284e-05, "loss": 0.6532, "num_input_tokens_seen": 344885632, "step": 1911 }, { "epoch": 0.20931059963326856, "grad_norm": 1.2296936015616324, "learning_rate": 4.478618999581833e-05, "loss": 0.7443, "num_input_tokens_seen": 345076032, "step": 1912 }, { "epoch": 0.20942007170420646, "grad_norm": 1.2401906884376446, "learning_rate": 4.4780933029285365e-05, "loss": 0.5801, "num_input_tokens_seen": 345253664, "step": 1913 }, { "epoch": 0.20952954377514438, "grad_norm": 1.1979047304747412, "learning_rate": 4.4775673722705836e-05, "loss": 0.7066, "num_input_tokens_seen": 345447200, "step": 1914 }, { "epoch": 0.20963901584608227, "grad_norm": 1.3522013659258894, "learning_rate": 4.47704120767019e-05, "loss": 0.8532, "num_input_tokens_seen": 345657984, "step": 1915 }, { "epoch": 0.20974848791702017, "grad_norm": 1.3069702553753755, "learning_rate": 4.476514809189603e-05, "loss": 0.7004, "num_input_tokens_seen": 345868992, "step": 1916 }, { "epoch": 0.20985795998795806, "grad_norm": 1.361492962483161, "learning_rate": 4.4759881768910915e-05, "loss": 0.8298, "num_input_tokens_seen": 346066112, "step": 1917 }, { "epoch": 0.20996743205889598, "grad_norm": 1.3849979523185714, "learning_rate": 4.475461310836957e-05, "loss": 0.6776, "num_input_tokens_seen": 346249120, "step": 1918 }, { "epoch": 0.21007690412983387, "grad_norm": 1.3719919419900024, "learning_rate": 4.4749342110895244e-05, "loss": 0.8439, "num_input_tokens_seen": 346409280, "step": 1919 }, { "epoch": 0.21018637620077177, "grad_norm": 1.2549720583993, "learning_rate": 4.4744068777111506e-05, "loss": 0.5991, "num_input_tokens_seen": 346559136, "step": 1920 }, { "epoch": 0.2102958482717097, "grad_norm": 1.1617133996631217, "learning_rate": 4.4738793107642174e-05, "loss": 0.5326, "num_input_tokens_seen": 346733856, "step": 1921 }, { "epoch": 0.21040532034264758, "grad_norm": 1.2241579528785842, "learning_rate": 4.4733515103111356e-05, "loss": 0.6439, "num_input_tokens_seen": 346907232, "step": 1922 }, { "epoch": 0.21051479241358548, "grad_norm": 1.3772741498147214, "learning_rate": 4.472823476414343e-05, "loss": 0.859, "num_input_tokens_seen": 347088448, "step": 1923 }, { "epoch": 0.2106242644845234, "grad_norm": 1.2441564346339018, "learning_rate": 4.4722952091363034e-05, "loss": 0.7572, "num_input_tokens_seen": 347292288, "step": 1924 }, { "epoch": 0.2107337365554613, "grad_norm": 1.3296601791354266, "learning_rate": 4.471766708539512e-05, "loss": 0.8705, "num_input_tokens_seen": 347478880, "step": 1925 }, { "epoch": 0.2108432086263992, "grad_norm": 1.5513949393064579, "learning_rate": 4.4712379746864876e-05, "loss": 0.8501, "num_input_tokens_seen": 347609472, "step": 1926 }, { "epoch": 0.21095268069733708, "grad_norm": 1.279963056359019, "learning_rate": 4.4707090076397795e-05, "loss": 0.8559, "num_input_tokens_seen": 347778368, "step": 1927 }, { "epoch": 0.211062152768275, "grad_norm": 1.3571650200303327, "learning_rate": 4.4701798074619626e-05, "loss": 0.6504, "num_input_tokens_seen": 347960256, "step": 1928 }, { "epoch": 0.2111716248392129, "grad_norm": 1.341442833269959, "learning_rate": 4.4696503742156414e-05, "loss": 0.6153, "num_input_tokens_seen": 348138336, "step": 1929 }, { "epoch": 0.2112810969101508, "grad_norm": 1.6587807376086285, "learning_rate": 4.469120707963447e-05, "loss": 0.8939, "num_input_tokens_seen": 348288416, "step": 1930 }, { "epoch": 0.2113905689810887, "grad_norm": 1.2099746321548952, "learning_rate": 4.468590808768036e-05, "loss": 0.5701, "num_input_tokens_seen": 348462464, "step": 1931 }, { "epoch": 0.2115000410520266, "grad_norm": 1.2984852732696168, "learning_rate": 4.4680606766920954e-05, "loss": 0.6949, "num_input_tokens_seen": 348642112, "step": 1932 }, { "epoch": 0.2116095131229645, "grad_norm": 1.3562438683437141, "learning_rate": 4.46753031179834e-05, "loss": 0.9169, "num_input_tokens_seen": 348832736, "step": 1933 }, { "epoch": 0.2117189851939024, "grad_norm": 1.346195169747301, "learning_rate": 4.4669997141495095e-05, "loss": 0.688, "num_input_tokens_seen": 349028512, "step": 1934 }, { "epoch": 0.21182845726484031, "grad_norm": 1.1081775058585908, "learning_rate": 4.466468883808373e-05, "loss": 0.77, "num_input_tokens_seen": 349212192, "step": 1935 }, { "epoch": 0.2119379293357782, "grad_norm": 1.2301439613514404, "learning_rate": 4.4659378208377276e-05, "loss": 0.7475, "num_input_tokens_seen": 349396768, "step": 1936 }, { "epoch": 0.2120474014067161, "grad_norm": 1.3244182695176554, "learning_rate": 4.465406525300395e-05, "loss": 0.7057, "num_input_tokens_seen": 349573952, "step": 1937 }, { "epoch": 0.21215687347765402, "grad_norm": 1.322851369653043, "learning_rate": 4.4648749972592286e-05, "loss": 0.9661, "num_input_tokens_seen": 349761664, "step": 1938 }, { "epoch": 0.21226634554859192, "grad_norm": 1.240921094714402, "learning_rate": 4.464343236777106e-05, "loss": 0.6221, "num_input_tokens_seen": 349921152, "step": 1939 }, { "epoch": 0.2123758176195298, "grad_norm": 1.2933141241334862, "learning_rate": 4.463811243916933e-05, "loss": 0.8982, "num_input_tokens_seen": 350095424, "step": 1940 }, { "epoch": 0.21248528969046773, "grad_norm": 1.3106040885250285, "learning_rate": 4.463279018741645e-05, "loss": 0.8376, "num_input_tokens_seen": 350275744, "step": 1941 }, { "epoch": 0.21259476176140563, "grad_norm": 1.2323314796824534, "learning_rate": 4.4627465613142014e-05, "loss": 0.6347, "num_input_tokens_seen": 350449792, "step": 1942 }, { "epoch": 0.21270423383234352, "grad_norm": 1.1340977185379006, "learning_rate": 4.462213871697592e-05, "loss": 0.6734, "num_input_tokens_seen": 350644000, "step": 1943 }, { "epoch": 0.2128137059032814, "grad_norm": 1.290215271575339, "learning_rate": 4.4616809499548334e-05, "loss": 0.798, "num_input_tokens_seen": 350821856, "step": 1944 }, { "epoch": 0.21292317797421934, "grad_norm": 1.2489489428008367, "learning_rate": 4.461147796148968e-05, "loss": 0.74, "num_input_tokens_seen": 350984928, "step": 1945 }, { "epoch": 0.21303265004515723, "grad_norm": 1.2992000190820598, "learning_rate": 4.460614410343067e-05, "loss": 0.6716, "num_input_tokens_seen": 351154496, "step": 1946 }, { "epoch": 0.21314212211609512, "grad_norm": 1.3648879412531802, "learning_rate": 4.46008079260023e-05, "loss": 0.703, "num_input_tokens_seen": 351318240, "step": 1947 }, { "epoch": 0.21325159418703304, "grad_norm": 1.3888530800759664, "learning_rate": 4.4595469429835826e-05, "loss": 0.6723, "num_input_tokens_seen": 351494752, "step": 1948 }, { "epoch": 0.21336106625797094, "grad_norm": 1.2049402192035288, "learning_rate": 4.4590128615562765e-05, "loss": 0.7073, "num_input_tokens_seen": 351673952, "step": 1949 }, { "epoch": 0.21347053832890883, "grad_norm": 1.1264142048618757, "learning_rate": 4.458478548381495e-05, "loss": 0.6136, "num_input_tokens_seen": 351849120, "step": 1950 }, { "epoch": 0.21358001039984673, "grad_norm": 1.136857748545596, "learning_rate": 4.4579440035224446e-05, "loss": 0.4825, "num_input_tokens_seen": 352012864, "step": 1951 }, { "epoch": 0.21368948247078465, "grad_norm": 1.236563613424506, "learning_rate": 4.457409227042362e-05, "loss": 0.7573, "num_input_tokens_seen": 352215584, "step": 1952 }, { "epoch": 0.21379895454172254, "grad_norm": 1.1725961627852526, "learning_rate": 4.456874219004509e-05, "loss": 0.514, "num_input_tokens_seen": 352405536, "step": 1953 }, { "epoch": 0.21390842661266044, "grad_norm": 1.2421680664845567, "learning_rate": 4.4563389794721776e-05, "loss": 0.7939, "num_input_tokens_seen": 352610272, "step": 1954 }, { "epoch": 0.21401789868359836, "grad_norm": 1.1934106694012583, "learning_rate": 4.455803508508685e-05, "loss": 0.8915, "num_input_tokens_seen": 352784544, "step": 1955 }, { "epoch": 0.21412737075453625, "grad_norm": 1.3952826604610669, "learning_rate": 4.455267806177376e-05, "loss": 0.8095, "num_input_tokens_seen": 352955680, "step": 1956 }, { "epoch": 0.21423684282547414, "grad_norm": 1.3026941113061945, "learning_rate": 4.454731872541622e-05, "loss": 0.76, "num_input_tokens_seen": 353110240, "step": 1957 }, { "epoch": 0.21434631489641207, "grad_norm": 1.2917391182463467, "learning_rate": 4.454195707664825e-05, "loss": 0.8617, "num_input_tokens_seen": 353297952, "step": 1958 }, { "epoch": 0.21445578696734996, "grad_norm": 1.3081590528288862, "learning_rate": 4.4536593116104125e-05, "loss": 0.658, "num_input_tokens_seen": 353442880, "step": 1959 }, { "epoch": 0.21456525903828785, "grad_norm": 1.23044535296766, "learning_rate": 4.453122684441837e-05, "loss": 0.6638, "num_input_tokens_seen": 353624320, "step": 1960 }, { "epoch": 0.21467473110922575, "grad_norm": 1.2540860626493833, "learning_rate": 4.452585826222583e-05, "loss": 0.723, "num_input_tokens_seen": 353829056, "step": 1961 }, { "epoch": 0.21478420318016367, "grad_norm": 1.2246169678667125, "learning_rate": 4.4520487370161576e-05, "loss": 0.7129, "num_input_tokens_seen": 354013632, "step": 1962 }, { "epoch": 0.21489367525110156, "grad_norm": 1.336567958618177, "learning_rate": 4.451511416886099e-05, "loss": 0.8509, "num_input_tokens_seen": 354243008, "step": 1963 }, { "epoch": 0.21500314732203946, "grad_norm": 1.204569061434021, "learning_rate": 4.45097386589597e-05, "loss": 0.7095, "num_input_tokens_seen": 354410336, "step": 1964 }, { "epoch": 0.21511261939297738, "grad_norm": 1.3564819594637845, "learning_rate": 4.450436084109362e-05, "loss": 0.8761, "num_input_tokens_seen": 354592448, "step": 1965 }, { "epoch": 0.21522209146391527, "grad_norm": 1.2281184441389061, "learning_rate": 4.449898071589894e-05, "loss": 0.6093, "num_input_tokens_seen": 354762240, "step": 1966 }, { "epoch": 0.21533156353485317, "grad_norm": 1.3423688133261293, "learning_rate": 4.449359828401212e-05, "loss": 0.7803, "num_input_tokens_seen": 354939648, "step": 1967 }, { "epoch": 0.21544103560579106, "grad_norm": 1.2020261130551073, "learning_rate": 4.4488213546069884e-05, "loss": 0.8137, "num_input_tokens_seen": 355115488, "step": 1968 }, { "epoch": 0.21555050767672898, "grad_norm": 1.2956611743814193, "learning_rate": 4.448282650270924e-05, "loss": 0.6437, "num_input_tokens_seen": 355254816, "step": 1969 }, { "epoch": 0.21565997974766687, "grad_norm": 1.2288692189898307, "learning_rate": 4.447743715456747e-05, "loss": 0.6666, "num_input_tokens_seen": 355415872, "step": 1970 }, { "epoch": 0.21576945181860477, "grad_norm": 1.2793626608204616, "learning_rate": 4.4472045502282115e-05, "loss": 0.7389, "num_input_tokens_seen": 355606720, "step": 1971 }, { "epoch": 0.2158789238895427, "grad_norm": 1.6983961898707067, "learning_rate": 4.4466651546491006e-05, "loss": 1.0017, "num_input_tokens_seen": 355809664, "step": 1972 }, { "epoch": 0.21598839596048058, "grad_norm": 1.2681127046954452, "learning_rate": 4.4461255287832235e-05, "loss": 0.8947, "num_input_tokens_seen": 355991552, "step": 1973 }, { "epoch": 0.21609786803141848, "grad_norm": 1.1753898805696432, "learning_rate": 4.4455856726944155e-05, "loss": 0.806, "num_input_tokens_seen": 356207040, "step": 1974 }, { "epoch": 0.2162073401023564, "grad_norm": 1.2428344865043057, "learning_rate": 4.445045586446543e-05, "loss": 0.8259, "num_input_tokens_seen": 356398784, "step": 1975 }, { "epoch": 0.2163168121732943, "grad_norm": 1.1907476887605426, "learning_rate": 4.4445052701034955e-05, "loss": 0.6571, "num_input_tokens_seen": 356596800, "step": 1976 }, { "epoch": 0.2164262842442322, "grad_norm": 1.236362065094954, "learning_rate": 4.443964723729191e-05, "loss": 0.9279, "num_input_tokens_seen": 356788096, "step": 1977 }, { "epoch": 0.21653575631517008, "grad_norm": 1.2550526544840037, "learning_rate": 4.443423947387577e-05, "loss": 0.8563, "num_input_tokens_seen": 356972672, "step": 1978 }, { "epoch": 0.216645228386108, "grad_norm": 1.1415879874486934, "learning_rate": 4.4428829411426254e-05, "loss": 0.6347, "num_input_tokens_seen": 357163968, "step": 1979 }, { "epoch": 0.2167547004570459, "grad_norm": 1.2429546170776897, "learning_rate": 4.442341705058335e-05, "loss": 0.7792, "num_input_tokens_seen": 357355936, "step": 1980 }, { "epoch": 0.2168641725279838, "grad_norm": 1.33125649571513, "learning_rate": 4.4418002391987345e-05, "loss": 0.8749, "num_input_tokens_seen": 357555072, "step": 1981 }, { "epoch": 0.2169736445989217, "grad_norm": 1.3130102982099041, "learning_rate": 4.441258543627879e-05, "loss": 0.7566, "num_input_tokens_seen": 357720608, "step": 1982 }, { "epoch": 0.2170831166698596, "grad_norm": 1.3165530793572569, "learning_rate": 4.440716618409847e-05, "loss": 0.8343, "num_input_tokens_seen": 357904512, "step": 1983 }, { "epoch": 0.2171925887407975, "grad_norm": 1.213964702344433, "learning_rate": 4.4401744636087495e-05, "loss": 0.6336, "num_input_tokens_seen": 358073632, "step": 1984 }, { "epoch": 0.2173020608117354, "grad_norm": 1.414865760905842, "learning_rate": 4.439632079288722e-05, "loss": 0.7854, "num_input_tokens_seen": 358283744, "step": 1985 }, { "epoch": 0.21741153288267331, "grad_norm": 1.3112428639147744, "learning_rate": 4.439089465513928e-05, "loss": 0.6696, "num_input_tokens_seen": 358437856, "step": 1986 }, { "epoch": 0.2175210049536112, "grad_norm": 1.3089071325577157, "learning_rate": 4.438546622348557e-05, "loss": 0.8466, "num_input_tokens_seen": 358619072, "step": 1987 }, { "epoch": 0.2176304770245491, "grad_norm": 1.2890701756104341, "learning_rate": 4.438003549856826e-05, "loss": 0.5911, "num_input_tokens_seen": 358778112, "step": 1988 }, { "epoch": 0.21773994909548702, "grad_norm": 1.340752829835186, "learning_rate": 4.4374602481029807e-05, "loss": 0.858, "num_input_tokens_seen": 358991136, "step": 1989 }, { "epoch": 0.21784942116642492, "grad_norm": 1.156582037186851, "learning_rate": 4.43691671715129e-05, "loss": 0.6973, "num_input_tokens_seen": 359128672, "step": 1990 }, { "epoch": 0.2179588932373628, "grad_norm": 1.265981459163956, "learning_rate": 4.436372957066056e-05, "loss": 0.6666, "num_input_tokens_seen": 359299360, "step": 1991 }, { "epoch": 0.21806836530830073, "grad_norm": 1.3168868367695583, "learning_rate": 4.4358289679116026e-05, "loss": 0.7876, "num_input_tokens_seen": 359490208, "step": 1992 }, { "epoch": 0.21817783737923863, "grad_norm": 1.2270958099421776, "learning_rate": 4.435284749752283e-05, "loss": 0.6753, "num_input_tokens_seen": 359661120, "step": 1993 }, { "epoch": 0.21828730945017652, "grad_norm": 1.5737881888073026, "learning_rate": 4.434740302652477e-05, "loss": 0.8226, "num_input_tokens_seen": 359840096, "step": 1994 }, { "epoch": 0.2183967815211144, "grad_norm": 1.1710525678502657, "learning_rate": 4.434195626676592e-05, "loss": 0.569, "num_input_tokens_seen": 360019520, "step": 1995 }, { "epoch": 0.21850625359205234, "grad_norm": 1.2646258613603854, "learning_rate": 4.4336507218890624e-05, "loss": 0.688, "num_input_tokens_seen": 360202528, "step": 1996 }, { "epoch": 0.21861572566299023, "grad_norm": 1.3055623028402725, "learning_rate": 4.433105588354348e-05, "loss": 0.7879, "num_input_tokens_seen": 360382848, "step": 1997 }, { "epoch": 0.21872519773392812, "grad_norm": 1.186013244702907, "learning_rate": 4.4325602261369384e-05, "loss": 0.8162, "num_input_tokens_seen": 360595200, "step": 1998 }, { "epoch": 0.21883466980486604, "grad_norm": 1.2035009118913873, "learning_rate": 4.432014635301348e-05, "loss": 0.4785, "num_input_tokens_seen": 360760960, "step": 1999 }, { "epoch": 0.21894414187580394, "grad_norm": 1.3128403267820437, "learning_rate": 4.43146881591212e-05, "loss": 0.8412, "num_input_tokens_seen": 360958752, "step": 2000 }, { "epoch": 0.21905361394674183, "grad_norm": 1.6039577529746962, "learning_rate": 4.430922768033824e-05, "loss": 1.052, "num_input_tokens_seen": 361143552, "step": 2001 }, { "epoch": 0.21916308601767973, "grad_norm": 1.1411288990393318, "learning_rate": 4.4303764917310555e-05, "loss": 0.711, "num_input_tokens_seen": 361340224, "step": 2002 }, { "epoch": 0.21927255808861765, "grad_norm": 1.3061230009116436, "learning_rate": 4.429829987068438e-05, "loss": 0.6528, "num_input_tokens_seen": 361512032, "step": 2003 }, { "epoch": 0.21938203015955554, "grad_norm": 1.3159784363281475, "learning_rate": 4.4292832541106214e-05, "loss": 0.8474, "num_input_tokens_seen": 361712288, "step": 2004 }, { "epoch": 0.21949150223049343, "grad_norm": 1.2990301218923364, "learning_rate": 4.428736292922285e-05, "loss": 0.8661, "num_input_tokens_seen": 361905600, "step": 2005 }, { "epoch": 0.21960097430143136, "grad_norm": 1.2791547157565992, "learning_rate": 4.428189103568132e-05, "loss": 0.6566, "num_input_tokens_seen": 362109216, "step": 2006 }, { "epoch": 0.21971044637236925, "grad_norm": 1.2151180828467785, "learning_rate": 4.427641686112894e-05, "loss": 0.6232, "num_input_tokens_seen": 362308576, "step": 2007 }, { "epoch": 0.21981991844330714, "grad_norm": 1.2985610113605868, "learning_rate": 4.4270940406213304e-05, "loss": 0.6635, "num_input_tokens_seen": 362481952, "step": 2008 }, { "epoch": 0.21992939051424507, "grad_norm": 1.3112984561603587, "learning_rate": 4.4265461671582254e-05, "loss": 0.6922, "num_input_tokens_seen": 362630912, "step": 2009 }, { "epoch": 0.22003886258518296, "grad_norm": 1.320706100031772, "learning_rate": 4.4259980657883916e-05, "loss": 1.0244, "num_input_tokens_seen": 362855136, "step": 2010 }, { "epoch": 0.22014833465612085, "grad_norm": 1.1695522940965075, "learning_rate": 4.425449736576668e-05, "loss": 0.7141, "num_input_tokens_seen": 363052256, "step": 2011 }, { "epoch": 0.22025780672705875, "grad_norm": 1.2660607423779093, "learning_rate": 4.424901179587922e-05, "loss": 0.8426, "num_input_tokens_seen": 363216000, "step": 2012 }, { "epoch": 0.22036727879799667, "grad_norm": 1.2762190040314065, "learning_rate": 4.4243523948870465e-05, "loss": 0.6569, "num_input_tokens_seen": 363372352, "step": 2013 }, { "epoch": 0.22047675086893456, "grad_norm": 1.2131255531997638, "learning_rate": 4.4238033825389605e-05, "loss": 0.6161, "num_input_tokens_seen": 363582464, "step": 2014 }, { "epoch": 0.22058622293987246, "grad_norm": 1.2390107133200514, "learning_rate": 4.423254142608613e-05, "loss": 0.6432, "num_input_tokens_seen": 363736352, "step": 2015 }, { "epoch": 0.22069569501081038, "grad_norm": 1.0665482087718874, "learning_rate": 4.422704675160976e-05, "loss": 0.6241, "num_input_tokens_seen": 363946688, "step": 2016 }, { "epoch": 0.22080516708174827, "grad_norm": 1.315577214838842, "learning_rate": 4.422154980261053e-05, "loss": 0.6576, "num_input_tokens_seen": 364140896, "step": 2017 }, { "epoch": 0.22091463915268617, "grad_norm": 1.2099495087484862, "learning_rate": 4.4216050579738685e-05, "loss": 0.7891, "num_input_tokens_seen": 364314272, "step": 2018 }, { "epoch": 0.22102411122362406, "grad_norm": 1.2522125213461284, "learning_rate": 4.42105490836448e-05, "loss": 0.6892, "num_input_tokens_seen": 364537376, "step": 2019 }, { "epoch": 0.22113358329456198, "grad_norm": 1.3871285706042964, "learning_rate": 4.420504531497969e-05, "loss": 0.7486, "num_input_tokens_seen": 364732480, "step": 2020 }, { "epoch": 0.22124305536549987, "grad_norm": 1.2576103758860755, "learning_rate": 4.419953927439443e-05, "loss": 0.6361, "num_input_tokens_seen": 364914144, "step": 2021 }, { "epoch": 0.22135252743643777, "grad_norm": 1.241518668287497, "learning_rate": 4.419403096254037e-05, "loss": 0.5429, "num_input_tokens_seen": 365098720, "step": 2022 }, { "epoch": 0.2214619995073757, "grad_norm": 1.2380847474149805, "learning_rate": 4.4188520380069145e-05, "loss": 0.7161, "num_input_tokens_seen": 365254400, "step": 2023 }, { "epoch": 0.22157147157831358, "grad_norm": 1.2480451556986396, "learning_rate": 4.418300752763264e-05, "loss": 0.7134, "num_input_tokens_seen": 365401120, "step": 2024 }, { "epoch": 0.22168094364925148, "grad_norm": 1.3397469888786053, "learning_rate": 4.4177492405883016e-05, "loss": 0.6245, "num_input_tokens_seen": 365595776, "step": 2025 }, { "epoch": 0.2217904157201894, "grad_norm": 1.2476404808925396, "learning_rate": 4.4171975015472705e-05, "loss": 0.5532, "num_input_tokens_seen": 365770048, "step": 2026 }, { "epoch": 0.2218998877911273, "grad_norm": 1.426647602396594, "learning_rate": 4.4166455357054394e-05, "loss": 0.6425, "num_input_tokens_seen": 365929312, "step": 2027 }, { "epoch": 0.2220093598620652, "grad_norm": 1.2953606461986147, "learning_rate": 4.416093343128106e-05, "loss": 0.5842, "num_input_tokens_seen": 366089248, "step": 2028 }, { "epoch": 0.22211883193300308, "grad_norm": 1.3663041667199873, "learning_rate": 4.415540923880593e-05, "loss": 0.8142, "num_input_tokens_seen": 366283232, "step": 2029 }, { "epoch": 0.222228304003941, "grad_norm": 1.5335191534616754, "learning_rate": 4.41498827802825e-05, "loss": 1.0114, "num_input_tokens_seen": 366466240, "step": 2030 }, { "epoch": 0.2223377760748789, "grad_norm": 1.4035567878783233, "learning_rate": 4.414435405636455e-05, "loss": 0.8991, "num_input_tokens_seen": 366662464, "step": 2031 }, { "epoch": 0.2224472481458168, "grad_norm": 1.3305338836334932, "learning_rate": 4.4138823067706116e-05, "loss": 0.6525, "num_input_tokens_seen": 366836960, "step": 2032 }, { "epoch": 0.2225567202167547, "grad_norm": 1.3098259433836064, "learning_rate": 4.413328981496149e-05, "loss": 0.5975, "num_input_tokens_seen": 366969120, "step": 2033 }, { "epoch": 0.2226661922876926, "grad_norm": 1.3787472554903217, "learning_rate": 4.412775429878527e-05, "loss": 0.7552, "num_input_tokens_seen": 367142496, "step": 2034 }, { "epoch": 0.2227756643586305, "grad_norm": 1.1571819055366903, "learning_rate": 4.412221651983227e-05, "loss": 0.6497, "num_input_tokens_seen": 367314528, "step": 2035 }, { "epoch": 0.22288513642956842, "grad_norm": 1.525905468518804, "learning_rate": 4.4116676478757616e-05, "loss": 0.7986, "num_input_tokens_seen": 367483200, "step": 2036 }, { "epoch": 0.22299460850050631, "grad_norm": 1.2284774410457298, "learning_rate": 4.4111134176216685e-05, "loss": 0.9344, "num_input_tokens_seen": 367675392, "step": 2037 }, { "epoch": 0.2231040805714442, "grad_norm": 1.1985462800831257, "learning_rate": 4.410558961286511e-05, "loss": 0.803, "num_input_tokens_seen": 367848544, "step": 2038 }, { "epoch": 0.2232135526423821, "grad_norm": 1.1603599395218414, "learning_rate": 4.41000427893588e-05, "loss": 0.4954, "num_input_tokens_seen": 368026624, "step": 2039 }, { "epoch": 0.22332302471332002, "grad_norm": 1.304950801755078, "learning_rate": 4.409449370635395e-05, "loss": 0.79, "num_input_tokens_seen": 368216128, "step": 2040 }, { "epoch": 0.22343249678425792, "grad_norm": 1.1938646311421013, "learning_rate": 4.4088942364506994e-05, "loss": 0.8221, "num_input_tokens_seen": 368432512, "step": 2041 }, { "epoch": 0.2235419688551958, "grad_norm": 1.093368354697796, "learning_rate": 4.408338876447465e-05, "loss": 0.7129, "num_input_tokens_seen": 368652032, "step": 2042 }, { "epoch": 0.22365144092613373, "grad_norm": 1.1968018970126402, "learning_rate": 4.4077832906913895e-05, "loss": 0.7354, "num_input_tokens_seen": 368857440, "step": 2043 }, { "epoch": 0.22376091299707163, "grad_norm": 1.4858470644541932, "learning_rate": 4.407227479248198e-05, "loss": 1.0417, "num_input_tokens_seen": 369048512, "step": 2044 }, { "epoch": 0.22387038506800952, "grad_norm": 1.2020078649692, "learning_rate": 4.406671442183642e-05, "loss": 0.8381, "num_input_tokens_seen": 369241600, "step": 2045 }, { "epoch": 0.2239798571389474, "grad_norm": 1.1881428534517366, "learning_rate": 4.4061151795634985e-05, "loss": 0.7005, "num_input_tokens_seen": 369445440, "step": 2046 }, { "epoch": 0.22408932920988534, "grad_norm": 1.2708081206441983, "learning_rate": 4.405558691453574e-05, "loss": 0.6792, "num_input_tokens_seen": 369628448, "step": 2047 }, { "epoch": 0.22419880128082323, "grad_norm": 1.2413650284987883, "learning_rate": 4.4050019779196984e-05, "loss": 0.6395, "num_input_tokens_seen": 369781216, "step": 2048 }, { "epoch": 0.22430827335176112, "grad_norm": 1.29413582029179, "learning_rate": 4.4044450390277306e-05, "loss": 0.7212, "num_input_tokens_seen": 369962208, "step": 2049 }, { "epoch": 0.22441774542269904, "grad_norm": 1.2452476283542708, "learning_rate": 4.403887874843556e-05, "loss": 0.8673, "num_input_tokens_seen": 370166048, "step": 2050 }, { "epoch": 0.22452721749363694, "grad_norm": 1.4821103136754281, "learning_rate": 4.403330485433085e-05, "loss": 0.7634, "num_input_tokens_seen": 370328448, "step": 2051 }, { "epoch": 0.22463668956457483, "grad_norm": 1.32614203125938, "learning_rate": 4.4027728708622555e-05, "loss": 0.7397, "num_input_tokens_seen": 370497792, "step": 2052 }, { "epoch": 0.22474616163551275, "grad_norm": 1.2553450271232638, "learning_rate": 4.4022150311970335e-05, "loss": 0.6868, "num_input_tokens_seen": 370686176, "step": 2053 }, { "epoch": 0.22485563370645065, "grad_norm": 1.259952596347952, "learning_rate": 4.4016569665034105e-05, "loss": 0.8084, "num_input_tokens_seen": 370887552, "step": 2054 }, { "epoch": 0.22496510577738854, "grad_norm": 1.3751753781920044, "learning_rate": 4.401098676847402e-05, "loss": 0.7042, "num_input_tokens_seen": 371058912, "step": 2055 }, { "epoch": 0.22507457784832643, "grad_norm": 1.3002546922906733, "learning_rate": 4.400540162295056e-05, "loss": 0.702, "num_input_tokens_seen": 371208096, "step": 2056 }, { "epoch": 0.22518404991926436, "grad_norm": 1.2061960732062293, "learning_rate": 4.399981422912441e-05, "loss": 0.5679, "num_input_tokens_seen": 371367136, "step": 2057 }, { "epoch": 0.22529352199020225, "grad_norm": 1.2822017391268608, "learning_rate": 4.3994224587656556e-05, "loss": 0.6447, "num_input_tokens_seen": 371550368, "step": 2058 }, { "epoch": 0.22540299406114014, "grad_norm": 1.1461159213157208, "learning_rate": 4.398863269920825e-05, "loss": 0.5606, "num_input_tokens_seen": 371719040, "step": 2059 }, { "epoch": 0.22551246613207807, "grad_norm": 1.2708629959063305, "learning_rate": 4.398303856444099e-05, "loss": 0.9015, "num_input_tokens_seen": 371906304, "step": 2060 }, { "epoch": 0.22562193820301596, "grad_norm": 1.1850681501018168, "learning_rate": 4.397744218401657e-05, "loss": 0.8008, "num_input_tokens_seen": 372101408, "step": 2061 }, { "epoch": 0.22573141027395385, "grad_norm": 1.3241206219979893, "learning_rate": 4.3971843558597e-05, "loss": 0.6843, "num_input_tokens_seen": 372314880, "step": 2062 }, { "epoch": 0.22584088234489175, "grad_norm": 1.1555975026582674, "learning_rate": 4.396624268884462e-05, "loss": 0.5991, "num_input_tokens_seen": 372480640, "step": 2063 }, { "epoch": 0.22595035441582967, "grad_norm": 1.4555189041120906, "learning_rate": 4.396063957542198e-05, "loss": 0.9296, "num_input_tokens_seen": 372680000, "step": 2064 }, { "epoch": 0.22605982648676756, "grad_norm": 1.3803226879418395, "learning_rate": 4.3955034218991934e-05, "loss": 0.8449, "num_input_tokens_seen": 372882944, "step": 2065 }, { "epoch": 0.22616929855770546, "grad_norm": 1.1550903691199836, "learning_rate": 4.394942662021756e-05, "loss": 0.7749, "num_input_tokens_seen": 373067968, "step": 2066 }, { "epoch": 0.22627877062864338, "grad_norm": 1.3493301831533453, "learning_rate": 4.3943816779762256e-05, "loss": 0.7659, "num_input_tokens_seen": 373221184, "step": 2067 }, { "epoch": 0.22638824269958127, "grad_norm": 1.340125674290003, "learning_rate": 4.393820469828964e-05, "loss": 0.9793, "num_input_tokens_seen": 373432416, "step": 2068 }, { "epoch": 0.22649771477051917, "grad_norm": 1.277782213277075, "learning_rate": 4.39325903764636e-05, "loss": 0.6913, "num_input_tokens_seen": 373606688, "step": 2069 }, { "epoch": 0.2266071868414571, "grad_norm": 1.3068245842365949, "learning_rate": 4.392697381494832e-05, "loss": 0.681, "num_input_tokens_seen": 373793728, "step": 2070 }, { "epoch": 0.22671665891239498, "grad_norm": 1.2733174366899058, "learning_rate": 4.3921355014408226e-05, "loss": 0.6642, "num_input_tokens_seen": 373954112, "step": 2071 }, { "epoch": 0.22682613098333287, "grad_norm": 1.3226218708615285, "learning_rate": 4.3915733975508e-05, "loss": 0.8819, "num_input_tokens_seen": 374159968, "step": 2072 }, { "epoch": 0.22693560305427077, "grad_norm": 1.2738096945329185, "learning_rate": 4.39101106989126e-05, "loss": 0.7061, "num_input_tokens_seen": 374335584, "step": 2073 }, { "epoch": 0.2270450751252087, "grad_norm": 1.2928524368384142, "learning_rate": 4.3904485185287256e-05, "loss": 0.7383, "num_input_tokens_seen": 374519040, "step": 2074 }, { "epoch": 0.22715454719614658, "grad_norm": 1.3477960486395308, "learning_rate": 4.389885743529746e-05, "loss": 0.8211, "num_input_tokens_seen": 374700480, "step": 2075 }, { "epoch": 0.22726401926708448, "grad_norm": 1.4026803380447395, "learning_rate": 4.389322744960895e-05, "loss": 0.7222, "num_input_tokens_seen": 374862432, "step": 2076 }, { "epoch": 0.2273734913380224, "grad_norm": 1.2762402542145663, "learning_rate": 4.388759522888776e-05, "loss": 0.7916, "num_input_tokens_seen": 375050816, "step": 2077 }, { "epoch": 0.2274829634089603, "grad_norm": 1.1766451579893678, "learning_rate": 4.3881960773800154e-05, "loss": 0.659, "num_input_tokens_seen": 375220384, "step": 2078 }, { "epoch": 0.2275924354798982, "grad_norm": 1.3065108846392777, "learning_rate": 4.387632408501269e-05, "loss": 0.8047, "num_input_tokens_seen": 375421312, "step": 2079 }, { "epoch": 0.22770190755083608, "grad_norm": 1.2887034985563852, "learning_rate": 4.3870685163192165e-05, "loss": 0.8133, "num_input_tokens_seen": 375584384, "step": 2080 }, { "epoch": 0.227811379621774, "grad_norm": 1.1491437543909009, "learning_rate": 4.386504400900566e-05, "loss": 0.6045, "num_input_tokens_seen": 375797632, "step": 2081 }, { "epoch": 0.2279208516927119, "grad_norm": 1.2148506679521929, "learning_rate": 4.3859400623120515e-05, "loss": 0.6957, "num_input_tokens_seen": 375979744, "step": 2082 }, { "epoch": 0.2280303237636498, "grad_norm": 1.3039643345004146, "learning_rate": 4.3853755006204334e-05, "loss": 0.8164, "num_input_tokens_seen": 376154016, "step": 2083 }, { "epoch": 0.2281397958345877, "grad_norm": 1.2608995169552213, "learning_rate": 4.384810715892498e-05, "loss": 0.6439, "num_input_tokens_seen": 376335904, "step": 2084 }, { "epoch": 0.2282492679055256, "grad_norm": 1.3093455873522604, "learning_rate": 4.3842457081950575e-05, "loss": 0.6883, "num_input_tokens_seen": 376517568, "step": 2085 }, { "epoch": 0.2283587399764635, "grad_norm": 1.331584463952241, "learning_rate": 4.383680477594951e-05, "loss": 0.6989, "num_input_tokens_seen": 376665632, "step": 2086 }, { "epoch": 0.22846821204740142, "grad_norm": 1.3471588986165082, "learning_rate": 4.3831150241590464e-05, "loss": 0.9081, "num_input_tokens_seen": 376847072, "step": 2087 }, { "epoch": 0.2285776841183393, "grad_norm": 1.1730572141432418, "learning_rate": 4.382549347954233e-05, "loss": 0.5016, "num_input_tokens_seen": 377031872, "step": 2088 }, { "epoch": 0.2286871561892772, "grad_norm": 1.4034415705674266, "learning_rate": 4.381983449047432e-05, "loss": 0.7132, "num_input_tokens_seen": 377209728, "step": 2089 }, { "epoch": 0.2287966282602151, "grad_norm": 1.243894687460368, "learning_rate": 4.381417327505586e-05, "loss": 0.7477, "num_input_tokens_seen": 377408864, "step": 2090 }, { "epoch": 0.22890610033115302, "grad_norm": 1.1176629821924433, "learning_rate": 4.3808509833956666e-05, "loss": 0.4857, "num_input_tokens_seen": 377609344, "step": 2091 }, { "epoch": 0.22901557240209092, "grad_norm": 1.306022691798272, "learning_rate": 4.380284416784672e-05, "loss": 0.6178, "num_input_tokens_seen": 377783616, "step": 2092 }, { "epoch": 0.2291250444730288, "grad_norm": 1.3357713830297655, "learning_rate": 4.3797176277396245e-05, "loss": 0.8217, "num_input_tokens_seen": 377975360, "step": 2093 }, { "epoch": 0.22923451654396673, "grad_norm": 1.4485346986817098, "learning_rate": 4.3791506163275764e-05, "loss": 0.8519, "num_input_tokens_seen": 378153888, "step": 2094 }, { "epoch": 0.22934398861490463, "grad_norm": 1.537950917932838, "learning_rate": 4.378583382615601e-05, "loss": 0.8439, "num_input_tokens_seen": 378318528, "step": 2095 }, { "epoch": 0.22945346068584252, "grad_norm": 1.1915424206570322, "learning_rate": 4.378015926670804e-05, "loss": 0.682, "num_input_tokens_seen": 378505120, "step": 2096 }, { "epoch": 0.2295629327567804, "grad_norm": 1.162405178345752, "learning_rate": 4.377448248560313e-05, "loss": 0.8057, "num_input_tokens_seen": 378686560, "step": 2097 }, { "epoch": 0.22967240482771833, "grad_norm": 1.307245590593551, "learning_rate": 4.376880348351283e-05, "loss": 0.7287, "num_input_tokens_seen": 378836416, "step": 2098 }, { "epoch": 0.22978187689865623, "grad_norm": 1.1523999733867596, "learning_rate": 4.376312226110895e-05, "loss": 0.5919, "num_input_tokens_seen": 378994336, "step": 2099 }, { "epoch": 0.22989134896959412, "grad_norm": 1.336734779212754, "learning_rate": 4.375743881906359e-05, "loss": 0.7046, "num_input_tokens_seen": 379149568, "step": 2100 }, { "epoch": 0.23000082104053204, "grad_norm": 1.6055175328326479, "learning_rate": 4.3751753158049065e-05, "loss": 1.1376, "num_input_tokens_seen": 379326304, "step": 2101 }, { "epoch": 0.23011029311146994, "grad_norm": 1.3370650591403406, "learning_rate": 4.374606527873799e-05, "loss": 0.7187, "num_input_tokens_seen": 379507968, "step": 2102 }, { "epoch": 0.23021976518240783, "grad_norm": 1.3106480202679232, "learning_rate": 4.3740375181803225e-05, "loss": 0.6193, "num_input_tokens_seen": 379709792, "step": 2103 }, { "epoch": 0.23032923725334575, "grad_norm": 1.164639374823897, "learning_rate": 4.373468286791792e-05, "loss": 0.6533, "num_input_tokens_seen": 379883616, "step": 2104 }, { "epoch": 0.23043870932428365, "grad_norm": 1.3682854303825618, "learning_rate": 4.3728988337755426e-05, "loss": 0.8281, "num_input_tokens_seen": 380048704, "step": 2105 }, { "epoch": 0.23054818139522154, "grad_norm": 1.1054973800779964, "learning_rate": 4.372329159198943e-05, "loss": 0.6358, "num_input_tokens_seen": 380246720, "step": 2106 }, { "epoch": 0.23065765346615943, "grad_norm": 1.3341391879906812, "learning_rate": 4.371759263129382e-05, "loss": 0.7766, "num_input_tokens_seen": 380412256, "step": 2107 }, { "epoch": 0.23076712553709736, "grad_norm": 1.1824244902134164, "learning_rate": 4.371189145634279e-05, "loss": 0.7225, "num_input_tokens_seen": 380597952, "step": 2108 }, { "epoch": 0.23087659760803525, "grad_norm": 1.2739528245201166, "learning_rate": 4.3706188067810766e-05, "loss": 0.6908, "num_input_tokens_seen": 380758336, "step": 2109 }, { "epoch": 0.23098606967897314, "grad_norm": 1.258279701799449, "learning_rate": 4.370048246637246e-05, "loss": 0.6135, "num_input_tokens_seen": 380951648, "step": 2110 }, { "epoch": 0.23109554174991107, "grad_norm": 1.2639886615250662, "learning_rate": 4.369477465270282e-05, "loss": 0.7887, "num_input_tokens_seen": 381150112, "step": 2111 }, { "epoch": 0.23120501382084896, "grad_norm": 1.3439656461628013, "learning_rate": 4.3689064627477084e-05, "loss": 0.7214, "num_input_tokens_seen": 381359104, "step": 2112 }, { "epoch": 0.23131448589178685, "grad_norm": 1.3883839669597884, "learning_rate": 4.368335239137073e-05, "loss": 0.658, "num_input_tokens_seen": 381538080, "step": 2113 }, { "epoch": 0.23142395796272475, "grad_norm": 1.2865559107213524, "learning_rate": 4.36776379450595e-05, "loss": 0.6878, "num_input_tokens_seen": 381725344, "step": 2114 }, { "epoch": 0.23153343003366267, "grad_norm": 1.4873677651748494, "learning_rate": 4.3671921289219415e-05, "loss": 0.8352, "num_input_tokens_seen": 381894240, "step": 2115 }, { "epoch": 0.23164290210460056, "grad_norm": 1.1778544654161416, "learning_rate": 4.3666202424526724e-05, "loss": 0.6355, "num_input_tokens_seen": 382082176, "step": 2116 }, { "epoch": 0.23175237417553846, "grad_norm": 1.3476113132227256, "learning_rate": 4.366048135165798e-05, "loss": 0.8084, "num_input_tokens_seen": 382264736, "step": 2117 }, { "epoch": 0.23186184624647638, "grad_norm": 1.2980387877706852, "learning_rate": 4.365475807128996e-05, "loss": 0.727, "num_input_tokens_seen": 382443264, "step": 2118 }, { "epoch": 0.23197131831741427, "grad_norm": 1.3755773719262017, "learning_rate": 4.364903258409973e-05, "loss": 0.7689, "num_input_tokens_seen": 382629632, "step": 2119 }, { "epoch": 0.23208079038835217, "grad_norm": 1.2006278938445245, "learning_rate": 4.364330489076458e-05, "loss": 0.6362, "num_input_tokens_seen": 382795840, "step": 2120 }, { "epoch": 0.2321902624592901, "grad_norm": 1.3537594354077032, "learning_rate": 4.3637574991962113e-05, "loss": 0.7211, "num_input_tokens_seen": 382955104, "step": 2121 }, { "epoch": 0.23229973453022798, "grad_norm": 1.2728784656359475, "learning_rate": 4.3631842888370154e-05, "loss": 0.8299, "num_input_tokens_seen": 383152896, "step": 2122 }, { "epoch": 0.23240920660116587, "grad_norm": 1.3198680693990599, "learning_rate": 4.362610858066679e-05, "loss": 0.6905, "num_input_tokens_seen": 383333888, "step": 2123 }, { "epoch": 0.23251867867210377, "grad_norm": 1.1019682441331424, "learning_rate": 4.3620372069530404e-05, "loss": 0.7238, "num_input_tokens_seen": 383508832, "step": 2124 }, { "epoch": 0.2326281507430417, "grad_norm": 1.2409151866926986, "learning_rate": 4.361463335563959e-05, "loss": 0.6629, "num_input_tokens_seen": 383673472, "step": 2125 }, { "epoch": 0.23273762281397958, "grad_norm": 1.067269559193553, "learning_rate": 4.3608892439673234e-05, "loss": 0.5215, "num_input_tokens_seen": 383874624, "step": 2126 }, { "epoch": 0.23284709488491748, "grad_norm": 1.1820123261262463, "learning_rate": 4.360314932231048e-05, "loss": 0.5942, "num_input_tokens_seen": 384067936, "step": 2127 }, { "epoch": 0.2329565669558554, "grad_norm": 1.1466870104478422, "learning_rate": 4.3597404004230714e-05, "loss": 0.5175, "num_input_tokens_seen": 384252064, "step": 2128 }, { "epoch": 0.2330660390267933, "grad_norm": 1.2581553495397297, "learning_rate": 4.3591656486113616e-05, "loss": 0.7117, "num_input_tokens_seen": 384441120, "step": 2129 }, { "epoch": 0.2331755110977312, "grad_norm": 1.1881479703959306, "learning_rate": 4.3585906768639095e-05, "loss": 0.6636, "num_input_tokens_seen": 384623232, "step": 2130 }, { "epoch": 0.23328498316866908, "grad_norm": 1.3121650359194772, "learning_rate": 4.358015485248733e-05, "loss": 0.7299, "num_input_tokens_seen": 384808928, "step": 2131 }, { "epoch": 0.233394455239607, "grad_norm": 1.2697039207994636, "learning_rate": 4.357440073833877e-05, "loss": 0.6303, "num_input_tokens_seen": 384949376, "step": 2132 }, { "epoch": 0.2335039273105449, "grad_norm": 1.6021667829126138, "learning_rate": 4.356864442687411e-05, "loss": 0.8497, "num_input_tokens_seen": 385141120, "step": 2133 }, { "epoch": 0.2336133993814828, "grad_norm": 1.221684176088377, "learning_rate": 4.356288591877431e-05, "loss": 0.6641, "num_input_tokens_seen": 385357280, "step": 2134 }, { "epoch": 0.2337228714524207, "grad_norm": 1.1785214958512515, "learning_rate": 4.355712521472059e-05, "loss": 0.6397, "num_input_tokens_seen": 385529984, "step": 2135 }, { "epoch": 0.2338323435233586, "grad_norm": 1.3176823891157594, "learning_rate": 4.355136231539443e-05, "loss": 0.8616, "num_input_tokens_seen": 385716352, "step": 2136 }, { "epoch": 0.2339418155942965, "grad_norm": 1.2672635997494759, "learning_rate": 4.3545597221477585e-05, "loss": 0.7354, "num_input_tokens_seen": 385882112, "step": 2137 }, { "epoch": 0.23405128766523442, "grad_norm": 1.204869430005136, "learning_rate": 4.353982993365203e-05, "loss": 0.6767, "num_input_tokens_seen": 386033312, "step": 2138 }, { "epoch": 0.2341607597361723, "grad_norm": 1.145104638944575, "learning_rate": 4.3534060452600046e-05, "loss": 0.5562, "num_input_tokens_seen": 386233792, "step": 2139 }, { "epoch": 0.2342702318071102, "grad_norm": 1.3397783980822024, "learning_rate": 4.3528288779004135e-05, "loss": 0.7539, "num_input_tokens_seen": 386407616, "step": 2140 }, { "epoch": 0.2343797038780481, "grad_norm": 1.2033643608284688, "learning_rate": 4.352251491354708e-05, "loss": 0.6199, "num_input_tokens_seen": 386567776, "step": 2141 }, { "epoch": 0.23448917594898602, "grad_norm": 1.2804312066840429, "learning_rate": 4.351673885691192e-05, "loss": 0.6844, "num_input_tokens_seen": 386748544, "step": 2142 }, { "epoch": 0.23459864801992392, "grad_norm": 1.254087916253771, "learning_rate": 4.3510960609781954e-05, "loss": 0.8967, "num_input_tokens_seen": 386940736, "step": 2143 }, { "epoch": 0.2347081200908618, "grad_norm": 0.9997064636473308, "learning_rate": 4.350518017284073e-05, "loss": 0.5102, "num_input_tokens_seen": 387100896, "step": 2144 }, { "epoch": 0.23481759216179973, "grad_norm": 1.2526136729724977, "learning_rate": 4.349939754677208e-05, "loss": 0.6685, "num_input_tokens_seen": 387310112, "step": 2145 }, { "epoch": 0.23492706423273763, "grad_norm": 1.3612472094182326, "learning_rate": 4.349361273226005e-05, "loss": 0.6578, "num_input_tokens_seen": 387446528, "step": 2146 }, { "epoch": 0.23503653630367552, "grad_norm": 1.3835404629733932, "learning_rate": 4.3487825729988995e-05, "loss": 0.9187, "num_input_tokens_seen": 387650816, "step": 2147 }, { "epoch": 0.2351460083746134, "grad_norm": 1.218519286790812, "learning_rate": 4.34820365406435e-05, "loss": 0.6088, "num_input_tokens_seen": 387808288, "step": 2148 }, { "epoch": 0.23525548044555133, "grad_norm": 1.3053045127282719, "learning_rate": 4.347624516490841e-05, "loss": 0.7736, "num_input_tokens_seen": 388017056, "step": 2149 }, { "epoch": 0.23536495251648923, "grad_norm": 1.3815311594516526, "learning_rate": 4.3470451603468836e-05, "loss": 0.7928, "num_input_tokens_seen": 388217760, "step": 2150 }, { "epoch": 0.23547442458742712, "grad_norm": 1.2591593858334595, "learning_rate": 4.346465585701015e-05, "loss": 0.7631, "num_input_tokens_seen": 388377248, "step": 2151 }, { "epoch": 0.23558389665836504, "grad_norm": 1.296656670633994, "learning_rate": 4.345885792621798e-05, "loss": 0.7203, "num_input_tokens_seen": 388578848, "step": 2152 }, { "epoch": 0.23569336872930294, "grad_norm": 1.3092771652926054, "learning_rate": 4.34530578117782e-05, "loss": 0.7063, "num_input_tokens_seen": 388758720, "step": 2153 }, { "epoch": 0.23580284080024083, "grad_norm": 1.1401862254207518, "learning_rate": 4.344725551437695e-05, "loss": 0.6471, "num_input_tokens_seen": 388932544, "step": 2154 }, { "epoch": 0.23591231287117875, "grad_norm": 1.1616560877387163, "learning_rate": 4.344145103470065e-05, "loss": 0.699, "num_input_tokens_seen": 389117792, "step": 2155 }, { "epoch": 0.23602178494211665, "grad_norm": 1.1706618069118464, "learning_rate": 4.343564437343594e-05, "loss": 0.8703, "num_input_tokens_seen": 389288256, "step": 2156 }, { "epoch": 0.23613125701305454, "grad_norm": 1.1953627631895336, "learning_rate": 4.342983553126974e-05, "loss": 0.5769, "num_input_tokens_seen": 389463200, "step": 2157 }, { "epoch": 0.23624072908399243, "grad_norm": 1.118670512950576, "learning_rate": 4.342402450888924e-05, "loss": 0.8318, "num_input_tokens_seen": 389688320, "step": 2158 }, { "epoch": 0.23635020115493036, "grad_norm": 1.2620111368779128, "learning_rate": 4.341821130698185e-05, "loss": 0.6685, "num_input_tokens_seen": 389885664, "step": 2159 }, { "epoch": 0.23645967322586825, "grad_norm": 1.4668809076815188, "learning_rate": 4.341239592623527e-05, "loss": 1.1616, "num_input_tokens_seen": 390085696, "step": 2160 }, { "epoch": 0.23656914529680614, "grad_norm": 1.1714382629375764, "learning_rate": 4.3406578367337466e-05, "loss": 0.6276, "num_input_tokens_seen": 390266688, "step": 2161 }, { "epoch": 0.23667861736774407, "grad_norm": 1.2419705951299653, "learning_rate": 4.340075863097662e-05, "loss": 0.7528, "num_input_tokens_seen": 390484640, "step": 2162 }, { "epoch": 0.23678808943868196, "grad_norm": 1.4222695777794134, "learning_rate": 4.33949367178412e-05, "loss": 0.7423, "num_input_tokens_seen": 390656000, "step": 2163 }, { "epoch": 0.23689756150961985, "grad_norm": 1.1772898883814025, "learning_rate": 4.338911262861993e-05, "loss": 0.5792, "num_input_tokens_seen": 390824000, "step": 2164 }, { "epoch": 0.23700703358055775, "grad_norm": 1.2807853600563, "learning_rate": 4.3383286364001794e-05, "loss": 0.5947, "num_input_tokens_seen": 391000064, "step": 2165 }, { "epoch": 0.23711650565149567, "grad_norm": 1.3992987596564002, "learning_rate": 4.337745792467604e-05, "loss": 0.677, "num_input_tokens_seen": 391172544, "step": 2166 }, { "epoch": 0.23722597772243356, "grad_norm": 1.3216128745482976, "learning_rate": 4.337162731133212e-05, "loss": 0.6756, "num_input_tokens_seen": 391335392, "step": 2167 }, { "epoch": 0.23733544979337146, "grad_norm": 1.2115918760204265, "learning_rate": 4.336579452465982e-05, "loss": 0.6934, "num_input_tokens_seen": 391483232, "step": 2168 }, { "epoch": 0.23744492186430938, "grad_norm": 1.3212814387223468, "learning_rate": 4.335995956534914e-05, "loss": 0.6566, "num_input_tokens_seen": 391667136, "step": 2169 }, { "epoch": 0.23755439393524727, "grad_norm": 1.2893338929214235, "learning_rate": 4.335412243409034e-05, "loss": 0.7516, "num_input_tokens_seen": 391818784, "step": 2170 }, { "epoch": 0.23766386600618516, "grad_norm": 1.1411325553470928, "learning_rate": 4.3348283131573944e-05, "loss": 0.5706, "num_input_tokens_seen": 391975360, "step": 2171 }, { "epoch": 0.2377733380771231, "grad_norm": 1.325941152826079, "learning_rate": 4.3342441658490724e-05, "loss": 0.7973, "num_input_tokens_seen": 392146048, "step": 2172 }, { "epoch": 0.23788281014806098, "grad_norm": 1.2665456555100787, "learning_rate": 4.333659801553173e-05, "loss": 0.7025, "num_input_tokens_seen": 392308672, "step": 2173 }, { "epoch": 0.23799228221899887, "grad_norm": 1.2362720060945238, "learning_rate": 4.3330752203388234e-05, "loss": 0.8281, "num_input_tokens_seen": 392503104, "step": 2174 }, { "epoch": 0.23810175428993677, "grad_norm": 1.2493078699821467, "learning_rate": 4.3324904222751796e-05, "loss": 0.8117, "num_input_tokens_seen": 392684096, "step": 2175 }, { "epoch": 0.2382112263608747, "grad_norm": 1.3133701891173368, "learning_rate": 4.331905407431422e-05, "loss": 0.6373, "num_input_tokens_seen": 392870912, "step": 2176 }, { "epoch": 0.23832069843181258, "grad_norm": 1.3035974574616445, "learning_rate": 4.3313201758767574e-05, "loss": 0.6628, "num_input_tokens_seen": 393057952, "step": 2177 }, { "epoch": 0.23843017050275048, "grad_norm": 1.2915494106763805, "learning_rate": 4.330734727680417e-05, "loss": 0.8184, "num_input_tokens_seen": 393260896, "step": 2178 }, { "epoch": 0.2385396425736884, "grad_norm": 1.315679966736526, "learning_rate": 4.330149062911657e-05, "loss": 0.7815, "num_input_tokens_seen": 393437408, "step": 2179 }, { "epoch": 0.2386491146446263, "grad_norm": 1.2650901700477482, "learning_rate": 4.3295631816397626e-05, "loss": 0.8418, "num_input_tokens_seen": 393630272, "step": 2180 }, { "epoch": 0.23875858671556419, "grad_norm": 1.4008853735363962, "learning_rate": 4.32897708393404e-05, "loss": 0.743, "num_input_tokens_seen": 393832320, "step": 2181 }, { "epoch": 0.23886805878650208, "grad_norm": 1.259266457246225, "learning_rate": 4.328390769863826e-05, "loss": 0.6785, "num_input_tokens_seen": 394022048, "step": 2182 }, { "epoch": 0.23897753085744, "grad_norm": 1.295122272065545, "learning_rate": 4.327804239498479e-05, "loss": 0.6435, "num_input_tokens_seen": 394197216, "step": 2183 }, { "epoch": 0.2390870029283779, "grad_norm": 1.4129031576539952, "learning_rate": 4.3272174929073846e-05, "loss": 0.7714, "num_input_tokens_seen": 394355584, "step": 2184 }, { "epoch": 0.2391964749993158, "grad_norm": 1.2240482973624112, "learning_rate": 4.326630530159954e-05, "loss": 0.6722, "num_input_tokens_seen": 394539488, "step": 2185 }, { "epoch": 0.2393059470702537, "grad_norm": 1.2437833216051783, "learning_rate": 4.3260433513256227e-05, "loss": 0.8328, "num_input_tokens_seen": 394732800, "step": 2186 }, { "epoch": 0.2394154191411916, "grad_norm": 1.1364404396632128, "learning_rate": 4.325455956473854e-05, "loss": 0.6082, "num_input_tokens_seen": 394925888, "step": 2187 }, { "epoch": 0.2395248912121295, "grad_norm": 1.4275065813063181, "learning_rate": 4.324868345674136e-05, "loss": 0.7465, "num_input_tokens_seen": 395087168, "step": 2188 }, { "epoch": 0.23963436328306742, "grad_norm": 1.3596978078323, "learning_rate": 4.324280518995981e-05, "loss": 0.6871, "num_input_tokens_seen": 395254720, "step": 2189 }, { "epoch": 0.2397438353540053, "grad_norm": 1.6859984605178049, "learning_rate": 4.3236924765089284e-05, "loss": 0.8124, "num_input_tokens_seen": 395418016, "step": 2190 }, { "epoch": 0.2398533074249432, "grad_norm": 1.2791805767748534, "learning_rate": 4.323104218282542e-05, "loss": 0.7596, "num_input_tokens_seen": 395592064, "step": 2191 }, { "epoch": 0.2399627794958811, "grad_norm": 1.292081591914452, "learning_rate": 4.322515744386411e-05, "loss": 0.8633, "num_input_tokens_seen": 395768576, "step": 2192 }, { "epoch": 0.24007225156681902, "grad_norm": 1.1867886972814217, "learning_rate": 4.321927054890153e-05, "loss": 0.7009, "num_input_tokens_seen": 395962336, "step": 2193 }, { "epoch": 0.24018172363775692, "grad_norm": 1.347956173458839, "learning_rate": 4.3213381498634056e-05, "loss": 0.871, "num_input_tokens_seen": 396158784, "step": 2194 }, { "epoch": 0.2402911957086948, "grad_norm": 1.2958978395669072, "learning_rate": 4.3207490293758374e-05, "loss": 0.8203, "num_input_tokens_seen": 396340672, "step": 2195 }, { "epoch": 0.24040066777963273, "grad_norm": 1.076061651604967, "learning_rate": 4.32015969349714e-05, "loss": 0.6214, "num_input_tokens_seen": 396516736, "step": 2196 }, { "epoch": 0.24051013985057063, "grad_norm": 1.2704160453383662, "learning_rate": 4.31957014229703e-05, "loss": 0.6514, "num_input_tokens_seen": 396698848, "step": 2197 }, { "epoch": 0.24061961192150852, "grad_norm": 1.1698457568948681, "learning_rate": 4.3189803758452504e-05, "loss": 0.7347, "num_input_tokens_seen": 396894400, "step": 2198 }, { "epoch": 0.24072908399244644, "grad_norm": 1.3069298485402467, "learning_rate": 4.318390394211571e-05, "loss": 0.8095, "num_input_tokens_seen": 397070912, "step": 2199 }, { "epoch": 0.24083855606338433, "grad_norm": 1.156775638593815, "learning_rate": 4.3178001974657836e-05, "loss": 0.6231, "num_input_tokens_seen": 397223904, "step": 2200 }, { "epoch": 0.24094802813432223, "grad_norm": 1.2401012439311498, "learning_rate": 4.317209785677707e-05, "loss": 0.8237, "num_input_tokens_seen": 397432224, "step": 2201 }, { "epoch": 0.24105750020526012, "grad_norm": 1.251357247694325, "learning_rate": 4.3166191589171875e-05, "loss": 0.767, "num_input_tokens_seen": 397635616, "step": 2202 }, { "epoch": 0.24116697227619804, "grad_norm": 1.1810140666786104, "learning_rate": 4.316028317254094e-05, "loss": 0.7334, "num_input_tokens_seen": 397838112, "step": 2203 }, { "epoch": 0.24127644434713594, "grad_norm": 1.0436995442695642, "learning_rate": 4.315437260758322e-05, "loss": 0.6005, "num_input_tokens_seen": 398018432, "step": 2204 }, { "epoch": 0.24138591641807383, "grad_norm": 1.4071511242439017, "learning_rate": 4.3148459894997926e-05, "loss": 1.0094, "num_input_tokens_seen": 398202112, "step": 2205 }, { "epoch": 0.24149538848901175, "grad_norm": 1.3419075688397193, "learning_rate": 4.3142545035484526e-05, "loss": 0.9189, "num_input_tokens_seen": 398374592, "step": 2206 }, { "epoch": 0.24160486055994965, "grad_norm": 1.4143766612718698, "learning_rate": 4.3136628029742735e-05, "loss": 0.9183, "num_input_tokens_seen": 398574848, "step": 2207 }, { "epoch": 0.24171433263088754, "grad_norm": 1.2334311051320126, "learning_rate": 4.3130708878472505e-05, "loss": 0.6346, "num_input_tokens_seen": 398737472, "step": 2208 }, { "epoch": 0.24182380470182543, "grad_norm": 1.2853970287601455, "learning_rate": 4.312478758237408e-05, "loss": 0.6345, "num_input_tokens_seen": 398908160, "step": 2209 }, { "epoch": 0.24193327677276336, "grad_norm": 1.2611077781013291, "learning_rate": 4.3118864142147944e-05, "loss": 0.6804, "num_input_tokens_seen": 399111552, "step": 2210 }, { "epoch": 0.24204274884370125, "grad_norm": 1.4186151885794938, "learning_rate": 4.31129385584948e-05, "loss": 0.9816, "num_input_tokens_seen": 399280000, "step": 2211 }, { "epoch": 0.24215222091463914, "grad_norm": 1.2884729597831481, "learning_rate": 4.310701083211566e-05, "loss": 0.7062, "num_input_tokens_seen": 399433664, "step": 2212 }, { "epoch": 0.24226169298557707, "grad_norm": 1.208352905421857, "learning_rate": 4.310108096371175e-05, "loss": 0.6091, "num_input_tokens_seen": 399615328, "step": 2213 }, { "epoch": 0.24237116505651496, "grad_norm": 1.3674366751171048, "learning_rate": 4.309514895398456e-05, "loss": 0.803, "num_input_tokens_seen": 399776160, "step": 2214 }, { "epoch": 0.24248063712745285, "grad_norm": 1.3571799961544047, "learning_rate": 4.308921480363586e-05, "loss": 0.7244, "num_input_tokens_seen": 399945728, "step": 2215 }, { "epoch": 0.24259010919839077, "grad_norm": 1.3714084101157178, "learning_rate": 4.308327851336762e-05, "loss": 0.6973, "num_input_tokens_seen": 400110368, "step": 2216 }, { "epoch": 0.24269958126932867, "grad_norm": 1.3674439979816237, "learning_rate": 4.307734008388209e-05, "loss": 0.8851, "num_input_tokens_seen": 400268288, "step": 2217 }, { "epoch": 0.24280905334026656, "grad_norm": 1.177973885798868, "learning_rate": 4.307139951588179e-05, "loss": 0.658, "num_input_tokens_seen": 400423520, "step": 2218 }, { "epoch": 0.24291852541120446, "grad_norm": 1.1218615836613726, "learning_rate": 4.306545681006949e-05, "loss": 0.593, "num_input_tokens_seen": 400591072, "step": 2219 }, { "epoch": 0.24302799748214238, "grad_norm": 1.5370068449266316, "learning_rate": 4.305951196714817e-05, "loss": 0.8656, "num_input_tokens_seen": 400777440, "step": 2220 }, { "epoch": 0.24313746955308027, "grad_norm": 1.232717584357471, "learning_rate": 4.305356498782112e-05, "loss": 0.6704, "num_input_tokens_seen": 400961568, "step": 2221 }, { "epoch": 0.24324694162401816, "grad_norm": 1.271000830238125, "learning_rate": 4.304761587279183e-05, "loss": 0.6563, "num_input_tokens_seen": 401110976, "step": 2222 }, { "epoch": 0.2433564136949561, "grad_norm": 1.1922077422217168, "learning_rate": 4.304166462276409e-05, "loss": 0.6782, "num_input_tokens_seen": 401308096, "step": 2223 }, { "epoch": 0.24346588576589398, "grad_norm": 1.2628071817287498, "learning_rate": 4.303571123844191e-05, "loss": 0.8364, "num_input_tokens_seen": 401477664, "step": 2224 }, { "epoch": 0.24357535783683187, "grad_norm": 1.2380293340181174, "learning_rate": 4.3029755720529576e-05, "loss": 0.5803, "num_input_tokens_seen": 401613632, "step": 2225 }, { "epoch": 0.24368482990776977, "grad_norm": 1.340849340559436, "learning_rate": 4.30237980697316e-05, "loss": 0.692, "num_input_tokens_seen": 401804704, "step": 2226 }, { "epoch": 0.2437943019787077, "grad_norm": 1.3132989813012133, "learning_rate": 4.3017838286752776e-05, "loss": 0.8151, "num_input_tokens_seen": 401953216, "step": 2227 }, { "epoch": 0.24390377404964558, "grad_norm": 1.14848228938157, "learning_rate": 4.301187637229812e-05, "loss": 0.5109, "num_input_tokens_seen": 402080672, "step": 2228 }, { "epoch": 0.24401324612058348, "grad_norm": 1.1447875481283554, "learning_rate": 4.300591232707293e-05, "loss": 0.659, "num_input_tokens_seen": 402278688, "step": 2229 }, { "epoch": 0.2441227181915214, "grad_norm": 1.3528104103655592, "learning_rate": 4.2999946151782735e-05, "loss": 0.7858, "num_input_tokens_seen": 402450720, "step": 2230 }, { "epoch": 0.2442321902624593, "grad_norm": 1.2731244223082574, "learning_rate": 4.29939778471333e-05, "loss": 0.7344, "num_input_tokens_seen": 402622976, "step": 2231 }, { "epoch": 0.24434166233339719, "grad_norm": 1.278819453720234, "learning_rate": 4.298800741383071e-05, "loss": 0.6002, "num_input_tokens_seen": 402821440, "step": 2232 }, { "epoch": 0.2444511344043351, "grad_norm": 1.0930308021016408, "learning_rate": 4.298203485258122e-05, "loss": 0.5028, "num_input_tokens_seen": 403008480, "step": 2233 }, { "epoch": 0.244560606475273, "grad_norm": 1.2580163920749152, "learning_rate": 4.2976060164091384e-05, "loss": 0.7309, "num_input_tokens_seen": 403154080, "step": 2234 }, { "epoch": 0.2446700785462109, "grad_norm": 1.2071520797348168, "learning_rate": 4.297008334906798e-05, "loss": 0.5743, "num_input_tokens_seen": 403335296, "step": 2235 }, { "epoch": 0.2447795506171488, "grad_norm": 1.3947900697538975, "learning_rate": 4.2964104408218085e-05, "loss": 0.76, "num_input_tokens_seen": 403505984, "step": 2236 }, { "epoch": 0.2448890226880867, "grad_norm": 1.216437076871346, "learning_rate": 4.295812334224898e-05, "loss": 0.5893, "num_input_tokens_seen": 403695712, "step": 2237 }, { "epoch": 0.2449984947590246, "grad_norm": 1.1414351003396355, "learning_rate": 4.2952140151868204e-05, "loss": 0.6426, "num_input_tokens_seen": 403891264, "step": 2238 }, { "epoch": 0.2451079668299625, "grad_norm": 1.1741009742620356, "learning_rate": 4.294615483778358e-05, "loss": 0.5291, "num_input_tokens_seen": 404044480, "step": 2239 }, { "epoch": 0.24521743890090042, "grad_norm": 1.2182649478601995, "learning_rate": 4.2940167400703134e-05, "loss": 0.7297, "num_input_tokens_seen": 404213824, "step": 2240 }, { "epoch": 0.2453269109718383, "grad_norm": 1.4716240379377659, "learning_rate": 4.293417784133519e-05, "loss": 0.9402, "num_input_tokens_seen": 404383840, "step": 2241 }, { "epoch": 0.2454363830427762, "grad_norm": 1.3382026698285256, "learning_rate": 4.2928186160388286e-05, "loss": 0.6448, "num_input_tokens_seen": 404550720, "step": 2242 }, { "epoch": 0.2455458551137141, "grad_norm": 1.22563418107239, "learning_rate": 4.292219235857123e-05, "loss": 0.5925, "num_input_tokens_seen": 404729472, "step": 2243 }, { "epoch": 0.24565532718465202, "grad_norm": 1.1834256899526188, "learning_rate": 4.291619643659308e-05, "loss": 0.6788, "num_input_tokens_seen": 404905760, "step": 2244 }, { "epoch": 0.24576479925558992, "grad_norm": 1.1104890307142754, "learning_rate": 4.291019839516314e-05, "loss": 0.5877, "num_input_tokens_seen": 405104672, "step": 2245 }, { "epoch": 0.2458742713265278, "grad_norm": 1.3656490280167461, "learning_rate": 4.290419823499098e-05, "loss": 0.8504, "num_input_tokens_seen": 405281184, "step": 2246 }, { "epoch": 0.24598374339746573, "grad_norm": 1.4035259141340473, "learning_rate": 4.289819595678638e-05, "loss": 0.8372, "num_input_tokens_seen": 405455680, "step": 2247 }, { "epoch": 0.24609321546840363, "grad_norm": 1.2512585979398732, "learning_rate": 4.289219156125942e-05, "loss": 0.7548, "num_input_tokens_seen": 405636672, "step": 2248 }, { "epoch": 0.24620268753934152, "grad_norm": 1.2087938593074157, "learning_rate": 4.288618504912041e-05, "loss": 0.7107, "num_input_tokens_seen": 405825952, "step": 2249 }, { "epoch": 0.24631215961027944, "grad_norm": 1.237933088870126, "learning_rate": 4.2880176421079896e-05, "loss": 0.5628, "num_input_tokens_seen": 405968864, "step": 2250 }, { "epoch": 0.24642163168121733, "grad_norm": 1.3306878438706098, "learning_rate": 4.287416567784869e-05, "loss": 0.6264, "num_input_tokens_seen": 406159264, "step": 2251 }, { "epoch": 0.24653110375215523, "grad_norm": 1.29712679153676, "learning_rate": 4.2868152820137855e-05, "loss": 0.7524, "num_input_tokens_seen": 406335328, "step": 2252 }, { "epoch": 0.24664057582309312, "grad_norm": 1.3255309858351783, "learning_rate": 4.28621378486587e-05, "loss": 0.6801, "num_input_tokens_seen": 406509152, "step": 2253 }, { "epoch": 0.24675004789403104, "grad_norm": 1.2643743858769614, "learning_rate": 4.285612076412279e-05, "loss": 0.8152, "num_input_tokens_seen": 406694176, "step": 2254 }, { "epoch": 0.24685951996496894, "grad_norm": 1.0493381616920796, "learning_rate": 4.285010156724192e-05, "loss": 0.5984, "num_input_tokens_seen": 406907200, "step": 2255 }, { "epoch": 0.24696899203590683, "grad_norm": 1.2890008160253388, "learning_rate": 4.2844080258728156e-05, "loss": 0.7221, "num_input_tokens_seen": 407088864, "step": 2256 }, { "epoch": 0.24707846410684475, "grad_norm": 1.1976573110477453, "learning_rate": 4.2838056839293816e-05, "loss": 0.7122, "num_input_tokens_seen": 407253728, "step": 2257 }, { "epoch": 0.24718793617778265, "grad_norm": 1.4156315986530035, "learning_rate": 4.283203130965145e-05, "loss": 0.8215, "num_input_tokens_seen": 407422624, "step": 2258 }, { "epoch": 0.24729740824872054, "grad_norm": 1.3268691464482338, "learning_rate": 4.282600367051387e-05, "loss": 0.7149, "num_input_tokens_seen": 407548960, "step": 2259 }, { "epoch": 0.24740688031965843, "grad_norm": 1.1891034995190113, "learning_rate": 4.2819973922594134e-05, "loss": 0.6445, "num_input_tokens_seen": 407739584, "step": 2260 }, { "epoch": 0.24751635239059636, "grad_norm": 1.1679583392747765, "learning_rate": 4.281394206660555e-05, "loss": 0.7869, "num_input_tokens_seen": 407932448, "step": 2261 }, { "epoch": 0.24762582446153425, "grad_norm": 1.204358051143178, "learning_rate": 4.2807908103261674e-05, "loss": 0.6026, "num_input_tokens_seen": 408093056, "step": 2262 }, { "epoch": 0.24773529653247214, "grad_norm": 1.308966694538556, "learning_rate": 4.280187203327631e-05, "loss": 0.8459, "num_input_tokens_seen": 408274048, "step": 2263 }, { "epoch": 0.24784476860341006, "grad_norm": 1.2323544056072475, "learning_rate": 4.2795833857363515e-05, "loss": 0.6728, "num_input_tokens_seen": 408434432, "step": 2264 }, { "epoch": 0.24795424067434796, "grad_norm": 1.1670916271539225, "learning_rate": 4.2789793576237594e-05, "loss": 0.6107, "num_input_tokens_seen": 408621024, "step": 2265 }, { "epoch": 0.24806371274528585, "grad_norm": 1.3685717741012706, "learning_rate": 4.278375119061311e-05, "loss": 0.6844, "num_input_tokens_seen": 408799776, "step": 2266 }, { "epoch": 0.24817318481622377, "grad_norm": 1.238579699052938, "learning_rate": 4.2777706701204846e-05, "loss": 0.7267, "num_input_tokens_seen": 408999360, "step": 2267 }, { "epoch": 0.24828265688716167, "grad_norm": 1.2141715394766448, "learning_rate": 4.277166010872787e-05, "loss": 0.7287, "num_input_tokens_seen": 409167584, "step": 2268 }, { "epoch": 0.24839212895809956, "grad_norm": 2.493470438725189, "learning_rate": 4.276561141389748e-05, "loss": 0.8652, "num_input_tokens_seen": 409370080, "step": 2269 }, { "epoch": 0.24850160102903746, "grad_norm": 1.3037691824828568, "learning_rate": 4.275956061742921e-05, "loss": 0.5777, "num_input_tokens_seen": 409536064, "step": 2270 }, { "epoch": 0.24861107309997538, "grad_norm": 1.4918198057591354, "learning_rate": 4.275350772003888e-05, "loss": 0.7637, "num_input_tokens_seen": 409689952, "step": 2271 }, { "epoch": 0.24872054517091327, "grad_norm": 1.4829121659673161, "learning_rate": 4.2747452722442524e-05, "loss": 0.9454, "num_input_tokens_seen": 409857728, "step": 2272 }, { "epoch": 0.24883001724185116, "grad_norm": 1.19746613807028, "learning_rate": 4.274139562535643e-05, "loss": 0.6154, "num_input_tokens_seen": 410054624, "step": 2273 }, { "epoch": 0.2489394893127891, "grad_norm": 1.2549380041668865, "learning_rate": 4.2735336429497166e-05, "loss": 0.8847, "num_input_tokens_seen": 410273696, "step": 2274 }, { "epoch": 0.24904896138372698, "grad_norm": 1.2801712891589125, "learning_rate": 4.272927513558149e-05, "loss": 0.7859, "num_input_tokens_seen": 410477760, "step": 2275 }, { "epoch": 0.24915843345466487, "grad_norm": 1.351834793445274, "learning_rate": 4.272321174432646e-05, "loss": 0.8957, "num_input_tokens_seen": 410657856, "step": 2276 }, { "epoch": 0.24926790552560277, "grad_norm": 1.3297953704219834, "learning_rate": 4.271714625644937e-05, "loss": 0.7914, "num_input_tokens_seen": 410850720, "step": 2277 }, { "epoch": 0.2493773775965407, "grad_norm": 1.2411642598545067, "learning_rate": 4.271107867266775e-05, "loss": 0.8169, "num_input_tokens_seen": 411024320, "step": 2278 }, { "epoch": 0.24948684966747858, "grad_norm": 1.444068662146947, "learning_rate": 4.270500899369937e-05, "loss": 0.8569, "num_input_tokens_seen": 411218080, "step": 2279 }, { "epoch": 0.24959632173841648, "grad_norm": 1.1905821404540007, "learning_rate": 4.269893722026228e-05, "loss": 0.6996, "num_input_tokens_seen": 411384512, "step": 2280 }, { "epoch": 0.2497057938093544, "grad_norm": 1.3365376334723487, "learning_rate": 4.2692863353074745e-05, "loss": 0.6996, "num_input_tokens_seen": 411570880, "step": 2281 }, { "epoch": 0.2498152658802923, "grad_norm": 1.2141981463953986, "learning_rate": 4.26867873928553e-05, "loss": 0.6311, "num_input_tokens_seen": 411710208, "step": 2282 }, { "epoch": 0.24992473795123019, "grad_norm": 1.1650472994031171, "learning_rate": 4.2680709340322725e-05, "loss": 0.6481, "num_input_tokens_seen": 411864096, "step": 2283 }, { "epoch": 0.2500342100221681, "grad_norm": 1.3111557207133524, "learning_rate": 4.2674629196196025e-05, "loss": 0.9176, "num_input_tokens_seen": 412073312, "step": 2284 }, { "epoch": 0.250143682093106, "grad_norm": 1.1905250667574967, "learning_rate": 4.266854696119449e-05, "loss": 0.5857, "num_input_tokens_seen": 412282528, "step": 2285 }, { "epoch": 0.2502531541640439, "grad_norm": 1.4145614839569651, "learning_rate": 4.266246263603761e-05, "loss": 0.9749, "num_input_tokens_seen": 412494880, "step": 2286 }, { "epoch": 0.2503626262349818, "grad_norm": 1.4019027909250885, "learning_rate": 4.2656376221445185e-05, "loss": 0.8228, "num_input_tokens_seen": 412664000, "step": 2287 }, { "epoch": 0.2504720983059197, "grad_norm": 1.4022743824243227, "learning_rate": 4.265028771813719e-05, "loss": 0.7435, "num_input_tokens_seen": 412849920, "step": 2288 }, { "epoch": 0.25058157037685763, "grad_norm": 1.326928845470652, "learning_rate": 4.2644197126833906e-05, "loss": 0.7253, "num_input_tokens_seen": 413002912, "step": 2289 }, { "epoch": 0.2506910424477955, "grad_norm": 1.3517041435330022, "learning_rate": 4.263810444825583e-05, "loss": 0.8337, "num_input_tokens_seen": 413161056, "step": 2290 }, { "epoch": 0.2508005145187334, "grad_norm": 1.2084303466174777, "learning_rate": 4.2632009683123716e-05, "loss": 0.7558, "num_input_tokens_seen": 413329952, "step": 2291 }, { "epoch": 0.2509099865896713, "grad_norm": 1.1784177097792248, "learning_rate": 4.262591283215857e-05, "loss": 0.5995, "num_input_tokens_seen": 413518560, "step": 2292 }, { "epoch": 0.2510194586606092, "grad_norm": 1.303086080358741, "learning_rate": 4.261981389608162e-05, "loss": 0.6912, "num_input_tokens_seen": 413678720, "step": 2293 }, { "epoch": 0.25112893073154713, "grad_norm": 1.4017635834867315, "learning_rate": 4.2613712875614374e-05, "loss": 0.6114, "num_input_tokens_seen": 413848064, "step": 2294 }, { "epoch": 0.251238402802485, "grad_norm": 1.3374187890469602, "learning_rate": 4.260760977147858e-05, "loss": 0.7621, "num_input_tokens_seen": 414013152, "step": 2295 }, { "epoch": 0.2513478748734229, "grad_norm": 1.2058008336079344, "learning_rate": 4.260150458439619e-05, "loss": 0.7083, "num_input_tokens_seen": 414172640, "step": 2296 }, { "epoch": 0.25145734694436084, "grad_norm": 1.3132871356494098, "learning_rate": 4.259539731508947e-05, "loss": 0.7597, "num_input_tokens_seen": 414339968, "step": 2297 }, { "epoch": 0.2515668190152987, "grad_norm": 1.2624898154726074, "learning_rate": 4.2589287964280874e-05, "loss": 0.6881, "num_input_tokens_seen": 414521856, "step": 2298 }, { "epoch": 0.2516762910862366, "grad_norm": 1.3079258822897946, "learning_rate": 4.2583176532693136e-05, "loss": 0.7224, "num_input_tokens_seen": 414725248, "step": 2299 }, { "epoch": 0.25178576315717455, "grad_norm": 1.3403070388437208, "learning_rate": 4.257706302104924e-05, "loss": 0.7737, "num_input_tokens_seen": 414920800, "step": 2300 }, { "epoch": 0.2518952352281124, "grad_norm": 1.1973811680361806, "learning_rate": 4.2570947430072384e-05, "loss": 0.6697, "num_input_tokens_seen": 415101792, "step": 2301 }, { "epoch": 0.25200470729905033, "grad_norm": 1.2049761316301921, "learning_rate": 4.256482976048603e-05, "loss": 0.6812, "num_input_tokens_seen": 415268448, "step": 2302 }, { "epoch": 0.25211417936998826, "grad_norm": 1.3120316169232173, "learning_rate": 4.2558710013013906e-05, "loss": 0.521, "num_input_tokens_seen": 415420320, "step": 2303 }, { "epoch": 0.2522236514409261, "grad_norm": 1.3369594034067478, "learning_rate": 4.255258818837994e-05, "loss": 0.9001, "num_input_tokens_seen": 415593024, "step": 2304 }, { "epoch": 0.25233312351186404, "grad_norm": 1.13225499608173, "learning_rate": 4.254646428730835e-05, "loss": 0.5922, "num_input_tokens_seen": 415806272, "step": 2305 }, { "epoch": 0.25244259558280197, "grad_norm": 1.1647483290254814, "learning_rate": 4.254033831052359e-05, "loss": 0.5669, "num_input_tokens_seen": 415979200, "step": 2306 }, { "epoch": 0.25255206765373983, "grad_norm": 1.1992642769885173, "learning_rate": 4.253421025875033e-05, "loss": 0.6982, "num_input_tokens_seen": 416184384, "step": 2307 }, { "epoch": 0.25266153972467775, "grad_norm": 1.494033310969504, "learning_rate": 4.252808013271351e-05, "loss": 0.71, "num_input_tokens_seen": 416343648, "step": 2308 }, { "epoch": 0.2527710117956156, "grad_norm": 1.3583662740929638, "learning_rate": 4.252194793313833e-05, "loss": 0.8952, "num_input_tokens_seen": 416525984, "step": 2309 }, { "epoch": 0.25288048386655354, "grad_norm": 1.3855949468799145, "learning_rate": 4.25158136607502e-05, "loss": 0.6391, "num_input_tokens_seen": 416680544, "step": 2310 }, { "epoch": 0.25298995593749146, "grad_norm": 1.1563963926276855, "learning_rate": 4.25096773162748e-05, "loss": 0.609, "num_input_tokens_seen": 416866240, "step": 2311 }, { "epoch": 0.25309942800842933, "grad_norm": 1.2764094957900436, "learning_rate": 4.2503538900438044e-05, "loss": 0.7208, "num_input_tokens_seen": 417051712, "step": 2312 }, { "epoch": 0.25320890007936725, "grad_norm": 1.1209064287272044, "learning_rate": 4.2497398413966094e-05, "loss": 0.5881, "num_input_tokens_seen": 417230688, "step": 2313 }, { "epoch": 0.25331837215030517, "grad_norm": 1.2442277192153348, "learning_rate": 4.249125585758537e-05, "loss": 0.6289, "num_input_tokens_seen": 417422880, "step": 2314 }, { "epoch": 0.25342784422124304, "grad_norm": 1.3349243694841806, "learning_rate": 4.248511123202251e-05, "loss": 0.6618, "num_input_tokens_seen": 417602976, "step": 2315 }, { "epoch": 0.25353731629218096, "grad_norm": 1.234151409517641, "learning_rate": 4.247896453800442e-05, "loss": 0.7165, "num_input_tokens_seen": 417800544, "step": 2316 }, { "epoch": 0.2536467883631189, "grad_norm": 1.3639631779222796, "learning_rate": 4.247281577625824e-05, "loss": 0.6589, "num_input_tokens_seen": 417988256, "step": 2317 }, { "epoch": 0.25375626043405675, "grad_norm": 1.436147136142105, "learning_rate": 4.2466664947511356e-05, "loss": 0.8643, "num_input_tokens_seen": 418140800, "step": 2318 }, { "epoch": 0.25386573250499467, "grad_norm": 1.3004939131368816, "learning_rate": 4.246051205249139e-05, "loss": 0.764, "num_input_tokens_seen": 418354720, "step": 2319 }, { "epoch": 0.2539752045759326, "grad_norm": 1.2467067729309143, "learning_rate": 4.2454357091926236e-05, "loss": 0.676, "num_input_tokens_seen": 418517792, "step": 2320 }, { "epoch": 0.25408467664687046, "grad_norm": 1.1789025733238208, "learning_rate": 4.244820006654401e-05, "loss": 0.5762, "num_input_tokens_seen": 418690944, "step": 2321 }, { "epoch": 0.2541941487178084, "grad_norm": 1.1725802524650795, "learning_rate": 4.244204097707306e-05, "loss": 0.7981, "num_input_tokens_seen": 418912480, "step": 2322 }, { "epoch": 0.2543036207887463, "grad_norm": 1.7746157324255347, "learning_rate": 4.243587982424201e-05, "loss": 0.701, "num_input_tokens_seen": 419089888, "step": 2323 }, { "epoch": 0.25441309285968416, "grad_norm": 1.2247528527517242, "learning_rate": 4.242971660877971e-05, "loss": 0.6268, "num_input_tokens_seen": 419246240, "step": 2324 }, { "epoch": 0.2545225649306221, "grad_norm": 1.1627402252232586, "learning_rate": 4.242355133141525e-05, "loss": 0.6317, "num_input_tokens_seen": 419442688, "step": 2325 }, { "epoch": 0.25463203700155995, "grad_norm": 1.299591253284433, "learning_rate": 4.241738399287798e-05, "loss": 0.6422, "num_input_tokens_seen": 419623680, "step": 2326 }, { "epoch": 0.2547415090724979, "grad_norm": 1.2376068396638833, "learning_rate": 4.2411214593897486e-05, "loss": 0.7002, "num_input_tokens_seen": 419819008, "step": 2327 }, { "epoch": 0.2548509811434358, "grad_norm": 1.374378974426546, "learning_rate": 4.2405043135203584e-05, "loss": 0.9622, "num_input_tokens_seen": 420017696, "step": 2328 }, { "epoch": 0.25496045321437366, "grad_norm": 1.3429622559004835, "learning_rate": 4.239886961752635e-05, "loss": 0.7019, "num_input_tokens_seen": 420210784, "step": 2329 }, { "epoch": 0.2550699252853116, "grad_norm": 1.3405364000613809, "learning_rate": 4.23926940415961e-05, "loss": 0.6885, "num_input_tokens_seen": 420407904, "step": 2330 }, { "epoch": 0.2551793973562495, "grad_norm": 1.3000967613302523, "learning_rate": 4.2386516408143404e-05, "loss": 0.5546, "num_input_tokens_seen": 420589120, "step": 2331 }, { "epoch": 0.25528886942718737, "grad_norm": 1.363385303100592, "learning_rate": 4.2380336717899044e-05, "loss": 0.6421, "num_input_tokens_seen": 420744352, "step": 2332 }, { "epoch": 0.2553983414981253, "grad_norm": 1.2822787643928024, "learning_rate": 4.237415497159408e-05, "loss": 0.6979, "num_input_tokens_seen": 420938112, "step": 2333 }, { "epoch": 0.2555078135690632, "grad_norm": 1.353006330400588, "learning_rate": 4.2367971169959796e-05, "loss": 0.6976, "num_input_tokens_seen": 421084160, "step": 2334 }, { "epoch": 0.2556172856400011, "grad_norm": 1.100682224776786, "learning_rate": 4.2361785313727726e-05, "loss": 0.6831, "num_input_tokens_seen": 421281728, "step": 2335 }, { "epoch": 0.255726757710939, "grad_norm": 1.2725357888515674, "learning_rate": 4.235559740362964e-05, "loss": 0.8004, "num_input_tokens_seen": 421481984, "step": 2336 }, { "epoch": 0.2558362297818769, "grad_norm": 1.2939242148928616, "learning_rate": 4.2349407440397566e-05, "loss": 0.7721, "num_input_tokens_seen": 421669696, "step": 2337 }, { "epoch": 0.2559457018528148, "grad_norm": 1.1614352282405564, "learning_rate": 4.234321542476375e-05, "loss": 0.6656, "num_input_tokens_seen": 421873312, "step": 2338 }, { "epoch": 0.2560551739237527, "grad_norm": 1.0998466059629655, "learning_rate": 4.2337021357460706e-05, "loss": 0.6814, "num_input_tokens_seen": 422064832, "step": 2339 }, { "epoch": 0.25616464599469063, "grad_norm": 1.2425171516662268, "learning_rate": 4.2330825239221186e-05, "loss": 0.8215, "num_input_tokens_seen": 422230144, "step": 2340 }, { "epoch": 0.2562741180656285, "grad_norm": 1.3450512906047876, "learning_rate": 4.232462707077816e-05, "loss": 0.8495, "num_input_tokens_seen": 422457280, "step": 2341 }, { "epoch": 0.2563835901365664, "grad_norm": 1.3525998759455655, "learning_rate": 4.231842685286488e-05, "loss": 0.9702, "num_input_tokens_seen": 422669856, "step": 2342 }, { "epoch": 0.2564930622075043, "grad_norm": 1.247067779323362, "learning_rate": 4.23122245862148e-05, "loss": 0.5781, "num_input_tokens_seen": 422831136, "step": 2343 }, { "epoch": 0.2566025342784422, "grad_norm": 1.2774083765695645, "learning_rate": 4.2306020271561656e-05, "loss": 0.8055, "num_input_tokens_seen": 423015488, "step": 2344 }, { "epoch": 0.25671200634938013, "grad_norm": 1.2178483708996242, "learning_rate": 4.2299813909639395e-05, "loss": 0.6582, "num_input_tokens_seen": 423209024, "step": 2345 }, { "epoch": 0.256821478420318, "grad_norm": 1.3047323162125246, "learning_rate": 4.229360550118222e-05, "loss": 0.7001, "num_input_tokens_seen": 423365824, "step": 2346 }, { "epoch": 0.2569309504912559, "grad_norm": 1.3187918850723994, "learning_rate": 4.228739504692457e-05, "loss": 0.6909, "num_input_tokens_seen": 423554880, "step": 2347 }, { "epoch": 0.25704042256219384, "grad_norm": 1.209218386186564, "learning_rate": 4.228118254760114e-05, "loss": 0.5869, "num_input_tokens_seen": 423735872, "step": 2348 }, { "epoch": 0.2571498946331317, "grad_norm": 1.3227982455539287, "learning_rate": 4.2274968003946845e-05, "loss": 0.7899, "num_input_tokens_seen": 423918880, "step": 2349 }, { "epoch": 0.2572593667040696, "grad_norm": 1.2755625026257469, "learning_rate": 4.226875141669686e-05, "loss": 0.6313, "num_input_tokens_seen": 424086656, "step": 2350 }, { "epoch": 0.25736883877500755, "grad_norm": 1.2869407531777197, "learning_rate": 4.22625327865866e-05, "loss": 0.6408, "num_input_tokens_seen": 424259136, "step": 2351 }, { "epoch": 0.2574783108459454, "grad_norm": 1.332449659931827, "learning_rate": 4.22563121143517e-05, "loss": 0.6255, "num_input_tokens_seen": 424428704, "step": 2352 }, { "epoch": 0.25758778291688333, "grad_norm": 1.3073035413144496, "learning_rate": 4.225008940072808e-05, "loss": 0.8997, "num_input_tokens_seen": 424625824, "step": 2353 }, { "epoch": 0.25769725498782126, "grad_norm": 1.3787019054086187, "learning_rate": 4.224386464645186e-05, "loss": 0.8799, "num_input_tokens_seen": 424826304, "step": 2354 }, { "epoch": 0.2578067270587591, "grad_norm": 1.1793700037265669, "learning_rate": 4.22376378522594e-05, "loss": 0.6415, "num_input_tokens_seen": 424968320, "step": 2355 }, { "epoch": 0.25791619912969704, "grad_norm": 1.1141035289084318, "learning_rate": 4.223140901888736e-05, "loss": 0.6528, "num_input_tokens_seen": 425156480, "step": 2356 }, { "epoch": 0.25802567120063497, "grad_norm": 1.3475456745283945, "learning_rate": 4.222517814707255e-05, "loss": 0.959, "num_input_tokens_seen": 425366368, "step": 2357 }, { "epoch": 0.25813514327157283, "grad_norm": 1.2805017286688931, "learning_rate": 4.221894523755211e-05, "loss": 0.6475, "num_input_tokens_seen": 425564608, "step": 2358 }, { "epoch": 0.25824461534251075, "grad_norm": 1.2488171703155262, "learning_rate": 4.2212710291063354e-05, "loss": 0.5597, "num_input_tokens_seen": 425725888, "step": 2359 }, { "epoch": 0.2583540874134486, "grad_norm": 1.2852410285605425, "learning_rate": 4.220647330834389e-05, "loss": 0.7329, "num_input_tokens_seen": 425882688, "step": 2360 }, { "epoch": 0.25846355948438654, "grad_norm": 1.3134404400818234, "learning_rate": 4.2200234290131515e-05, "loss": 0.679, "num_input_tokens_seen": 426037024, "step": 2361 }, { "epoch": 0.25857303155532446, "grad_norm": 1.1855524556313002, "learning_rate": 4.219399323716431e-05, "loss": 0.6232, "num_input_tokens_seen": 426195392, "step": 2362 }, { "epoch": 0.25868250362626233, "grad_norm": 1.2722987600807265, "learning_rate": 4.2187750150180574e-05, "loss": 0.7653, "num_input_tokens_seen": 426348160, "step": 2363 }, { "epoch": 0.25879197569720025, "grad_norm": 1.3772795161839038, "learning_rate": 4.2181505029918847e-05, "loss": 0.797, "num_input_tokens_seen": 426523328, "step": 2364 }, { "epoch": 0.25890144776813817, "grad_norm": 1.4562845643488913, "learning_rate": 4.217525787711792e-05, "loss": 0.9774, "num_input_tokens_seen": 426702304, "step": 2365 }, { "epoch": 0.25901091983907604, "grad_norm": 1.3340778457713331, "learning_rate": 4.216900869251683e-05, "loss": 0.7212, "num_input_tokens_seen": 426879936, "step": 2366 }, { "epoch": 0.25912039191001396, "grad_norm": 1.1478582984683832, "learning_rate": 4.216275747685482e-05, "loss": 0.6235, "num_input_tokens_seen": 427081760, "step": 2367 }, { "epoch": 0.2592298639809519, "grad_norm": 1.233628252355038, "learning_rate": 4.215650423087142e-05, "loss": 0.6489, "num_input_tokens_seen": 427258944, "step": 2368 }, { "epoch": 0.25933933605188975, "grad_norm": 1.326555019757464, "learning_rate": 4.215024895530636e-05, "loss": 0.6584, "num_input_tokens_seen": 427439488, "step": 2369 }, { "epoch": 0.25944880812282767, "grad_norm": 1.184604729186238, "learning_rate": 4.2143991650899646e-05, "loss": 0.6371, "num_input_tokens_seen": 427616896, "step": 2370 }, { "epoch": 0.2595582801937656, "grad_norm": 1.21539416830595, "learning_rate": 4.213773231839149e-05, "loss": 0.6665, "num_input_tokens_seen": 427818944, "step": 2371 }, { "epoch": 0.25966775226470346, "grad_norm": 1.3238199809508548, "learning_rate": 4.213147095852235e-05, "loss": 0.7532, "num_input_tokens_seen": 428003520, "step": 2372 }, { "epoch": 0.2597772243356414, "grad_norm": 1.2433234245239533, "learning_rate": 4.2125207572032954e-05, "loss": 0.7031, "num_input_tokens_seen": 428199296, "step": 2373 }, { "epoch": 0.2598866964065793, "grad_norm": 1.24933950139996, "learning_rate": 4.211894215966424e-05, "loss": 0.6883, "num_input_tokens_seen": 428368192, "step": 2374 }, { "epoch": 0.25999616847751716, "grad_norm": 1.3910499095039315, "learning_rate": 4.21126747221574e-05, "loss": 0.7356, "num_input_tokens_seen": 428549408, "step": 2375 }, { "epoch": 0.2601056405484551, "grad_norm": 1.143442202149445, "learning_rate": 4.210640526025384e-05, "loss": 0.6502, "num_input_tokens_seen": 428732192, "step": 2376 }, { "epoch": 0.26021511261939295, "grad_norm": 1.2730939904969807, "learning_rate": 4.210013377469525e-05, "loss": 0.7482, "num_input_tokens_seen": 428913184, "step": 2377 }, { "epoch": 0.2603245846903309, "grad_norm": 1.237801636434149, "learning_rate": 4.209386026622353e-05, "loss": 0.584, "num_input_tokens_seen": 429097984, "step": 2378 }, { "epoch": 0.2604340567612688, "grad_norm": 1.2637303617406122, "learning_rate": 4.208758473558081e-05, "loss": 0.5518, "num_input_tokens_seen": 429265760, "step": 2379 }, { "epoch": 0.26054352883220666, "grad_norm": 1.3330901738807424, "learning_rate": 4.208130718350948e-05, "loss": 0.7819, "num_input_tokens_seen": 429449888, "step": 2380 }, { "epoch": 0.2606530009031446, "grad_norm": 1.297881689360643, "learning_rate": 4.207502761075217e-05, "loss": 0.7541, "num_input_tokens_seen": 429643648, "step": 2381 }, { "epoch": 0.2607624729740825, "grad_norm": 1.2356832307824093, "learning_rate": 4.206874601805173e-05, "loss": 0.5879, "num_input_tokens_seen": 429833824, "step": 2382 }, { "epoch": 0.26087194504502037, "grad_norm": 1.28486096546544, "learning_rate": 4.206246240615127e-05, "loss": 0.8328, "num_input_tokens_seen": 429999136, "step": 2383 }, { "epoch": 0.2609814171159583, "grad_norm": 1.2937498931147582, "learning_rate": 4.205617677579413e-05, "loss": 0.8094, "num_input_tokens_seen": 430203200, "step": 2384 }, { "epoch": 0.2610908891868962, "grad_norm": 1.1989027967717725, "learning_rate": 4.2049889127723876e-05, "loss": 0.6311, "num_input_tokens_seen": 430389792, "step": 2385 }, { "epoch": 0.2612003612578341, "grad_norm": 1.1384310590387465, "learning_rate": 4.204359946268432e-05, "loss": 0.5677, "num_input_tokens_seen": 430579520, "step": 2386 }, { "epoch": 0.261309833328772, "grad_norm": 1.353841987224983, "learning_rate": 4.203730778141955e-05, "loss": 0.8018, "num_input_tokens_seen": 430761184, "step": 2387 }, { "epoch": 0.2614193053997099, "grad_norm": 1.1278072934811372, "learning_rate": 4.203101408467383e-05, "loss": 0.5371, "num_input_tokens_seen": 430934112, "step": 2388 }, { "epoch": 0.2615287774706478, "grad_norm": 1.3312572120865152, "learning_rate": 4.2024718373191705e-05, "loss": 0.9066, "num_input_tokens_seen": 431126304, "step": 2389 }, { "epoch": 0.2616382495415857, "grad_norm": 1.3275409985262017, "learning_rate": 4.201842064771794e-05, "loss": 0.7265, "num_input_tokens_seen": 431313344, "step": 2390 }, { "epoch": 0.26174772161252363, "grad_norm": 1.1992745650532775, "learning_rate": 4.2012120908997546e-05, "loss": 0.7215, "num_input_tokens_seen": 431490752, "step": 2391 }, { "epoch": 0.2618571936834615, "grad_norm": 1.2627846670729703, "learning_rate": 4.200581915777577e-05, "loss": 0.7194, "num_input_tokens_seen": 431678688, "step": 2392 }, { "epoch": 0.2619666657543994, "grad_norm": 1.1432753302335006, "learning_rate": 4.19995153947981e-05, "loss": 0.5698, "num_input_tokens_seen": 431856768, "step": 2393 }, { "epoch": 0.2620761378253373, "grad_norm": 1.2780607585105093, "learning_rate": 4.1993209620810255e-05, "loss": 0.6913, "num_input_tokens_seen": 432067328, "step": 2394 }, { "epoch": 0.2621856098962752, "grad_norm": 1.2190360687214599, "learning_rate": 4.19869018365582e-05, "loss": 0.7507, "num_input_tokens_seen": 432260640, "step": 2395 }, { "epoch": 0.26229508196721313, "grad_norm": 1.1709727056141812, "learning_rate": 4.198059204278813e-05, "loss": 0.5902, "num_input_tokens_seen": 432423264, "step": 2396 }, { "epoch": 0.262404554038151, "grad_norm": 1.23118570421296, "learning_rate": 4.1974280240246477e-05, "loss": 0.6058, "num_input_tokens_seen": 432619264, "step": 2397 }, { "epoch": 0.2625140261090889, "grad_norm": 1.2196452447372776, "learning_rate": 4.196796642967992e-05, "loss": 0.6659, "num_input_tokens_seen": 432791744, "step": 2398 }, { "epoch": 0.26262349818002684, "grad_norm": 1.4836063201677112, "learning_rate": 4.1961650611835376e-05, "loss": 1.0284, "num_input_tokens_seen": 432996704, "step": 2399 }, { "epoch": 0.2627329702509647, "grad_norm": 1.241499704404712, "learning_rate": 4.195533278745999e-05, "loss": 0.6089, "num_input_tokens_seen": 433174336, "step": 2400 }, { "epoch": 0.2628424423219026, "grad_norm": 1.285455083453299, "learning_rate": 4.194901295730115e-05, "loss": 0.6598, "num_input_tokens_seen": 433387584, "step": 2401 }, { "epoch": 0.26295191439284055, "grad_norm": 1.178975528327178, "learning_rate": 4.1942691122106484e-05, "loss": 0.6033, "num_input_tokens_seen": 433560960, "step": 2402 }, { "epoch": 0.2630613864637784, "grad_norm": 1.251809479421965, "learning_rate": 4.1936367282623836e-05, "loss": 0.6755, "num_input_tokens_seen": 433741952, "step": 2403 }, { "epoch": 0.26317085853471633, "grad_norm": 1.3002209072221207, "learning_rate": 4.1930041439601316e-05, "loss": 0.8841, "num_input_tokens_seen": 433956096, "step": 2404 }, { "epoch": 0.26328033060565426, "grad_norm": 1.251091575844587, "learning_rate": 4.192371359378726e-05, "loss": 0.6934, "num_input_tokens_seen": 434138208, "step": 2405 }, { "epoch": 0.2633898026765921, "grad_norm": 1.189635915757652, "learning_rate": 4.191738374593024e-05, "loss": 0.6385, "num_input_tokens_seen": 434327264, "step": 2406 }, { "epoch": 0.26349927474753004, "grad_norm": 1.217258812672477, "learning_rate": 4.191105189677906e-05, "loss": 0.5218, "num_input_tokens_seen": 434476224, "step": 2407 }, { "epoch": 0.26360874681846796, "grad_norm": 1.2854442331747313, "learning_rate": 4.190471804708278e-05, "loss": 0.861, "num_input_tokens_seen": 434660800, "step": 2408 }, { "epoch": 0.26371821888940583, "grad_norm": 1.1461597613441483, "learning_rate": 4.189838219759066e-05, "loss": 0.5958, "num_input_tokens_seen": 434808192, "step": 2409 }, { "epoch": 0.26382769096034375, "grad_norm": 1.3584017722707988, "learning_rate": 4.1892044349052234e-05, "loss": 0.8483, "num_input_tokens_seen": 434996576, "step": 2410 }, { "epoch": 0.2639371630312816, "grad_norm": 1.3342416719030565, "learning_rate": 4.1885704502217255e-05, "loss": 0.9089, "num_input_tokens_seen": 435221248, "step": 2411 }, { "epoch": 0.26404663510221954, "grad_norm": 1.398078190340092, "learning_rate": 4.187936265783571e-05, "loss": 0.6613, "num_input_tokens_seen": 435386112, "step": 2412 }, { "epoch": 0.26415610717315746, "grad_norm": 1.2768086937237906, "learning_rate": 4.187301881665783e-05, "loss": 0.7273, "num_input_tokens_seen": 435560832, "step": 2413 }, { "epoch": 0.2642655792440953, "grad_norm": 1.3420850508184536, "learning_rate": 4.1866672979434084e-05, "loss": 0.8307, "num_input_tokens_seen": 435733312, "step": 2414 }, { "epoch": 0.26437505131503325, "grad_norm": 1.180928114777798, "learning_rate": 4.1860325146915166e-05, "loss": 0.7134, "num_input_tokens_seen": 435938048, "step": 2415 }, { "epoch": 0.26448452338597117, "grad_norm": 1.3389608070814043, "learning_rate": 4.1853975319852015e-05, "loss": 0.7758, "num_input_tokens_seen": 436129568, "step": 2416 }, { "epoch": 0.26459399545690904, "grad_norm": 1.2289815483135929, "learning_rate": 4.18476234989958e-05, "loss": 0.6861, "num_input_tokens_seen": 436316608, "step": 2417 }, { "epoch": 0.26470346752784696, "grad_norm": 1.213633925351378, "learning_rate": 4.184126968509794e-05, "loss": 0.6837, "num_input_tokens_seen": 436530304, "step": 2418 }, { "epoch": 0.2648129395987849, "grad_norm": 1.3016797419358193, "learning_rate": 4.183491387891007e-05, "loss": 0.7014, "num_input_tokens_seen": 436719808, "step": 2419 }, { "epoch": 0.26492241166972275, "grad_norm": 1.1456228525294578, "learning_rate": 4.1828556081184064e-05, "loss": 0.8221, "num_input_tokens_seen": 436912672, "step": 2420 }, { "epoch": 0.26503188374066067, "grad_norm": 1.1635479397833737, "learning_rate": 4.1822196292672045e-05, "loss": 0.5469, "num_input_tokens_seen": 437094336, "step": 2421 }, { "epoch": 0.2651413558115986, "grad_norm": 1.1599572521587302, "learning_rate": 4.1815834514126366e-05, "loss": 0.5706, "num_input_tokens_seen": 437259200, "step": 2422 }, { "epoch": 0.26525082788253646, "grad_norm": 1.3746469217751511, "learning_rate": 4.180947074629961e-05, "loss": 0.8705, "num_input_tokens_seen": 437459904, "step": 2423 }, { "epoch": 0.2653602999534744, "grad_norm": 1.3427182605850918, "learning_rate": 4.1803104989944594e-05, "loss": 0.7482, "num_input_tokens_seen": 437634176, "step": 2424 }, { "epoch": 0.2654697720244123, "grad_norm": 1.37733906416764, "learning_rate": 4.1796737245814396e-05, "loss": 0.8687, "num_input_tokens_seen": 437843616, "step": 2425 }, { "epoch": 0.26557924409535016, "grad_norm": 1.381497377363457, "learning_rate": 4.1790367514662276e-05, "loss": 0.8499, "num_input_tokens_seen": 438044096, "step": 2426 }, { "epoch": 0.2656887161662881, "grad_norm": 1.3989086103467352, "learning_rate": 4.178399579724178e-05, "loss": 0.6385, "num_input_tokens_seen": 438193280, "step": 2427 }, { "epoch": 0.26579818823722595, "grad_norm": 1.3239935849486695, "learning_rate": 4.177762209430667e-05, "loss": 0.6204, "num_input_tokens_seen": 438355904, "step": 2428 }, { "epoch": 0.2659076603081639, "grad_norm": 1.2559583752471233, "learning_rate": 4.177124640661094e-05, "loss": 0.5386, "num_input_tokens_seen": 438549440, "step": 2429 }, { "epoch": 0.2660171323791018, "grad_norm": 1.3594982047537385, "learning_rate": 4.176486873490882e-05, "loss": 0.7279, "num_input_tokens_seen": 438728864, "step": 2430 }, { "epoch": 0.26612660445003966, "grad_norm": 1.3828578014819775, "learning_rate": 4.1758489079954774e-05, "loss": 0.8481, "num_input_tokens_seen": 438902464, "step": 2431 }, { "epoch": 0.2662360765209776, "grad_norm": 1.3671875435965395, "learning_rate": 4.1752107442503505e-05, "loss": 0.6231, "num_input_tokens_seen": 439053888, "step": 2432 }, { "epoch": 0.2663455485919155, "grad_norm": 1.204151755815913, "learning_rate": 4.174572382330996e-05, "loss": 0.7068, "num_input_tokens_seen": 439234432, "step": 2433 }, { "epoch": 0.26645502066285337, "grad_norm": 1.1656192891898893, "learning_rate": 4.1739338223129294e-05, "loss": 0.7253, "num_input_tokens_seen": 439432896, "step": 2434 }, { "epoch": 0.2665644927337913, "grad_norm": 1.2700523832736803, "learning_rate": 4.1732950642716916e-05, "loss": 0.7188, "num_input_tokens_seen": 439590816, "step": 2435 }, { "epoch": 0.2666739648047292, "grad_norm": 1.323453158631631, "learning_rate": 4.1726561082828466e-05, "loss": 0.9017, "num_input_tokens_seen": 439791072, "step": 2436 }, { "epoch": 0.2667834368756671, "grad_norm": 1.239023653939162, "learning_rate": 4.172016954421981e-05, "loss": 0.7513, "num_input_tokens_seen": 439971840, "step": 2437 }, { "epoch": 0.266892908946605, "grad_norm": 1.2451577333318697, "learning_rate": 4.171377602764707e-05, "loss": 0.6477, "num_input_tokens_seen": 440125280, "step": 2438 }, { "epoch": 0.2670023810175429, "grad_norm": 1.3225307452372435, "learning_rate": 4.170738053386657e-05, "loss": 0.6648, "num_input_tokens_seen": 440295296, "step": 2439 }, { "epoch": 0.2671118530884808, "grad_norm": 1.1969200245329128, "learning_rate": 4.170098306363489e-05, "loss": 0.7852, "num_input_tokens_seen": 440485472, "step": 2440 }, { "epoch": 0.2672213251594187, "grad_norm": 1.2513902086022113, "learning_rate": 4.169458361770885e-05, "loss": 0.6549, "num_input_tokens_seen": 440642944, "step": 2441 }, { "epoch": 0.26733079723035663, "grad_norm": 1.2774618485011267, "learning_rate": 4.168818219684548e-05, "loss": 0.6812, "num_input_tokens_seen": 440801760, "step": 2442 }, { "epoch": 0.2674402693012945, "grad_norm": 1.2361603392514982, "learning_rate": 4.168177880180205e-05, "loss": 0.6429, "num_input_tokens_seen": 440986336, "step": 2443 }, { "epoch": 0.2675497413722324, "grad_norm": 1.4298071472465301, "learning_rate": 4.167537343333608e-05, "loss": 0.7478, "num_input_tokens_seen": 441152320, "step": 2444 }, { "epoch": 0.2676592134431703, "grad_norm": 1.3127752651209028, "learning_rate": 4.166896609220532e-05, "loss": 0.7742, "num_input_tokens_seen": 441335328, "step": 2445 }, { "epoch": 0.2677686855141082, "grad_norm": 1.2283176571912386, "learning_rate": 4.1662556779167735e-05, "loss": 0.5799, "num_input_tokens_seen": 441495488, "step": 2446 }, { "epoch": 0.26787815758504613, "grad_norm": 1.2641536495885564, "learning_rate": 4.165614549498152e-05, "loss": 0.6634, "num_input_tokens_seen": 441693280, "step": 2447 }, { "epoch": 0.267987629655984, "grad_norm": 1.322244167858121, "learning_rate": 4.164973224040516e-05, "loss": 0.7197, "num_input_tokens_seen": 441866656, "step": 2448 }, { "epoch": 0.2680971017269219, "grad_norm": 1.2032502839884451, "learning_rate": 4.164331701619729e-05, "loss": 0.7235, "num_input_tokens_seen": 442048768, "step": 2449 }, { "epoch": 0.26820657379785984, "grad_norm": 1.331309909099366, "learning_rate": 4.1636899823116835e-05, "loss": 0.6798, "num_input_tokens_seen": 442212960, "step": 2450 }, { "epoch": 0.2683160458687977, "grad_norm": 1.2649978685172598, "learning_rate": 4.1630480661922935e-05, "loss": 0.7217, "num_input_tokens_seen": 442365728, "step": 2451 }, { "epoch": 0.2684255179397356, "grad_norm": 1.2223565950124715, "learning_rate": 4.162405953337497e-05, "loss": 0.7866, "num_input_tokens_seen": 442572704, "step": 2452 }, { "epoch": 0.26853499001067355, "grad_norm": 1.1163705893521136, "learning_rate": 4.161763643823253e-05, "loss": 0.5574, "num_input_tokens_seen": 442731072, "step": 2453 }, { "epoch": 0.2686444620816114, "grad_norm": 1.260043044187605, "learning_rate": 4.1611211377255473e-05, "loss": 0.6538, "num_input_tokens_seen": 442924160, "step": 2454 }, { "epoch": 0.26875393415254933, "grad_norm": 1.1808769343386463, "learning_rate": 4.1604784351203876e-05, "loss": 0.7989, "num_input_tokens_seen": 443128896, "step": 2455 }, { "epoch": 0.26886340622348726, "grad_norm": 1.2676130151869545, "learning_rate": 4.1598355360838016e-05, "loss": 0.6935, "num_input_tokens_seen": 443291296, "step": 2456 }, { "epoch": 0.2689728782944251, "grad_norm": 1.3491904992976642, "learning_rate": 4.159192440691846e-05, "loss": 1.0139, "num_input_tokens_seen": 443492672, "step": 2457 }, { "epoch": 0.26908235036536304, "grad_norm": 1.276954101487443, "learning_rate": 4.1585491490205965e-05, "loss": 0.6998, "num_input_tokens_seen": 443674560, "step": 2458 }, { "epoch": 0.26919182243630096, "grad_norm": 1.2458734108252698, "learning_rate": 4.157905661146152e-05, "loss": 0.7053, "num_input_tokens_seen": 443854432, "step": 2459 }, { "epoch": 0.26930129450723883, "grad_norm": 1.1763196453917024, "learning_rate": 4.157261977144638e-05, "loss": 0.7108, "num_input_tokens_seen": 444051104, "step": 2460 }, { "epoch": 0.26941076657817675, "grad_norm": 1.1914093517825914, "learning_rate": 4.1566180970922006e-05, "loss": 0.6096, "num_input_tokens_seen": 444240608, "step": 2461 }, { "epoch": 0.2695202386491146, "grad_norm": 1.2053957726076114, "learning_rate": 4.155974021065009e-05, "loss": 0.7337, "num_input_tokens_seen": 444428096, "step": 2462 }, { "epoch": 0.26962971072005254, "grad_norm": 1.1964467470215951, "learning_rate": 4.1553297491392564e-05, "loss": 0.71, "num_input_tokens_seen": 444596992, "step": 2463 }, { "epoch": 0.26973918279099046, "grad_norm": 1.0350054361136125, "learning_rate": 4.154685281391158e-05, "loss": 0.6077, "num_input_tokens_seen": 444800608, "step": 2464 }, { "epoch": 0.2698486548619283, "grad_norm": 1.3693672895889113, "learning_rate": 4.1540406178969553e-05, "loss": 0.8101, "num_input_tokens_seen": 444963456, "step": 2465 }, { "epoch": 0.26995812693286625, "grad_norm": 1.1739236853066515, "learning_rate": 4.153395758732909e-05, "loss": 0.5022, "num_input_tokens_seen": 445115104, "step": 2466 }, { "epoch": 0.27006759900380417, "grad_norm": 1.4147636403533264, "learning_rate": 4.152750703975305e-05, "loss": 0.8127, "num_input_tokens_seen": 445315136, "step": 2467 }, { "epoch": 0.27017707107474204, "grad_norm": 1.3984980649992698, "learning_rate": 4.152105453700452e-05, "loss": 0.8784, "num_input_tokens_seen": 445531072, "step": 2468 }, { "epoch": 0.27028654314567996, "grad_norm": 1.301257805118367, "learning_rate": 4.151460007984683e-05, "loss": 0.7166, "num_input_tokens_seen": 445703104, "step": 2469 }, { "epoch": 0.2703960152166179, "grad_norm": 1.2534462153213313, "learning_rate": 4.150814366904352e-05, "loss": 0.7559, "num_input_tokens_seen": 445850720, "step": 2470 }, { "epoch": 0.27050548728755575, "grad_norm": 1.2186531126195839, "learning_rate": 4.150168530535837e-05, "loss": 0.7502, "num_input_tokens_seen": 446035744, "step": 2471 }, { "epoch": 0.27061495935849367, "grad_norm": 1.3173589230598954, "learning_rate": 4.149522498955539e-05, "loss": 0.6764, "num_input_tokens_seen": 446230624, "step": 2472 }, { "epoch": 0.2707244314294316, "grad_norm": 1.1964982079613253, "learning_rate": 4.148876272239883e-05, "loss": 0.7967, "num_input_tokens_seen": 446413856, "step": 2473 }, { "epoch": 0.27083390350036946, "grad_norm": 1.1073035659700925, "learning_rate": 4.148229850465316e-05, "loss": 0.6252, "num_input_tokens_seen": 446599104, "step": 2474 }, { "epoch": 0.2709433755713074, "grad_norm": 1.1851419326028056, "learning_rate": 4.1475832337083085e-05, "loss": 0.6244, "num_input_tokens_seen": 446778304, "step": 2475 }, { "epoch": 0.2710528476422453, "grad_norm": 1.2167068864541126, "learning_rate": 4.1469364220453546e-05, "loss": 0.714, "num_input_tokens_seen": 446964224, "step": 2476 }, { "epoch": 0.27116231971318316, "grad_norm": 1.4496891872480886, "learning_rate": 4.1462894155529706e-05, "loss": 0.9179, "num_input_tokens_seen": 447130880, "step": 2477 }, { "epoch": 0.2712717917841211, "grad_norm": 1.1741601153954209, "learning_rate": 4.145642214307695e-05, "loss": 0.6579, "num_input_tokens_seen": 447331136, "step": 2478 }, { "epoch": 0.27138126385505895, "grad_norm": 1.112847601774286, "learning_rate": 4.144994818386092e-05, "loss": 0.5765, "num_input_tokens_seen": 447512576, "step": 2479 }, { "epoch": 0.2714907359259969, "grad_norm": 1.3255679029140306, "learning_rate": 4.144347227864747e-05, "loss": 0.6908, "num_input_tokens_seen": 447699616, "step": 2480 }, { "epoch": 0.2716002079969348, "grad_norm": 1.3556861812983827, "learning_rate": 4.1436994428202667e-05, "loss": 0.7807, "num_input_tokens_seen": 447861120, "step": 2481 }, { "epoch": 0.27170968006787266, "grad_norm": 1.4238239387691374, "learning_rate": 4.143051463329286e-05, "loss": 0.6498, "num_input_tokens_seen": 448017696, "step": 2482 }, { "epoch": 0.2718191521388106, "grad_norm": 1.3153236533335813, "learning_rate": 4.1424032894684584e-05, "loss": 0.6461, "num_input_tokens_seen": 448163520, "step": 2483 }, { "epoch": 0.2719286242097485, "grad_norm": 1.2198454995116526, "learning_rate": 4.141754921314461e-05, "loss": 0.5423, "num_input_tokens_seen": 448348768, "step": 2484 }, { "epoch": 0.27203809628068637, "grad_norm": 1.316756114255298, "learning_rate": 4.141106358943995e-05, "loss": 0.8305, "num_input_tokens_seen": 448559104, "step": 2485 }, { "epoch": 0.2721475683516243, "grad_norm": 1.4778736892845383, "learning_rate": 4.140457602433784e-05, "loss": 0.7723, "num_input_tokens_seen": 448746816, "step": 2486 }, { "epoch": 0.2722570404225622, "grad_norm": 1.3042607780275903, "learning_rate": 4.139808651860574e-05, "loss": 0.7453, "num_input_tokens_seen": 448911232, "step": 2487 }, { "epoch": 0.2723665124935001, "grad_norm": 1.3333336065212607, "learning_rate": 4.139159507301136e-05, "loss": 0.5576, "num_input_tokens_seen": 449090880, "step": 2488 }, { "epoch": 0.272475984564438, "grad_norm": 1.3687791986704172, "learning_rate": 4.138510168832261e-05, "loss": 0.8987, "num_input_tokens_seen": 449277472, "step": 2489 }, { "epoch": 0.2725854566353759, "grad_norm": 1.3153049378688633, "learning_rate": 4.137860636530766e-05, "loss": 0.7678, "num_input_tokens_seen": 449475936, "step": 2490 }, { "epoch": 0.2726949287063138, "grad_norm": 1.2423795635562918, "learning_rate": 4.1372109104734886e-05, "loss": 0.7396, "num_input_tokens_seen": 449685152, "step": 2491 }, { "epoch": 0.2728044007772517, "grad_norm": 1.3225476458643597, "learning_rate": 4.1365609907372905e-05, "loss": 0.6865, "num_input_tokens_seen": 449833664, "step": 2492 }, { "epoch": 0.27291387284818963, "grad_norm": 1.2001055690899674, "learning_rate": 4.135910877399055e-05, "loss": 0.6291, "num_input_tokens_seen": 450011968, "step": 2493 }, { "epoch": 0.2730233449191275, "grad_norm": 1.159578584137454, "learning_rate": 4.13526057053569e-05, "loss": 0.6542, "num_input_tokens_seen": 450195424, "step": 2494 }, { "epoch": 0.2731328169900654, "grad_norm": 1.2427573186658505, "learning_rate": 4.134610070224127e-05, "loss": 0.7338, "num_input_tokens_seen": 450359616, "step": 2495 }, { "epoch": 0.2732422890610033, "grad_norm": 1.186360565435354, "learning_rate": 4.133959376541317e-05, "loss": 0.6666, "num_input_tokens_seen": 450537248, "step": 2496 }, { "epoch": 0.2733517611319412, "grad_norm": 1.306901982602183, "learning_rate": 4.133308489564236e-05, "loss": 0.6672, "num_input_tokens_seen": 450717120, "step": 2497 }, { "epoch": 0.27346123320287913, "grad_norm": 1.4294161513008916, "learning_rate": 4.132657409369883e-05, "loss": 0.8104, "num_input_tokens_seen": 450876832, "step": 2498 }, { "epoch": 0.273570705273817, "grad_norm": 1.3005441902129724, "learning_rate": 4.1320061360352804e-05, "loss": 0.8793, "num_input_tokens_seen": 451063872, "step": 2499 }, { "epoch": 0.2736801773447549, "grad_norm": 1.2514019733403032, "learning_rate": 4.13135466963747e-05, "loss": 0.7204, "num_input_tokens_seen": 451229856, "step": 2500 }, { "epoch": 0.27378964941569284, "grad_norm": 1.2478355742245422, "learning_rate": 4.130703010253523e-05, "loss": 0.6425, "num_input_tokens_seen": 451394272, "step": 2501 }, { "epoch": 0.2738991214866307, "grad_norm": 1.4361552707384817, "learning_rate": 4.130051157960526e-05, "loss": 0.6298, "num_input_tokens_seen": 451577952, "step": 2502 }, { "epoch": 0.2740085935575686, "grad_norm": 1.1573880754904975, "learning_rate": 4.1293991128355934e-05, "loss": 0.5696, "num_input_tokens_seen": 451759392, "step": 2503 }, { "epoch": 0.27411806562850655, "grad_norm": 1.102175116127384, "learning_rate": 4.12874687495586e-05, "loss": 0.6699, "num_input_tokens_seen": 451978240, "step": 2504 }, { "epoch": 0.2742275376994444, "grad_norm": 1.3124934605026817, "learning_rate": 4.128094444398486e-05, "loss": 0.7837, "num_input_tokens_seen": 452204480, "step": 2505 }, { "epoch": 0.27433700977038233, "grad_norm": 1.2028574955790545, "learning_rate": 4.127441821240651e-05, "loss": 0.7288, "num_input_tokens_seen": 452376960, "step": 2506 }, { "epoch": 0.27444648184132026, "grad_norm": 1.2872387556191336, "learning_rate": 4.12678900555956e-05, "loss": 0.6966, "num_input_tokens_seen": 452565344, "step": 2507 }, { "epoch": 0.2745559539122581, "grad_norm": 1.2360039115646342, "learning_rate": 4.1261359974324387e-05, "loss": 0.5866, "num_input_tokens_seen": 452757984, "step": 2508 }, { "epoch": 0.27466542598319604, "grad_norm": 1.3033474190350016, "learning_rate": 4.1254827969365387e-05, "loss": 0.913, "num_input_tokens_seen": 452954880, "step": 2509 }, { "epoch": 0.27477489805413396, "grad_norm": 1.2435680371312816, "learning_rate": 4.1248294041491306e-05, "loss": 0.6852, "num_input_tokens_seen": 453153792, "step": 2510 }, { "epoch": 0.27488437012507183, "grad_norm": 1.363807205133276, "learning_rate": 4.12417581914751e-05, "loss": 0.7437, "num_input_tokens_seen": 453320224, "step": 2511 }, { "epoch": 0.27499384219600975, "grad_norm": 1.2664166082373947, "learning_rate": 4.123522042008996e-05, "loss": 0.7121, "num_input_tokens_seen": 453485760, "step": 2512 }, { "epoch": 0.2751033142669476, "grad_norm": 1.268287354478459, "learning_rate": 4.122868072810927e-05, "loss": 0.7123, "num_input_tokens_seen": 453683776, "step": 2513 }, { "epoch": 0.27521278633788554, "grad_norm": 1.1581405442620822, "learning_rate": 4.122213911630667e-05, "loss": 0.644, "num_input_tokens_seen": 453868576, "step": 2514 }, { "epoch": 0.27532225840882346, "grad_norm": 1.2472209555646152, "learning_rate": 4.121559558545603e-05, "loss": 0.7475, "num_input_tokens_seen": 454073760, "step": 2515 }, { "epoch": 0.2754317304797613, "grad_norm": 1.3036038586936605, "learning_rate": 4.120905013633143e-05, "loss": 0.6714, "num_input_tokens_seen": 454247136, "step": 2516 }, { "epoch": 0.27554120255069925, "grad_norm": 1.4265687204916833, "learning_rate": 4.1202502769707184e-05, "loss": 0.7585, "num_input_tokens_seen": 454406400, "step": 2517 }, { "epoch": 0.27565067462163717, "grad_norm": 1.363947314875372, "learning_rate": 4.119595348635784e-05, "loss": 0.6338, "num_input_tokens_seen": 454582912, "step": 2518 }, { "epoch": 0.27576014669257504, "grad_norm": 1.2913532133109924, "learning_rate": 4.118940228705815e-05, "loss": 0.8511, "num_input_tokens_seen": 454765472, "step": 2519 }, { "epoch": 0.27586961876351296, "grad_norm": 1.304282531019626, "learning_rate": 4.1182849172583135e-05, "loss": 0.9503, "num_input_tokens_seen": 454977152, "step": 2520 }, { "epoch": 0.2759790908344509, "grad_norm": 1.1863411217794975, "learning_rate": 4.117629414370799e-05, "loss": 0.6124, "num_input_tokens_seen": 455160832, "step": 2521 }, { "epoch": 0.27608856290538875, "grad_norm": 1.206934310822745, "learning_rate": 4.116973720120817e-05, "loss": 0.7638, "num_input_tokens_seen": 455349440, "step": 2522 }, { "epoch": 0.27619803497632667, "grad_norm": 1.1275258319712191, "learning_rate": 4.116317834585935e-05, "loss": 0.5594, "num_input_tokens_seen": 455513184, "step": 2523 }, { "epoch": 0.2763075070472646, "grad_norm": 1.2043046988778454, "learning_rate": 4.115661757843743e-05, "loss": 0.7322, "num_input_tokens_seen": 455706272, "step": 2524 }, { "epoch": 0.27641697911820245, "grad_norm": 1.1137199742029953, "learning_rate": 4.115005489971854e-05, "loss": 0.7606, "num_input_tokens_seen": 455899136, "step": 2525 }, { "epoch": 0.2765264511891404, "grad_norm": 1.317037368852524, "learning_rate": 4.114349031047903e-05, "loss": 0.8265, "num_input_tokens_seen": 456079680, "step": 2526 }, { "epoch": 0.2766359232600783, "grad_norm": 1.294302169246487, "learning_rate": 4.1136923811495475e-05, "loss": 0.7435, "num_input_tokens_seen": 456276128, "step": 2527 }, { "epoch": 0.27674539533101616, "grad_norm": 1.195436957368714, "learning_rate": 4.1130355403544675e-05, "loss": 0.5567, "num_input_tokens_seen": 456467872, "step": 2528 }, { "epoch": 0.2768548674019541, "grad_norm": 1.3138493459144263, "learning_rate": 4.1123785087403676e-05, "loss": 0.6926, "num_input_tokens_seen": 456671264, "step": 2529 }, { "epoch": 0.27696433947289195, "grad_norm": 1.269290091277573, "learning_rate": 4.111721286384972e-05, "loss": 0.6822, "num_input_tokens_seen": 456855616, "step": 2530 }, { "epoch": 0.2770738115438299, "grad_norm": 1.2820203372693115, "learning_rate": 4.1110638733660294e-05, "loss": 0.6777, "num_input_tokens_seen": 457012864, "step": 2531 }, { "epoch": 0.2771832836147678, "grad_norm": 1.3307243491407212, "learning_rate": 4.110406269761311e-05, "loss": 1.0981, "num_input_tokens_seen": 457236416, "step": 2532 }, { "epoch": 0.27729275568570566, "grad_norm": 1.3141107212431369, "learning_rate": 4.109748475648609e-05, "loss": 0.6851, "num_input_tokens_seen": 457399040, "step": 2533 }, { "epoch": 0.2774022277566436, "grad_norm": 1.2742077890716392, "learning_rate": 4.109090491105739e-05, "loss": 0.6021, "num_input_tokens_seen": 457568160, "step": 2534 }, { "epoch": 0.2775116998275815, "grad_norm": 1.2649927797224532, "learning_rate": 4.108432316210541e-05, "loss": 0.6856, "num_input_tokens_seen": 457742208, "step": 2535 }, { "epoch": 0.27762117189851937, "grad_norm": 1.4717533166561918, "learning_rate": 4.107773951040874e-05, "loss": 0.9781, "num_input_tokens_seen": 457935744, "step": 2536 }, { "epoch": 0.2777306439694573, "grad_norm": 1.4577740368772465, "learning_rate": 4.107115395674623e-05, "loss": 0.9058, "num_input_tokens_seen": 458108000, "step": 2537 }, { "epoch": 0.2778401160403952, "grad_norm": 1.3659075603987199, "learning_rate": 4.1064566501896925e-05, "loss": 0.7031, "num_input_tokens_seen": 458277344, "step": 2538 }, { "epoch": 0.2779495881113331, "grad_norm": 1.3295348818343053, "learning_rate": 4.1057977146640115e-05, "loss": 0.7866, "num_input_tokens_seen": 458465504, "step": 2539 }, { "epoch": 0.278059060182271, "grad_norm": 1.1275320698137845, "learning_rate": 4.10513858917553e-05, "loss": 0.6235, "num_input_tokens_seen": 458659712, "step": 2540 }, { "epoch": 0.2781685322532089, "grad_norm": 1.1045636357167932, "learning_rate": 4.104479273802222e-05, "loss": 0.5307, "num_input_tokens_seen": 458844288, "step": 2541 }, { "epoch": 0.2782780043241468, "grad_norm": 1.275885026101736, "learning_rate": 4.1038197686220837e-05, "loss": 0.6158, "num_input_tokens_seen": 459038272, "step": 2542 }, { "epoch": 0.2783874763950847, "grad_norm": 1.3072968846352486, "learning_rate": 4.1031600737131326e-05, "loss": 0.8242, "num_input_tokens_seen": 459245696, "step": 2543 }, { "epoch": 0.27849694846602263, "grad_norm": 1.2761064889233922, "learning_rate": 4.102500189153409e-05, "loss": 0.7983, "num_input_tokens_seen": 459439008, "step": 2544 }, { "epoch": 0.2786064205369605, "grad_norm": 1.1219396290984434, "learning_rate": 4.1018401150209776e-05, "loss": 0.5546, "num_input_tokens_seen": 459610816, "step": 2545 }, { "epoch": 0.2787158926078984, "grad_norm": 1.2173000905528184, "learning_rate": 4.101179851393921e-05, "loss": 0.5729, "num_input_tokens_seen": 459783072, "step": 2546 }, { "epoch": 0.2788253646788363, "grad_norm": 1.2488531096449693, "learning_rate": 4.100519398350351e-05, "loss": 0.5532, "num_input_tokens_seen": 459951296, "step": 2547 }, { "epoch": 0.2789348367497742, "grad_norm": 1.195225382104733, "learning_rate": 4.099858755968394e-05, "loss": 0.8026, "num_input_tokens_seen": 460122880, "step": 2548 }, { "epoch": 0.27904430882071213, "grad_norm": 1.274344980991053, "learning_rate": 4.0991979243262054e-05, "loss": 0.7547, "num_input_tokens_seen": 460291552, "step": 2549 }, { "epoch": 0.27915378089165, "grad_norm": 1.4047568977703921, "learning_rate": 4.09853690350196e-05, "loss": 0.8274, "num_input_tokens_seen": 460471648, "step": 2550 }, { "epoch": 0.2792632529625879, "grad_norm": 1.2093545719757572, "learning_rate": 4.097875693573854e-05, "loss": 0.659, "num_input_tokens_seen": 460651520, "step": 2551 }, { "epoch": 0.27937272503352584, "grad_norm": 1.1800983547180153, "learning_rate": 4.0972142946201086e-05, "loss": 0.6332, "num_input_tokens_seen": 460831392, "step": 2552 }, { "epoch": 0.2794821971044637, "grad_norm": 1.1504354937729633, "learning_rate": 4.0965527067189655e-05, "loss": 0.6982, "num_input_tokens_seen": 461026272, "step": 2553 }, { "epoch": 0.2795916691754016, "grad_norm": 1.2720842250283984, "learning_rate": 4.095890929948689e-05, "loss": 0.8168, "num_input_tokens_seen": 461203904, "step": 2554 }, { "epoch": 0.27970114124633955, "grad_norm": 1.3108522880785953, "learning_rate": 4.095228964387566e-05, "loss": 0.6863, "num_input_tokens_seen": 461378624, "step": 2555 }, { "epoch": 0.2798106133172774, "grad_norm": 1.2875664092038306, "learning_rate": 4.094566810113907e-05, "loss": 0.822, "num_input_tokens_seen": 461555360, "step": 2556 }, { "epoch": 0.27992008538821533, "grad_norm": 1.2428913161867932, "learning_rate": 4.0939044672060426e-05, "loss": 0.7704, "num_input_tokens_seen": 461741056, "step": 2557 }, { "epoch": 0.28002955745915326, "grad_norm": 1.159790340994083, "learning_rate": 4.093241935742326e-05, "loss": 0.647, "num_input_tokens_seen": 461936832, "step": 2558 }, { "epoch": 0.2801390295300911, "grad_norm": 1.1216689172885606, "learning_rate": 4.0925792158011345e-05, "loss": 0.6284, "num_input_tokens_seen": 462098112, "step": 2559 }, { "epoch": 0.28024850160102904, "grad_norm": 1.295952940242453, "learning_rate": 4.091916307460866e-05, "loss": 0.679, "num_input_tokens_seen": 462267680, "step": 2560 }, { "epoch": 0.28035797367196696, "grad_norm": 1.321394166903571, "learning_rate": 4.091253210799942e-05, "loss": 0.7662, "num_input_tokens_seen": 462473760, "step": 2561 }, { "epoch": 0.28046744574290483, "grad_norm": 1.382769869562135, "learning_rate": 4.0905899258968046e-05, "loss": 0.8217, "num_input_tokens_seen": 462670432, "step": 2562 }, { "epoch": 0.28057691781384275, "grad_norm": 1.2205215685297068, "learning_rate": 4.08992645282992e-05, "loss": 0.7336, "num_input_tokens_seen": 462876736, "step": 2563 }, { "epoch": 0.2806863898847806, "grad_norm": 1.379044998580003, "learning_rate": 4.089262791677775e-05, "loss": 0.7736, "num_input_tokens_seen": 463031744, "step": 2564 }, { "epoch": 0.28079586195571854, "grad_norm": 1.1094428565535508, "learning_rate": 4.0885989425188806e-05, "loss": 0.56, "num_input_tokens_seen": 463221024, "step": 2565 }, { "epoch": 0.28090533402665646, "grad_norm": 1.2500052451977355, "learning_rate": 4.087934905431768e-05, "loss": 0.7438, "num_input_tokens_seen": 463410528, "step": 2566 }, { "epoch": 0.2810148060975943, "grad_norm": 1.344394085906895, "learning_rate": 4.087270680494992e-05, "loss": 0.7131, "num_input_tokens_seen": 463609440, "step": 2567 }, { "epoch": 0.28112427816853225, "grad_norm": 1.2417825482631895, "learning_rate": 4.086606267787128e-05, "loss": 0.8143, "num_input_tokens_seen": 463799616, "step": 2568 }, { "epoch": 0.28123375023947017, "grad_norm": 1.2793355921353795, "learning_rate": 4.0859416673867755e-05, "loss": 0.6703, "num_input_tokens_seen": 463951040, "step": 2569 }, { "epoch": 0.28134322231040804, "grad_norm": 1.2772061801754901, "learning_rate": 4.085276879372557e-05, "loss": 0.8107, "num_input_tokens_seen": 464143008, "step": 2570 }, { "epoch": 0.28145269438134596, "grad_norm": 1.331442604996697, "learning_rate": 4.084611903823113e-05, "loss": 0.842, "num_input_tokens_seen": 464292192, "step": 2571 }, { "epoch": 0.2815621664522839, "grad_norm": 1.4299674671980733, "learning_rate": 4.083946740817111e-05, "loss": 0.7741, "num_input_tokens_seen": 464448992, "step": 2572 }, { "epoch": 0.28167163852322175, "grad_norm": 1.2336053017866, "learning_rate": 4.083281390433236e-05, "loss": 0.9608, "num_input_tokens_seen": 464658208, "step": 2573 }, { "epoch": 0.28178111059415967, "grad_norm": 1.3089306753465437, "learning_rate": 4.0826158527502e-05, "loss": 0.7138, "num_input_tokens_seen": 464808512, "step": 2574 }, { "epoch": 0.2818905826650976, "grad_norm": 1.203687462058627, "learning_rate": 4.081950127846735e-05, "loss": 0.6454, "num_input_tokens_seen": 464975392, "step": 2575 }, { "epoch": 0.28200005473603545, "grad_norm": 1.2551518608253007, "learning_rate": 4.081284215801593e-05, "loss": 0.679, "num_input_tokens_seen": 465122560, "step": 2576 }, { "epoch": 0.2821095268069734, "grad_norm": 1.4038497361498228, "learning_rate": 4.080618116693551e-05, "loss": 0.8124, "num_input_tokens_seen": 465326848, "step": 2577 }, { "epoch": 0.2822189988779113, "grad_norm": 1.4887576804935845, "learning_rate": 4.079951830601408e-05, "loss": 0.7779, "num_input_tokens_seen": 465508736, "step": 2578 }, { "epoch": 0.28232847094884916, "grad_norm": 1.277763787598431, "learning_rate": 4.079285357603984e-05, "loss": 0.8101, "num_input_tokens_seen": 465688832, "step": 2579 }, { "epoch": 0.2824379430197871, "grad_norm": 1.163381031437584, "learning_rate": 4.078618697780121e-05, "loss": 0.68, "num_input_tokens_seen": 465867360, "step": 2580 }, { "epoch": 0.28254741509072495, "grad_norm": 1.1615769136164178, "learning_rate": 4.0779518512086834e-05, "loss": 0.8635, "num_input_tokens_seen": 466077248, "step": 2581 }, { "epoch": 0.2826568871616629, "grad_norm": 1.0891125047716268, "learning_rate": 4.077284817968559e-05, "loss": 0.5325, "num_input_tokens_seen": 466269216, "step": 2582 }, { "epoch": 0.2827663592326008, "grad_norm": 1.3672198046546162, "learning_rate": 4.0766175981386556e-05, "loss": 0.6551, "num_input_tokens_seen": 466404736, "step": 2583 }, { "epoch": 0.28287583130353866, "grad_norm": 1.3255631365767564, "learning_rate": 4.0759501917979035e-05, "loss": 0.8677, "num_input_tokens_seen": 466550336, "step": 2584 }, { "epoch": 0.2829853033744766, "grad_norm": 1.2747830729977547, "learning_rate": 4.0752825990252574e-05, "loss": 0.8391, "num_input_tokens_seen": 466731328, "step": 2585 }, { "epoch": 0.2830947754454145, "grad_norm": 1.2365762904441533, "learning_rate": 4.074614819899692e-05, "loss": 0.6522, "num_input_tokens_seen": 466886112, "step": 2586 }, { "epoch": 0.28320424751635237, "grad_norm": 1.3299001162889714, "learning_rate": 4.073946854500202e-05, "loss": 0.7409, "num_input_tokens_seen": 467059488, "step": 2587 }, { "epoch": 0.2833137195872903, "grad_norm": 1.2388211586508493, "learning_rate": 4.073278702905809e-05, "loss": 0.6733, "num_input_tokens_seen": 467238240, "step": 2588 }, { "epoch": 0.2834231916582282, "grad_norm": 1.3294205125228495, "learning_rate": 4.0726103651955525e-05, "loss": 0.4938, "num_input_tokens_seen": 467406464, "step": 2589 }, { "epoch": 0.2835326637291661, "grad_norm": 1.6321502510683195, "learning_rate": 4.071941841448496e-05, "loss": 0.9921, "num_input_tokens_seen": 467623072, "step": 2590 }, { "epoch": 0.283642135800104, "grad_norm": 1.191985892951618, "learning_rate": 4.071273131743725e-05, "loss": 0.4916, "num_input_tokens_seen": 467781664, "step": 2591 }, { "epoch": 0.2837516078710419, "grad_norm": 1.3495392843283123, "learning_rate": 4.070604236160347e-05, "loss": 0.7407, "num_input_tokens_seen": 467980352, "step": 2592 }, { "epoch": 0.2838610799419798, "grad_norm": 1.3912603234304786, "learning_rate": 4.06993515477749e-05, "loss": 0.9193, "num_input_tokens_seen": 468190464, "step": 2593 }, { "epoch": 0.2839705520129177, "grad_norm": 1.3967360049738575, "learning_rate": 4.069265887674304e-05, "loss": 0.6743, "num_input_tokens_seen": 468334048, "step": 2594 }, { "epoch": 0.28408002408385563, "grad_norm": 1.2434931676789664, "learning_rate": 4.068596434929965e-05, "loss": 0.7248, "num_input_tokens_seen": 468519968, "step": 2595 }, { "epoch": 0.2841894961547935, "grad_norm": 1.4198448023808448, "learning_rate": 4.067926796623666e-05, "loss": 0.7753, "num_input_tokens_seen": 468702304, "step": 2596 }, { "epoch": 0.2842989682257314, "grad_norm": 1.3572992283143759, "learning_rate": 4.067256972834623e-05, "loss": 0.8119, "num_input_tokens_seen": 468868064, "step": 2597 }, { "epoch": 0.2844084402966693, "grad_norm": 1.1248930244447233, "learning_rate": 4.066586963642078e-05, "loss": 0.692, "num_input_tokens_seen": 469047264, "step": 2598 }, { "epoch": 0.2845179123676072, "grad_norm": 1.3042677244006105, "learning_rate": 4.06591676912529e-05, "loss": 0.6408, "num_input_tokens_seen": 469197120, "step": 2599 }, { "epoch": 0.28462738443854513, "grad_norm": 1.320642328541304, "learning_rate": 4.065246389363541e-05, "loss": 0.8267, "num_input_tokens_seen": 469388864, "step": 2600 }, { "epoch": 0.284736856509483, "grad_norm": 1.1327637760601836, "learning_rate": 4.064575824436136e-05, "loss": 0.4828, "num_input_tokens_seen": 469549920, "step": 2601 }, { "epoch": 0.2848463285804209, "grad_norm": 1.140434510511054, "learning_rate": 4.063905074422403e-05, "loss": 0.6579, "num_input_tokens_seen": 469704928, "step": 2602 }, { "epoch": 0.28495580065135884, "grad_norm": 1.3682781120212908, "learning_rate": 4.0632341394016884e-05, "loss": 0.7735, "num_input_tokens_seen": 469904288, "step": 2603 }, { "epoch": 0.2850652727222967, "grad_norm": 1.3017786000016445, "learning_rate": 4.062563019453364e-05, "loss": 0.7966, "num_input_tokens_seen": 470104768, "step": 2604 }, { "epoch": 0.2851747447932346, "grad_norm": 1.3097970468976825, "learning_rate": 4.0618917146568214e-05, "loss": 0.6812, "num_input_tokens_seen": 470258432, "step": 2605 }, { "epoch": 0.28528421686417255, "grad_norm": 1.3659467462153856, "learning_rate": 4.061220225091474e-05, "loss": 0.6137, "num_input_tokens_seen": 470419712, "step": 2606 }, { "epoch": 0.2853936889351104, "grad_norm": 1.3496834807745692, "learning_rate": 4.06054855083676e-05, "loss": 0.6157, "num_input_tokens_seen": 470585024, "step": 2607 }, { "epoch": 0.28550316100604833, "grad_norm": 1.0963568547715, "learning_rate": 4.059876691972135e-05, "loss": 0.572, "num_input_tokens_seen": 470768256, "step": 2608 }, { "epoch": 0.28561263307698626, "grad_norm": 1.2556263661507694, "learning_rate": 4.05920464857708e-05, "loss": 0.7178, "num_input_tokens_seen": 470948576, "step": 2609 }, { "epoch": 0.2857221051479241, "grad_norm": 1.333397262749295, "learning_rate": 4.0585324207310946e-05, "loss": 0.7708, "num_input_tokens_seen": 471140992, "step": 2610 }, { "epoch": 0.28583157721886204, "grad_norm": 1.3183991150694487, "learning_rate": 4.057860008513703e-05, "loss": 0.6023, "num_input_tokens_seen": 471283456, "step": 2611 }, { "epoch": 0.28594104928979996, "grad_norm": 1.2017175442951578, "learning_rate": 4.057187412004452e-05, "loss": 0.7144, "num_input_tokens_seen": 471489088, "step": 2612 }, { "epoch": 0.28605052136073783, "grad_norm": 1.1855227422379047, "learning_rate": 4.056514631282906e-05, "loss": 0.5612, "num_input_tokens_seen": 471686432, "step": 2613 }, { "epoch": 0.28615999343167575, "grad_norm": 1.2782092414086152, "learning_rate": 4.055841666428655e-05, "loss": 0.8474, "num_input_tokens_seen": 471893856, "step": 2614 }, { "epoch": 0.2862694655026136, "grad_norm": 1.2329318144756092, "learning_rate": 4.0551685175213094e-05, "loss": 0.5951, "num_input_tokens_seen": 472048864, "step": 2615 }, { "epoch": 0.28637893757355154, "grad_norm": 1.2270690940889988, "learning_rate": 4.0544951846405e-05, "loss": 0.6453, "num_input_tokens_seen": 472209920, "step": 2616 }, { "epoch": 0.28648840964448946, "grad_norm": 1.3084497628777318, "learning_rate": 4.053821667865883e-05, "loss": 0.6488, "num_input_tokens_seen": 472371648, "step": 2617 }, { "epoch": 0.2865978817154273, "grad_norm": 1.3395717333602366, "learning_rate": 4.053147967277133e-05, "loss": 0.7562, "num_input_tokens_seen": 472534944, "step": 2618 }, { "epoch": 0.28670735378636525, "grad_norm": 1.413470613309752, "learning_rate": 4.052474082953948e-05, "loss": 0.6095, "num_input_tokens_seen": 472691520, "step": 2619 }, { "epoch": 0.28681682585730317, "grad_norm": 1.2038624967809657, "learning_rate": 4.051800014976046e-05, "loss": 0.6513, "num_input_tokens_seen": 472858400, "step": 2620 }, { "epoch": 0.28692629792824104, "grad_norm": 1.2344491670756434, "learning_rate": 4.051125763423169e-05, "loss": 0.6595, "num_input_tokens_seen": 473011616, "step": 2621 }, { "epoch": 0.28703576999917896, "grad_norm": 1.1291797662986285, "learning_rate": 4.050451328375079e-05, "loss": 0.6963, "num_input_tokens_seen": 473207616, "step": 2622 }, { "epoch": 0.2871452420701169, "grad_norm": 1.2264574819909293, "learning_rate": 4.0497767099115615e-05, "loss": 0.8038, "num_input_tokens_seen": 473411904, "step": 2623 }, { "epoch": 0.28725471414105475, "grad_norm": 1.2441122629714638, "learning_rate": 4.0491019081124216e-05, "loss": 0.6942, "num_input_tokens_seen": 473600288, "step": 2624 }, { "epoch": 0.28736418621199267, "grad_norm": 1.1786496479120474, "learning_rate": 4.048426923057488e-05, "loss": 0.886, "num_input_tokens_seen": 473797408, "step": 2625 }, { "epoch": 0.2874736582829306, "grad_norm": 1.1648927326508496, "learning_rate": 4.047751754826608e-05, "loss": 0.6483, "num_input_tokens_seen": 473976608, "step": 2626 }, { "epoch": 0.28758313035386845, "grad_norm": 1.1641118083981827, "learning_rate": 4.0470764034996556e-05, "loss": 0.6745, "num_input_tokens_seen": 474164992, "step": 2627 }, { "epoch": 0.2876926024248064, "grad_norm": 1.2340264129965057, "learning_rate": 4.046400869156521e-05, "loss": 0.7982, "num_input_tokens_seen": 474351584, "step": 2628 }, { "epoch": 0.2878020744957443, "grad_norm": 1.3730556873022464, "learning_rate": 4.045725151877121e-05, "loss": 0.7603, "num_input_tokens_seen": 474533696, "step": 2629 }, { "epoch": 0.28791154656668216, "grad_norm": 1.6898154867897652, "learning_rate": 4.04504925174139e-05, "loss": 0.9596, "num_input_tokens_seen": 474712896, "step": 2630 }, { "epoch": 0.2880210186376201, "grad_norm": 1.2778639827664928, "learning_rate": 4.0443731688292866e-05, "loss": 0.8781, "num_input_tokens_seen": 474922560, "step": 2631 }, { "epoch": 0.28813049070855795, "grad_norm": 1.26688600050785, "learning_rate": 4.043696903220788e-05, "loss": 0.5924, "num_input_tokens_seen": 475078912, "step": 2632 }, { "epoch": 0.2882399627794959, "grad_norm": 1.1718011451336665, "learning_rate": 4.0430204549958986e-05, "loss": 0.551, "num_input_tokens_seen": 475252288, "step": 2633 }, { "epoch": 0.2883494348504338, "grad_norm": 1.1919257860410717, "learning_rate": 4.0423438242346386e-05, "loss": 0.783, "num_input_tokens_seen": 475461056, "step": 2634 }, { "epoch": 0.28845890692137166, "grad_norm": 1.2912683744498783, "learning_rate": 4.0416670110170526e-05, "loss": 0.7863, "num_input_tokens_seen": 475655488, "step": 2635 }, { "epoch": 0.2885683789923096, "grad_norm": 1.4153176317145095, "learning_rate": 4.040990015423206e-05, "loss": 0.7313, "num_input_tokens_seen": 475814528, "step": 2636 }, { "epoch": 0.2886778510632475, "grad_norm": 1.2275510072695994, "learning_rate": 4.040312837533187e-05, "loss": 0.7351, "num_input_tokens_seen": 475992608, "step": 2637 }, { "epoch": 0.28878732313418537, "grad_norm": 1.2488533482824784, "learning_rate": 4.039635477427103e-05, "loss": 1.0363, "num_input_tokens_seen": 476207648, "step": 2638 }, { "epoch": 0.2888967952051233, "grad_norm": 1.3372036026147576, "learning_rate": 4.038957935185086e-05, "loss": 0.7408, "num_input_tokens_seen": 476387520, "step": 2639 }, { "epoch": 0.2890062672760612, "grad_norm": 1.2503674443916748, "learning_rate": 4.038280210887287e-05, "loss": 0.8477, "num_input_tokens_seen": 476592256, "step": 2640 }, { "epoch": 0.2891157393469991, "grad_norm": 1.17326536986003, "learning_rate": 4.0376023046138803e-05, "loss": 0.5161, "num_input_tokens_seen": 476774144, "step": 2641 }, { "epoch": 0.289225211417937, "grad_norm": 1.3398074932030641, "learning_rate": 4.036924216445061e-05, "loss": 0.7096, "num_input_tokens_seen": 476962304, "step": 2642 }, { "epoch": 0.2893346834888749, "grad_norm": 1.290342354478756, "learning_rate": 4.036245946461043e-05, "loss": 0.7925, "num_input_tokens_seen": 477129632, "step": 2643 }, { "epoch": 0.2894441555598128, "grad_norm": 1.218751320471415, "learning_rate": 4.0355674947420676e-05, "loss": 0.6097, "num_input_tokens_seen": 477313984, "step": 2644 }, { "epoch": 0.2895536276307507, "grad_norm": 1.4389506773935774, "learning_rate": 4.0348888613683925e-05, "loss": 0.9934, "num_input_tokens_seen": 477486688, "step": 2645 }, { "epoch": 0.28966309970168863, "grad_norm": 1.3376948526643153, "learning_rate": 4.0342100464203e-05, "loss": 0.7809, "num_input_tokens_seen": 477656480, "step": 2646 }, { "epoch": 0.2897725717726265, "grad_norm": 1.3960902371439743, "learning_rate": 4.033531049978091e-05, "loss": 0.7938, "num_input_tokens_seen": 477843744, "step": 2647 }, { "epoch": 0.2898820438435644, "grad_norm": 1.2724645911725654, "learning_rate": 4.032851872122091e-05, "loss": 0.6259, "num_input_tokens_seen": 477998528, "step": 2648 }, { "epoch": 0.28999151591450234, "grad_norm": 1.275343375623269, "learning_rate": 4.0321725129326446e-05, "loss": 0.6605, "num_input_tokens_seen": 478177056, "step": 2649 }, { "epoch": 0.2901009879854402, "grad_norm": 1.189594830370036, "learning_rate": 4.031492972490119e-05, "loss": 0.5733, "num_input_tokens_seen": 478352224, "step": 2650 }, { "epoch": 0.29021046005637813, "grad_norm": 1.516431456272345, "learning_rate": 4.030813250874903e-05, "loss": 0.6928, "num_input_tokens_seen": 478516640, "step": 2651 }, { "epoch": 0.290319932127316, "grad_norm": 1.2793305137812594, "learning_rate": 4.030133348167405e-05, "loss": 0.7149, "num_input_tokens_seen": 478701440, "step": 2652 }, { "epoch": 0.2904294041982539, "grad_norm": 1.2326735830424158, "learning_rate": 4.0294532644480576e-05, "loss": 0.6239, "num_input_tokens_seen": 478897440, "step": 2653 }, { "epoch": 0.29053887626919184, "grad_norm": 1.3595629595018042, "learning_rate": 4.028772999797313e-05, "loss": 0.8451, "num_input_tokens_seen": 479069920, "step": 2654 }, { "epoch": 0.2906483483401297, "grad_norm": 1.3240746613826995, "learning_rate": 4.028092554295645e-05, "loss": 0.8026, "num_input_tokens_seen": 479253824, "step": 2655 }, { "epoch": 0.2907578204110676, "grad_norm": 1.3899965473516382, "learning_rate": 4.027411928023549e-05, "loss": 0.7605, "num_input_tokens_seen": 479444448, "step": 2656 }, { "epoch": 0.29086729248200555, "grad_norm": 1.2539301124274898, "learning_rate": 4.026731121061541e-05, "loss": 0.7468, "num_input_tokens_seen": 479627680, "step": 2657 }, { "epoch": 0.2909767645529434, "grad_norm": 1.2217961903007986, "learning_rate": 4.026050133490161e-05, "loss": 0.8872, "num_input_tokens_seen": 479837344, "step": 2658 }, { "epoch": 0.29108623662388133, "grad_norm": 1.6074327767467116, "learning_rate": 4.025368965389967e-05, "loss": 1.1086, "num_input_tokens_seen": 480029312, "step": 2659 }, { "epoch": 0.29119570869481926, "grad_norm": 1.2947502655423826, "learning_rate": 4.02468761684154e-05, "loss": 0.5807, "num_input_tokens_seen": 480184768, "step": 2660 }, { "epoch": 0.2913051807657571, "grad_norm": 1.3684750842612723, "learning_rate": 4.024006087925484e-05, "loss": 0.8739, "num_input_tokens_seen": 480375840, "step": 2661 }, { "epoch": 0.29141465283669504, "grad_norm": 1.261377199788945, "learning_rate": 4.02332437872242e-05, "loss": 0.7637, "num_input_tokens_seen": 480583040, "step": 2662 }, { "epoch": 0.29152412490763296, "grad_norm": 1.3352192068686437, "learning_rate": 4.022642489312994e-05, "loss": 0.8802, "num_input_tokens_seen": 480779936, "step": 2663 }, { "epoch": 0.29163359697857083, "grad_norm": 1.2614540318427023, "learning_rate": 4.0219604197778725e-05, "loss": 0.8596, "num_input_tokens_seen": 480967200, "step": 2664 }, { "epoch": 0.29174306904950875, "grad_norm": 1.2052214551171223, "learning_rate": 4.0212781701977434e-05, "loss": 0.6464, "num_input_tokens_seen": 481154912, "step": 2665 }, { "epoch": 0.2918525411204467, "grad_norm": 1.2601497094208702, "learning_rate": 4.020595740653315e-05, "loss": 0.803, "num_input_tokens_seen": 481326496, "step": 2666 }, { "epoch": 0.29196201319138454, "grad_norm": 1.29620964723201, "learning_rate": 4.019913131225318e-05, "loss": 0.7658, "num_input_tokens_seen": 481483296, "step": 2667 }, { "epoch": 0.29207148526232246, "grad_norm": 1.1934935249309584, "learning_rate": 4.019230341994501e-05, "loss": 0.6103, "num_input_tokens_seen": 481626432, "step": 2668 }, { "epoch": 0.2921809573332603, "grad_norm": 1.1210273962288804, "learning_rate": 4.018547373041641e-05, "loss": 0.7102, "num_input_tokens_seen": 481790176, "step": 2669 }, { "epoch": 0.29229042940419825, "grad_norm": 1.2506875531415473, "learning_rate": 4.017864224447528e-05, "loss": 0.6012, "num_input_tokens_seen": 481959968, "step": 2670 }, { "epoch": 0.29239990147513617, "grad_norm": 1.4488006564509033, "learning_rate": 4.01718089629298e-05, "loss": 0.9035, "num_input_tokens_seen": 482166272, "step": 2671 }, { "epoch": 0.29250937354607404, "grad_norm": 1.3670126230930941, "learning_rate": 4.016497388658832e-05, "loss": 0.6891, "num_input_tokens_seen": 482344128, "step": 2672 }, { "epoch": 0.29261884561701196, "grad_norm": 1.2084605325418132, "learning_rate": 4.015813701625942e-05, "loss": 0.569, "num_input_tokens_seen": 482541696, "step": 2673 }, { "epoch": 0.2927283176879499, "grad_norm": 1.1908557057575972, "learning_rate": 4.015129835275189e-05, "loss": 0.7128, "num_input_tokens_seen": 482749120, "step": 2674 }, { "epoch": 0.29283778975888775, "grad_norm": 1.4138269807885016, "learning_rate": 4.014445789687472e-05, "loss": 0.7743, "num_input_tokens_seen": 482941312, "step": 2675 }, { "epoch": 0.29294726182982567, "grad_norm": 1.1041493984287625, "learning_rate": 4.013761564943714e-05, "loss": 0.5479, "num_input_tokens_seen": 483154560, "step": 2676 }, { "epoch": 0.2930567339007636, "grad_norm": 1.198789588107467, "learning_rate": 4.013077161124857e-05, "loss": 0.6463, "num_input_tokens_seen": 483349664, "step": 2677 }, { "epoch": 0.29316620597170145, "grad_norm": 1.3632016746228637, "learning_rate": 4.012392578311864e-05, "loss": 0.7593, "num_input_tokens_seen": 483523488, "step": 2678 }, { "epoch": 0.2932756780426394, "grad_norm": 1.220364894548076, "learning_rate": 4.0117078165857205e-05, "loss": 0.5259, "num_input_tokens_seen": 483681632, "step": 2679 }, { "epoch": 0.2933851501135773, "grad_norm": 1.3278077588139237, "learning_rate": 4.0110228760274314e-05, "loss": 0.7735, "num_input_tokens_seen": 483855008, "step": 2680 }, { "epoch": 0.29349462218451516, "grad_norm": 1.2902307476254877, "learning_rate": 4.010337756718026e-05, "loss": 0.629, "num_input_tokens_seen": 484040256, "step": 2681 }, { "epoch": 0.2936040942554531, "grad_norm": 1.409322899238147, "learning_rate": 4.00965245873855e-05, "loss": 0.905, "num_input_tokens_seen": 484216320, "step": 2682 }, { "epoch": 0.293713566326391, "grad_norm": 1.1930677491525756, "learning_rate": 4.008966982170074e-05, "loss": 0.7703, "num_input_tokens_seen": 484379616, "step": 2683 }, { "epoch": 0.2938230383973289, "grad_norm": 1.3438230095479837, "learning_rate": 4.008281327093689e-05, "loss": 0.875, "num_input_tokens_seen": 484580096, "step": 2684 }, { "epoch": 0.2939325104682668, "grad_norm": 1.3292476621402416, "learning_rate": 4.007595493590506e-05, "loss": 0.6374, "num_input_tokens_seen": 484784160, "step": 2685 }, { "epoch": 0.29404198253920466, "grad_norm": 1.2374008851866503, "learning_rate": 4.006909481741659e-05, "loss": 0.7606, "num_input_tokens_seen": 485000096, "step": 2686 }, { "epoch": 0.2941514546101426, "grad_norm": 1.2284923848438971, "learning_rate": 4.006223291628301e-05, "loss": 0.6953, "num_input_tokens_seen": 485183776, "step": 2687 }, { "epoch": 0.2942609266810805, "grad_norm": 1.249527269619184, "learning_rate": 4.0055369233316063e-05, "loss": 0.6848, "num_input_tokens_seen": 485366560, "step": 2688 }, { "epoch": 0.29437039875201837, "grad_norm": 1.2452674924597926, "learning_rate": 4.004850376932772e-05, "loss": 0.605, "num_input_tokens_seen": 485536800, "step": 2689 }, { "epoch": 0.2944798708229563, "grad_norm": 1.362261590310123, "learning_rate": 4.0041636525130156e-05, "loss": 0.7007, "num_input_tokens_seen": 485678144, "step": 2690 }, { "epoch": 0.2945893428938942, "grad_norm": 1.443791346762984, "learning_rate": 4.003476750153573e-05, "loss": 0.8169, "num_input_tokens_seen": 485838304, "step": 2691 }, { "epoch": 0.2946988149648321, "grad_norm": 1.2441949039039066, "learning_rate": 4.002789669935706e-05, "loss": 0.6483, "num_input_tokens_seen": 486014144, "step": 2692 }, { "epoch": 0.29480828703577, "grad_norm": 1.2220282846289718, "learning_rate": 4.002102411940694e-05, "loss": 0.7284, "num_input_tokens_seen": 486202304, "step": 2693 }, { "epoch": 0.2949177591067079, "grad_norm": 1.3199493089248175, "learning_rate": 4.001414976249839e-05, "loss": 0.8499, "num_input_tokens_seen": 486386656, "step": 2694 }, { "epoch": 0.2950272311776458, "grad_norm": 1.1433617667748888, "learning_rate": 4.000727362944461e-05, "loss": 0.7277, "num_input_tokens_seen": 486580192, "step": 2695 }, { "epoch": 0.2951367032485837, "grad_norm": 1.1980730977405352, "learning_rate": 4.0000395721059053e-05, "loss": 0.8296, "num_input_tokens_seen": 486748416, "step": 2696 }, { "epoch": 0.29524617531952163, "grad_norm": 1.3190947113767428, "learning_rate": 3.999351603815536e-05, "loss": 0.6923, "num_input_tokens_seen": 486920224, "step": 2697 }, { "epoch": 0.2953556473904595, "grad_norm": 1.2536391214345046, "learning_rate": 3.998663458154738e-05, "loss": 0.6927, "num_input_tokens_seen": 487101664, "step": 2698 }, { "epoch": 0.2954651194613974, "grad_norm": 1.1736528770393637, "learning_rate": 3.997975135204918e-05, "loss": 0.6878, "num_input_tokens_seen": 487281088, "step": 2699 }, { "epoch": 0.29557459153233534, "grad_norm": 1.230765751916297, "learning_rate": 3.997286635047503e-05, "loss": 0.7752, "num_input_tokens_seen": 487471040, "step": 2700 }, { "epoch": 0.2956840636032732, "grad_norm": 1.3694438081670646, "learning_rate": 3.9965979577639416e-05, "loss": 0.808, "num_input_tokens_seen": 487660992, "step": 2701 }, { "epoch": 0.2957935356742111, "grad_norm": 1.3214162693408396, "learning_rate": 3.9959091034357036e-05, "loss": 0.6672, "num_input_tokens_seen": 487835264, "step": 2702 }, { "epoch": 0.295903007745149, "grad_norm": 1.4842878617258428, "learning_rate": 3.995220072144277e-05, "loss": 0.8301, "num_input_tokens_seen": 488037984, "step": 2703 }, { "epoch": 0.2960124798160869, "grad_norm": 1.2772443073539337, "learning_rate": 3.994530863971175e-05, "loss": 0.701, "num_input_tokens_seen": 488238688, "step": 2704 }, { "epoch": 0.29612195188702484, "grad_norm": 1.2658734960674227, "learning_rate": 3.993841478997928e-05, "loss": 0.6376, "num_input_tokens_seen": 488438048, "step": 2705 }, { "epoch": 0.2962314239579627, "grad_norm": 1.150922202330488, "learning_rate": 3.993151917306091e-05, "loss": 0.6891, "num_input_tokens_seen": 488614560, "step": 2706 }, { "epoch": 0.2963408960289006, "grad_norm": 1.181720522866993, "learning_rate": 3.992462178977235e-05, "loss": 0.5963, "num_input_tokens_seen": 488818624, "step": 2707 }, { "epoch": 0.29645036809983855, "grad_norm": 1.1831912121026646, "learning_rate": 3.9917722640929576e-05, "loss": 0.6143, "num_input_tokens_seen": 489001408, "step": 2708 }, { "epoch": 0.2965598401707764, "grad_norm": 1.3329772722148236, "learning_rate": 3.991082172734874e-05, "loss": 0.6866, "num_input_tokens_seen": 489194272, "step": 2709 }, { "epoch": 0.29666931224171433, "grad_norm": 1.2853141154347032, "learning_rate": 3.990391904984618e-05, "loss": 0.828, "num_input_tokens_seen": 489374592, "step": 2710 }, { "epoch": 0.29677878431265226, "grad_norm": 1.2448709640889495, "learning_rate": 3.9897014609238496e-05, "loss": 0.6197, "num_input_tokens_seen": 489537664, "step": 2711 }, { "epoch": 0.2968882563835901, "grad_norm": 1.2160153642330018, "learning_rate": 3.9890108406342455e-05, "loss": 0.7954, "num_input_tokens_seen": 489692224, "step": 2712 }, { "epoch": 0.29699772845452804, "grad_norm": 1.1961847252012583, "learning_rate": 3.988320044197507e-05, "loss": 0.5368, "num_input_tokens_seen": 489853728, "step": 2713 }, { "epoch": 0.29710720052546596, "grad_norm": 1.1531537328929198, "learning_rate": 3.987629071695351e-05, "loss": 0.4457, "num_input_tokens_seen": 490021952, "step": 2714 }, { "epoch": 0.29721667259640383, "grad_norm": 1.1474593272096414, "learning_rate": 3.9869379232095204e-05, "loss": 0.6428, "num_input_tokens_seen": 490218400, "step": 2715 }, { "epoch": 0.29732614466734175, "grad_norm": 1.3803587009884315, "learning_rate": 3.986246598821776e-05, "loss": 0.778, "num_input_tokens_seen": 490405664, "step": 2716 }, { "epoch": 0.2974356167382797, "grad_norm": 1.296375040407615, "learning_rate": 3.9855550986139e-05, "loss": 0.8182, "num_input_tokens_seen": 490602336, "step": 2717 }, { "epoch": 0.29754508880921754, "grad_norm": 1.1979701099360667, "learning_rate": 3.984863422667695e-05, "loss": 0.6533, "num_input_tokens_seen": 490776384, "step": 2718 }, { "epoch": 0.29765456088015546, "grad_norm": 1.4206412695487187, "learning_rate": 3.9841715710649865e-05, "loss": 0.7878, "num_input_tokens_seen": 490936768, "step": 2719 }, { "epoch": 0.2977640329510933, "grad_norm": 1.2309120469610413, "learning_rate": 3.983479543887618e-05, "loss": 0.698, "num_input_tokens_seen": 491087520, "step": 2720 }, { "epoch": 0.29787350502203125, "grad_norm": 1.2522714956932388, "learning_rate": 3.9827873412174565e-05, "loss": 0.6897, "num_input_tokens_seen": 491289120, "step": 2721 }, { "epoch": 0.29798297709296917, "grad_norm": 1.1859839951720257, "learning_rate": 3.9820949631363855e-05, "loss": 0.6004, "num_input_tokens_seen": 491491616, "step": 2722 }, { "epoch": 0.29809244916390704, "grad_norm": 1.3091455463395005, "learning_rate": 3.9814024097263154e-05, "loss": 0.6376, "num_input_tokens_seen": 491645056, "step": 2723 }, { "epoch": 0.29820192123484496, "grad_norm": 1.3302347816381082, "learning_rate": 3.980709681069171e-05, "loss": 0.7212, "num_input_tokens_seen": 491822016, "step": 2724 }, { "epoch": 0.2983113933057829, "grad_norm": 1.2948306412583948, "learning_rate": 3.980016777246902e-05, "loss": 0.763, "num_input_tokens_seen": 492016000, "step": 2725 }, { "epoch": 0.29842086537672075, "grad_norm": 1.1739292196450821, "learning_rate": 3.979323698341478e-05, "loss": 0.6382, "num_input_tokens_seen": 492198784, "step": 2726 }, { "epoch": 0.29853033744765867, "grad_norm": 1.6334536813007825, "learning_rate": 3.978630444434888e-05, "loss": 0.8562, "num_input_tokens_seen": 492373056, "step": 2727 }, { "epoch": 0.2986398095185966, "grad_norm": 1.2402473028515442, "learning_rate": 3.977937015609143e-05, "loss": 0.6055, "num_input_tokens_seen": 492550464, "step": 2728 }, { "epoch": 0.29874928158953445, "grad_norm": 1.3729729883491701, "learning_rate": 3.9772434119462754e-05, "loss": 0.8492, "num_input_tokens_seen": 492757888, "step": 2729 }, { "epoch": 0.2988587536604724, "grad_norm": 1.2658253263948975, "learning_rate": 3.976549633528336e-05, "loss": 0.7463, "num_input_tokens_seen": 492923200, "step": 2730 }, { "epoch": 0.2989682257314103, "grad_norm": 1.339466453345761, "learning_rate": 3.975855680437397e-05, "loss": 0.6969, "num_input_tokens_seen": 493085824, "step": 2731 }, { "epoch": 0.29907769780234816, "grad_norm": 1.2097833362991277, "learning_rate": 3.975161552755552e-05, "loss": 0.8239, "num_input_tokens_seen": 493272864, "step": 2732 }, { "epoch": 0.2991871698732861, "grad_norm": 1.16071221487678, "learning_rate": 3.974467250564916e-05, "loss": 0.575, "num_input_tokens_seen": 493435936, "step": 2733 }, { "epoch": 0.299296641944224, "grad_norm": 1.2506998487166676, "learning_rate": 3.973772773947623e-05, "loss": 0.7081, "num_input_tokens_seen": 493616032, "step": 2734 }, { "epoch": 0.2994061140151619, "grad_norm": 1.3313387863567718, "learning_rate": 3.9730781229858284e-05, "loss": 0.732, "num_input_tokens_seen": 493755136, "step": 2735 }, { "epoch": 0.2995155860860998, "grad_norm": 1.1910324761089086, "learning_rate": 3.972383297761707e-05, "loss": 0.5326, "num_input_tokens_seen": 493895360, "step": 2736 }, { "epoch": 0.29962505815703766, "grad_norm": 1.1272779290856743, "learning_rate": 3.971688298357457e-05, "loss": 0.6188, "num_input_tokens_seen": 494047008, "step": 2737 }, { "epoch": 0.2997345302279756, "grad_norm": 1.1742380350017831, "learning_rate": 3.9709931248552944e-05, "loss": 0.5434, "num_input_tokens_seen": 494216352, "step": 2738 }, { "epoch": 0.2998440022989135, "grad_norm": 1.436026771933404, "learning_rate": 3.9702977773374576e-05, "loss": 0.9036, "num_input_tokens_seen": 494405408, "step": 2739 }, { "epoch": 0.29995347436985137, "grad_norm": 1.378359980888733, "learning_rate": 3.969602255886203e-05, "loss": 0.8111, "num_input_tokens_seen": 494597600, "step": 2740 }, { "epoch": 0.3000629464407893, "grad_norm": 1.4721903987460019, "learning_rate": 3.968906560583813e-05, "loss": 0.8526, "num_input_tokens_seen": 494785536, "step": 2741 }, { "epoch": 0.3001724185117272, "grad_norm": 1.207244706029101, "learning_rate": 3.968210691512584e-05, "loss": 0.6901, "num_input_tokens_seen": 494995872, "step": 2742 }, { "epoch": 0.3002818905826651, "grad_norm": 1.3745635813927932, "learning_rate": 3.9675146487548364e-05, "loss": 0.8486, "num_input_tokens_seen": 495192096, "step": 2743 }, { "epoch": 0.300391362653603, "grad_norm": 1.2972471323736978, "learning_rate": 3.966818432392912e-05, "loss": 0.7542, "num_input_tokens_seen": 495364352, "step": 2744 }, { "epoch": 0.3005008347245409, "grad_norm": 1.248344087507699, "learning_rate": 3.9661220425091705e-05, "loss": 0.7337, "num_input_tokens_seen": 495536608, "step": 2745 }, { "epoch": 0.3006103067954788, "grad_norm": 1.2713438738706209, "learning_rate": 3.9654254791859943e-05, "loss": 0.6002, "num_input_tokens_seen": 495678176, "step": 2746 }, { "epoch": 0.3007197788664167, "grad_norm": 1.1425584057793567, "learning_rate": 3.9647287425057864e-05, "loss": 0.5132, "num_input_tokens_seen": 495846400, "step": 2747 }, { "epoch": 0.30082925093735463, "grad_norm": 1.2473124699885756, "learning_rate": 3.9640318325509676e-05, "loss": 0.6548, "num_input_tokens_seen": 496030304, "step": 2748 }, { "epoch": 0.3009387230082925, "grad_norm": 1.1660371455725762, "learning_rate": 3.9633347494039814e-05, "loss": 0.6137, "num_input_tokens_seen": 496235040, "step": 2749 }, { "epoch": 0.3010481950792304, "grad_norm": 1.0308651061366714, "learning_rate": 3.962637493147292e-05, "loss": 0.461, "num_input_tokens_seen": 496402816, "step": 2750 }, { "epoch": 0.30115766715016834, "grad_norm": 1.316228656505185, "learning_rate": 3.961940063863383e-05, "loss": 0.7966, "num_input_tokens_seen": 496564768, "step": 2751 }, { "epoch": 0.3012671392211062, "grad_norm": 1.1944550075113218, "learning_rate": 3.9612424616347596e-05, "loss": 0.6718, "num_input_tokens_seen": 496741504, "step": 2752 }, { "epoch": 0.3013766112920441, "grad_norm": 1.1562641246036032, "learning_rate": 3.9605446865439466e-05, "loss": 0.5075, "num_input_tokens_seen": 496932128, "step": 2753 }, { "epoch": 0.301486083362982, "grad_norm": 1.3434612939878854, "learning_rate": 3.959846738673488e-05, "loss": 0.7756, "num_input_tokens_seen": 497142688, "step": 2754 }, { "epoch": 0.3015955554339199, "grad_norm": 1.4076266861541245, "learning_rate": 3.9591486181059524e-05, "loss": 0.7264, "num_input_tokens_seen": 497320320, "step": 2755 }, { "epoch": 0.30170502750485784, "grad_norm": 1.431875170637343, "learning_rate": 3.958450324923924e-05, "loss": 0.8953, "num_input_tokens_seen": 497532224, "step": 2756 }, { "epoch": 0.3018144995757957, "grad_norm": 1.3143778945230709, "learning_rate": 3.9577518592100114e-05, "loss": 0.7377, "num_input_tokens_seen": 497718592, "step": 2757 }, { "epoch": 0.3019239716467336, "grad_norm": 1.1588343529311098, "learning_rate": 3.957053221046839e-05, "loss": 0.5535, "num_input_tokens_seen": 497867776, "step": 2758 }, { "epoch": 0.30203344371767155, "grad_norm": 1.157923441113594, "learning_rate": 3.956354410517057e-05, "loss": 0.6142, "num_input_tokens_seen": 498040928, "step": 2759 }, { "epoch": 0.3021429157886094, "grad_norm": 1.223469765190188, "learning_rate": 3.955655427703332e-05, "loss": 0.7489, "num_input_tokens_seen": 498231104, "step": 2760 }, { "epoch": 0.30225238785954733, "grad_norm": 1.2304451107600296, "learning_rate": 3.954956272688353e-05, "loss": 0.5415, "num_input_tokens_seen": 498414560, "step": 2761 }, { "epoch": 0.30236185993048525, "grad_norm": 1.1628901575489277, "learning_rate": 3.954256945554827e-05, "loss": 0.6429, "num_input_tokens_seen": 498616832, "step": 2762 }, { "epoch": 0.3024713320014231, "grad_norm": 1.2435258577125543, "learning_rate": 3.9535574463854856e-05, "loss": 0.6342, "num_input_tokens_seen": 498804544, "step": 2763 }, { "epoch": 0.30258080407236104, "grad_norm": 1.1399340692494973, "learning_rate": 3.952857775263077e-05, "loss": 0.5288, "num_input_tokens_seen": 498985088, "step": 2764 }, { "epoch": 0.30269027614329896, "grad_norm": 1.2431259449091676, "learning_rate": 3.9521579322703704e-05, "loss": 0.6922, "num_input_tokens_seen": 499167200, "step": 2765 }, { "epoch": 0.30279974821423683, "grad_norm": 1.1430425759576786, "learning_rate": 3.951457917490157e-05, "loss": 0.7798, "num_input_tokens_seen": 499329824, "step": 2766 }, { "epoch": 0.30290922028517475, "grad_norm": 1.2129155400615053, "learning_rate": 3.950757731005247e-05, "loss": 0.7547, "num_input_tokens_seen": 499481696, "step": 2767 }, { "epoch": 0.3030186923561127, "grad_norm": 1.2308485141421466, "learning_rate": 3.95005737289847e-05, "loss": 0.686, "num_input_tokens_seen": 499690912, "step": 2768 }, { "epoch": 0.30312816442705054, "grad_norm": 1.203205749972534, "learning_rate": 3.9493568432526787e-05, "loss": 0.7246, "num_input_tokens_seen": 499900576, "step": 2769 }, { "epoch": 0.30323763649798846, "grad_norm": 1.4052395899383154, "learning_rate": 3.948656142150742e-05, "loss": 0.8593, "num_input_tokens_seen": 500088288, "step": 2770 }, { "epoch": 0.3033471085689263, "grad_norm": 1.3050970017347647, "learning_rate": 3.947955269675554e-05, "loss": 0.6, "num_input_tokens_seen": 500239712, "step": 2771 }, { "epoch": 0.30345658063986425, "grad_norm": 1.4001421634836881, "learning_rate": 3.9472542259100264e-05, "loss": 0.7396, "num_input_tokens_seen": 500411744, "step": 2772 }, { "epoch": 0.30356605271080217, "grad_norm": 1.3268583203784354, "learning_rate": 3.94655301093709e-05, "loss": 0.7097, "num_input_tokens_seen": 500605280, "step": 2773 }, { "epoch": 0.30367552478174004, "grad_norm": 1.1802162854745175, "learning_rate": 3.945851624839697e-05, "loss": 0.6901, "num_input_tokens_seen": 500793440, "step": 2774 }, { "epoch": 0.30378499685267796, "grad_norm": 1.2840595609808139, "learning_rate": 3.9451500677008213e-05, "loss": 0.6398, "num_input_tokens_seen": 500969280, "step": 2775 }, { "epoch": 0.3038944689236159, "grad_norm": 1.3958125705977753, "learning_rate": 3.944448339603455e-05, "loss": 0.7351, "num_input_tokens_seen": 501124064, "step": 2776 }, { "epoch": 0.30400394099455375, "grad_norm": 1.2827156336984864, "learning_rate": 3.9437464406306124e-05, "loss": 0.9323, "num_input_tokens_seen": 501310880, "step": 2777 }, { "epoch": 0.30411341306549167, "grad_norm": 1.2308813462979467, "learning_rate": 3.9430443708653255e-05, "loss": 0.7429, "num_input_tokens_seen": 501473280, "step": 2778 }, { "epoch": 0.3042228851364296, "grad_norm": 1.3252107398667914, "learning_rate": 3.9423421303906474e-05, "loss": 0.7402, "num_input_tokens_seen": 501661664, "step": 2779 }, { "epoch": 0.30433235720736745, "grad_norm": 1.3374086277491333, "learning_rate": 3.9416397192896523e-05, "loss": 0.7956, "num_input_tokens_seen": 501830784, "step": 2780 }, { "epoch": 0.3044418292783054, "grad_norm": 1.4002030378658805, "learning_rate": 3.940937137645435e-05, "loss": 0.881, "num_input_tokens_seen": 502003712, "step": 2781 }, { "epoch": 0.3045513013492433, "grad_norm": 1.3412467795099594, "learning_rate": 3.94023438554111e-05, "loss": 0.8933, "num_input_tokens_seen": 502203296, "step": 2782 }, { "epoch": 0.30466077342018116, "grad_norm": 1.1696791357240222, "learning_rate": 3.939531463059809e-05, "loss": 0.8568, "num_input_tokens_seen": 502424384, "step": 2783 }, { "epoch": 0.3047702454911191, "grad_norm": 1.5579897826548978, "learning_rate": 3.9388283702846876e-05, "loss": 0.9976, "num_input_tokens_seen": 502628672, "step": 2784 }, { "epoch": 0.304879717562057, "grad_norm": 1.157035457750251, "learning_rate": 3.9381251072989216e-05, "loss": 0.6313, "num_input_tokens_seen": 502828480, "step": 2785 }, { "epoch": 0.3049891896329949, "grad_norm": 1.2130680169504795, "learning_rate": 3.937421674185704e-05, "loss": 0.7279, "num_input_tokens_seen": 503030752, "step": 2786 }, { "epoch": 0.3050986617039328, "grad_norm": 1.2109145623772635, "learning_rate": 3.9367180710282504e-05, "loss": 0.7397, "num_input_tokens_seen": 503205472, "step": 2787 }, { "epoch": 0.30520813377487066, "grad_norm": 1.2886757935287783, "learning_rate": 3.936014297909796e-05, "loss": 0.839, "num_input_tokens_seen": 503388032, "step": 2788 }, { "epoch": 0.3053176058458086, "grad_norm": 1.266629326838865, "learning_rate": 3.935310354913595e-05, "loss": 0.6767, "num_input_tokens_seen": 503568352, "step": 2789 }, { "epoch": 0.3054270779167465, "grad_norm": 1.1967355572219673, "learning_rate": 3.934606242122922e-05, "loss": 0.6014, "num_input_tokens_seen": 503736576, "step": 2790 }, { "epoch": 0.30553654998768437, "grad_norm": 1.1582419274763311, "learning_rate": 3.9339019596210746e-05, "loss": 0.6478, "num_input_tokens_seen": 503934592, "step": 2791 }, { "epoch": 0.3056460220586223, "grad_norm": 1.1806742097404361, "learning_rate": 3.933197507491366e-05, "loss": 0.5422, "num_input_tokens_seen": 504105280, "step": 2792 }, { "epoch": 0.3057554941295602, "grad_norm": 1.5822059817424212, "learning_rate": 3.932492885817132e-05, "loss": 0.7855, "num_input_tokens_seen": 504296800, "step": 2793 }, { "epoch": 0.3058649662004981, "grad_norm": 1.3235000865779167, "learning_rate": 3.9317880946817274e-05, "loss": 0.7483, "num_input_tokens_seen": 504493696, "step": 2794 }, { "epoch": 0.305974438271436, "grad_norm": 1.2874384967224717, "learning_rate": 3.931083134168529e-05, "loss": 0.7795, "num_input_tokens_seen": 504699552, "step": 2795 }, { "epoch": 0.3060839103423739, "grad_norm": 1.1721555246920785, "learning_rate": 3.9303780043609315e-05, "loss": 0.596, "num_input_tokens_seen": 504885024, "step": 2796 }, { "epoch": 0.3061933824133118, "grad_norm": 1.2458564747646057, "learning_rate": 3.9296727053423506e-05, "loss": 0.7177, "num_input_tokens_seen": 505080128, "step": 2797 }, { "epoch": 0.3063028544842497, "grad_norm": 1.265672047058503, "learning_rate": 3.9289672371962214e-05, "loss": 0.6644, "num_input_tokens_seen": 505268288, "step": 2798 }, { "epoch": 0.30641232655518763, "grad_norm": 1.0769697994010887, "learning_rate": 3.928261600006e-05, "loss": 0.6082, "num_input_tokens_seen": 505439200, "step": 2799 }, { "epoch": 0.3065217986261255, "grad_norm": 1.2494738902141835, "learning_rate": 3.9275557938551614e-05, "loss": 0.8565, "num_input_tokens_seen": 505626016, "step": 2800 }, { "epoch": 0.3066312706970634, "grad_norm": 1.1779555181126227, "learning_rate": 3.926849818827202e-05, "loss": 0.5678, "num_input_tokens_seen": 505794688, "step": 2801 }, { "epoch": 0.30674074276800134, "grad_norm": 1.0817450409950333, "learning_rate": 3.9261436750056364e-05, "loss": 0.5778, "num_input_tokens_seen": 505969408, "step": 2802 }, { "epoch": 0.3068502148389392, "grad_norm": 1.2392583415674354, "learning_rate": 3.925437362474001e-05, "loss": 0.7556, "num_input_tokens_seen": 506154880, "step": 2803 }, { "epoch": 0.3069596869098771, "grad_norm": 1.2336917869647606, "learning_rate": 3.924730881315849e-05, "loss": 0.7718, "num_input_tokens_seen": 506317280, "step": 2804 }, { "epoch": 0.307069158980815, "grad_norm": 1.2292679028389513, "learning_rate": 3.9240242316147586e-05, "loss": 0.6171, "num_input_tokens_seen": 506506112, "step": 2805 }, { "epoch": 0.3071786310517529, "grad_norm": 1.2992760366356375, "learning_rate": 3.923317413454324e-05, "loss": 0.7287, "num_input_tokens_seen": 506709504, "step": 2806 }, { "epoch": 0.30728810312269084, "grad_norm": 1.263126969220317, "learning_rate": 3.922610426918159e-05, "loss": 0.7014, "num_input_tokens_seen": 506902816, "step": 2807 }, { "epoch": 0.3073975751936287, "grad_norm": 1.2778081953648366, "learning_rate": 3.921903272089901e-05, "loss": 0.6937, "num_input_tokens_seen": 507052000, "step": 2808 }, { "epoch": 0.3075070472645666, "grad_norm": 1.3727182182171862, "learning_rate": 3.9211959490532044e-05, "loss": 0.8297, "num_input_tokens_seen": 507236800, "step": 2809 }, { "epoch": 0.30761651933550455, "grad_norm": 1.1163472570445327, "learning_rate": 3.920488457891743e-05, "loss": 0.4767, "num_input_tokens_seen": 507393824, "step": 2810 }, { "epoch": 0.3077259914064424, "grad_norm": 1.4131663733882955, "learning_rate": 3.919780798689213e-05, "loss": 0.8102, "num_input_tokens_seen": 507573024, "step": 2811 }, { "epoch": 0.30783546347738033, "grad_norm": 1.202328604143609, "learning_rate": 3.919072971529329e-05, "loss": 0.7186, "num_input_tokens_seen": 507764320, "step": 2812 }, { "epoch": 0.30794493554831825, "grad_norm": 1.4669761497363425, "learning_rate": 3.918364976495825e-05, "loss": 0.6168, "num_input_tokens_seen": 507919552, "step": 2813 }, { "epoch": 0.3080544076192561, "grad_norm": 1.3330441648116689, "learning_rate": 3.917656813672456e-05, "loss": 0.7816, "num_input_tokens_seen": 508092256, "step": 2814 }, { "epoch": 0.30816387969019404, "grad_norm": 1.2826859870629974, "learning_rate": 3.916948483142996e-05, "loss": 0.6203, "num_input_tokens_seen": 508259360, "step": 2815 }, { "epoch": 0.30827335176113196, "grad_norm": 1.5309028231915265, "learning_rate": 3.916239984991239e-05, "loss": 0.8996, "num_input_tokens_seen": 508426912, "step": 2816 }, { "epoch": 0.30838282383206983, "grad_norm": 1.2914862301631833, "learning_rate": 3.915531319300999e-05, "loss": 0.7073, "num_input_tokens_seen": 508605888, "step": 2817 }, { "epoch": 0.30849229590300775, "grad_norm": 1.2094850258469958, "learning_rate": 3.9148224861561105e-05, "loss": 0.6224, "num_input_tokens_seen": 508782176, "step": 2818 }, { "epoch": 0.3086017679739457, "grad_norm": 1.1037355457020452, "learning_rate": 3.914113485640426e-05, "loss": 0.6324, "num_input_tokens_seen": 508975040, "step": 2819 }, { "epoch": 0.30871124004488354, "grad_norm": 1.3335807938217346, "learning_rate": 3.91340431783782e-05, "loss": 0.7823, "num_input_tokens_seen": 509184928, "step": 2820 }, { "epoch": 0.30882071211582146, "grad_norm": 1.2758773178867362, "learning_rate": 3.912694982832185e-05, "loss": 0.7183, "num_input_tokens_seen": 509351360, "step": 2821 }, { "epoch": 0.3089301841867593, "grad_norm": 1.2216314829243806, "learning_rate": 3.9119854807074336e-05, "loss": 0.7218, "num_input_tokens_seen": 509547584, "step": 2822 }, { "epoch": 0.30903965625769725, "grad_norm": 1.1282545274254412, "learning_rate": 3.911275811547499e-05, "loss": 0.4951, "num_input_tokens_seen": 509699680, "step": 2823 }, { "epoch": 0.30914912832863517, "grad_norm": 1.3541722322007275, "learning_rate": 3.910565975436335e-05, "loss": 0.8541, "num_input_tokens_seen": 509892992, "step": 2824 }, { "epoch": 0.30925860039957304, "grad_norm": 1.2681112946154651, "learning_rate": 3.909855972457912e-05, "loss": 0.8201, "num_input_tokens_seen": 510073984, "step": 2825 }, { "epoch": 0.30936807247051096, "grad_norm": 1.233781454580069, "learning_rate": 3.9091458026962226e-05, "loss": 0.6367, "num_input_tokens_seen": 510246464, "step": 2826 }, { "epoch": 0.3094775445414489, "grad_norm": 1.260625785104926, "learning_rate": 3.9084354662352784e-05, "loss": 0.7428, "num_input_tokens_seen": 510419616, "step": 2827 }, { "epoch": 0.30958701661238675, "grad_norm": 1.2801370206152476, "learning_rate": 3.9077249631591106e-05, "loss": 0.6168, "num_input_tokens_seen": 510577984, "step": 2828 }, { "epoch": 0.30969648868332467, "grad_norm": 1.3069115145529298, "learning_rate": 3.9070142935517714e-05, "loss": 0.9965, "num_input_tokens_seen": 510769056, "step": 2829 }, { "epoch": 0.3098059607542626, "grad_norm": 1.3047073271381269, "learning_rate": 3.906303457497331e-05, "loss": 0.7856, "num_input_tokens_seen": 510964832, "step": 2830 }, { "epoch": 0.30991543282520045, "grad_norm": 1.1555509774003263, "learning_rate": 3.9055924550798806e-05, "loss": 0.6376, "num_input_tokens_seen": 511126784, "step": 2831 }, { "epoch": 0.3100249048961384, "grad_norm": 1.3687031655159105, "learning_rate": 3.904881286383529e-05, "loss": 0.6484, "num_input_tokens_seen": 511289184, "step": 2832 }, { "epoch": 0.3101343769670763, "grad_norm": 1.3479401220357263, "learning_rate": 3.904169951492407e-05, "loss": 0.8467, "num_input_tokens_seen": 511478464, "step": 2833 }, { "epoch": 0.31024384903801416, "grad_norm": 1.1664516841269457, "learning_rate": 3.903458450490664e-05, "loss": 0.6678, "num_input_tokens_seen": 511657664, "step": 2834 }, { "epoch": 0.3103533211089521, "grad_norm": 1.4522732935616853, "learning_rate": 3.9027467834624696e-05, "loss": 0.7997, "num_input_tokens_seen": 511820288, "step": 2835 }, { "epoch": 0.31046279317989, "grad_norm": 1.3214080148003773, "learning_rate": 3.902034950492012e-05, "loss": 0.7553, "num_input_tokens_seen": 511995680, "step": 2836 }, { "epoch": 0.3105722652508279, "grad_norm": 1.7139304820488324, "learning_rate": 3.9013229516635e-05, "loss": 0.7594, "num_input_tokens_seen": 512193472, "step": 2837 }, { "epoch": 0.3106817373217658, "grad_norm": 1.2462670376147742, "learning_rate": 3.900610787061162e-05, "loss": 0.8091, "num_input_tokens_seen": 512401344, "step": 2838 }, { "epoch": 0.31079120939270366, "grad_norm": 1.4507880338593413, "learning_rate": 3.899898456769245e-05, "loss": 0.6557, "num_input_tokens_seen": 512578752, "step": 2839 }, { "epoch": 0.3109006814636416, "grad_norm": 1.2575955411135515, "learning_rate": 3.899185960872016e-05, "loss": 0.826, "num_input_tokens_seen": 512783488, "step": 2840 }, { "epoch": 0.3110101535345795, "grad_norm": 1.283605596635124, "learning_rate": 3.8984732994537644e-05, "loss": 0.7326, "num_input_tokens_seen": 512990688, "step": 2841 }, { "epoch": 0.31111962560551737, "grad_norm": 1.28340498034804, "learning_rate": 3.8977604725987936e-05, "loss": 0.6607, "num_input_tokens_seen": 513169216, "step": 2842 }, { "epoch": 0.3112290976764553, "grad_norm": 1.2908211473579725, "learning_rate": 3.897047480391431e-05, "loss": 0.6077, "num_input_tokens_seen": 513366112, "step": 2843 }, { "epoch": 0.3113385697473932, "grad_norm": 1.2161637276987791, "learning_rate": 3.8963343229160235e-05, "loss": 0.5833, "num_input_tokens_seen": 513538144, "step": 2844 }, { "epoch": 0.3114480418183311, "grad_norm": 1.2594979408401794, "learning_rate": 3.8956210002569334e-05, "loss": 0.5917, "num_input_tokens_seen": 513708608, "step": 2845 }, { "epoch": 0.311557513889269, "grad_norm": 1.1642063231234394, "learning_rate": 3.894907512498548e-05, "loss": 0.6332, "num_input_tokens_seen": 513868096, "step": 2846 }, { "epoch": 0.3116669859602069, "grad_norm": 1.1964110269459862, "learning_rate": 3.89419385972527e-05, "loss": 0.6904, "num_input_tokens_seen": 514042368, "step": 2847 }, { "epoch": 0.3117764580311448, "grad_norm": 1.155159358553559, "learning_rate": 3.893480042021523e-05, "loss": 0.6416, "num_input_tokens_seen": 514250016, "step": 2848 }, { "epoch": 0.3118859301020827, "grad_norm": 1.4287003407896923, "learning_rate": 3.892766059471752e-05, "loss": 0.7677, "num_input_tokens_seen": 514442656, "step": 2849 }, { "epoch": 0.31199540217302063, "grad_norm": 1.433286878291355, "learning_rate": 3.892051912160418e-05, "loss": 0.7286, "num_input_tokens_seen": 514614912, "step": 2850 }, { "epoch": 0.3121048742439585, "grad_norm": 1.3482319362426942, "learning_rate": 3.8913376001720046e-05, "loss": 0.8638, "num_input_tokens_seen": 514799488, "step": 2851 }, { "epoch": 0.3122143463148964, "grad_norm": 1.3229317952089519, "learning_rate": 3.890623123591013e-05, "loss": 0.7756, "num_input_tokens_seen": 514996832, "step": 2852 }, { "epoch": 0.31232381838583434, "grad_norm": 1.2188738246517326, "learning_rate": 3.889908482501963e-05, "loss": 0.5609, "num_input_tokens_seen": 515172896, "step": 2853 }, { "epoch": 0.3124332904567722, "grad_norm": 1.3931576671851975, "learning_rate": 3.889193676989398e-05, "loss": 0.8509, "num_input_tokens_seen": 515345152, "step": 2854 }, { "epoch": 0.3125427625277101, "grad_norm": 1.318655882093791, "learning_rate": 3.888478707137875e-05, "loss": 0.8009, "num_input_tokens_seen": 515557952, "step": 2855 }, { "epoch": 0.312652234598648, "grad_norm": 1.2231749875356148, "learning_rate": 3.8877635730319774e-05, "loss": 0.6578, "num_input_tokens_seen": 515750592, "step": 2856 }, { "epoch": 0.3127617066695859, "grad_norm": 1.4184856050801002, "learning_rate": 3.8870482747563006e-05, "loss": 0.8557, "num_input_tokens_seen": 515917248, "step": 2857 }, { "epoch": 0.31287117874052384, "grad_norm": 1.3347437471913866, "learning_rate": 3.886332812395465e-05, "loss": 0.9126, "num_input_tokens_seen": 516117952, "step": 2858 }, { "epoch": 0.3129806508114617, "grad_norm": 1.2627497850839706, "learning_rate": 3.885617186034107e-05, "loss": 0.7197, "num_input_tokens_seen": 516309248, "step": 2859 }, { "epoch": 0.3130901228823996, "grad_norm": 1.200368870228628, "learning_rate": 3.884901395756886e-05, "loss": 0.8931, "num_input_tokens_seen": 516502112, "step": 2860 }, { "epoch": 0.31319959495333755, "grad_norm": 1.373580546736005, "learning_rate": 3.884185441648477e-05, "loss": 1.0365, "num_input_tokens_seen": 516731488, "step": 2861 }, { "epoch": 0.3133090670242754, "grad_norm": 1.1777058308496795, "learning_rate": 3.883469323793576e-05, "loss": 0.7319, "num_input_tokens_seen": 516924352, "step": 2862 }, { "epoch": 0.31341853909521333, "grad_norm": 1.3714683433992967, "learning_rate": 3.882753042276899e-05, "loss": 0.9392, "num_input_tokens_seen": 517138496, "step": 2863 }, { "epoch": 0.31352801116615125, "grad_norm": 1.232597084709581, "learning_rate": 3.882036597183181e-05, "loss": 0.8536, "num_input_tokens_seen": 517339872, "step": 2864 }, { "epoch": 0.3136374832370891, "grad_norm": 1.1441749304528332, "learning_rate": 3.881319988597174e-05, "loss": 0.8045, "num_input_tokens_seen": 517532960, "step": 2865 }, { "epoch": 0.31374695530802704, "grad_norm": 1.193613328150183, "learning_rate": 3.8806032166036545e-05, "loss": 0.8623, "num_input_tokens_seen": 517729408, "step": 2866 }, { "epoch": 0.31385642737896496, "grad_norm": 1.244072832928473, "learning_rate": 3.8798862812874136e-05, "loss": 0.7046, "num_input_tokens_seen": 517910624, "step": 2867 }, { "epoch": 0.31396589944990283, "grad_norm": 1.4205299135500553, "learning_rate": 3.8791691827332627e-05, "loss": 0.6361, "num_input_tokens_seen": 518083328, "step": 2868 }, { "epoch": 0.31407537152084075, "grad_norm": 1.296264090907466, "learning_rate": 3.8784519210260343e-05, "loss": 0.6417, "num_input_tokens_seen": 518250880, "step": 2869 }, { "epoch": 0.3141848435917787, "grad_norm": 1.4433241079768309, "learning_rate": 3.877734496250579e-05, "loss": 0.7183, "num_input_tokens_seen": 518419776, "step": 2870 }, { "epoch": 0.31429431566271654, "grad_norm": 1.2492161200270842, "learning_rate": 3.877016908491767e-05, "loss": 0.6618, "num_input_tokens_seen": 518585088, "step": 2871 }, { "epoch": 0.31440378773365446, "grad_norm": 1.4161940047935724, "learning_rate": 3.8762991578344864e-05, "loss": 0.8113, "num_input_tokens_seen": 518769888, "step": 2872 }, { "epoch": 0.3145132598045923, "grad_norm": 1.8311604787147309, "learning_rate": 3.8755812443636466e-05, "loss": 0.6016, "num_input_tokens_seen": 518951328, "step": 2873 }, { "epoch": 0.31462273187553025, "grad_norm": 1.3035620215014943, "learning_rate": 3.8748631681641757e-05, "loss": 0.7797, "num_input_tokens_seen": 519157408, "step": 2874 }, { "epoch": 0.31473220394646817, "grad_norm": 1.1980076741464734, "learning_rate": 3.8741449293210194e-05, "loss": 0.6789, "num_input_tokens_seen": 519308608, "step": 2875 }, { "epoch": 0.31484167601740604, "grad_norm": 1.3566318326050608, "learning_rate": 3.8734265279191455e-05, "loss": 0.7589, "num_input_tokens_seen": 519516032, "step": 2876 }, { "epoch": 0.31495114808834396, "grad_norm": 1.176058866354245, "learning_rate": 3.872707964043539e-05, "loss": 0.5528, "num_input_tokens_seen": 519686944, "step": 2877 }, { "epoch": 0.3150606201592819, "grad_norm": 1.3308301415800272, "learning_rate": 3.871989237779204e-05, "loss": 0.6317, "num_input_tokens_seen": 519847776, "step": 2878 }, { "epoch": 0.31517009223021974, "grad_norm": 1.242963632396854, "learning_rate": 3.8712703492111656e-05, "loss": 0.5936, "num_input_tokens_seen": 520044224, "step": 2879 }, { "epoch": 0.31527956430115767, "grad_norm": 1.1353541307405528, "learning_rate": 3.8705512984244665e-05, "loss": 0.5583, "num_input_tokens_seen": 520232160, "step": 2880 }, { "epoch": 0.3153890363720956, "grad_norm": 1.3244633631551577, "learning_rate": 3.869832085504168e-05, "loss": 0.6576, "num_input_tokens_seen": 520425472, "step": 2881 }, { "epoch": 0.31549850844303345, "grad_norm": 1.139419702687726, "learning_rate": 3.869112710535353e-05, "loss": 0.6297, "num_input_tokens_seen": 520643872, "step": 2882 }, { "epoch": 0.3156079805139714, "grad_norm": 1.240326260744834, "learning_rate": 3.868393173603122e-05, "loss": 0.5198, "num_input_tokens_seen": 520814336, "step": 2883 }, { "epoch": 0.3157174525849093, "grad_norm": 1.3332804530306877, "learning_rate": 3.867673474792593e-05, "loss": 0.6114, "num_input_tokens_seen": 520967552, "step": 2884 }, { "epoch": 0.31582692465584716, "grad_norm": 1.2995248861685655, "learning_rate": 3.866953614188908e-05, "loss": 0.6074, "num_input_tokens_seen": 521151008, "step": 2885 }, { "epoch": 0.3159363967267851, "grad_norm": 1.1994337951497984, "learning_rate": 3.866233591877223e-05, "loss": 0.5478, "num_input_tokens_seen": 521344768, "step": 2886 }, { "epoch": 0.316045868797723, "grad_norm": 1.1975163606390136, "learning_rate": 3.865513407942716e-05, "loss": 0.4603, "num_input_tokens_seen": 521489472, "step": 2887 }, { "epoch": 0.3161553408686609, "grad_norm": 1.3290706185264014, "learning_rate": 3.864793062470583e-05, "loss": 0.792, "num_input_tokens_seen": 521684352, "step": 2888 }, { "epoch": 0.3162648129395988, "grad_norm": 1.2928291545087474, "learning_rate": 3.864072555546041e-05, "loss": 0.8604, "num_input_tokens_seen": 521887296, "step": 2889 }, { "epoch": 0.31637428501053666, "grad_norm": 1.2311920946321018, "learning_rate": 3.863351887254322e-05, "loss": 0.787, "num_input_tokens_seen": 522084640, "step": 2890 }, { "epoch": 0.3164837570814746, "grad_norm": 1.2910149324677842, "learning_rate": 3.862631057680681e-05, "loss": 0.691, "num_input_tokens_seen": 522277504, "step": 2891 }, { "epoch": 0.3165932291524125, "grad_norm": 1.3461548776413037, "learning_rate": 3.8619100669103916e-05, "loss": 0.7287, "num_input_tokens_seen": 522471488, "step": 2892 }, { "epoch": 0.31670270122335037, "grad_norm": 1.3088271203404676, "learning_rate": 3.861188915028744e-05, "loss": 0.7116, "num_input_tokens_seen": 522693024, "step": 2893 }, { "epoch": 0.3168121732942883, "grad_norm": 1.3464778004688591, "learning_rate": 3.8604676021210506e-05, "loss": 0.7532, "num_input_tokens_seen": 522905600, "step": 2894 }, { "epoch": 0.3169216453652262, "grad_norm": 1.2650966718924679, "learning_rate": 3.85974612827264e-05, "loss": 0.5196, "num_input_tokens_seen": 523067552, "step": 2895 }, { "epoch": 0.3170311174361641, "grad_norm": 1.2260143731816466, "learning_rate": 3.859024493568862e-05, "loss": 0.8541, "num_input_tokens_seen": 523273408, "step": 2896 }, { "epoch": 0.317140589507102, "grad_norm": 1.3672553345336749, "learning_rate": 3.8583026980950846e-05, "loss": 0.7167, "num_input_tokens_seen": 523433344, "step": 2897 }, { "epoch": 0.3172500615780399, "grad_norm": 1.216689299455663, "learning_rate": 3.857580741936695e-05, "loss": 0.5849, "num_input_tokens_seen": 523590816, "step": 2898 }, { "epoch": 0.3173595336489778, "grad_norm": 1.3296616584248075, "learning_rate": 3.856858625179098e-05, "loss": 0.7044, "num_input_tokens_seen": 523795776, "step": 2899 }, { "epoch": 0.3174690057199157, "grad_norm": 1.2090137582884295, "learning_rate": 3.85613634790772e-05, "loss": 0.7911, "num_input_tokens_seen": 523971840, "step": 2900 }, { "epoch": 0.31757847779085363, "grad_norm": 1.305775993531043, "learning_rate": 3.8554139102080044e-05, "loss": 0.7645, "num_input_tokens_seen": 524167840, "step": 2901 }, { "epoch": 0.3176879498617915, "grad_norm": 1.4520550962719194, "learning_rate": 3.854691312165414e-05, "loss": 0.6904, "num_input_tokens_seen": 524330240, "step": 2902 }, { "epoch": 0.3177974219327294, "grad_norm": 1.2667359550665633, "learning_rate": 3.8539685538654325e-05, "loss": 0.7055, "num_input_tokens_seen": 524507200, "step": 2903 }, { "epoch": 0.31790689400366734, "grad_norm": 1.32829198348534, "learning_rate": 3.853245635393558e-05, "loss": 0.7334, "num_input_tokens_seen": 524675648, "step": 2904 }, { "epoch": 0.3180163660746052, "grad_norm": 1.2265060800587502, "learning_rate": 3.852522556835313e-05, "loss": 0.6949, "num_input_tokens_seen": 524858432, "step": 2905 }, { "epoch": 0.3181258381455431, "grad_norm": 1.1655427878179225, "learning_rate": 3.8517993182762334e-05, "loss": 0.6758, "num_input_tokens_seen": 525032480, "step": 2906 }, { "epoch": 0.318235310216481, "grad_norm": 1.1949752631081145, "learning_rate": 3.8510759198018805e-05, "loss": 0.6856, "num_input_tokens_seen": 525216832, "step": 2907 }, { "epoch": 0.3183447822874189, "grad_norm": 1.2822012742651019, "learning_rate": 3.8503523614978274e-05, "loss": 0.6203, "num_input_tokens_seen": 525407008, "step": 2908 }, { "epoch": 0.31845425435835684, "grad_norm": 1.358295011662404, "learning_rate": 3.849628643449673e-05, "loss": 0.7456, "num_input_tokens_seen": 525571648, "step": 2909 }, { "epoch": 0.3185637264292947, "grad_norm": 1.1868348015299945, "learning_rate": 3.8489047657430286e-05, "loss": 0.5976, "num_input_tokens_seen": 525740096, "step": 2910 }, { "epoch": 0.3186731985002326, "grad_norm": 1.3253449458832625, "learning_rate": 3.84818072846353e-05, "loss": 0.8062, "num_input_tokens_seen": 525909216, "step": 2911 }, { "epoch": 0.31878267057117055, "grad_norm": 1.4743212285733298, "learning_rate": 3.8474565316968284e-05, "loss": 0.7807, "num_input_tokens_seen": 526065344, "step": 2912 }, { "epoch": 0.3188921426421084, "grad_norm": 1.470736519686431, "learning_rate": 3.846732175528595e-05, "loss": 0.8759, "num_input_tokens_seen": 526248576, "step": 2913 }, { "epoch": 0.31900161471304633, "grad_norm": 1.4531413251718641, "learning_rate": 3.84600766004452e-05, "loss": 0.7472, "num_input_tokens_seen": 526432704, "step": 2914 }, { "epoch": 0.31911108678398425, "grad_norm": 1.376690605515789, "learning_rate": 3.845282985330311e-05, "loss": 0.7634, "num_input_tokens_seen": 526586368, "step": 2915 }, { "epoch": 0.3192205588549221, "grad_norm": 1.3395585626869928, "learning_rate": 3.8445581514716977e-05, "loss": 0.601, "num_input_tokens_seen": 526721216, "step": 2916 }, { "epoch": 0.31933003092586004, "grad_norm": 1.4855759430498192, "learning_rate": 3.843833158554425e-05, "loss": 0.8711, "num_input_tokens_seen": 526950592, "step": 2917 }, { "epoch": 0.31943950299679796, "grad_norm": 1.2665185952641846, "learning_rate": 3.843108006664259e-05, "loss": 0.7113, "num_input_tokens_seen": 527112096, "step": 2918 }, { "epoch": 0.31954897506773583, "grad_norm": 1.1016411178346455, "learning_rate": 3.8423826958869825e-05, "loss": 0.7535, "num_input_tokens_seen": 527303616, "step": 2919 }, { "epoch": 0.31965844713867375, "grad_norm": 1.13504970231652, "learning_rate": 3.841657226308399e-05, "loss": 0.8099, "num_input_tokens_seen": 527494688, "step": 2920 }, { "epoch": 0.3197679192096117, "grad_norm": 1.4004289736212172, "learning_rate": 3.840931598014332e-05, "loss": 0.5972, "num_input_tokens_seen": 527643872, "step": 2921 }, { "epoch": 0.31987739128054954, "grad_norm": 1.1545392345173537, "learning_rate": 3.840205811090619e-05, "loss": 0.5544, "num_input_tokens_seen": 527814336, "step": 2922 }, { "epoch": 0.31998686335148746, "grad_norm": 1.2966626004855641, "learning_rate": 3.8394798656231215e-05, "loss": 0.8348, "num_input_tokens_seen": 528016160, "step": 2923 }, { "epoch": 0.3200963354224253, "grad_norm": 1.2900420195367321, "learning_rate": 3.8387537616977165e-05, "loss": 0.9048, "num_input_tokens_seen": 528228064, "step": 2924 }, { "epoch": 0.32020580749336325, "grad_norm": 1.0958800691470185, "learning_rate": 3.8380274994003e-05, "loss": 0.562, "num_input_tokens_seen": 528374112, "step": 2925 }, { "epoch": 0.32031527956430117, "grad_norm": 1.1362078590258247, "learning_rate": 3.837301078816789e-05, "loss": 0.7073, "num_input_tokens_seen": 528534944, "step": 2926 }, { "epoch": 0.32042475163523904, "grad_norm": 1.3234615805640855, "learning_rate": 3.8365745000331164e-05, "loss": 0.6107, "num_input_tokens_seen": 528705856, "step": 2927 }, { "epoch": 0.32053422370617696, "grad_norm": 1.2809912257433766, "learning_rate": 3.8358477631352364e-05, "loss": 0.5801, "num_input_tokens_seen": 528861312, "step": 2928 }, { "epoch": 0.3206436957771149, "grad_norm": 1.415527006888111, "learning_rate": 3.8351208682091185e-05, "loss": 0.8095, "num_input_tokens_seen": 529051040, "step": 2929 }, { "epoch": 0.32075316784805274, "grad_norm": 1.2436952377608494, "learning_rate": 3.834393815340754e-05, "loss": 0.6655, "num_input_tokens_seen": 529252416, "step": 2930 }, { "epoch": 0.32086263991899067, "grad_norm": 1.2363545443588135, "learning_rate": 3.833666604616153e-05, "loss": 0.6824, "num_input_tokens_seen": 529439904, "step": 2931 }, { "epoch": 0.3209721119899286, "grad_norm": 1.3598062116334892, "learning_rate": 3.832939236121342e-05, "loss": 0.7335, "num_input_tokens_seen": 529600512, "step": 2932 }, { "epoch": 0.32108158406086645, "grad_norm": 1.3167237032032926, "learning_rate": 3.8322117099423674e-05, "loss": 0.7757, "num_input_tokens_seen": 529796064, "step": 2933 }, { "epoch": 0.3211910561318044, "grad_norm": 1.3149878673936566, "learning_rate": 3.8314840261652954e-05, "loss": 0.7436, "num_input_tokens_seen": 529973024, "step": 2934 }, { "epoch": 0.3213005282027423, "grad_norm": 1.2284560439273244, "learning_rate": 3.8307561848762066e-05, "loss": 0.6174, "num_input_tokens_seen": 530156928, "step": 2935 }, { "epoch": 0.32141000027368016, "grad_norm": 1.2419657479291593, "learning_rate": 3.8300281861612056e-05, "loss": 0.8203, "num_input_tokens_seen": 530369056, "step": 2936 }, { "epoch": 0.3215194723446181, "grad_norm": 1.1790565167438787, "learning_rate": 3.829300030106413e-05, "loss": 0.69, "num_input_tokens_seen": 530553408, "step": 2937 }, { "epoch": 0.321628944415556, "grad_norm": 1.281832399818227, "learning_rate": 3.828571716797968e-05, "loss": 0.836, "num_input_tokens_seen": 530759040, "step": 2938 }, { "epoch": 0.3217384164864939, "grad_norm": 1.3514176660453145, "learning_rate": 3.827843246322029e-05, "loss": 0.668, "num_input_tokens_seen": 530961536, "step": 2939 }, { "epoch": 0.3218478885574318, "grad_norm": 1.3320927759543233, "learning_rate": 3.827114618764772e-05, "loss": 0.9635, "num_input_tokens_seen": 531151712, "step": 2940 }, { "epoch": 0.32195736062836966, "grad_norm": 1.2956389546137401, "learning_rate": 3.8263858342123936e-05, "loss": 0.8707, "num_input_tokens_seen": 531353312, "step": 2941 }, { "epoch": 0.3220668326993076, "grad_norm": 1.1021546199556906, "learning_rate": 3.8256568927511047e-05, "loss": 0.7634, "num_input_tokens_seen": 531543488, "step": 2942 }, { "epoch": 0.3221763047702455, "grad_norm": 1.314936193433898, "learning_rate": 3.8249277944671415e-05, "loss": 0.7338, "num_input_tokens_seen": 531733216, "step": 2943 }, { "epoch": 0.32228577684118337, "grad_norm": 1.272810003220591, "learning_rate": 3.824198539446752e-05, "loss": 0.7396, "num_input_tokens_seen": 531911520, "step": 2944 }, { "epoch": 0.3223952489121213, "grad_norm": 1.1597907521348356, "learning_rate": 3.823469127776208e-05, "loss": 0.6152, "num_input_tokens_seen": 532068096, "step": 2945 }, { "epoch": 0.3225047209830592, "grad_norm": 1.19287803919578, "learning_rate": 3.822739559541795e-05, "loss": 0.5899, "num_input_tokens_seen": 532240576, "step": 2946 }, { "epoch": 0.3226141930539971, "grad_norm": 1.2761925225387196, "learning_rate": 3.8220098348298204e-05, "loss": 0.6232, "num_input_tokens_seen": 532431648, "step": 2947 }, { "epoch": 0.322723665124935, "grad_norm": 1.3478789656887744, "learning_rate": 3.8212799537266105e-05, "loss": 0.7026, "num_input_tokens_seen": 532594944, "step": 2948 }, { "epoch": 0.3228331371958729, "grad_norm": 1.264189624406083, "learning_rate": 3.8205499163185074e-05, "loss": 0.796, "num_input_tokens_seen": 532771008, "step": 2949 }, { "epoch": 0.3229426092668108, "grad_norm": 1.2969363094683026, "learning_rate": 3.819819722691874e-05, "loss": 0.7151, "num_input_tokens_seen": 532947072, "step": 2950 }, { "epoch": 0.3230520813377487, "grad_norm": 1.3675286439619005, "learning_rate": 3.8190893729330904e-05, "loss": 0.8587, "num_input_tokens_seen": 533166592, "step": 2951 }, { "epoch": 0.32316155340868663, "grad_norm": 1.4200734893157372, "learning_rate": 3.8183588671285556e-05, "loss": 0.7172, "num_input_tokens_seen": 533376480, "step": 2952 }, { "epoch": 0.3232710254796245, "grad_norm": 1.1347139382860874, "learning_rate": 3.817628205364687e-05, "loss": 0.6343, "num_input_tokens_seen": 533547168, "step": 2953 }, { "epoch": 0.3233804975505624, "grad_norm": 1.4725837993254465, "learning_rate": 3.816897387727921e-05, "loss": 0.8268, "num_input_tokens_seen": 533713376, "step": 2954 }, { "epoch": 0.32348996962150034, "grad_norm": 1.2255378379349975, "learning_rate": 3.816166414304711e-05, "loss": 0.8066, "num_input_tokens_seen": 533893696, "step": 2955 }, { "epoch": 0.3235994416924382, "grad_norm": 1.2983554524964347, "learning_rate": 3.81543528518153e-05, "loss": 0.7821, "num_input_tokens_seen": 534101792, "step": 2956 }, { "epoch": 0.3237089137633761, "grad_norm": 1.2428433108617107, "learning_rate": 3.81470400044487e-05, "loss": 0.8894, "num_input_tokens_seen": 534293536, "step": 2957 }, { "epoch": 0.323818385834314, "grad_norm": 1.2166727899806642, "learning_rate": 3.81397256018124e-05, "loss": 0.7484, "num_input_tokens_seen": 534464896, "step": 2958 }, { "epoch": 0.3239278579052519, "grad_norm": 1.1077354565773943, "learning_rate": 3.8132409644771683e-05, "loss": 0.605, "num_input_tokens_seen": 534665600, "step": 2959 }, { "epoch": 0.32403732997618984, "grad_norm": 1.2910258744333596, "learning_rate": 3.812509213419201e-05, "loss": 0.6027, "num_input_tokens_seen": 534832928, "step": 2960 }, { "epoch": 0.3241468020471277, "grad_norm": 1.2203163938179364, "learning_rate": 3.8117773070939025e-05, "loss": 0.8371, "num_input_tokens_seen": 535006080, "step": 2961 }, { "epoch": 0.3242562741180656, "grad_norm": 1.224210926802615, "learning_rate": 3.811045245587856e-05, "loss": 0.7262, "num_input_tokens_seen": 535208576, "step": 2962 }, { "epoch": 0.32436574618900355, "grad_norm": 1.2472136436786645, "learning_rate": 3.810313028987663e-05, "loss": 0.7924, "num_input_tokens_seen": 535373664, "step": 2963 }, { "epoch": 0.3244752182599414, "grad_norm": 1.2765977646848916, "learning_rate": 3.809580657379944e-05, "loss": 0.6628, "num_input_tokens_seen": 535570560, "step": 2964 }, { "epoch": 0.32458469033087933, "grad_norm": 1.290839756054268, "learning_rate": 3.8088481308513375e-05, "loss": 0.5767, "num_input_tokens_seen": 535761408, "step": 2965 }, { "epoch": 0.32469416240181725, "grad_norm": 1.300753939688437, "learning_rate": 3.808115449488499e-05, "loss": 0.7851, "num_input_tokens_seen": 535956512, "step": 2966 }, { "epoch": 0.3248036344727551, "grad_norm": 1.3184769641569443, "learning_rate": 3.8073826133781026e-05, "loss": 0.7067, "num_input_tokens_seen": 536103008, "step": 2967 }, { "epoch": 0.32491310654369304, "grad_norm": 1.1722877792710644, "learning_rate": 3.8066496226068426e-05, "loss": 0.6918, "num_input_tokens_seen": 536293856, "step": 2968 }, { "epoch": 0.32502257861463096, "grad_norm": 1.4037618453493304, "learning_rate": 3.8059164772614304e-05, "loss": 0.7358, "num_input_tokens_seen": 536477984, "step": 2969 }, { "epoch": 0.32513205068556883, "grad_norm": 1.3629262292507616, "learning_rate": 3.805183177428595e-05, "loss": 0.7322, "num_input_tokens_seen": 536648224, "step": 2970 }, { "epoch": 0.32524152275650675, "grad_norm": 1.3682907884436726, "learning_rate": 3.8044497231950855e-05, "loss": 0.5611, "num_input_tokens_seen": 536829440, "step": 2971 }, { "epoch": 0.3253509948274447, "grad_norm": 1.2639627726179141, "learning_rate": 3.803716114647667e-05, "loss": 0.6641, "num_input_tokens_seen": 537007520, "step": 2972 }, { "epoch": 0.32546046689838254, "grad_norm": 1.1744343085790458, "learning_rate": 3.8029823518731247e-05, "loss": 0.7223, "num_input_tokens_seen": 537198816, "step": 2973 }, { "epoch": 0.32556993896932046, "grad_norm": 1.3153051191337213, "learning_rate": 3.802248434958261e-05, "loss": 0.6333, "num_input_tokens_seen": 537365248, "step": 2974 }, { "epoch": 0.3256794110402583, "grad_norm": 1.2819846303438056, "learning_rate": 3.801514363989897e-05, "loss": 0.6795, "num_input_tokens_seen": 537539296, "step": 2975 }, { "epoch": 0.32578888311119625, "grad_norm": 1.2296939893306527, "learning_rate": 3.8007801390548706e-05, "loss": 0.5947, "num_input_tokens_seen": 537716480, "step": 2976 }, { "epoch": 0.32589835518213417, "grad_norm": 1.2404629714433373, "learning_rate": 3.800045760240042e-05, "loss": 0.6689, "num_input_tokens_seen": 537888512, "step": 2977 }, { "epoch": 0.32600782725307204, "grad_norm": 1.2306066042259907, "learning_rate": 3.799311227632284e-05, "loss": 0.6666, "num_input_tokens_seen": 538080928, "step": 2978 }, { "epoch": 0.32611729932400996, "grad_norm": 1.2619796341493597, "learning_rate": 3.7985765413184924e-05, "loss": 0.7422, "num_input_tokens_seen": 538241536, "step": 2979 }, { "epoch": 0.3262267713949479, "grad_norm": 1.1835993398402191, "learning_rate": 3.797841701385578e-05, "loss": 0.6312, "num_input_tokens_seen": 538422976, "step": 2980 }, { "epoch": 0.32633624346588574, "grad_norm": 1.363173078819859, "learning_rate": 3.7971067079204726e-05, "loss": 0.7662, "num_input_tokens_seen": 538588288, "step": 2981 }, { "epoch": 0.32644571553682367, "grad_norm": 1.330155335368547, "learning_rate": 3.7963715610101215e-05, "loss": 0.7762, "num_input_tokens_seen": 538786752, "step": 2982 }, { "epoch": 0.3265551876077616, "grad_norm": 1.3661652585315107, "learning_rate": 3.795636260741494e-05, "loss": 1.0204, "num_input_tokens_seen": 538953632, "step": 2983 }, { "epoch": 0.32666465967869945, "grad_norm": 1.4263161687856005, "learning_rate": 3.794900807201574e-05, "loss": 0.7219, "num_input_tokens_seen": 539126112, "step": 2984 }, { "epoch": 0.3267741317496374, "grad_norm": 1.2270199839760803, "learning_rate": 3.794165200477363e-05, "loss": 0.4956, "num_input_tokens_seen": 539273504, "step": 2985 }, { "epoch": 0.3268836038205753, "grad_norm": 1.3353075918245356, "learning_rate": 3.793429440655884e-05, "loss": 0.7768, "num_input_tokens_seen": 539477120, "step": 2986 }, { "epoch": 0.32699307589151316, "grad_norm": 1.3905685499070737, "learning_rate": 3.792693527824174e-05, "loss": 0.7814, "num_input_tokens_seen": 539651616, "step": 2987 }, { "epoch": 0.3271025479624511, "grad_norm": 1.348177513263475, "learning_rate": 3.791957462069291e-05, "loss": 0.7395, "num_input_tokens_seen": 539804160, "step": 2988 }, { "epoch": 0.327212020033389, "grad_norm": 1.157597040677193, "learning_rate": 3.7912212434783095e-05, "loss": 0.5808, "num_input_tokens_seen": 539979776, "step": 2989 }, { "epoch": 0.32732149210432687, "grad_norm": 1.3791451828004546, "learning_rate": 3.7904848721383234e-05, "loss": 0.6056, "num_input_tokens_seen": 540129184, "step": 2990 }, { "epoch": 0.3274309641752648, "grad_norm": 1.3298303314489368, "learning_rate": 3.789748348136444e-05, "loss": 0.7609, "num_input_tokens_seen": 540339072, "step": 2991 }, { "epoch": 0.3275404362462027, "grad_norm": 1.2856086931691966, "learning_rate": 3.7890116715598013e-05, "loss": 0.5684, "num_input_tokens_seen": 540545376, "step": 2992 }, { "epoch": 0.3276499083171406, "grad_norm": 1.360067257059586, "learning_rate": 3.7882748424955414e-05, "loss": 0.9684, "num_input_tokens_seen": 540744736, "step": 2993 }, { "epoch": 0.3277593803880785, "grad_norm": 1.29288908836923, "learning_rate": 3.7875378610308306e-05, "loss": 0.7161, "num_input_tokens_seen": 540930208, "step": 2994 }, { "epoch": 0.32786885245901637, "grad_norm": 1.4165827595414702, "learning_rate": 3.7868007272528524e-05, "loss": 0.8304, "num_input_tokens_seen": 541080960, "step": 2995 }, { "epoch": 0.3279783245299543, "grad_norm": 1.3488757821332586, "learning_rate": 3.786063441248808e-05, "loss": 0.6485, "num_input_tokens_seen": 541271136, "step": 2996 }, { "epoch": 0.3280877966008922, "grad_norm": 1.505996796785674, "learning_rate": 3.785326003105916e-05, "loss": 0.8814, "num_input_tokens_seen": 541427488, "step": 2997 }, { "epoch": 0.3281972686718301, "grad_norm": 1.3098395950449886, "learning_rate": 3.784588412911416e-05, "loss": 0.5283, "num_input_tokens_seen": 541617888, "step": 2998 }, { "epoch": 0.328306740742768, "grad_norm": 1.266598409562021, "learning_rate": 3.783850670752563e-05, "loss": 0.6947, "num_input_tokens_seen": 541816352, "step": 2999 }, { "epoch": 0.3284162128137059, "grad_norm": 1.1955609530869105, "learning_rate": 3.783112776716629e-05, "loss": 0.821, "num_input_tokens_seen": 541993760, "step": 3000 }, { "epoch": 0.3285256848846438, "grad_norm": 1.4135991807489445, "learning_rate": 3.782374730890908e-05, "loss": 1.0811, "num_input_tokens_seen": 542173632, "step": 3001 }, { "epoch": 0.3286351569555817, "grad_norm": 1.372382533704484, "learning_rate": 3.781636533362706e-05, "loss": 0.7887, "num_input_tokens_seen": 542372320, "step": 3002 }, { "epoch": 0.32874462902651963, "grad_norm": 1.3186081489095949, "learning_rate": 3.780898184219352e-05, "loss": 0.7109, "num_input_tokens_seen": 542521728, "step": 3003 }, { "epoch": 0.3288541010974575, "grad_norm": 1.2984063632504785, "learning_rate": 3.780159683548192e-05, "loss": 0.904, "num_input_tokens_seen": 542699584, "step": 3004 }, { "epoch": 0.3289635731683954, "grad_norm": 1.2534254346564997, "learning_rate": 3.779421031436588e-05, "loss": 0.8152, "num_input_tokens_seen": 542902752, "step": 3005 }, { "epoch": 0.32907304523933334, "grad_norm": 1.1773944319844636, "learning_rate": 3.7786822279719237e-05, "loss": 0.5504, "num_input_tokens_seen": 543088224, "step": 3006 }, { "epoch": 0.3291825173102712, "grad_norm": 1.1792762999684065, "learning_rate": 3.777943273241595e-05, "loss": 0.7772, "num_input_tokens_seen": 543258016, "step": 3007 }, { "epoch": 0.3292919893812091, "grad_norm": 1.2296330596234675, "learning_rate": 3.77720416733302e-05, "loss": 0.6468, "num_input_tokens_seen": 543426912, "step": 3008 }, { "epoch": 0.32940146145214705, "grad_norm": 1.0441333683445342, "learning_rate": 3.776464910333635e-05, "loss": 0.5287, "num_input_tokens_seen": 543617312, "step": 3009 }, { "epoch": 0.3295109335230849, "grad_norm": 1.3469843419558987, "learning_rate": 3.77572550233089e-05, "loss": 0.6494, "num_input_tokens_seen": 543810848, "step": 3010 }, { "epoch": 0.32962040559402284, "grad_norm": 1.2435501589841667, "learning_rate": 3.774985943412257e-05, "loss": 0.6514, "num_input_tokens_seen": 543986912, "step": 3011 }, { "epoch": 0.3297298776649607, "grad_norm": 1.454436499947368, "learning_rate": 3.774246233665224e-05, "loss": 0.9398, "num_input_tokens_seen": 544188960, "step": 3012 }, { "epoch": 0.3298393497358986, "grad_norm": 1.1999305327494494, "learning_rate": 3.773506373177298e-05, "loss": 0.5982, "num_input_tokens_seen": 544376896, "step": 3013 }, { "epoch": 0.32994882180683655, "grad_norm": 1.1085783018470048, "learning_rate": 3.7727663620360026e-05, "loss": 0.5543, "num_input_tokens_seen": 544550944, "step": 3014 }, { "epoch": 0.3300582938777744, "grad_norm": 1.222677651117487, "learning_rate": 3.772026200328879e-05, "loss": 0.5163, "num_input_tokens_seen": 544731936, "step": 3015 }, { "epoch": 0.33016776594871233, "grad_norm": 1.4411647356524118, "learning_rate": 3.771285888143489e-05, "loss": 0.7702, "num_input_tokens_seen": 544918080, "step": 3016 }, { "epoch": 0.33027723801965025, "grad_norm": 1.3100066798527594, "learning_rate": 3.7705454255674064e-05, "loss": 0.5687, "num_input_tokens_seen": 545073984, "step": 3017 }, { "epoch": 0.3303867100905881, "grad_norm": 1.3314797610593703, "learning_rate": 3.769804812688231e-05, "loss": 0.8614, "num_input_tokens_seen": 545245120, "step": 3018 }, { "epoch": 0.33049618216152604, "grad_norm": 1.417891991589572, "learning_rate": 3.769064049593573e-05, "loss": 0.748, "num_input_tokens_seen": 545417600, "step": 3019 }, { "epoch": 0.33060565423246396, "grad_norm": 1.3555000147662977, "learning_rate": 3.768323136371064e-05, "loss": 0.6956, "num_input_tokens_seen": 545569024, "step": 3020 }, { "epoch": 0.33071512630340183, "grad_norm": 1.3814476303324918, "learning_rate": 3.7675820731083526e-05, "loss": 0.6547, "num_input_tokens_seen": 545751360, "step": 3021 }, { "epoch": 0.33082459837433975, "grad_norm": 1.248306271336528, "learning_rate": 3.766840859893105e-05, "loss": 0.8081, "num_input_tokens_seen": 545925184, "step": 3022 }, { "epoch": 0.3309340704452777, "grad_norm": 1.3107980865441269, "learning_rate": 3.766099496813006e-05, "loss": 0.6221, "num_input_tokens_seen": 546068544, "step": 3023 }, { "epoch": 0.33104354251621554, "grad_norm": 1.1257799941406021, "learning_rate": 3.765357983955756e-05, "loss": 0.5686, "num_input_tokens_seen": 546239680, "step": 3024 }, { "epoch": 0.33115301458715346, "grad_norm": 1.1937277766850392, "learning_rate": 3.764616321409076e-05, "loss": 0.6519, "num_input_tokens_seen": 546415072, "step": 3025 }, { "epoch": 0.3312624866580914, "grad_norm": 1.3998666205902655, "learning_rate": 3.763874509260702e-05, "loss": 0.7772, "num_input_tokens_seen": 546616672, "step": 3026 }, { "epoch": 0.33137195872902925, "grad_norm": 1.3478037877799587, "learning_rate": 3.7631325475983905e-05, "loss": 0.6952, "num_input_tokens_seen": 546797664, "step": 3027 }, { "epoch": 0.33148143079996717, "grad_norm": 1.3093793190677758, "learning_rate": 3.7623904365099134e-05, "loss": 0.7839, "num_input_tokens_seen": 546989632, "step": 3028 }, { "epoch": 0.33159090287090504, "grad_norm": 1.5965498471911361, "learning_rate": 3.761648176083061e-05, "loss": 0.8631, "num_input_tokens_seen": 547180032, "step": 3029 }, { "epoch": 0.33170037494184296, "grad_norm": 1.2560177906319285, "learning_rate": 3.76090576640564e-05, "loss": 0.6256, "num_input_tokens_seen": 547363712, "step": 3030 }, { "epoch": 0.3318098470127809, "grad_norm": 1.254824198381831, "learning_rate": 3.7601632075654784e-05, "loss": 0.7486, "num_input_tokens_seen": 547529248, "step": 3031 }, { "epoch": 0.33191931908371874, "grad_norm": 1.3009987497492301, "learning_rate": 3.759420499650419e-05, "loss": 0.6361, "num_input_tokens_seen": 547691872, "step": 3032 }, { "epoch": 0.33202879115465667, "grad_norm": 1.1641421194817674, "learning_rate": 3.758677642748321e-05, "loss": 0.7569, "num_input_tokens_seen": 547911392, "step": 3033 }, { "epoch": 0.3321382632255946, "grad_norm": 1.287779614723621, "learning_rate": 3.757934636947064e-05, "loss": 0.6862, "num_input_tokens_seen": 548075808, "step": 3034 }, { "epoch": 0.33224773529653245, "grad_norm": 1.2612909117188946, "learning_rate": 3.7571914823345444e-05, "loss": 0.7132, "num_input_tokens_seen": 548250080, "step": 3035 }, { "epoch": 0.3323572073674704, "grad_norm": 1.3036503124071213, "learning_rate": 3.756448178998676e-05, "loss": 0.701, "num_input_tokens_seen": 548462432, "step": 3036 }, { "epoch": 0.3324666794384083, "grad_norm": 1.2554619666096107, "learning_rate": 3.755704727027389e-05, "loss": 0.6908, "num_input_tokens_seen": 548632000, "step": 3037 }, { "epoch": 0.33257615150934616, "grad_norm": 1.2434316198876747, "learning_rate": 3.754961126508634e-05, "loss": 0.5264, "num_input_tokens_seen": 548783648, "step": 3038 }, { "epoch": 0.3326856235802841, "grad_norm": 1.1857355963684482, "learning_rate": 3.754217377530377e-05, "loss": 0.5652, "num_input_tokens_seen": 548983680, "step": 3039 }, { "epoch": 0.332795095651222, "grad_norm": 1.2645679816498314, "learning_rate": 3.753473480180603e-05, "loss": 0.701, "num_input_tokens_seen": 549152576, "step": 3040 }, { "epoch": 0.33290456772215987, "grad_norm": 1.268724672412347, "learning_rate": 3.752729434547311e-05, "loss": 0.6705, "num_input_tokens_seen": 549351040, "step": 3041 }, { "epoch": 0.3330140397930978, "grad_norm": 1.3276784707196803, "learning_rate": 3.751985240718522e-05, "loss": 0.6188, "num_input_tokens_seen": 549493728, "step": 3042 }, { "epoch": 0.3331235118640357, "grad_norm": 1.507629620623974, "learning_rate": 3.7512408987822724e-05, "loss": 0.8571, "num_input_tokens_seen": 549695552, "step": 3043 }, { "epoch": 0.3332329839349736, "grad_norm": 1.2011331195316681, "learning_rate": 3.750496408826616e-05, "loss": 0.6649, "num_input_tokens_seen": 549878560, "step": 3044 }, { "epoch": 0.3333424560059115, "grad_norm": 1.4110567804992713, "learning_rate": 3.749751770939626e-05, "loss": 0.6378, "num_input_tokens_seen": 550043200, "step": 3045 }, { "epoch": 0.33345192807684937, "grad_norm": 1.394534541107483, "learning_rate": 3.74900698520939e-05, "loss": 0.7189, "num_input_tokens_seen": 550253536, "step": 3046 }, { "epoch": 0.3335614001477873, "grad_norm": 1.2107377410526414, "learning_rate": 3.7482620517240155e-05, "loss": 0.5551, "num_input_tokens_seen": 550438336, "step": 3047 }, { "epoch": 0.3336708722187252, "grad_norm": 1.430023737490314, "learning_rate": 3.747516970571626e-05, "loss": 0.8268, "num_input_tokens_seen": 550610368, "step": 3048 }, { "epoch": 0.3337803442896631, "grad_norm": 1.1498193018382856, "learning_rate": 3.746771741840365e-05, "loss": 0.5868, "num_input_tokens_seen": 550766944, "step": 3049 }, { "epoch": 0.333889816360601, "grad_norm": 1.347053238123199, "learning_rate": 3.746026365618389e-05, "loss": 0.6358, "num_input_tokens_seen": 550944128, "step": 3050 }, { "epoch": 0.3339992884315389, "grad_norm": 1.3687770213765105, "learning_rate": 3.745280841993876e-05, "loss": 0.7253, "num_input_tokens_seen": 551118624, "step": 3051 }, { "epoch": 0.3341087605024768, "grad_norm": 1.2358845039898139, "learning_rate": 3.744535171055021e-05, "loss": 0.6385, "num_input_tokens_seen": 551301856, "step": 3052 }, { "epoch": 0.3342182325734147, "grad_norm": 1.2631648135623172, "learning_rate": 3.743789352890034e-05, "loss": 0.7595, "num_input_tokens_seen": 551468736, "step": 3053 }, { "epoch": 0.33432770464435263, "grad_norm": 1.4208818257727416, "learning_rate": 3.743043387587144e-05, "loss": 0.8359, "num_input_tokens_seen": 551643456, "step": 3054 }, { "epoch": 0.3344371767152905, "grad_norm": 1.1922618857759342, "learning_rate": 3.742297275234598e-05, "loss": 0.6378, "num_input_tokens_seen": 551815712, "step": 3055 }, { "epoch": 0.3345466487862284, "grad_norm": 1.2720851621459517, "learning_rate": 3.7415510159206593e-05, "loss": 0.6353, "num_input_tokens_seen": 551983936, "step": 3056 }, { "epoch": 0.33465612085716634, "grad_norm": 1.244469759296663, "learning_rate": 3.740804609733608e-05, "loss": 0.6878, "num_input_tokens_seen": 552176800, "step": 3057 }, { "epoch": 0.3347655929281042, "grad_norm": 1.2607596327492918, "learning_rate": 3.740058056761743e-05, "loss": 0.7228, "num_input_tokens_seen": 552345696, "step": 3058 }, { "epoch": 0.3348750649990421, "grad_norm": 1.2697500656527483, "learning_rate": 3.739311357093382e-05, "loss": 0.5975, "num_input_tokens_seen": 552532960, "step": 3059 }, { "epoch": 0.33498453706998005, "grad_norm": 1.3088845911928728, "learning_rate": 3.738564510816856e-05, "loss": 0.8951, "num_input_tokens_seen": 552701632, "step": 3060 }, { "epoch": 0.3350940091409179, "grad_norm": 1.311539480149579, "learning_rate": 3.737817518020516e-05, "loss": 0.7841, "num_input_tokens_seen": 552892480, "step": 3061 }, { "epoch": 0.33520348121185584, "grad_norm": 1.2451659667987889, "learning_rate": 3.73707037879273e-05, "loss": 0.642, "num_input_tokens_seen": 553065856, "step": 3062 }, { "epoch": 0.3353129532827937, "grad_norm": 1.4573889899141457, "learning_rate": 3.736323093221884e-05, "loss": 0.9782, "num_input_tokens_seen": 553250656, "step": 3063 }, { "epoch": 0.3354224253537316, "grad_norm": 1.3135325139831266, "learning_rate": 3.735575661396378e-05, "loss": 0.6574, "num_input_tokens_seen": 553420000, "step": 3064 }, { "epoch": 0.33553189742466955, "grad_norm": 1.2633846375122426, "learning_rate": 3.7348280834046334e-05, "loss": 0.7347, "num_input_tokens_seen": 553614656, "step": 3065 }, { "epoch": 0.3356413694956074, "grad_norm": 1.1616191439073065, "learning_rate": 3.7340803593350884e-05, "loss": 0.838, "num_input_tokens_seen": 553818048, "step": 3066 }, { "epoch": 0.33575084156654533, "grad_norm": 1.409073982318888, "learning_rate": 3.733332489276195e-05, "loss": 0.81, "num_input_tokens_seen": 553992096, "step": 3067 }, { "epoch": 0.33586031363748325, "grad_norm": 1.2032969958735542, "learning_rate": 3.7325844733164256e-05, "loss": 0.7872, "num_input_tokens_seen": 554184288, "step": 3068 }, { "epoch": 0.3359697857084211, "grad_norm": 1.255760224553047, "learning_rate": 3.73183631154427e-05, "loss": 0.6419, "num_input_tokens_seen": 554396192, "step": 3069 }, { "epoch": 0.33607925777935904, "grad_norm": 1.278951537134971, "learning_rate": 3.7310880040482335e-05, "loss": 0.7937, "num_input_tokens_seen": 554600704, "step": 3070 }, { "epoch": 0.33618872985029696, "grad_norm": 1.1030612786667922, "learning_rate": 3.730339550916839e-05, "loss": 0.7986, "num_input_tokens_seen": 554803648, "step": 3071 }, { "epoch": 0.33629820192123483, "grad_norm": 1.2355685198103346, "learning_rate": 3.729590952238628e-05, "loss": 0.6929, "num_input_tokens_seen": 554989792, "step": 3072 }, { "epoch": 0.33640767399217275, "grad_norm": 1.3097002504019823, "learning_rate": 3.728842208102158e-05, "loss": 0.7243, "num_input_tokens_seen": 555178176, "step": 3073 }, { "epoch": 0.3365171460631107, "grad_norm": 1.1617955914181757, "learning_rate": 3.728093318596004e-05, "loss": 0.5647, "num_input_tokens_seen": 555370368, "step": 3074 }, { "epoch": 0.33662661813404854, "grad_norm": 1.3089739802217315, "learning_rate": 3.7273442838087584e-05, "loss": 0.7666, "num_input_tokens_seen": 555529632, "step": 3075 }, { "epoch": 0.33673609020498646, "grad_norm": 1.328706860707769, "learning_rate": 3.7265951038290305e-05, "loss": 0.6633, "num_input_tokens_seen": 555668288, "step": 3076 }, { "epoch": 0.3368455622759244, "grad_norm": 1.2294094794150408, "learning_rate": 3.725845778745446e-05, "loss": 0.6464, "num_input_tokens_seen": 555896096, "step": 3077 }, { "epoch": 0.33695503434686225, "grad_norm": 1.1761083813179478, "learning_rate": 3.725096308646649e-05, "loss": 0.5667, "num_input_tokens_seen": 556085152, "step": 3078 }, { "epoch": 0.33706450641780017, "grad_norm": 1.286168172548632, "learning_rate": 3.724346693621301e-05, "loss": 0.7083, "num_input_tokens_seen": 556254944, "step": 3079 }, { "epoch": 0.33717397848873804, "grad_norm": 1.4485676991894896, "learning_rate": 3.72359693375808e-05, "loss": 0.804, "num_input_tokens_seen": 556428992, "step": 3080 }, { "epoch": 0.33728345055967596, "grad_norm": 1.2511245913924602, "learning_rate": 3.722847029145681e-05, "loss": 0.6773, "num_input_tokens_seen": 556602144, "step": 3081 }, { "epoch": 0.3373929226306139, "grad_norm": 1.322942788574559, "learning_rate": 3.722096979872815e-05, "loss": 0.7295, "num_input_tokens_seen": 556771264, "step": 3082 }, { "epoch": 0.33750239470155174, "grad_norm": 1.3647314190288853, "learning_rate": 3.7213467860282144e-05, "loss": 0.9204, "num_input_tokens_seen": 556961664, "step": 3083 }, { "epoch": 0.33761186677248967, "grad_norm": 1.294744511075786, "learning_rate": 3.720596447700623e-05, "loss": 0.6539, "num_input_tokens_seen": 557126528, "step": 3084 }, { "epoch": 0.3377213388434276, "grad_norm": 1.2538014305724823, "learning_rate": 3.7198459649788045e-05, "loss": 0.6929, "num_input_tokens_seen": 557306624, "step": 3085 }, { "epoch": 0.33783081091436545, "grad_norm": 1.204810460565377, "learning_rate": 3.7190953379515404e-05, "loss": 0.7993, "num_input_tokens_seen": 557520320, "step": 3086 }, { "epoch": 0.3379402829853034, "grad_norm": 1.1914101022126504, "learning_rate": 3.718344566707629e-05, "loss": 0.7108, "num_input_tokens_seen": 557704896, "step": 3087 }, { "epoch": 0.3380497550562413, "grad_norm": 1.2883962123046548, "learning_rate": 3.717593651335884e-05, "loss": 0.6535, "num_input_tokens_seen": 557868864, "step": 3088 }, { "epoch": 0.33815922712717916, "grad_norm": 1.2783641414253903, "learning_rate": 3.716842591925138e-05, "loss": 0.6744, "num_input_tokens_seen": 558042016, "step": 3089 }, { "epoch": 0.3382686991981171, "grad_norm": 1.2072723049205385, "learning_rate": 3.71609138856424e-05, "loss": 0.6626, "num_input_tokens_seen": 558191648, "step": 3090 }, { "epoch": 0.338378171269055, "grad_norm": 1.2022310378875114, "learning_rate": 3.715340041342055e-05, "loss": 0.737, "num_input_tokens_seen": 558379584, "step": 3091 }, { "epoch": 0.33848764333999287, "grad_norm": 1.2621471516260645, "learning_rate": 3.7145885503474654e-05, "loss": 0.7874, "num_input_tokens_seen": 558549152, "step": 3092 }, { "epoch": 0.3385971154109308, "grad_norm": 1.2066729856469651, "learning_rate": 3.713836915669373e-05, "loss": 0.6163, "num_input_tokens_seen": 558719168, "step": 3093 }, { "epoch": 0.3387065874818687, "grad_norm": 1.263997573976965, "learning_rate": 3.713085137396694e-05, "loss": 0.8031, "num_input_tokens_seen": 558892096, "step": 3094 }, { "epoch": 0.3388160595528066, "grad_norm": 1.3612145440814687, "learning_rate": 3.712333215618363e-05, "loss": 0.9104, "num_input_tokens_seen": 559080704, "step": 3095 }, { "epoch": 0.3389255316237445, "grad_norm": 1.185340222316872, "learning_rate": 3.71158115042333e-05, "loss": 0.6035, "num_input_tokens_seen": 559244224, "step": 3096 }, { "epoch": 0.33903500369468237, "grad_norm": 1.4044306960561348, "learning_rate": 3.7108289419005625e-05, "loss": 0.7184, "num_input_tokens_seen": 559429472, "step": 3097 }, { "epoch": 0.3391444757656203, "grad_norm": 1.3729236270726173, "learning_rate": 3.710076590139045e-05, "loss": 0.8382, "num_input_tokens_seen": 559622784, "step": 3098 }, { "epoch": 0.3392539478365582, "grad_norm": 1.1992469809126172, "learning_rate": 3.7093240952277816e-05, "loss": 0.8304, "num_input_tokens_seen": 559803104, "step": 3099 }, { "epoch": 0.3393634199074961, "grad_norm": 1.2260025592920372, "learning_rate": 3.708571457255789e-05, "loss": 0.7293, "num_input_tokens_seen": 559989024, "step": 3100 }, { "epoch": 0.339472891978434, "grad_norm": 1.3109605025591045, "learning_rate": 3.7078186763121034e-05, "loss": 0.8897, "num_input_tokens_seen": 560163072, "step": 3101 }, { "epoch": 0.3395823640493719, "grad_norm": 1.266474309336157, "learning_rate": 3.7070657524857786e-05, "loss": 0.523, "num_input_tokens_seen": 560325920, "step": 3102 }, { "epoch": 0.3396918361203098, "grad_norm": 1.42904031415577, "learning_rate": 3.706312685865881e-05, "loss": 0.7425, "num_input_tokens_seen": 560524832, "step": 3103 }, { "epoch": 0.3398013081912477, "grad_norm": 1.3000554531447979, "learning_rate": 3.7055594765415e-05, "loss": 0.7011, "num_input_tokens_seen": 560701344, "step": 3104 }, { "epoch": 0.33991078026218563, "grad_norm": 1.3251640074369833, "learning_rate": 3.704806124601736e-05, "loss": 0.6051, "num_input_tokens_seen": 560850976, "step": 3105 }, { "epoch": 0.3400202523331235, "grad_norm": 1.2284806433034974, "learning_rate": 3.704052630135713e-05, "loss": 0.6006, "num_input_tokens_seen": 561039808, "step": 3106 }, { "epoch": 0.3401297244040614, "grad_norm": 1.2587895360077175, "learning_rate": 3.7032989932325634e-05, "loss": 0.5989, "num_input_tokens_seen": 561188320, "step": 3107 }, { "epoch": 0.34023919647499934, "grad_norm": 1.2010213121783138, "learning_rate": 3.7025452139814445e-05, "loss": 0.7302, "num_input_tokens_seen": 561352288, "step": 3108 }, { "epoch": 0.3403486685459372, "grad_norm": 1.4643777741924062, "learning_rate": 3.7017912924715257e-05, "loss": 0.92, "num_input_tokens_seen": 561546496, "step": 3109 }, { "epoch": 0.3404581406168751, "grad_norm": 1.33482425997416, "learning_rate": 3.701037228791993e-05, "loss": 0.6253, "num_input_tokens_seen": 561729280, "step": 3110 }, { "epoch": 0.34056761268781305, "grad_norm": 1.2659248185305356, "learning_rate": 3.7002830230320537e-05, "loss": 0.6745, "num_input_tokens_seen": 561910496, "step": 3111 }, { "epoch": 0.3406770847587509, "grad_norm": 1.2839613812334234, "learning_rate": 3.699528675280926e-05, "loss": 0.9247, "num_input_tokens_seen": 562082080, "step": 3112 }, { "epoch": 0.34078655682968884, "grad_norm": 1.2609828539022203, "learning_rate": 3.69877418562785e-05, "loss": 0.6365, "num_input_tokens_seen": 562282560, "step": 3113 }, { "epoch": 0.3408960289006267, "grad_norm": 1.298703203432123, "learning_rate": 3.69801955416208e-05, "loss": 0.8286, "num_input_tokens_seen": 562476768, "step": 3114 }, { "epoch": 0.3410055009715646, "grad_norm": 1.2412828234033073, "learning_rate": 3.697264780972886e-05, "loss": 0.7051, "num_input_tokens_seen": 562667840, "step": 3115 }, { "epoch": 0.34111497304250255, "grad_norm": 1.1622735428455484, "learning_rate": 3.696509866149558e-05, "loss": 0.7157, "num_input_tokens_seen": 562840544, "step": 3116 }, { "epoch": 0.3412244451134404, "grad_norm": 1.5167272433385723, "learning_rate": 3.6957548097814e-05, "loss": 0.8251, "num_input_tokens_seen": 563028928, "step": 3117 }, { "epoch": 0.34133391718437833, "grad_norm": 1.3006866695626313, "learning_rate": 3.6949996119577335e-05, "loss": 0.7393, "num_input_tokens_seen": 563203424, "step": 3118 }, { "epoch": 0.34144338925531625, "grad_norm": 1.0770713522642532, "learning_rate": 3.694244272767897e-05, "loss": 0.4825, "num_input_tokens_seen": 563385088, "step": 3119 }, { "epoch": 0.3415528613262541, "grad_norm": 1.3545277432022174, "learning_rate": 3.693488792301247e-05, "loss": 0.7199, "num_input_tokens_seen": 563563392, "step": 3120 }, { "epoch": 0.34166233339719204, "grad_norm": 1.3178383482871434, "learning_rate": 3.6927331706471536e-05, "loss": 0.6731, "num_input_tokens_seen": 563747072, "step": 3121 }, { "epoch": 0.34177180546812996, "grad_norm": 1.6077178274739974, "learning_rate": 3.6919774078950065e-05, "loss": 0.9561, "num_input_tokens_seen": 563935232, "step": 3122 }, { "epoch": 0.34188127753906783, "grad_norm": 1.2583380128550306, "learning_rate": 3.691221504134211e-05, "loss": 0.7495, "num_input_tokens_seen": 564103680, "step": 3123 }, { "epoch": 0.34199074961000575, "grad_norm": 1.220512143263365, "learning_rate": 3.6904654594541885e-05, "loss": 0.5662, "num_input_tokens_seen": 564279072, "step": 3124 }, { "epoch": 0.3421002216809437, "grad_norm": 1.3835407214608353, "learning_rate": 3.689709273944378e-05, "loss": 0.9131, "num_input_tokens_seen": 564487616, "step": 3125 }, { "epoch": 0.34220969375188154, "grad_norm": 1.2656205730596284, "learning_rate": 3.6889529476942344e-05, "loss": 0.664, "num_input_tokens_seen": 564654944, "step": 3126 }, { "epoch": 0.34231916582281946, "grad_norm": 1.3068238544717938, "learning_rate": 3.6881964807932306e-05, "loss": 0.7431, "num_input_tokens_seen": 564873344, "step": 3127 }, { "epoch": 0.3424286378937574, "grad_norm": 1.3029582733417453, "learning_rate": 3.6874398733308544e-05, "loss": 0.7563, "num_input_tokens_seen": 565071584, "step": 3128 }, { "epoch": 0.34253810996469525, "grad_norm": 1.3783257318784496, "learning_rate": 3.686683125396611e-05, "loss": 0.739, "num_input_tokens_seen": 565204640, "step": 3129 }, { "epoch": 0.34264758203563317, "grad_norm": 1.3546718486510934, "learning_rate": 3.685926237080023e-05, "loss": 0.779, "num_input_tokens_seen": 565410048, "step": 3130 }, { "epoch": 0.34275705410657104, "grad_norm": 1.2544440901526495, "learning_rate": 3.6851692084706266e-05, "loss": 0.6577, "num_input_tokens_seen": 565572448, "step": 3131 }, { "epoch": 0.34286652617750896, "grad_norm": 1.2074855208861592, "learning_rate": 3.68441203965798e-05, "loss": 0.6851, "num_input_tokens_seen": 565735072, "step": 3132 }, { "epoch": 0.3429759982484469, "grad_norm": 1.1588698939709943, "learning_rate": 3.6836547307316524e-05, "loss": 0.5406, "num_input_tokens_seen": 565940032, "step": 3133 }, { "epoch": 0.34308547031938474, "grad_norm": 1.1752469188890802, "learning_rate": 3.682897281781234e-05, "loss": 0.6749, "num_input_tokens_seen": 566097280, "step": 3134 }, { "epoch": 0.34319494239032267, "grad_norm": 1.2570273276600357, "learning_rate": 3.682139692896328e-05, "loss": 0.6863, "num_input_tokens_seen": 566263040, "step": 3135 }, { "epoch": 0.3433044144612606, "grad_norm": 1.3747860568670556, "learning_rate": 3.681381964166556e-05, "loss": 0.7361, "num_input_tokens_seen": 566426784, "step": 3136 }, { "epoch": 0.34341388653219845, "grad_norm": 1.2946098492553424, "learning_rate": 3.680624095681557e-05, "loss": 0.605, "num_input_tokens_seen": 566561856, "step": 3137 }, { "epoch": 0.3435233586031364, "grad_norm": 1.3753868339159134, "learning_rate": 3.6798660875309836e-05, "loss": 0.6285, "num_input_tokens_seen": 566719776, "step": 3138 }, { "epoch": 0.3436328306740743, "grad_norm": 1.251579240741757, "learning_rate": 3.679107939804507e-05, "loss": 0.5439, "num_input_tokens_seen": 566885984, "step": 3139 }, { "epoch": 0.34374230274501216, "grad_norm": 1.1120179085455801, "learning_rate": 3.678349652591816e-05, "loss": 0.4707, "num_input_tokens_seen": 567065632, "step": 3140 }, { "epoch": 0.3438517748159501, "grad_norm": 1.2563745085852203, "learning_rate": 3.677591225982614e-05, "loss": 0.7488, "num_input_tokens_seen": 567249536, "step": 3141 }, { "epoch": 0.343961246886888, "grad_norm": 1.2915657475742406, "learning_rate": 3.67683266006662e-05, "loss": 0.7251, "num_input_tokens_seen": 567454272, "step": 3142 }, { "epoch": 0.34407071895782587, "grad_norm": 1.270221229267941, "learning_rate": 3.676073954933573e-05, "loss": 0.6472, "num_input_tokens_seen": 567641312, "step": 3143 }, { "epoch": 0.3441801910287638, "grad_norm": 1.1470458753136936, "learning_rate": 3.6753151106732255e-05, "loss": 0.7084, "num_input_tokens_seen": 567819168, "step": 3144 }, { "epoch": 0.3442896630997017, "grad_norm": 1.2027419335033942, "learning_rate": 3.674556127375347e-05, "loss": 0.6171, "num_input_tokens_seen": 568002400, "step": 3145 }, { "epoch": 0.3443991351706396, "grad_norm": 1.274803926313666, "learning_rate": 3.6737970051297234e-05, "loss": 0.8196, "num_input_tokens_seen": 568205792, "step": 3146 }, { "epoch": 0.3445086072415775, "grad_norm": 1.3307800681375557, "learning_rate": 3.673037744026159e-05, "loss": 0.7456, "num_input_tokens_seen": 568401120, "step": 3147 }, { "epoch": 0.34461807931251537, "grad_norm": 1.2517375790250405, "learning_rate": 3.672278344154471e-05, "loss": 0.5945, "num_input_tokens_seen": 568557696, "step": 3148 }, { "epoch": 0.3447275513834533, "grad_norm": 1.2734490610545712, "learning_rate": 3.671518805604496e-05, "loss": 0.7462, "num_input_tokens_seen": 568737792, "step": 3149 }, { "epoch": 0.3448370234543912, "grad_norm": 1.4044536137021673, "learning_rate": 3.670759128466087e-05, "loss": 0.7897, "num_input_tokens_seen": 568940960, "step": 3150 }, { "epoch": 0.3449464955253291, "grad_norm": 1.033635587991637, "learning_rate": 3.669999312829111e-05, "loss": 0.4074, "num_input_tokens_seen": 569113888, "step": 3151 }, { "epoch": 0.345055967596267, "grad_norm": 1.3764983596320863, "learning_rate": 3.669239358783452e-05, "loss": 0.7551, "num_input_tokens_seen": 569288160, "step": 3152 }, { "epoch": 0.3451654396672049, "grad_norm": 1.1414643426272684, "learning_rate": 3.668479266419012e-05, "loss": 0.5305, "num_input_tokens_seen": 569438016, "step": 3153 }, { "epoch": 0.3452749117381428, "grad_norm": 1.2941912723190974, "learning_rate": 3.6677190358257086e-05, "loss": 0.8747, "num_input_tokens_seen": 569623040, "step": 3154 }, { "epoch": 0.3453843838090807, "grad_norm": 1.1804898798507566, "learning_rate": 3.666958667093476e-05, "loss": 0.7497, "num_input_tokens_seen": 569811424, "step": 3155 }, { "epoch": 0.34549385588001863, "grad_norm": 1.0800301677411126, "learning_rate": 3.6661981603122645e-05, "loss": 0.5819, "num_input_tokens_seen": 569986368, "step": 3156 }, { "epoch": 0.3456033279509565, "grad_norm": 1.2936213480058998, "learning_rate": 3.665437515572039e-05, "loss": 0.6702, "num_input_tokens_seen": 570184160, "step": 3157 }, { "epoch": 0.3457128000218944, "grad_norm": 1.3578515559682323, "learning_rate": 3.664676732962784e-05, "loss": 0.8348, "num_input_tokens_seen": 570394720, "step": 3158 }, { "epoch": 0.34582227209283234, "grad_norm": 1.107290808508975, "learning_rate": 3.663915812574497e-05, "loss": 0.5655, "num_input_tokens_seen": 570571008, "step": 3159 }, { "epoch": 0.3459317441637702, "grad_norm": 1.3634569115036153, "learning_rate": 3.663154754497196e-05, "loss": 0.6114, "num_input_tokens_seen": 570739008, "step": 3160 }, { "epoch": 0.3460412162347081, "grad_norm": 1.3186663686982125, "learning_rate": 3.66239355882091e-05, "loss": 0.6208, "num_input_tokens_seen": 570895360, "step": 3161 }, { "epoch": 0.34615068830564605, "grad_norm": 1.2674843587418156, "learning_rate": 3.6616322256356884e-05, "loss": 0.7707, "num_input_tokens_seen": 571085536, "step": 3162 }, { "epoch": 0.3462601603765839, "grad_norm": 1.3791206344498057, "learning_rate": 3.6608707550315944e-05, "loss": 0.6948, "num_input_tokens_seen": 571273024, "step": 3163 }, { "epoch": 0.34636963244752184, "grad_norm": 1.3871303323652, "learning_rate": 3.660109147098711e-05, "loss": 0.7548, "num_input_tokens_seen": 571444160, "step": 3164 }, { "epoch": 0.3464791045184597, "grad_norm": 1.3379419024588337, "learning_rate": 3.659347401927131e-05, "loss": 0.7098, "num_input_tokens_seen": 571642400, "step": 3165 }, { "epoch": 0.3465885765893976, "grad_norm": 1.3376714596520292, "learning_rate": 3.6585855196069704e-05, "loss": 0.6282, "num_input_tokens_seen": 571820032, "step": 3166 }, { "epoch": 0.34669804866033554, "grad_norm": 1.395966419119393, "learning_rate": 3.657823500228359e-05, "loss": 0.7717, "num_input_tokens_seen": 571995648, "step": 3167 }, { "epoch": 0.3468075207312734, "grad_norm": 1.1743536104586028, "learning_rate": 3.65706134388144e-05, "loss": 0.5266, "num_input_tokens_seen": 572149536, "step": 3168 }, { "epoch": 0.34691699280221133, "grad_norm": 1.2680667352804493, "learning_rate": 3.656299050656376e-05, "loss": 0.6338, "num_input_tokens_seen": 572310816, "step": 3169 }, { "epoch": 0.34702646487314925, "grad_norm": 1.258934042241385, "learning_rate": 3.655536620643345e-05, "loss": 0.6667, "num_input_tokens_seen": 572476576, "step": 3170 }, { "epoch": 0.3471359369440871, "grad_norm": 1.3085950709094312, "learning_rate": 3.654774053932541e-05, "loss": 0.6294, "num_input_tokens_seen": 572661376, "step": 3171 }, { "epoch": 0.34724540901502504, "grad_norm": 1.3760749343134937, "learning_rate": 3.6540113506141734e-05, "loss": 0.8661, "num_input_tokens_seen": 572833184, "step": 3172 }, { "epoch": 0.34735488108596296, "grad_norm": 1.3268903041968627, "learning_rate": 3.653248510778469e-05, "loss": 0.7283, "num_input_tokens_seen": 573025600, "step": 3173 }, { "epoch": 0.34746435315690083, "grad_norm": 1.2735589092395156, "learning_rate": 3.652485534515671e-05, "loss": 0.7794, "num_input_tokens_seen": 573230336, "step": 3174 }, { "epoch": 0.34757382522783875, "grad_norm": 1.31667947622096, "learning_rate": 3.6517224219160365e-05, "loss": 0.7147, "num_input_tokens_seen": 573406176, "step": 3175 }, { "epoch": 0.3476832972987767, "grad_norm": 1.0706227542031912, "learning_rate": 3.6509591730698416e-05, "loss": 0.5735, "num_input_tokens_seen": 573601504, "step": 3176 }, { "epoch": 0.34779276936971454, "grad_norm": 1.2362241776626697, "learning_rate": 3.6501957880673775e-05, "loss": 0.6359, "num_input_tokens_seen": 573781376, "step": 3177 }, { "epoch": 0.34790224144065246, "grad_norm": 1.1716577455679318, "learning_rate": 3.64943226699895e-05, "loss": 0.6908, "num_input_tokens_seen": 573964160, "step": 3178 }, { "epoch": 0.3480117135115904, "grad_norm": 1.2568247453003023, "learning_rate": 3.648668609954883e-05, "loss": 0.7793, "num_input_tokens_seen": 574157920, "step": 3179 }, { "epoch": 0.34812118558252825, "grad_norm": 1.2315419194733754, "learning_rate": 3.647904817025514e-05, "loss": 0.5795, "num_input_tokens_seen": 574319648, "step": 3180 }, { "epoch": 0.34823065765346617, "grad_norm": 1.2212230338142502, "learning_rate": 3.6471408883012006e-05, "loss": 0.8677, "num_input_tokens_seen": 574479136, "step": 3181 }, { "epoch": 0.34834012972440404, "grad_norm": 1.316989033907549, "learning_rate": 3.646376823872313e-05, "loss": 1.0189, "num_input_tokens_seen": 574696864, "step": 3182 }, { "epoch": 0.34844960179534196, "grad_norm": 1.2464209817506158, "learning_rate": 3.6456126238292394e-05, "loss": 0.6874, "num_input_tokens_seen": 574873824, "step": 3183 }, { "epoch": 0.3485590738662799, "grad_norm": 1.3749404807646937, "learning_rate": 3.6448482882623814e-05, "loss": 0.8923, "num_input_tokens_seen": 575080576, "step": 3184 }, { "epoch": 0.34866854593721774, "grad_norm": 1.1755833253262638, "learning_rate": 3.64408381726216e-05, "loss": 0.7943, "num_input_tokens_seen": 575275232, "step": 3185 }, { "epoch": 0.34877801800815567, "grad_norm": 1.3631813865308713, "learning_rate": 3.6433192109190096e-05, "loss": 0.929, "num_input_tokens_seen": 575455776, "step": 3186 }, { "epoch": 0.3488874900790936, "grad_norm": 1.0801977605250168, "learning_rate": 3.642554469323382e-05, "loss": 0.5777, "num_input_tokens_seen": 575619296, "step": 3187 }, { "epoch": 0.34899696215003145, "grad_norm": 1.3033977232309815, "learning_rate": 3.641789592565746e-05, "loss": 0.7902, "num_input_tokens_seen": 575823136, "step": 3188 }, { "epoch": 0.3491064342209694, "grad_norm": 1.287989175327496, "learning_rate": 3.641024580736583e-05, "loss": 0.6671, "num_input_tokens_seen": 576006592, "step": 3189 }, { "epoch": 0.3492159062919073, "grad_norm": 1.4398247538000148, "learning_rate": 3.640259433926394e-05, "loss": 0.7917, "num_input_tokens_seen": 576158464, "step": 3190 }, { "epoch": 0.34932537836284516, "grad_norm": 1.3466101525427518, "learning_rate": 3.639494152225693e-05, "loss": 0.9047, "num_input_tokens_seen": 576366784, "step": 3191 }, { "epoch": 0.3494348504337831, "grad_norm": 1.2182979723594969, "learning_rate": 3.638728735725013e-05, "loss": 0.5282, "num_input_tokens_seen": 576524256, "step": 3192 }, { "epoch": 0.349544322504721, "grad_norm": 1.2194199799063137, "learning_rate": 3.6379631845148995e-05, "loss": 0.6554, "num_input_tokens_seen": 576708832, "step": 3193 }, { "epoch": 0.34965379457565887, "grad_norm": 1.2010074161742068, "learning_rate": 3.637197498685917e-05, "loss": 0.4771, "num_input_tokens_seen": 576884896, "step": 3194 }, { "epoch": 0.3497632666465968, "grad_norm": 1.3466932753886425, "learning_rate": 3.636431678328646e-05, "loss": 0.8125, "num_input_tokens_seen": 577090752, "step": 3195 }, { "epoch": 0.3498727387175347, "grad_norm": 1.3788636717017586, "learning_rate": 3.635665723533678e-05, "loss": 0.58, "num_input_tokens_seen": 577250688, "step": 3196 }, { "epoch": 0.3499822107884726, "grad_norm": 1.22983057336118, "learning_rate": 3.634899634391626e-05, "loss": 0.7901, "num_input_tokens_seen": 577427872, "step": 3197 }, { "epoch": 0.3500916828594105, "grad_norm": 1.2236262851793682, "learning_rate": 3.634133410993117e-05, "loss": 0.5697, "num_input_tokens_seen": 577572128, "step": 3198 }, { "epoch": 0.35020115493034837, "grad_norm": 1.1376279884687261, "learning_rate": 3.6333670534287945e-05, "loss": 0.6586, "num_input_tokens_seen": 577743936, "step": 3199 }, { "epoch": 0.3503106270012863, "grad_norm": 1.3907788223744268, "learning_rate": 3.632600561789315e-05, "loss": 0.6503, "num_input_tokens_seen": 577902528, "step": 3200 }, { "epoch": 0.3504200990722242, "grad_norm": 2.2042004280413203, "learning_rate": 3.6318339361653545e-05, "loss": 0.9012, "num_input_tokens_seen": 578095168, "step": 3201 }, { "epoch": 0.3505295711431621, "grad_norm": 1.223414420620259, "learning_rate": 3.631067176647603e-05, "loss": 0.6675, "num_input_tokens_seen": 578265184, "step": 3202 }, { "epoch": 0.3506390432141, "grad_norm": 1.2965393551268316, "learning_rate": 3.630300283326768e-05, "loss": 0.7568, "num_input_tokens_seen": 578458720, "step": 3203 }, { "epoch": 0.3507485152850379, "grad_norm": 1.2542674177305089, "learning_rate": 3.629533256293569e-05, "loss": 0.648, "num_input_tokens_seen": 578649344, "step": 3204 }, { "epoch": 0.3508579873559758, "grad_norm": 1.19035182632535, "learning_rate": 3.6287660956387454e-05, "loss": 0.5904, "num_input_tokens_seen": 578803008, "step": 3205 }, { "epoch": 0.3509674594269137, "grad_norm": 1.328911223225648, "learning_rate": 3.62799880145305e-05, "loss": 0.736, "num_input_tokens_seen": 578962496, "step": 3206 }, { "epoch": 0.35107693149785163, "grad_norm": 1.2824501371450299, "learning_rate": 3.627231373827253e-05, "loss": 0.8662, "num_input_tokens_seen": 579156256, "step": 3207 }, { "epoch": 0.3511864035687895, "grad_norm": 1.228110193449653, "learning_rate": 3.62646381285214e-05, "loss": 0.5092, "num_input_tokens_seen": 579323136, "step": 3208 }, { "epoch": 0.3512958756397274, "grad_norm": 1.2733704250901714, "learning_rate": 3.6256961186185115e-05, "loss": 0.6725, "num_input_tokens_seen": 579508608, "step": 3209 }, { "epoch": 0.35140534771066534, "grad_norm": 1.197211110303685, "learning_rate": 3.624928291217184e-05, "loss": 0.6277, "num_input_tokens_seen": 579673920, "step": 3210 }, { "epoch": 0.3515148197816032, "grad_norm": 1.1726969570182562, "learning_rate": 3.624160330738989e-05, "loss": 0.5944, "num_input_tokens_seen": 579851104, "step": 3211 }, { "epoch": 0.3516242918525411, "grad_norm": 1.3004683384544622, "learning_rate": 3.623392237274777e-05, "loss": 0.6536, "num_input_tokens_seen": 580018880, "step": 3212 }, { "epoch": 0.35173376392347905, "grad_norm": 1.238025626864827, "learning_rate": 3.6226240109154105e-05, "loss": 0.6492, "num_input_tokens_seen": 580183520, "step": 3213 }, { "epoch": 0.3518432359944169, "grad_norm": 1.2432507936439459, "learning_rate": 3.6218556517517695e-05, "loss": 0.8064, "num_input_tokens_seen": 580401472, "step": 3214 }, { "epoch": 0.35195270806535484, "grad_norm": 1.3762950866964079, "learning_rate": 3.6210871598747495e-05, "loss": 0.6504, "num_input_tokens_seen": 580597248, "step": 3215 }, { "epoch": 0.3520621801362927, "grad_norm": 1.449581954149842, "learning_rate": 3.620318535375262e-05, "loss": 0.9963, "num_input_tokens_seen": 580798624, "step": 3216 }, { "epoch": 0.3521716522072306, "grad_norm": 1.259911249304838, "learning_rate": 3.6195497783442336e-05, "loss": 0.6888, "num_input_tokens_seen": 581004032, "step": 3217 }, { "epoch": 0.35228112427816854, "grad_norm": 1.2933520529726108, "learning_rate": 3.618780888872606e-05, "loss": 0.8282, "num_input_tokens_seen": 581174048, "step": 3218 }, { "epoch": 0.3523905963491064, "grad_norm": 1.3872797301436213, "learning_rate": 3.618011867051339e-05, "loss": 0.69, "num_input_tokens_seen": 581343616, "step": 3219 }, { "epoch": 0.35250006842004433, "grad_norm": 1.2111294994115154, "learning_rate": 3.6172427129714036e-05, "loss": 0.8062, "num_input_tokens_seen": 581519680, "step": 3220 }, { "epoch": 0.35260954049098225, "grad_norm": 1.3394285398347963, "learning_rate": 3.616473426723792e-05, "loss": 0.6634, "num_input_tokens_seen": 581679168, "step": 3221 }, { "epoch": 0.3527190125619201, "grad_norm": 1.391736561434349, "learning_rate": 3.615704008399509e-05, "loss": 0.8617, "num_input_tokens_seen": 581851872, "step": 3222 }, { "epoch": 0.35282848463285804, "grad_norm": 1.253061931288234, "learning_rate": 3.614934458089575e-05, "loss": 0.6769, "num_input_tokens_seen": 582036672, "step": 3223 }, { "epoch": 0.35293795670379596, "grad_norm": 1.1598877258885318, "learning_rate": 3.614164775885025e-05, "loss": 0.8119, "num_input_tokens_seen": 582243424, "step": 3224 }, { "epoch": 0.35304742877473383, "grad_norm": 1.1607704463409252, "learning_rate": 3.613394961876912e-05, "loss": 0.5529, "num_input_tokens_seen": 582410752, "step": 3225 }, { "epoch": 0.35315690084567175, "grad_norm": 1.266546079057158, "learning_rate": 3.612625016156303e-05, "loss": 0.8328, "num_input_tokens_seen": 582577632, "step": 3226 }, { "epoch": 0.35326637291660967, "grad_norm": 1.1713041822670318, "learning_rate": 3.611854938814282e-05, "loss": 0.527, "num_input_tokens_seen": 582725248, "step": 3227 }, { "epoch": 0.35337584498754754, "grad_norm": 1.3869996122237298, "learning_rate": 3.6110847299419474e-05, "loss": 0.7511, "num_input_tokens_seen": 582890560, "step": 3228 }, { "epoch": 0.35348531705848546, "grad_norm": 1.3259241156404886, "learning_rate": 3.6103143896304136e-05, "loss": 0.6117, "num_input_tokens_seen": 583074016, "step": 3229 }, { "epoch": 0.3535947891294234, "grad_norm": 1.2417842762353872, "learning_rate": 3.6095439179708096e-05, "loss": 0.7153, "num_input_tokens_seen": 583249184, "step": 3230 }, { "epoch": 0.35370426120036125, "grad_norm": 1.2066831117549126, "learning_rate": 3.6087733150542814e-05, "loss": 0.6756, "num_input_tokens_seen": 583436448, "step": 3231 }, { "epoch": 0.35381373327129917, "grad_norm": 1.2268286252305656, "learning_rate": 3.6080025809719894e-05, "loss": 0.6668, "num_input_tokens_seen": 583600192, "step": 3232 }, { "epoch": 0.35392320534223703, "grad_norm": 1.096998730497921, "learning_rate": 3.607231715815111e-05, "loss": 0.6171, "num_input_tokens_seen": 583804032, "step": 3233 }, { "epoch": 0.35403267741317496, "grad_norm": 1.250192102929626, "learning_rate": 3.6064607196748365e-05, "loss": 0.6807, "num_input_tokens_seen": 583993760, "step": 3234 }, { "epoch": 0.3541421494841129, "grad_norm": 1.2996266140609951, "learning_rate": 3.6056895926423736e-05, "loss": 0.8425, "num_input_tokens_seen": 584178112, "step": 3235 }, { "epoch": 0.35425162155505074, "grad_norm": 1.1761846008929924, "learning_rate": 3.604918334808947e-05, "loss": 0.5065, "num_input_tokens_seen": 584347456, "step": 3236 }, { "epoch": 0.35436109362598867, "grad_norm": 1.376797151862617, "learning_rate": 3.6041469462657926e-05, "loss": 0.7108, "num_input_tokens_seen": 584520160, "step": 3237 }, { "epoch": 0.3544705656969266, "grad_norm": 1.2315251251179133, "learning_rate": 3.6033754271041654e-05, "loss": 0.5951, "num_input_tokens_seen": 584683008, "step": 3238 }, { "epoch": 0.35458003776786445, "grad_norm": 1.231986571997377, "learning_rate": 3.602603777415335e-05, "loss": 0.8605, "num_input_tokens_seen": 584884608, "step": 3239 }, { "epoch": 0.3546895098388024, "grad_norm": 1.3487780780715608, "learning_rate": 3.601831997290585e-05, "loss": 0.7763, "num_input_tokens_seen": 585055072, "step": 3240 }, { "epoch": 0.3547989819097403, "grad_norm": 1.282940423978197, "learning_rate": 3.6010600868212156e-05, "loss": 0.6473, "num_input_tokens_seen": 585220160, "step": 3241 }, { "epoch": 0.35490845398067816, "grad_norm": 1.3996925902055837, "learning_rate": 3.6002880460985436e-05, "loss": 0.6425, "num_input_tokens_seen": 585380320, "step": 3242 }, { "epoch": 0.3550179260516161, "grad_norm": 1.2709535114601347, "learning_rate": 3.599515875213899e-05, "loss": 0.6272, "num_input_tokens_seen": 585570944, "step": 3243 }, { "epoch": 0.355127398122554, "grad_norm": 1.2690695054970038, "learning_rate": 3.5987435742586286e-05, "loss": 0.6361, "num_input_tokens_seen": 585733792, "step": 3244 }, { "epoch": 0.35523687019349187, "grad_norm": 1.159171717422711, "learning_rate": 3.5979711433240934e-05, "loss": 0.606, "num_input_tokens_seen": 585858336, "step": 3245 }, { "epoch": 0.3553463422644298, "grad_norm": 1.2427454241290634, "learning_rate": 3.597198582501671e-05, "loss": 0.6926, "num_input_tokens_seen": 586074272, "step": 3246 }, { "epoch": 0.3554558143353677, "grad_norm": 1.2418700478334566, "learning_rate": 3.596425891882754e-05, "loss": 0.65, "num_input_tokens_seen": 586266912, "step": 3247 }, { "epoch": 0.3555652864063056, "grad_norm": 1.20051335837905, "learning_rate": 3.59565307155875e-05, "loss": 0.7093, "num_input_tokens_seen": 586441632, "step": 3248 }, { "epoch": 0.3556747584772435, "grad_norm": 1.4559704393540507, "learning_rate": 3.594880121621081e-05, "loss": 0.7655, "num_input_tokens_seen": 586600224, "step": 3249 }, { "epoch": 0.35578423054818137, "grad_norm": 1.1442934381575705, "learning_rate": 3.5941070421611874e-05, "loss": 0.6586, "num_input_tokens_seen": 586775616, "step": 3250 }, { "epoch": 0.3558937026191193, "grad_norm": 1.1515470692650167, "learning_rate": 3.5933338332705225e-05, "loss": 0.6395, "num_input_tokens_seen": 586973632, "step": 3251 }, { "epoch": 0.3560031746900572, "grad_norm": 1.1406966409001886, "learning_rate": 3.592560495040556e-05, "loss": 0.5088, "num_input_tokens_seen": 587128416, "step": 3252 }, { "epoch": 0.3561126467609951, "grad_norm": 1.2128364686039934, "learning_rate": 3.59178702756277e-05, "loss": 0.6844, "num_input_tokens_seen": 587306496, "step": 3253 }, { "epoch": 0.356222118831933, "grad_norm": 1.3656299983790776, "learning_rate": 3.591013430928666e-05, "loss": 0.746, "num_input_tokens_seen": 587474048, "step": 3254 }, { "epoch": 0.3563315909028709, "grad_norm": 1.2268422287670873, "learning_rate": 3.59023970522976e-05, "loss": 0.5208, "num_input_tokens_seen": 587648096, "step": 3255 }, { "epoch": 0.3564410629738088, "grad_norm": 1.5287724907656235, "learning_rate": 3.5894658505575805e-05, "loss": 0.7146, "num_input_tokens_seen": 587833568, "step": 3256 }, { "epoch": 0.3565505350447467, "grad_norm": 1.2504652111304162, "learning_rate": 3.588691867003673e-05, "loss": 0.6186, "num_input_tokens_seen": 587983648, "step": 3257 }, { "epoch": 0.35666000711568463, "grad_norm": 1.3084054384387354, "learning_rate": 3.5879177546595996e-05, "loss": 0.7749, "num_input_tokens_seen": 588159712, "step": 3258 }, { "epoch": 0.3567694791866225, "grad_norm": 1.10753597364707, "learning_rate": 3.5871435136169355e-05, "loss": 0.5357, "num_input_tokens_seen": 588341376, "step": 3259 }, { "epoch": 0.3568789512575604, "grad_norm": 1.368207540100359, "learning_rate": 3.5863691439672715e-05, "loss": 0.9149, "num_input_tokens_seen": 588528640, "step": 3260 }, { "epoch": 0.35698842332849834, "grad_norm": 1.2424368937870989, "learning_rate": 3.5855946458022145e-05, "loss": 0.6582, "num_input_tokens_seen": 588685888, "step": 3261 }, { "epoch": 0.3570978953994362, "grad_norm": 1.316941557126967, "learning_rate": 3.5848200192133866e-05, "loss": 0.6487, "num_input_tokens_seen": 588860832, "step": 3262 }, { "epoch": 0.3572073674703741, "grad_norm": 1.5081778760573494, "learning_rate": 3.5840452642924243e-05, "loss": 0.8504, "num_input_tokens_seen": 589020096, "step": 3263 }, { "epoch": 0.35731683954131205, "grad_norm": 1.197084198440326, "learning_rate": 3.5832703811309795e-05, "loss": 0.7016, "num_input_tokens_seen": 589203328, "step": 3264 }, { "epoch": 0.3574263116122499, "grad_norm": 1.448994333828733, "learning_rate": 3.58249536982072e-05, "loss": 1.0058, "num_input_tokens_seen": 589383424, "step": 3265 }, { "epoch": 0.35753578368318784, "grad_norm": 1.180877439087535, "learning_rate": 3.581720230453327e-05, "loss": 0.8033, "num_input_tokens_seen": 589581440, "step": 3266 }, { "epoch": 0.3576452557541257, "grad_norm": 1.0987780394464515, "learning_rate": 3.5809449631204985e-05, "loss": 0.682, "num_input_tokens_seen": 589745632, "step": 3267 }, { "epoch": 0.3577547278250636, "grad_norm": 1.2498075813966976, "learning_rate": 3.580169567913947e-05, "loss": 0.7716, "num_input_tokens_seen": 589922368, "step": 3268 }, { "epoch": 0.35786419989600154, "grad_norm": 1.205872111844835, "learning_rate": 3.5793940449254016e-05, "loss": 0.6651, "num_input_tokens_seen": 590061696, "step": 3269 }, { "epoch": 0.3579736719669394, "grad_norm": 1.1498279587969835, "learning_rate": 3.578618394246603e-05, "loss": 0.6599, "num_input_tokens_seen": 590231712, "step": 3270 }, { "epoch": 0.35808314403787733, "grad_norm": 1.3164881559071695, "learning_rate": 3.577842615969311e-05, "loss": 0.7798, "num_input_tokens_seen": 590412480, "step": 3271 }, { "epoch": 0.35819261610881525, "grad_norm": 1.3134791491174578, "learning_rate": 3.577066710185298e-05, "loss": 0.6977, "num_input_tokens_seen": 590569952, "step": 3272 }, { "epoch": 0.3583020881797531, "grad_norm": 1.3006069765395698, "learning_rate": 3.576290676986352e-05, "loss": 0.6471, "num_input_tokens_seen": 590773792, "step": 3273 }, { "epoch": 0.35841156025069104, "grad_norm": 1.277795274356708, "learning_rate": 3.575514516464277e-05, "loss": 0.7674, "num_input_tokens_seen": 590960160, "step": 3274 }, { "epoch": 0.35852103232162896, "grad_norm": 1.113893735282631, "learning_rate": 3.57473822871089e-05, "loss": 0.5837, "num_input_tokens_seen": 591161088, "step": 3275 }, { "epoch": 0.35863050439256683, "grad_norm": 1.2496478538396154, "learning_rate": 3.5739618138180254e-05, "loss": 0.708, "num_input_tokens_seen": 591330432, "step": 3276 }, { "epoch": 0.35873997646350475, "grad_norm": 1.4135823989130756, "learning_rate": 3.573185271877531e-05, "loss": 0.7754, "num_input_tokens_seen": 591480288, "step": 3277 }, { "epoch": 0.35884944853444267, "grad_norm": 1.5128190326766324, "learning_rate": 3.572408602981271e-05, "loss": 0.7743, "num_input_tokens_seen": 591699360, "step": 3278 }, { "epoch": 0.35895892060538054, "grad_norm": 1.274497965539062, "learning_rate": 3.571631807221123e-05, "loss": 0.6722, "num_input_tokens_seen": 591897600, "step": 3279 }, { "epoch": 0.35906839267631846, "grad_norm": 1.1976624371094466, "learning_rate": 3.570854884688981e-05, "loss": 0.6769, "num_input_tokens_seen": 592092032, "step": 3280 }, { "epoch": 0.3591778647472564, "grad_norm": 1.575117890926121, "learning_rate": 3.570077835476753e-05, "loss": 0.9675, "num_input_tokens_seen": 592286688, "step": 3281 }, { "epoch": 0.35928733681819425, "grad_norm": 1.287116136084528, "learning_rate": 3.569300659676363e-05, "loss": 0.6696, "num_input_tokens_seen": 592456704, "step": 3282 }, { "epoch": 0.35939680888913217, "grad_norm": 1.0957468061528972, "learning_rate": 3.568523357379749e-05, "loss": 0.6586, "num_input_tokens_seen": 592663904, "step": 3283 }, { "epoch": 0.35950628096007003, "grad_norm": 1.2120061281508823, "learning_rate": 3.5677459286788645e-05, "loss": 0.6057, "num_input_tokens_seen": 592863712, "step": 3284 }, { "epoch": 0.35961575303100796, "grad_norm": 1.0922399723835137, "learning_rate": 3.566968373665678e-05, "loss": 0.6455, "num_input_tokens_seen": 593078528, "step": 3285 }, { "epoch": 0.3597252251019459, "grad_norm": 1.485097046965908, "learning_rate": 3.5661906924321723e-05, "loss": 0.7128, "num_input_tokens_seen": 593238688, "step": 3286 }, { "epoch": 0.35983469717288374, "grad_norm": 1.240701422271319, "learning_rate": 3.5654128850703464e-05, "loss": 0.5496, "num_input_tokens_seen": 593433568, "step": 3287 }, { "epoch": 0.35994416924382167, "grad_norm": 1.107433123926692, "learning_rate": 3.564634951672212e-05, "loss": 0.5224, "num_input_tokens_seen": 593597984, "step": 3288 }, { "epoch": 0.3600536413147596, "grad_norm": 1.131863268702444, "learning_rate": 3.5638568923297985e-05, "loss": 0.5969, "num_input_tokens_seen": 593759040, "step": 3289 }, { "epoch": 0.36016311338569745, "grad_norm": 1.3528175039853123, "learning_rate": 3.563078707135149e-05, "loss": 0.7618, "num_input_tokens_seen": 593949888, "step": 3290 }, { "epoch": 0.3602725854566354, "grad_norm": 1.280854838771297, "learning_rate": 3.56230039618032e-05, "loss": 0.7381, "num_input_tokens_seen": 594138496, "step": 3291 }, { "epoch": 0.3603820575275733, "grad_norm": 1.2141036941039782, "learning_rate": 3.561521959557385e-05, "loss": 0.524, "num_input_tokens_seen": 594287904, "step": 3292 }, { "epoch": 0.36049152959851116, "grad_norm": 1.3388311268564173, "learning_rate": 3.5607433973584316e-05, "loss": 0.5885, "num_input_tokens_seen": 594470912, "step": 3293 }, { "epoch": 0.3606010016694491, "grad_norm": 1.2692826247681763, "learning_rate": 3.5599647096755624e-05, "loss": 0.7536, "num_input_tokens_seen": 594664448, "step": 3294 }, { "epoch": 0.360710473740387, "grad_norm": 1.3222249643224355, "learning_rate": 3.5591858966008935e-05, "loss": 0.7582, "num_input_tokens_seen": 594830208, "step": 3295 }, { "epoch": 0.36081994581132487, "grad_norm": 1.3137567044131508, "learning_rate": 3.558406958226559e-05, "loss": 0.7423, "num_input_tokens_seen": 594965952, "step": 3296 }, { "epoch": 0.3609294178822628, "grad_norm": 1.2908663988075266, "learning_rate": 3.5576278946447036e-05, "loss": 0.6987, "num_input_tokens_seen": 595152992, "step": 3297 }, { "epoch": 0.3610388899532007, "grad_norm": 1.45439264933488, "learning_rate": 3.556848705947491e-05, "loss": 0.7194, "num_input_tokens_seen": 595372512, "step": 3298 }, { "epoch": 0.3611483620241386, "grad_norm": 1.1607011741903714, "learning_rate": 3.556069392227096e-05, "loss": 0.6593, "num_input_tokens_seen": 595542752, "step": 3299 }, { "epoch": 0.3612578340950765, "grad_norm": 1.2201046872952745, "learning_rate": 3.5552899535757115e-05, "loss": 0.5969, "num_input_tokens_seen": 595731360, "step": 3300 }, { "epoch": 0.36136730616601437, "grad_norm": 1.2223691268016785, "learning_rate": 3.554510390085543e-05, "loss": 0.5062, "num_input_tokens_seen": 595918624, "step": 3301 }, { "epoch": 0.3614767782369523, "grad_norm": 1.1900997815653052, "learning_rate": 3.5537307018488095e-05, "loss": 0.6638, "num_input_tokens_seen": 596109472, "step": 3302 }, { "epoch": 0.3615862503078902, "grad_norm": 1.298603239165919, "learning_rate": 3.55295088895775e-05, "loss": 0.6893, "num_input_tokens_seen": 596267168, "step": 3303 }, { "epoch": 0.3616957223788281, "grad_norm": 1.2189917202418765, "learning_rate": 3.552170951504613e-05, "loss": 0.6731, "num_input_tokens_seen": 596422400, "step": 3304 }, { "epoch": 0.361805194449766, "grad_norm": 1.090333343245404, "learning_rate": 3.551390889581664e-05, "loss": 0.6372, "num_input_tokens_seen": 596632736, "step": 3305 }, { "epoch": 0.3619146665207039, "grad_norm": 1.2394994285246719, "learning_rate": 3.550610703281182e-05, "loss": 0.683, "num_input_tokens_seen": 596809024, "step": 3306 }, { "epoch": 0.3620241385916418, "grad_norm": 1.313820129613371, "learning_rate": 3.5498303926954626e-05, "loss": 0.5589, "num_input_tokens_seen": 596996288, "step": 3307 }, { "epoch": 0.3621336106625797, "grad_norm": 1.2594077378175697, "learning_rate": 3.549049957916815e-05, "loss": 0.5323, "num_input_tokens_seen": 597149504, "step": 3308 }, { "epoch": 0.36224308273351763, "grad_norm": 1.3482934744467834, "learning_rate": 3.548269399037562e-05, "loss": 0.7142, "num_input_tokens_seen": 597329824, "step": 3309 }, { "epoch": 0.3623525548044555, "grad_norm": 1.365436631748295, "learning_rate": 3.547488716150044e-05, "loss": 0.7663, "num_input_tokens_seen": 597491552, "step": 3310 }, { "epoch": 0.3624620268753934, "grad_norm": 1.1853229242053014, "learning_rate": 3.546707909346613e-05, "loss": 0.6858, "num_input_tokens_seen": 597678144, "step": 3311 }, { "epoch": 0.36257149894633134, "grad_norm": 1.3052270196346778, "learning_rate": 3.545926978719637e-05, "loss": 0.8191, "num_input_tokens_seen": 597886464, "step": 3312 }, { "epoch": 0.3626809710172692, "grad_norm": 1.240386713238866, "learning_rate": 3.545145924361499e-05, "loss": 0.9161, "num_input_tokens_seen": 598096576, "step": 3313 }, { "epoch": 0.3627904430882071, "grad_norm": 1.256764751513937, "learning_rate": 3.544364746364596e-05, "loss": 0.7122, "num_input_tokens_seen": 598273088, "step": 3314 }, { "epoch": 0.36289991515914505, "grad_norm": 1.2439842425265268, "learning_rate": 3.54358344482134e-05, "loss": 0.6235, "num_input_tokens_seen": 598450720, "step": 3315 }, { "epoch": 0.3630093872300829, "grad_norm": 1.2022858207285971, "learning_rate": 3.542802019824158e-05, "loss": 0.7491, "num_input_tokens_seen": 598638880, "step": 3316 }, { "epoch": 0.36311885930102084, "grad_norm": 1.182767623795866, "learning_rate": 3.5420204714654906e-05, "loss": 0.5544, "num_input_tokens_seen": 598834880, "step": 3317 }, { "epoch": 0.3632283313719587, "grad_norm": 1.1095656110317795, "learning_rate": 3.5412387998377926e-05, "loss": 0.6329, "num_input_tokens_seen": 599006240, "step": 3318 }, { "epoch": 0.3633378034428966, "grad_norm": 1.3782132922757029, "learning_rate": 3.5404570050335354e-05, "loss": 0.9257, "num_input_tokens_seen": 599208064, "step": 3319 }, { "epoch": 0.36344727551383454, "grad_norm": 1.17444055103277, "learning_rate": 3.5396750871452036e-05, "loss": 0.5907, "num_input_tokens_seen": 599379648, "step": 3320 }, { "epoch": 0.3635567475847724, "grad_norm": 1.2131340040671663, "learning_rate": 3.538893046265297e-05, "loss": 0.8463, "num_input_tokens_seen": 599581920, "step": 3321 }, { "epoch": 0.36366621965571033, "grad_norm": 1.1781892167052, "learning_rate": 3.5381108824863284e-05, "loss": 0.6833, "num_input_tokens_seen": 599766048, "step": 3322 }, { "epoch": 0.36377569172664825, "grad_norm": 1.1612018535804212, "learning_rate": 3.5373285959008265e-05, "loss": 0.6742, "num_input_tokens_seen": 599989376, "step": 3323 }, { "epoch": 0.3638851637975861, "grad_norm": 1.2323728330983734, "learning_rate": 3.536546186601336e-05, "loss": 0.6352, "num_input_tokens_seen": 600173952, "step": 3324 }, { "epoch": 0.36399463586852404, "grad_norm": 1.3142089163439863, "learning_rate": 3.5357636546804125e-05, "loss": 0.8862, "num_input_tokens_seen": 600350016, "step": 3325 }, { "epoch": 0.36410410793946196, "grad_norm": 1.189861509374681, "learning_rate": 3.534981000230629e-05, "loss": 0.6444, "num_input_tokens_seen": 600536608, "step": 3326 }, { "epoch": 0.36421358001039983, "grad_norm": 1.2859889987724171, "learning_rate": 3.5341982233445715e-05, "loss": 0.7013, "num_input_tokens_seen": 600719616, "step": 3327 }, { "epoch": 0.36432305208133775, "grad_norm": 1.3601282159588306, "learning_rate": 3.533415324114841e-05, "loss": 0.8, "num_input_tokens_seen": 600907552, "step": 3328 }, { "epoch": 0.36443252415227567, "grad_norm": 1.1420461606351757, "learning_rate": 3.532632302634053e-05, "loss": 0.5329, "num_input_tokens_seen": 601085856, "step": 3329 }, { "epoch": 0.36454199622321354, "grad_norm": 1.4271166256921775, "learning_rate": 3.531849158994839e-05, "loss": 0.6869, "num_input_tokens_seen": 601250272, "step": 3330 }, { "epoch": 0.36465146829415146, "grad_norm": 1.2350022376190795, "learning_rate": 3.531065893289841e-05, "loss": 0.6708, "num_input_tokens_seen": 601406624, "step": 3331 }, { "epoch": 0.3647609403650894, "grad_norm": 1.4459633676207355, "learning_rate": 3.530282505611719e-05, "loss": 0.8848, "num_input_tokens_seen": 601601728, "step": 3332 }, { "epoch": 0.36487041243602725, "grad_norm": 1.3855044784060524, "learning_rate": 3.5294989960531456e-05, "loss": 0.7992, "num_input_tokens_seen": 601800640, "step": 3333 }, { "epoch": 0.36497988450696517, "grad_norm": 1.399601286924108, "learning_rate": 3.52871536470681e-05, "loss": 0.684, "num_input_tokens_seen": 601942880, "step": 3334 }, { "epoch": 0.36508935657790303, "grad_norm": 1.2482786246986108, "learning_rate": 3.5279316116654126e-05, "loss": 0.665, "num_input_tokens_seen": 602105952, "step": 3335 }, { "epoch": 0.36519882864884096, "grad_norm": 1.2480743357423696, "learning_rate": 3.527147737021671e-05, "loss": 0.7021, "num_input_tokens_seen": 602279104, "step": 3336 }, { "epoch": 0.3653083007197789, "grad_norm": 1.4479139680174051, "learning_rate": 3.526363740868316e-05, "loss": 0.6554, "num_input_tokens_seen": 602478016, "step": 3337 }, { "epoch": 0.36541777279071674, "grad_norm": 1.2607761322344988, "learning_rate": 3.525579623298092e-05, "loss": 0.6294, "num_input_tokens_seen": 602686560, "step": 3338 }, { "epoch": 0.36552724486165467, "grad_norm": 1.2903397676742219, "learning_rate": 3.52479538440376e-05, "loss": 0.7855, "num_input_tokens_seen": 602887936, "step": 3339 }, { "epoch": 0.3656367169325926, "grad_norm": 1.2560414231266848, "learning_rate": 3.5240110242780916e-05, "loss": 0.9159, "num_input_tokens_seen": 603089088, "step": 3340 }, { "epoch": 0.36574618900353045, "grad_norm": 1.3248227972620348, "learning_rate": 3.5232265430138776e-05, "loss": 0.5979, "num_input_tokens_seen": 603250144, "step": 3341 }, { "epoch": 0.3658556610744684, "grad_norm": 1.3744514845122373, "learning_rate": 3.52244194070392e-05, "loss": 0.836, "num_input_tokens_seen": 603428000, "step": 3342 }, { "epoch": 0.3659651331454063, "grad_norm": 1.359371185297382, "learning_rate": 3.521657217441034e-05, "loss": 0.6995, "num_input_tokens_seen": 603585472, "step": 3343 }, { "epoch": 0.36607460521634416, "grad_norm": 1.1421124414081456, "learning_rate": 3.520872373318053e-05, "loss": 0.6596, "num_input_tokens_seen": 603764896, "step": 3344 }, { "epoch": 0.3661840772872821, "grad_norm": 1.2394047402773085, "learning_rate": 3.520087408427822e-05, "loss": 0.8945, "num_input_tokens_seen": 603971648, "step": 3345 }, { "epoch": 0.36629354935822, "grad_norm": 1.3492771739050429, "learning_rate": 3.5193023228632003e-05, "loss": 0.9512, "num_input_tokens_seen": 604164064, "step": 3346 }, { "epoch": 0.36640302142915787, "grad_norm": 1.2825936970680654, "learning_rate": 3.518517116717063e-05, "loss": 0.7553, "num_input_tokens_seen": 604334528, "step": 3347 }, { "epoch": 0.3665124935000958, "grad_norm": 1.2220219926150575, "learning_rate": 3.5177317900822974e-05, "loss": 0.6105, "num_input_tokens_seen": 604531424, "step": 3348 }, { "epoch": 0.3666219655710337, "grad_norm": 1.1605876801159831, "learning_rate": 3.516946343051806e-05, "loss": 0.6353, "num_input_tokens_seen": 604732800, "step": 3349 }, { "epoch": 0.3667314376419716, "grad_norm": 1.229327977904931, "learning_rate": 3.516160775718508e-05, "loss": 0.7061, "num_input_tokens_seen": 604938880, "step": 3350 }, { "epoch": 0.3668409097129095, "grad_norm": 1.2010754058060338, "learning_rate": 3.5153750881753314e-05, "loss": 0.6514, "num_input_tokens_seen": 605102624, "step": 3351 }, { "epoch": 0.3669503817838474, "grad_norm": 1.3081036461236837, "learning_rate": 3.514589280515223e-05, "loss": 0.7267, "num_input_tokens_seen": 605268608, "step": 3352 }, { "epoch": 0.3670598538547853, "grad_norm": 1.4000800092859116, "learning_rate": 3.513803352831143e-05, "loss": 0.9207, "num_input_tokens_seen": 605448256, "step": 3353 }, { "epoch": 0.3671693259257232, "grad_norm": 1.2479898024808747, "learning_rate": 3.5130173052160645e-05, "loss": 0.7838, "num_input_tokens_seen": 605639552, "step": 3354 }, { "epoch": 0.3672787979966611, "grad_norm": 1.1304073715155887, "learning_rate": 3.512231137762975e-05, "loss": 0.5323, "num_input_tokens_seen": 605824576, "step": 3355 }, { "epoch": 0.367388270067599, "grad_norm": 1.1945881362852193, "learning_rate": 3.5114448505648754e-05, "loss": 0.7502, "num_input_tokens_seen": 606016768, "step": 3356 }, { "epoch": 0.3674977421385369, "grad_norm": 1.3719206487407882, "learning_rate": 3.510658443714785e-05, "loss": 0.5687, "num_input_tokens_seen": 606185216, "step": 3357 }, { "epoch": 0.3676072142094748, "grad_norm": 1.3481333012650256, "learning_rate": 3.509871917305734e-05, "loss": 0.6255, "num_input_tokens_seen": 606374272, "step": 3358 }, { "epoch": 0.3677166862804127, "grad_norm": 1.2382608405997473, "learning_rate": 3.509085271430764e-05, "loss": 0.5482, "num_input_tokens_seen": 606539360, "step": 3359 }, { "epoch": 0.36782615835135063, "grad_norm": 1.2023990472145138, "learning_rate": 3.508298506182936e-05, "loss": 0.6908, "num_input_tokens_seen": 606727968, "step": 3360 }, { "epoch": 0.3679356304222885, "grad_norm": 1.1918125147810765, "learning_rate": 3.5075116216553225e-05, "loss": 0.5082, "num_input_tokens_seen": 606932928, "step": 3361 }, { "epoch": 0.3680451024932264, "grad_norm": 1.4191623844627932, "learning_rate": 3.50672461794101e-05, "loss": 0.9114, "num_input_tokens_seen": 607099584, "step": 3362 }, { "epoch": 0.36815457456416434, "grad_norm": 1.3298948276496352, "learning_rate": 3.5059374951330995e-05, "loss": 0.8327, "num_input_tokens_seen": 607264224, "step": 3363 }, { "epoch": 0.3682640466351022, "grad_norm": 1.1489962302097134, "learning_rate": 3.505150253324706e-05, "loss": 0.6214, "num_input_tokens_seen": 607459104, "step": 3364 }, { "epoch": 0.3683735187060401, "grad_norm": 1.2295237954510365, "learning_rate": 3.5043628926089596e-05, "loss": 0.7016, "num_input_tokens_seen": 607644800, "step": 3365 }, { "epoch": 0.36848299077697805, "grad_norm": 1.2691171293470174, "learning_rate": 3.503575413079003e-05, "loss": 0.6754, "num_input_tokens_seen": 607854016, "step": 3366 }, { "epoch": 0.3685924628479159, "grad_norm": 1.1954404974328614, "learning_rate": 3.502787814827994e-05, "loss": 0.6725, "num_input_tokens_seen": 608042400, "step": 3367 }, { "epoch": 0.36870193491885384, "grad_norm": 1.3171719551300443, "learning_rate": 3.5020000979491025e-05, "loss": 0.6151, "num_input_tokens_seen": 608211072, "step": 3368 }, { "epoch": 0.36881140698979176, "grad_norm": 1.1167837627184185, "learning_rate": 3.501212262535515e-05, "loss": 0.6844, "num_input_tokens_seen": 608418496, "step": 3369 }, { "epoch": 0.3689208790607296, "grad_norm": 1.3329158318053804, "learning_rate": 3.500424308680431e-05, "loss": 0.6813, "num_input_tokens_seen": 608574624, "step": 3370 }, { "epoch": 0.36903035113166754, "grad_norm": 1.1927904436702141, "learning_rate": 3.499636236477064e-05, "loss": 0.5584, "num_input_tokens_seen": 608753600, "step": 3371 }, { "epoch": 0.3691398232026054, "grad_norm": 1.2895398267680072, "learning_rate": 3.498848046018641e-05, "loss": 0.747, "num_input_tokens_seen": 608956768, "step": 3372 }, { "epoch": 0.36924929527354333, "grad_norm": 1.40639139100392, "learning_rate": 3.498059737398405e-05, "loss": 0.7515, "num_input_tokens_seen": 609147392, "step": 3373 }, { "epoch": 0.36935876734448125, "grad_norm": 1.3833041421652164, "learning_rate": 3.497271310709608e-05, "loss": 0.6842, "num_input_tokens_seen": 609293888, "step": 3374 }, { "epoch": 0.3694682394154191, "grad_norm": 1.273515055348399, "learning_rate": 3.4964827660455226e-05, "loss": 0.7378, "num_input_tokens_seen": 609474432, "step": 3375 }, { "epoch": 0.36957771148635704, "grad_norm": 1.1033459558273921, "learning_rate": 3.495694103499431e-05, "loss": 0.4927, "num_input_tokens_seen": 609666176, "step": 3376 }, { "epoch": 0.36968718355729496, "grad_norm": 1.2506358913422075, "learning_rate": 3.494905323164629e-05, "loss": 0.7194, "num_input_tokens_seen": 609844928, "step": 3377 }, { "epoch": 0.36979665562823283, "grad_norm": 1.195490705382984, "learning_rate": 3.4941164251344306e-05, "loss": 0.6797, "num_input_tokens_seen": 610026368, "step": 3378 }, { "epoch": 0.36990612769917075, "grad_norm": 1.249729890249883, "learning_rate": 3.493327409502159e-05, "loss": 0.8618, "num_input_tokens_seen": 610220576, "step": 3379 }, { "epoch": 0.37001559977010867, "grad_norm": 1.3312762405603202, "learning_rate": 3.492538276361154e-05, "loss": 0.7187, "num_input_tokens_seen": 610367296, "step": 3380 }, { "epoch": 0.37012507184104654, "grad_norm": 1.379505837599932, "learning_rate": 3.491749025804768e-05, "loss": 0.8269, "num_input_tokens_seen": 610542016, "step": 3381 }, { "epoch": 0.37023454391198446, "grad_norm": 1.301666599978658, "learning_rate": 3.4909596579263685e-05, "loss": 0.7192, "num_input_tokens_seen": 610747872, "step": 3382 }, { "epoch": 0.3703440159829224, "grad_norm": 1.3000626053774087, "learning_rate": 3.490170172819336e-05, "loss": 0.5798, "num_input_tokens_seen": 610901536, "step": 3383 }, { "epoch": 0.37045348805386025, "grad_norm": 1.2122103984309354, "learning_rate": 3.489380570577064e-05, "loss": 0.6032, "num_input_tokens_seen": 611066848, "step": 3384 }, { "epoch": 0.37056296012479817, "grad_norm": 1.2151252935507493, "learning_rate": 3.488590851292963e-05, "loss": 0.759, "num_input_tokens_seen": 611283680, "step": 3385 }, { "epoch": 0.3706724321957361, "grad_norm": 1.2063946572962068, "learning_rate": 3.487801015060453e-05, "loss": 0.6417, "num_input_tokens_seen": 611478560, "step": 3386 }, { "epoch": 0.37078190426667396, "grad_norm": 1.4167402846711903, "learning_rate": 3.487011061972972e-05, "loss": 1.0781, "num_input_tokens_seen": 611686432, "step": 3387 }, { "epoch": 0.3708913763376119, "grad_norm": 1.1996925735068251, "learning_rate": 3.48622099212397e-05, "loss": 0.6617, "num_input_tokens_seen": 611868768, "step": 3388 }, { "epoch": 0.37100084840854974, "grad_norm": 1.201016101195615, "learning_rate": 3.485430805606909e-05, "loss": 0.5927, "num_input_tokens_seen": 612009216, "step": 3389 }, { "epoch": 0.37111032047948767, "grad_norm": 1.3048786320281784, "learning_rate": 3.484640502515267e-05, "loss": 0.8317, "num_input_tokens_seen": 612200736, "step": 3390 }, { "epoch": 0.3712197925504256, "grad_norm": 1.1491209319163118, "learning_rate": 3.483850082942537e-05, "loss": 0.8839, "num_input_tokens_seen": 612423840, "step": 3391 }, { "epoch": 0.37132926462136345, "grad_norm": 1.3472097348205798, "learning_rate": 3.4830595469822224e-05, "loss": 0.7767, "num_input_tokens_seen": 612597888, "step": 3392 }, { "epoch": 0.3714387366923014, "grad_norm": 1.246924001165882, "learning_rate": 3.482268894727843e-05, "loss": 0.7403, "num_input_tokens_seen": 612759616, "step": 3393 }, { "epoch": 0.3715482087632393, "grad_norm": 1.3596200393287454, "learning_rate": 3.481478126272931e-05, "loss": 0.7046, "num_input_tokens_seen": 612924928, "step": 3394 }, { "epoch": 0.37165768083417716, "grad_norm": 1.1058421228936757, "learning_rate": 3.4806872417110333e-05, "loss": 0.6082, "num_input_tokens_seen": 613099424, "step": 3395 }, { "epoch": 0.3717671529051151, "grad_norm": 1.3519851376574352, "learning_rate": 3.479896241135709e-05, "loss": 0.8083, "num_input_tokens_seen": 613279744, "step": 3396 }, { "epoch": 0.371876624976053, "grad_norm": 1.2305196115790236, "learning_rate": 3.4791051246405326e-05, "loss": 0.5648, "num_input_tokens_seen": 613451552, "step": 3397 }, { "epoch": 0.37198609704699087, "grad_norm": 1.2332968047033595, "learning_rate": 3.478313892319092e-05, "loss": 0.6135, "num_input_tokens_seen": 613638592, "step": 3398 }, { "epoch": 0.3720955691179288, "grad_norm": 1.2023117983255265, "learning_rate": 3.477522544264988e-05, "loss": 0.5899, "num_input_tokens_seen": 613812192, "step": 3399 }, { "epoch": 0.3722050411888667, "grad_norm": 1.202172484416909, "learning_rate": 3.4767310805718355e-05, "loss": 0.6572, "num_input_tokens_seen": 614010208, "step": 3400 }, { "epoch": 0.3723145132598046, "grad_norm": 1.3128453663155928, "learning_rate": 3.475939501333264e-05, "loss": 0.8875, "num_input_tokens_seen": 614209120, "step": 3401 }, { "epoch": 0.3724239853307425, "grad_norm": 1.1838793520158186, "learning_rate": 3.4751478066429156e-05, "loss": 0.574, "num_input_tokens_seen": 614400640, "step": 3402 }, { "epoch": 0.3725334574016804, "grad_norm": 1.3008437041448215, "learning_rate": 3.474355996594445e-05, "loss": 0.79, "num_input_tokens_seen": 614602016, "step": 3403 }, { "epoch": 0.3726429294726183, "grad_norm": 1.2410240722983525, "learning_rate": 3.473564071281522e-05, "loss": 0.8379, "num_input_tokens_seen": 614802720, "step": 3404 }, { "epoch": 0.3727524015435562, "grad_norm": 1.3267048142729745, "learning_rate": 3.472772030797832e-05, "loss": 0.7605, "num_input_tokens_seen": 614975648, "step": 3405 }, { "epoch": 0.3728618736144941, "grad_norm": 1.1061699466512045, "learning_rate": 3.4719798752370694e-05, "loss": 0.6571, "num_input_tokens_seen": 615192704, "step": 3406 }, { "epoch": 0.372971345685432, "grad_norm": 1.256129495335379, "learning_rate": 3.471187604692945e-05, "loss": 0.7727, "num_input_tokens_seen": 615376160, "step": 3407 }, { "epoch": 0.3730808177563699, "grad_norm": 1.2763836253245269, "learning_rate": 3.470395219259185e-05, "loss": 0.6472, "num_input_tokens_seen": 615542592, "step": 3408 }, { "epoch": 0.3731902898273078, "grad_norm": 1.1247364901180472, "learning_rate": 3.469602719029526e-05, "loss": 0.486, "num_input_tokens_seen": 615714400, "step": 3409 }, { "epoch": 0.3732997618982457, "grad_norm": 1.259137847380401, "learning_rate": 3.4688101040977164e-05, "loss": 0.7045, "num_input_tokens_seen": 615856192, "step": 3410 }, { "epoch": 0.37340923396918363, "grad_norm": 1.302872588962702, "learning_rate": 3.468017374557526e-05, "loss": 0.8153, "num_input_tokens_seen": 616062048, "step": 3411 }, { "epoch": 0.3735187060401215, "grad_norm": 1.3744821440257438, "learning_rate": 3.46722453050273e-05, "loss": 0.8219, "num_input_tokens_seen": 616221312, "step": 3412 }, { "epoch": 0.3736281781110594, "grad_norm": 1.3826299428317768, "learning_rate": 3.466431572027121e-05, "loss": 0.7456, "num_input_tokens_seen": 616406336, "step": 3413 }, { "epoch": 0.37373765018199734, "grad_norm": 1.5277997285055895, "learning_rate": 3.465638499224504e-05, "loss": 0.7126, "num_input_tokens_seen": 616576352, "step": 3414 }, { "epoch": 0.3738471222529352, "grad_norm": 1.2919173561390773, "learning_rate": 3.4648453121886994e-05, "loss": 0.7554, "num_input_tokens_seen": 616733824, "step": 3415 }, { "epoch": 0.3739565943238731, "grad_norm": 1.216626983587658, "learning_rate": 3.464052011013539e-05, "loss": 0.75, "num_input_tokens_seen": 616903168, "step": 3416 }, { "epoch": 0.37406606639481105, "grad_norm": 1.1667234997303337, "learning_rate": 3.463258595792867e-05, "loss": 0.6599, "num_input_tokens_seen": 617118880, "step": 3417 }, { "epoch": 0.3741755384657489, "grad_norm": 1.3508646491953058, "learning_rate": 3.462465066620546e-05, "loss": 0.7821, "num_input_tokens_seen": 617292032, "step": 3418 }, { "epoch": 0.37428501053668684, "grad_norm": 1.273923769357481, "learning_rate": 3.461671423590447e-05, "loss": 0.7024, "num_input_tokens_seen": 617464288, "step": 3419 }, { "epoch": 0.37439448260762476, "grad_norm": 1.3575027112424412, "learning_rate": 3.460877666796457e-05, "loss": 0.7639, "num_input_tokens_seen": 617644384, "step": 3420 }, { "epoch": 0.3745039546785626, "grad_norm": 1.3675195345276638, "learning_rate": 3.460083796332476e-05, "loss": 0.6281, "num_input_tokens_seen": 617805664, "step": 3421 }, { "epoch": 0.37461342674950054, "grad_norm": 1.36240010901533, "learning_rate": 3.459289812292418e-05, "loss": 0.8818, "num_input_tokens_seen": 617983744, "step": 3422 }, { "epoch": 0.3747228988204384, "grad_norm": 1.2408320391110002, "learning_rate": 3.458495714770208e-05, "loss": 0.7425, "num_input_tokens_seen": 618167200, "step": 3423 }, { "epoch": 0.37483237089137633, "grad_norm": 1.1549947987889349, "learning_rate": 3.4577015038597874e-05, "loss": 0.5721, "num_input_tokens_seen": 618313248, "step": 3424 }, { "epoch": 0.37494184296231425, "grad_norm": 1.4026966979050681, "learning_rate": 3.45690717965511e-05, "loss": 0.7513, "num_input_tokens_seen": 618495136, "step": 3425 }, { "epoch": 0.3750513150332521, "grad_norm": 1.2799024240733845, "learning_rate": 3.456112742250143e-05, "loss": 0.5456, "num_input_tokens_seen": 618659104, "step": 3426 }, { "epoch": 0.37516078710419004, "grad_norm": 1.1670863157481872, "learning_rate": 3.4553181917388664e-05, "loss": 0.5851, "num_input_tokens_seen": 618840096, "step": 3427 }, { "epoch": 0.37527025917512796, "grad_norm": 1.4462054818966494, "learning_rate": 3.4545235282152724e-05, "loss": 0.9804, "num_input_tokens_seen": 619003392, "step": 3428 }, { "epoch": 0.37537973124606583, "grad_norm": 1.2110258993061558, "learning_rate": 3.4537287517733713e-05, "loss": 0.555, "num_input_tokens_seen": 619192000, "step": 3429 }, { "epoch": 0.37548920331700375, "grad_norm": 1.2866692260772734, "learning_rate": 3.452933862507182e-05, "loss": 0.7426, "num_input_tokens_seen": 619360224, "step": 3430 }, { "epoch": 0.37559867538794167, "grad_norm": 1.3082047524816551, "learning_rate": 3.452138860510737e-05, "loss": 0.7633, "num_input_tokens_seen": 619561824, "step": 3431 }, { "epoch": 0.37570814745887954, "grad_norm": 1.2631579714647487, "learning_rate": 3.451343745878086e-05, "loss": 0.779, "num_input_tokens_seen": 619759168, "step": 3432 }, { "epoch": 0.37581761952981746, "grad_norm": 1.271347015039858, "learning_rate": 3.4505485187032894e-05, "loss": 0.5906, "num_input_tokens_seen": 619975776, "step": 3433 }, { "epoch": 0.3759270916007554, "grad_norm": 1.1767232169609902, "learning_rate": 3.4497531790804194e-05, "loss": 0.6173, "num_input_tokens_seen": 620158336, "step": 3434 }, { "epoch": 0.37603656367169325, "grad_norm": 1.229456991187297, "learning_rate": 3.448957727103564e-05, "loss": 0.8563, "num_input_tokens_seen": 620345376, "step": 3435 }, { "epoch": 0.37614603574263117, "grad_norm": 1.0914433270261559, "learning_rate": 3.448162162866823e-05, "loss": 0.5848, "num_input_tokens_seen": 620533984, "step": 3436 }, { "epoch": 0.3762555078135691, "grad_norm": 1.1578352602885034, "learning_rate": 3.447366486464312e-05, "loss": 0.7495, "num_input_tokens_seen": 620738272, "step": 3437 }, { "epoch": 0.37636497988450696, "grad_norm": 1.3097884460882088, "learning_rate": 3.446570697990155e-05, "loss": 0.7797, "num_input_tokens_seen": 620902912, "step": 3438 }, { "epoch": 0.3764744519554449, "grad_norm": 1.372350480876314, "learning_rate": 3.445774797538495e-05, "loss": 0.8431, "num_input_tokens_seen": 621094432, "step": 3439 }, { "epoch": 0.37658392402638274, "grad_norm": 1.1764659218835851, "learning_rate": 3.444978785203484e-05, "loss": 0.5268, "num_input_tokens_seen": 621257504, "step": 3440 }, { "epoch": 0.37669339609732067, "grad_norm": 1.1265552155448328, "learning_rate": 3.44418266107929e-05, "loss": 0.5065, "num_input_tokens_seen": 621409600, "step": 3441 }, { "epoch": 0.3768028681682586, "grad_norm": 1.3244531024623913, "learning_rate": 3.4433864252600916e-05, "loss": 0.7604, "num_input_tokens_seen": 621602464, "step": 3442 }, { "epoch": 0.37691234023919645, "grad_norm": 1.2795030267611827, "learning_rate": 3.442590077840083e-05, "loss": 0.5679, "num_input_tokens_seen": 621746720, "step": 3443 }, { "epoch": 0.3770218123101344, "grad_norm": 1.4586079838389747, "learning_rate": 3.441793618913469e-05, "loss": 0.7557, "num_input_tokens_seen": 621880000, "step": 3444 }, { "epoch": 0.3771312843810723, "grad_norm": 1.2859533093432007, "learning_rate": 3.4409970485744714e-05, "loss": 0.5577, "num_input_tokens_seen": 622058528, "step": 3445 }, { "epoch": 0.37724075645201016, "grad_norm": 1.2018166401025188, "learning_rate": 3.440200366917321e-05, "loss": 0.604, "num_input_tokens_seen": 622214208, "step": 3446 }, { "epoch": 0.3773502285229481, "grad_norm": 1.3298522936753048, "learning_rate": 3.439403574036266e-05, "loss": 0.6832, "num_input_tokens_seen": 622395648, "step": 3447 }, { "epoch": 0.377459700593886, "grad_norm": 1.306070246032623, "learning_rate": 3.438606670025563e-05, "loss": 0.7479, "num_input_tokens_seen": 622581344, "step": 3448 }, { "epoch": 0.37756917266482387, "grad_norm": 1.2725812689680116, "learning_rate": 3.437809654979485e-05, "loss": 0.6944, "num_input_tokens_seen": 622775104, "step": 3449 }, { "epoch": 0.3776786447357618, "grad_norm": 1.3842695071151634, "learning_rate": 3.4370125289923176e-05, "loss": 0.7201, "num_input_tokens_seen": 622923392, "step": 3450 }, { "epoch": 0.3777881168066997, "grad_norm": 1.246609285193408, "learning_rate": 3.436215292158359e-05, "loss": 0.752, "num_input_tokens_seen": 623118272, "step": 3451 }, { "epoch": 0.3778975888776376, "grad_norm": 1.1892754683591216, "learning_rate": 3.435417944571922e-05, "loss": 0.6996, "num_input_tokens_seen": 623289632, "step": 3452 }, { "epoch": 0.3780070609485755, "grad_norm": 1.1973673293460085, "learning_rate": 3.4346204863273304e-05, "loss": 0.5636, "num_input_tokens_seen": 623467488, "step": 3453 }, { "epoch": 0.3781165330195134, "grad_norm": 1.1978669640001736, "learning_rate": 3.433822917518921e-05, "loss": 0.7555, "num_input_tokens_seen": 623661920, "step": 3454 }, { "epoch": 0.3782260050904513, "grad_norm": 1.300086079095129, "learning_rate": 3.433025238241047e-05, "loss": 0.6554, "num_input_tokens_seen": 623831040, "step": 3455 }, { "epoch": 0.3783354771613892, "grad_norm": 1.2668489730713364, "learning_rate": 3.43222744858807e-05, "loss": 0.631, "num_input_tokens_seen": 624018976, "step": 3456 }, { "epoch": 0.3784449492323271, "grad_norm": 1.3947027191829233, "learning_rate": 3.431429548654368e-05, "loss": 0.6552, "num_input_tokens_seen": 624187648, "step": 3457 }, { "epoch": 0.378554421303265, "grad_norm": 1.3333506831391357, "learning_rate": 3.4306315385343316e-05, "loss": 0.714, "num_input_tokens_seen": 624377600, "step": 3458 }, { "epoch": 0.3786638933742029, "grad_norm": 1.2157059336102114, "learning_rate": 3.4298334183223624e-05, "loss": 0.5447, "num_input_tokens_seen": 624547392, "step": 3459 }, { "epoch": 0.3787733654451408, "grad_norm": 1.293423160787777, "learning_rate": 3.4290351881128767e-05, "loss": 0.7613, "num_input_tokens_seen": 624742720, "step": 3460 }, { "epoch": 0.3788828375160787, "grad_norm": 1.4127072747661655, "learning_rate": 3.4282368480003056e-05, "loss": 0.6768, "num_input_tokens_seen": 624959776, "step": 3461 }, { "epoch": 0.37899230958701663, "grad_norm": 1.239462785175235, "learning_rate": 3.42743839807909e-05, "loss": 0.6703, "num_input_tokens_seen": 625115680, "step": 3462 }, { "epoch": 0.3791017816579545, "grad_norm": 1.3356679454819875, "learning_rate": 3.426639838443684e-05, "loss": 0.7018, "num_input_tokens_seen": 625303616, "step": 3463 }, { "epoch": 0.3792112537288924, "grad_norm": 1.1783747663771849, "learning_rate": 3.4258411691885575e-05, "loss": 0.5344, "num_input_tokens_seen": 625484384, "step": 3464 }, { "epoch": 0.37932072579983034, "grad_norm": 1.2146923301999188, "learning_rate": 3.425042390408189e-05, "loss": 0.6884, "num_input_tokens_seen": 625635360, "step": 3465 }, { "epoch": 0.3794301978707682, "grad_norm": 1.377323787931864, "learning_rate": 3.424243502197076e-05, "loss": 0.7606, "num_input_tokens_seen": 625817696, "step": 3466 }, { "epoch": 0.3795396699417061, "grad_norm": 1.5102588001052297, "learning_rate": 3.4234445046497225e-05, "loss": 0.723, "num_input_tokens_seen": 625996672, "step": 3467 }, { "epoch": 0.37964914201264405, "grad_norm": 1.2395391963933398, "learning_rate": 3.42264539786065e-05, "loss": 0.5086, "num_input_tokens_seen": 626164224, "step": 3468 }, { "epoch": 0.3797586140835819, "grad_norm": 1.274280386158485, "learning_rate": 3.421846181924391e-05, "loss": 0.8155, "num_input_tokens_seen": 626350816, "step": 3469 }, { "epoch": 0.37986808615451984, "grad_norm": 1.212565158784255, "learning_rate": 3.421046856935489e-05, "loss": 0.511, "num_input_tokens_seen": 626517248, "step": 3470 }, { "epoch": 0.37997755822545776, "grad_norm": 1.2676770564003004, "learning_rate": 3.420247422988506e-05, "loss": 0.5787, "num_input_tokens_seen": 626670464, "step": 3471 }, { "epoch": 0.3800870302963956, "grad_norm": 1.1293662958701651, "learning_rate": 3.4194478801780116e-05, "loss": 0.6905, "num_input_tokens_seen": 626848544, "step": 3472 }, { "epoch": 0.38019650236733354, "grad_norm": 1.1833158361905418, "learning_rate": 3.4186482285985915e-05, "loss": 0.645, "num_input_tokens_seen": 627019232, "step": 3473 }, { "epoch": 0.3803059744382714, "grad_norm": 1.1370651399007994, "learning_rate": 3.417848468344842e-05, "loss": 0.6214, "num_input_tokens_seen": 627213664, "step": 3474 }, { "epoch": 0.38041544650920933, "grad_norm": 1.268807495392936, "learning_rate": 3.417048599511373e-05, "loss": 0.726, "num_input_tokens_seen": 627378752, "step": 3475 }, { "epoch": 0.38052491858014725, "grad_norm": 1.1351062046894016, "learning_rate": 3.416248622192807e-05, "loss": 0.5725, "num_input_tokens_seen": 627549888, "step": 3476 }, { "epoch": 0.3806343906510851, "grad_norm": 1.2923411894874264, "learning_rate": 3.415448536483782e-05, "loss": 0.6593, "num_input_tokens_seen": 627709152, "step": 3477 }, { "epoch": 0.38074386272202304, "grad_norm": 1.4463999188661552, "learning_rate": 3.4146483424789445e-05, "loss": 0.8815, "num_input_tokens_seen": 627892160, "step": 3478 }, { "epoch": 0.38085333479296096, "grad_norm": 1.2039797335309914, "learning_rate": 3.4138480402729564e-05, "loss": 0.6754, "num_input_tokens_seen": 628064640, "step": 3479 }, { "epoch": 0.38096280686389883, "grad_norm": 1.4628139419727841, "learning_rate": 3.413047629960492e-05, "loss": 0.9209, "num_input_tokens_seen": 628253696, "step": 3480 }, { "epoch": 0.38107227893483675, "grad_norm": 1.3128160822691666, "learning_rate": 3.412247111636239e-05, "loss": 0.7456, "num_input_tokens_seen": 628425280, "step": 3481 }, { "epoch": 0.38118175100577467, "grad_norm": 1.3006723719213804, "learning_rate": 3.411446485394896e-05, "loss": 0.781, "num_input_tokens_seen": 628607616, "step": 3482 }, { "epoch": 0.38129122307671254, "grad_norm": 1.1195352600835096, "learning_rate": 3.410645751331176e-05, "loss": 0.5653, "num_input_tokens_seen": 628793536, "step": 3483 }, { "epoch": 0.38140069514765046, "grad_norm": 1.087208230790539, "learning_rate": 3.4098449095398054e-05, "loss": 0.5763, "num_input_tokens_seen": 628984160, "step": 3484 }, { "epoch": 0.3815101672185884, "grad_norm": 1.3657288535846608, "learning_rate": 3.409043960115521e-05, "loss": 0.6494, "num_input_tokens_seen": 629142304, "step": 3485 }, { "epoch": 0.38161963928952625, "grad_norm": 1.1636684185897446, "learning_rate": 3.408242903153074e-05, "loss": 0.7008, "num_input_tokens_seen": 629330016, "step": 3486 }, { "epoch": 0.38172911136046417, "grad_norm": 1.2375580032318567, "learning_rate": 3.4074417387472274e-05, "loss": 0.7423, "num_input_tokens_seen": 629520864, "step": 3487 }, { "epoch": 0.3818385834314021, "grad_norm": 1.2712890193656132, "learning_rate": 3.406640466992758e-05, "loss": 0.5757, "num_input_tokens_seen": 629708128, "step": 3488 }, { "epoch": 0.38194805550233996, "grad_norm": 1.144011812522564, "learning_rate": 3.405839087984455e-05, "loss": 0.5804, "num_input_tokens_seen": 629896288, "step": 3489 }, { "epoch": 0.3820575275732779, "grad_norm": 1.2694581935109253, "learning_rate": 3.405037601817119e-05, "loss": 0.607, "num_input_tokens_seen": 630104384, "step": 3490 }, { "epoch": 0.38216699964421574, "grad_norm": 1.2477808327003856, "learning_rate": 3.4042360085855654e-05, "loss": 0.6528, "num_input_tokens_seen": 630323456, "step": 3491 }, { "epoch": 0.38227647171515367, "grad_norm": 1.363995078256962, "learning_rate": 3.40343430838462e-05, "loss": 0.6488, "num_input_tokens_seen": 630471296, "step": 3492 }, { "epoch": 0.3823859437860916, "grad_norm": 1.234940797568262, "learning_rate": 3.4026325013091224e-05, "loss": 0.5574, "num_input_tokens_seen": 630644224, "step": 3493 }, { "epoch": 0.38249541585702945, "grad_norm": 1.166413308062856, "learning_rate": 3.4018305874539264e-05, "loss": 0.4858, "num_input_tokens_seen": 630816256, "step": 3494 }, { "epoch": 0.3826048879279674, "grad_norm": 1.2526390351050785, "learning_rate": 3.401028566913896e-05, "loss": 0.6602, "num_input_tokens_seen": 631001728, "step": 3495 }, { "epoch": 0.3827143599989053, "grad_norm": 1.320921808081457, "learning_rate": 3.400226439783908e-05, "loss": 0.7076, "num_input_tokens_seen": 631164800, "step": 3496 }, { "epoch": 0.38282383206984316, "grad_norm": 1.2641866068984684, "learning_rate": 3.399424206158855e-05, "loss": 0.7095, "num_input_tokens_seen": 631336384, "step": 3497 }, { "epoch": 0.3829333041407811, "grad_norm": 1.4617607530668426, "learning_rate": 3.3986218661336355e-05, "loss": 0.8616, "num_input_tokens_seen": 631544704, "step": 3498 }, { "epoch": 0.383042776211719, "grad_norm": 1.51884260483974, "learning_rate": 3.397819419803168e-05, "loss": 0.8851, "num_input_tokens_seen": 631728608, "step": 3499 }, { "epoch": 0.38315224828265687, "grad_norm": 1.2663136716087604, "learning_rate": 3.397016867262379e-05, "loss": 0.684, "num_input_tokens_seen": 631898624, "step": 3500 }, { "epoch": 0.3832617203535948, "grad_norm": 1.2730995619137504, "learning_rate": 3.39621420860621e-05, "loss": 0.8396, "num_input_tokens_seen": 632070656, "step": 3501 }, { "epoch": 0.3833711924245327, "grad_norm": 1.18770140897406, "learning_rate": 3.395411443929613e-05, "loss": 0.6993, "num_input_tokens_seen": 632255456, "step": 3502 }, { "epoch": 0.3834806644954706, "grad_norm": 1.2459947313531035, "learning_rate": 3.394608573327554e-05, "loss": 0.6201, "num_input_tokens_seen": 632425696, "step": 3503 }, { "epoch": 0.3835901365664085, "grad_norm": 1.2743177121743539, "learning_rate": 3.393805596895011e-05, "loss": 0.7592, "num_input_tokens_seen": 632585632, "step": 3504 }, { "epoch": 0.3836996086373464, "grad_norm": 1.3479655919735007, "learning_rate": 3.3930025147269746e-05, "loss": 0.9398, "num_input_tokens_seen": 632756544, "step": 3505 }, { "epoch": 0.3838090807082843, "grad_norm": 1.2916998038604603, "learning_rate": 3.3921993269184474e-05, "loss": 0.675, "num_input_tokens_seen": 632908416, "step": 3506 }, { "epoch": 0.3839185527792222, "grad_norm": 1.2344998646608991, "learning_rate": 3.391396033564446e-05, "loss": 0.7226, "num_input_tokens_seen": 633104640, "step": 3507 }, { "epoch": 0.3840280248501601, "grad_norm": 1.3065128007298932, "learning_rate": 3.390592634759998e-05, "loss": 0.726, "num_input_tokens_seen": 633298176, "step": 3508 }, { "epoch": 0.384137496921098, "grad_norm": 1.2880244844135265, "learning_rate": 3.389789130600144e-05, "loss": 0.583, "num_input_tokens_seen": 633474688, "step": 3509 }, { "epoch": 0.3842469689920359, "grad_norm": 1.2328191682081622, "learning_rate": 3.388985521179937e-05, "loss": 0.6932, "num_input_tokens_seen": 633633504, "step": 3510 }, { "epoch": 0.3843564410629738, "grad_norm": 1.2569267044548398, "learning_rate": 3.3881818065944416e-05, "loss": 0.7529, "num_input_tokens_seen": 633821664, "step": 3511 }, { "epoch": 0.3844659131339117, "grad_norm": 1.259847049890961, "learning_rate": 3.3873779869387356e-05, "loss": 0.5711, "num_input_tokens_seen": 633989440, "step": 3512 }, { "epoch": 0.38457538520484963, "grad_norm": 1.3115399346127146, "learning_rate": 3.3865740623079116e-05, "loss": 0.7372, "num_input_tokens_seen": 634195744, "step": 3513 }, { "epoch": 0.3846848572757875, "grad_norm": 1.1957323421583768, "learning_rate": 3.3857700327970696e-05, "loss": 0.5626, "num_input_tokens_seen": 634389056, "step": 3514 }, { "epoch": 0.3847943293467254, "grad_norm": 1.3787928202283624, "learning_rate": 3.384965898501327e-05, "loss": 0.7285, "num_input_tokens_seen": 634581920, "step": 3515 }, { "epoch": 0.38490380141766334, "grad_norm": 1.190104339176669, "learning_rate": 3.384161659515811e-05, "loss": 0.6266, "num_input_tokens_seen": 634760896, "step": 3516 }, { "epoch": 0.3850132734886012, "grad_norm": 1.4607525876773266, "learning_rate": 3.38335731593566e-05, "loss": 0.8768, "num_input_tokens_seen": 634932704, "step": 3517 }, { "epoch": 0.3851227455595391, "grad_norm": 1.273768937678592, "learning_rate": 3.382552867856027e-05, "loss": 0.5249, "num_input_tokens_seen": 635088160, "step": 3518 }, { "epoch": 0.38523221763047705, "grad_norm": 1.297616401992974, "learning_rate": 3.381748315372077e-05, "loss": 0.7001, "num_input_tokens_seen": 635257056, "step": 3519 }, { "epoch": 0.3853416897014149, "grad_norm": 1.1636977680420588, "learning_rate": 3.380943658578987e-05, "loss": 0.6903, "num_input_tokens_seen": 635440064, "step": 3520 }, { "epoch": 0.38545116177235283, "grad_norm": 1.255464245468253, "learning_rate": 3.380138897571946e-05, "loss": 0.691, "num_input_tokens_seen": 635608288, "step": 3521 }, { "epoch": 0.38556063384329076, "grad_norm": 1.2700866892656646, "learning_rate": 3.379334032446157e-05, "loss": 0.6702, "num_input_tokens_seen": 635775168, "step": 3522 }, { "epoch": 0.3856701059142286, "grad_norm": 1.4158263612739805, "learning_rate": 3.378529063296832e-05, "loss": 0.7594, "num_input_tokens_seen": 635963776, "step": 3523 }, { "epoch": 0.38577957798516654, "grad_norm": 1.0678225324392214, "learning_rate": 3.377723990219198e-05, "loss": 0.5231, "num_input_tokens_seen": 636139840, "step": 3524 }, { "epoch": 0.3858890500561044, "grad_norm": 1.3130078468553132, "learning_rate": 3.376918813308495e-05, "loss": 0.6448, "num_input_tokens_seen": 636298656, "step": 3525 }, { "epoch": 0.38599852212704233, "grad_norm": 1.1859038819384373, "learning_rate": 3.3761135326599716e-05, "loss": 0.5687, "num_input_tokens_seen": 636446496, "step": 3526 }, { "epoch": 0.38610799419798025, "grad_norm": 1.2476672339866572, "learning_rate": 3.375308148368893e-05, "loss": 0.648, "num_input_tokens_seen": 636618528, "step": 3527 }, { "epoch": 0.3862174662689181, "grad_norm": 1.2914732613920998, "learning_rate": 3.374502660530534e-05, "loss": 0.6723, "num_input_tokens_seen": 636798176, "step": 3528 }, { "epoch": 0.38632693833985604, "grad_norm": 1.3341156879544331, "learning_rate": 3.373697069240181e-05, "loss": 0.6954, "num_input_tokens_seen": 636998880, "step": 3529 }, { "epoch": 0.38643641041079396, "grad_norm": 1.3130666781024043, "learning_rate": 3.3728913745931356e-05, "loss": 0.6488, "num_input_tokens_seen": 637168224, "step": 3530 }, { "epoch": 0.38654588248173183, "grad_norm": 1.213110420104061, "learning_rate": 3.372085576684709e-05, "loss": 0.8162, "num_input_tokens_seen": 637372960, "step": 3531 }, { "epoch": 0.38665535455266975, "grad_norm": 1.287816225494054, "learning_rate": 3.371279675610226e-05, "loss": 0.6996, "num_input_tokens_seen": 637532224, "step": 3532 }, { "epoch": 0.38676482662360767, "grad_norm": 1.2499525061167753, "learning_rate": 3.370473671465022e-05, "loss": 0.6445, "num_input_tokens_seen": 637691264, "step": 3533 }, { "epoch": 0.38687429869454554, "grad_norm": 1.6889532683814816, "learning_rate": 3.369667564344449e-05, "loss": 0.838, "num_input_tokens_seen": 637835744, "step": 3534 }, { "epoch": 0.38698377076548346, "grad_norm": 1.351228661734808, "learning_rate": 3.368861354343863e-05, "loss": 0.8217, "num_input_tokens_seen": 638006432, "step": 3535 }, { "epoch": 0.3870932428364214, "grad_norm": 1.4494886121351884, "learning_rate": 3.3680550415586416e-05, "loss": 0.7862, "num_input_tokens_seen": 638203104, "step": 3536 }, { "epoch": 0.38720271490735925, "grad_norm": 1.14987067241748, "learning_rate": 3.367248626084168e-05, "loss": 0.6576, "num_input_tokens_seen": 638397088, "step": 3537 }, { "epoch": 0.38731218697829717, "grad_norm": 1.2514911817025853, "learning_rate": 3.3664421080158394e-05, "loss": 0.8003, "num_input_tokens_seen": 638563968, "step": 3538 }, { "epoch": 0.3874216590492351, "grad_norm": 1.2773435166851612, "learning_rate": 3.365635487449065e-05, "loss": 0.7296, "num_input_tokens_seen": 638744288, "step": 3539 }, { "epoch": 0.38753113112017296, "grad_norm": 1.2567058457372855, "learning_rate": 3.364828764479269e-05, "loss": 0.7647, "num_input_tokens_seen": 638930432, "step": 3540 }, { "epoch": 0.3876406031911109, "grad_norm": 1.1821872424196733, "learning_rate": 3.3640219392018824e-05, "loss": 0.9648, "num_input_tokens_seen": 639144352, "step": 3541 }, { "epoch": 0.38775007526204874, "grad_norm": 1.2598631355339376, "learning_rate": 3.3632150117123524e-05, "loss": 0.5847, "num_input_tokens_seen": 639315712, "step": 3542 }, { "epoch": 0.38785954733298666, "grad_norm": 1.2752892072700863, "learning_rate": 3.362407982106136e-05, "loss": 0.7576, "num_input_tokens_seen": 639479456, "step": 3543 }, { "epoch": 0.3879690194039246, "grad_norm": 1.1159820455453637, "learning_rate": 3.361600850478704e-05, "loss": 0.5505, "num_input_tokens_seen": 639658208, "step": 3544 }, { "epoch": 0.38807849147486245, "grad_norm": 1.2300956039180218, "learning_rate": 3.3607936169255396e-05, "loss": 0.7474, "num_input_tokens_seen": 639846368, "step": 3545 }, { "epoch": 0.3881879635458004, "grad_norm": 1.2062133190527207, "learning_rate": 3.359986281542135e-05, "loss": 0.6702, "num_input_tokens_seen": 640042368, "step": 3546 }, { "epoch": 0.3882974356167383, "grad_norm": 1.4262798116980147, "learning_rate": 3.359178844423998e-05, "loss": 0.7807, "num_input_tokens_seen": 640197824, "step": 3547 }, { "epoch": 0.38840690768767616, "grad_norm": 1.3033871595108721, "learning_rate": 3.3583713056666454e-05, "loss": 0.5957, "num_input_tokens_seen": 640378592, "step": 3548 }, { "epoch": 0.3885163797586141, "grad_norm": 1.2712409142363628, "learning_rate": 3.3575636653656094e-05, "loss": 0.8118, "num_input_tokens_seen": 640575936, "step": 3549 }, { "epoch": 0.388625851829552, "grad_norm": 1.3670515592293624, "learning_rate": 3.35675592361643e-05, "loss": 0.6425, "num_input_tokens_seen": 640761856, "step": 3550 }, { "epoch": 0.38873532390048987, "grad_norm": 1.4408881023728284, "learning_rate": 3.3559480805146634e-05, "loss": 0.9478, "num_input_tokens_seen": 640956512, "step": 3551 }, { "epoch": 0.3888447959714278, "grad_norm": 1.4417724144286173, "learning_rate": 3.355140136155875e-05, "loss": 0.6906, "num_input_tokens_seen": 641124288, "step": 3552 }, { "epoch": 0.3889542680423657, "grad_norm": 1.4904500222821624, "learning_rate": 3.354332090635643e-05, "loss": 0.6998, "num_input_tokens_seen": 641308416, "step": 3553 }, { "epoch": 0.3890637401133036, "grad_norm": 1.274303025125641, "learning_rate": 3.353523944049558e-05, "loss": 0.6442, "num_input_tokens_seen": 641491872, "step": 3554 }, { "epoch": 0.3891732121842415, "grad_norm": 1.325317242269165, "learning_rate": 3.352715696493222e-05, "loss": 0.7195, "num_input_tokens_seen": 641643520, "step": 3555 }, { "epoch": 0.3892826842551794, "grad_norm": 1.1855476794169428, "learning_rate": 3.3519073480622495e-05, "loss": 0.6733, "num_input_tokens_seen": 641850272, "step": 3556 }, { "epoch": 0.3893921563261173, "grad_norm": 1.2834406477576314, "learning_rate": 3.351098898852266e-05, "loss": 0.8, "num_input_tokens_seen": 642023200, "step": 3557 }, { "epoch": 0.3895016283970552, "grad_norm": 1.2212063415907466, "learning_rate": 3.35029034895891e-05, "loss": 0.7061, "num_input_tokens_seen": 642213600, "step": 3558 }, { "epoch": 0.3896111004679931, "grad_norm": 1.2301096558731714, "learning_rate": 3.349481698477831e-05, "loss": 0.6759, "num_input_tokens_seen": 642375328, "step": 3559 }, { "epoch": 0.389720572538931, "grad_norm": 1.375501064556657, "learning_rate": 3.348672947504691e-05, "loss": 0.7953, "num_input_tokens_seen": 642554976, "step": 3560 }, { "epoch": 0.3898300446098689, "grad_norm": 1.3399131912609306, "learning_rate": 3.3478640961351635e-05, "loss": 0.596, "num_input_tokens_seen": 642734176, "step": 3561 }, { "epoch": 0.3899395166808068, "grad_norm": 1.3753826302553607, "learning_rate": 3.3470551444649346e-05, "loss": 0.696, "num_input_tokens_seen": 642904864, "step": 3562 }, { "epoch": 0.3900489887517447, "grad_norm": 1.2634350231900258, "learning_rate": 3.346246092589702e-05, "loss": 0.7262, "num_input_tokens_seen": 643093696, "step": 3563 }, { "epoch": 0.39015846082268263, "grad_norm": 1.3173215949424226, "learning_rate": 3.3454369406051736e-05, "loss": 0.7172, "num_input_tokens_seen": 643247808, "step": 3564 }, { "epoch": 0.3902679328936205, "grad_norm": 1.248691589318223, "learning_rate": 3.344627688607071e-05, "loss": 0.755, "num_input_tokens_seen": 643417152, "step": 3565 }, { "epoch": 0.3903774049645584, "grad_norm": 1.1703389591337876, "learning_rate": 3.343818336691128e-05, "loss": 0.7633, "num_input_tokens_seen": 643612928, "step": 3566 }, { "epoch": 0.39048687703549634, "grad_norm": 1.3581597168475015, "learning_rate": 3.3430088849530886e-05, "loss": 0.8775, "num_input_tokens_seen": 643809600, "step": 3567 }, { "epoch": 0.3905963491064342, "grad_norm": 1.2805737362194176, "learning_rate": 3.34219933348871e-05, "loss": 0.8137, "num_input_tokens_seen": 643994400, "step": 3568 }, { "epoch": 0.3907058211773721, "grad_norm": 1.0562180328046171, "learning_rate": 3.34138968239376e-05, "loss": 0.5446, "num_input_tokens_seen": 644182784, "step": 3569 }, { "epoch": 0.39081529324831005, "grad_norm": 1.0796564255036636, "learning_rate": 3.3405799317640196e-05, "loss": 0.5222, "num_input_tokens_seen": 644384832, "step": 3570 }, { "epoch": 0.3909247653192479, "grad_norm": 1.2206941894204206, "learning_rate": 3.3397700816952795e-05, "loss": 0.7357, "num_input_tokens_seen": 644547008, "step": 3571 }, { "epoch": 0.39103423739018583, "grad_norm": 1.3159253065368237, "learning_rate": 3.3389601322833454e-05, "loss": 0.7655, "num_input_tokens_seen": 644707616, "step": 3572 }, { "epoch": 0.39114370946112376, "grad_norm": 1.2684138148601778, "learning_rate": 3.3381500836240296e-05, "loss": 0.6157, "num_input_tokens_seen": 644873152, "step": 3573 }, { "epoch": 0.3912531815320616, "grad_norm": 1.3135177208787245, "learning_rate": 3.337339935813163e-05, "loss": 0.6822, "num_input_tokens_seen": 645043168, "step": 3574 }, { "epoch": 0.39136265360299954, "grad_norm": 1.1272707380839386, "learning_rate": 3.3365296889465814e-05, "loss": 0.5988, "num_input_tokens_seen": 645246112, "step": 3575 }, { "epoch": 0.3914721256739374, "grad_norm": 1.094590708651246, "learning_rate": 3.3357193431201374e-05, "loss": 0.6227, "num_input_tokens_seen": 645443232, "step": 3576 }, { "epoch": 0.39158159774487533, "grad_norm": 1.3726788783284434, "learning_rate": 3.3349088984296916e-05, "loss": 0.7044, "num_input_tokens_seen": 645619296, "step": 3577 }, { "epoch": 0.39169106981581325, "grad_norm": 1.170473404990806, "learning_rate": 3.33409835497112e-05, "loss": 0.6025, "num_input_tokens_seen": 645810816, "step": 3578 }, { "epoch": 0.3918005418867511, "grad_norm": 1.1625564869108442, "learning_rate": 3.333287712840308e-05, "loss": 0.5436, "num_input_tokens_seen": 645967616, "step": 3579 }, { "epoch": 0.39191001395768904, "grad_norm": 1.363762144949005, "learning_rate": 3.3324769721331515e-05, "loss": 0.6143, "num_input_tokens_seen": 646097984, "step": 3580 }, { "epoch": 0.39201948602862696, "grad_norm": 1.228761487656813, "learning_rate": 3.331666132945562e-05, "loss": 0.7269, "num_input_tokens_seen": 646266656, "step": 3581 }, { "epoch": 0.39212895809956483, "grad_norm": 1.2940646134566307, "learning_rate": 3.3308551953734576e-05, "loss": 0.7231, "num_input_tokens_seen": 646443168, "step": 3582 }, { "epoch": 0.39223843017050275, "grad_norm": 1.303883653265525, "learning_rate": 3.330044159512773e-05, "loss": 0.6935, "num_input_tokens_seen": 646636928, "step": 3583 }, { "epoch": 0.39234790224144067, "grad_norm": 1.077965987268732, "learning_rate": 3.3292330254594504e-05, "loss": 0.6439, "num_input_tokens_seen": 646803360, "step": 3584 }, { "epoch": 0.39245737431237854, "grad_norm": 1.1884772144776612, "learning_rate": 3.3284217933094465e-05, "loss": 0.6084, "num_input_tokens_seen": 647002272, "step": 3585 }, { "epoch": 0.39256684638331646, "grad_norm": 1.186322984128483, "learning_rate": 3.3276104631587274e-05, "loss": 0.7016, "num_input_tokens_seen": 647190432, "step": 3586 }, { "epoch": 0.3926763184542544, "grad_norm": 1.190385625209975, "learning_rate": 3.326799035103273e-05, "loss": 0.7, "num_input_tokens_seen": 647349696, "step": 3587 }, { "epoch": 0.39278579052519225, "grad_norm": 1.2462223668224963, "learning_rate": 3.325987509239074e-05, "loss": 0.6261, "num_input_tokens_seen": 647548160, "step": 3588 }, { "epoch": 0.39289526259613017, "grad_norm": 1.3320126797900231, "learning_rate": 3.3251758856621303e-05, "loss": 0.6884, "num_input_tokens_seen": 647726016, "step": 3589 }, { "epoch": 0.3930047346670681, "grad_norm": 1.144188683185246, "learning_rate": 3.324364164468458e-05, "loss": 0.5285, "num_input_tokens_seen": 647920224, "step": 3590 }, { "epoch": 0.39311420673800596, "grad_norm": 1.27043124649191, "learning_rate": 3.3235523457540805e-05, "loss": 0.6309, "num_input_tokens_seen": 648098752, "step": 3591 }, { "epoch": 0.3932236788089439, "grad_norm": 1.340730068117669, "learning_rate": 3.322740429615035e-05, "loss": 0.6196, "num_input_tokens_seen": 648280864, "step": 3592 }, { "epoch": 0.39333315087988174, "grad_norm": 1.3109387694433383, "learning_rate": 3.32192841614737e-05, "loss": 0.8418, "num_input_tokens_seen": 648464768, "step": 3593 }, { "epoch": 0.39344262295081966, "grad_norm": 1.320735795825061, "learning_rate": 3.321116305447143e-05, "loss": 0.5551, "num_input_tokens_seen": 648630528, "step": 3594 }, { "epoch": 0.3935520950217576, "grad_norm": 1.1608416655096068, "learning_rate": 3.3203040976104285e-05, "loss": 0.6282, "num_input_tokens_seen": 648822048, "step": 3595 }, { "epoch": 0.39366156709269545, "grad_norm": 1.2073681308777233, "learning_rate": 3.319491792733307e-05, "loss": 0.7658, "num_input_tokens_seen": 648998336, "step": 3596 }, { "epoch": 0.3937710391636334, "grad_norm": 1.2663758017134827, "learning_rate": 3.318679390911873e-05, "loss": 0.9055, "num_input_tokens_seen": 649200608, "step": 3597 }, { "epoch": 0.3938805112345713, "grad_norm": 1.2470689264228545, "learning_rate": 3.317866892242231e-05, "loss": 0.6582, "num_input_tokens_seen": 649363008, "step": 3598 }, { "epoch": 0.39398998330550916, "grad_norm": 1.191243144893236, "learning_rate": 3.3170542968205e-05, "loss": 0.8219, "num_input_tokens_seen": 649561248, "step": 3599 }, { "epoch": 0.3940994553764471, "grad_norm": 1.2888305917843326, "learning_rate": 3.316241604742807e-05, "loss": 0.7225, "num_input_tokens_seen": 649715136, "step": 3600 }, { "epoch": 0.394208927447385, "grad_norm": 1.3329266981101362, "learning_rate": 3.3154288161052936e-05, "loss": 0.8177, "num_input_tokens_seen": 649896576, "step": 3601 }, { "epoch": 0.39431839951832287, "grad_norm": 1.2269979785103533, "learning_rate": 3.3146159310041095e-05, "loss": 0.6565, "num_input_tokens_seen": 650093024, "step": 3602 }, { "epoch": 0.3944278715892608, "grad_norm": 1.377782260894318, "learning_rate": 3.3138029495354184e-05, "loss": 0.9172, "num_input_tokens_seen": 650274912, "step": 3603 }, { "epoch": 0.3945373436601987, "grad_norm": 1.2502226154461102, "learning_rate": 3.3129898717953946e-05, "loss": 0.6353, "num_input_tokens_seen": 650463296, "step": 3604 }, { "epoch": 0.3946468157311366, "grad_norm": 1.26283469909625, "learning_rate": 3.312176697880222e-05, "loss": 0.661, "num_input_tokens_seen": 650634208, "step": 3605 }, { "epoch": 0.3947562878020745, "grad_norm": 1.2285668100146678, "learning_rate": 3.3113634278860994e-05, "loss": 0.6968, "num_input_tokens_seen": 650821024, "step": 3606 }, { "epoch": 0.3948657598730124, "grad_norm": 1.266769598032897, "learning_rate": 3.310550061909233e-05, "loss": 0.8367, "num_input_tokens_seen": 650989472, "step": 3607 }, { "epoch": 0.3949752319439503, "grad_norm": 1.310839920140568, "learning_rate": 3.3097366000458454e-05, "loss": 0.5892, "num_input_tokens_seen": 651161952, "step": 3608 }, { "epoch": 0.3950847040148882, "grad_norm": 1.2365080836548983, "learning_rate": 3.308923042392165e-05, "loss": 0.6177, "num_input_tokens_seen": 651331072, "step": 3609 }, { "epoch": 0.3951941760858261, "grad_norm": 1.2738391266283098, "learning_rate": 3.308109389044436e-05, "loss": 0.6569, "num_input_tokens_seen": 651499296, "step": 3610 }, { "epoch": 0.395303648156764, "grad_norm": 1.2677549641458778, "learning_rate": 3.3072956400989103e-05, "loss": 0.5741, "num_input_tokens_seen": 651671552, "step": 3611 }, { "epoch": 0.3954131202277019, "grad_norm": 1.3662905557377094, "learning_rate": 3.306481795651854e-05, "loss": 0.7297, "num_input_tokens_seen": 651842016, "step": 3612 }, { "epoch": 0.3955225922986398, "grad_norm": 1.4891330153909912, "learning_rate": 3.3056678557995434e-05, "loss": 0.5838, "num_input_tokens_seen": 652005536, "step": 3613 }, { "epoch": 0.3956320643695777, "grad_norm": 1.2231512560631004, "learning_rate": 3.3048538206382645e-05, "loss": 0.5869, "num_input_tokens_seen": 652154272, "step": 3614 }, { "epoch": 0.39574153644051563, "grad_norm": 1.2589303966458743, "learning_rate": 3.3040396902643186e-05, "loss": 0.6406, "num_input_tokens_seen": 652318240, "step": 3615 }, { "epoch": 0.3958510085114535, "grad_norm": 1.1263786451598032, "learning_rate": 3.3032254647740135e-05, "loss": 0.6463, "num_input_tokens_seen": 652484448, "step": 3616 }, { "epoch": 0.3959604805823914, "grad_norm": 1.2446715271672166, "learning_rate": 3.3024111442636716e-05, "loss": 0.63, "num_input_tokens_seen": 652647968, "step": 3617 }, { "epoch": 0.39606995265332934, "grad_norm": 1.2280848586338806, "learning_rate": 3.3015967288296256e-05, "loss": 0.7043, "num_input_tokens_seen": 652804992, "step": 3618 }, { "epoch": 0.3961794247242672, "grad_norm": 1.1692391825288329, "learning_rate": 3.300782218568218e-05, "loss": 0.6614, "num_input_tokens_seen": 653006368, "step": 3619 }, { "epoch": 0.3962888967952051, "grad_norm": 1.0807395413678265, "learning_rate": 3.299967613575806e-05, "loss": 0.5085, "num_input_tokens_seen": 653202144, "step": 3620 }, { "epoch": 0.39639836886614305, "grad_norm": 1.3740556681959624, "learning_rate": 3.299152913948754e-05, "loss": 0.7079, "num_input_tokens_seen": 653367904, "step": 3621 }, { "epoch": 0.3965078409370809, "grad_norm": 1.2717832340062187, "learning_rate": 3.298338119783439e-05, "loss": 0.6152, "num_input_tokens_seen": 653579584, "step": 3622 }, { "epoch": 0.39661731300801883, "grad_norm": 1.2752676608057942, "learning_rate": 3.297523231176253e-05, "loss": 0.6164, "num_input_tokens_seen": 653740192, "step": 3623 }, { "epoch": 0.39672678507895676, "grad_norm": 1.263821623948704, "learning_rate": 3.296708248223592e-05, "loss": 0.7469, "num_input_tokens_seen": 653919168, "step": 3624 }, { "epoch": 0.3968362571498946, "grad_norm": 1.2043484498894914, "learning_rate": 3.295893171021868e-05, "loss": 0.7527, "num_input_tokens_seen": 654093888, "step": 3625 }, { "epoch": 0.39694572922083254, "grad_norm": 1.2429253168387997, "learning_rate": 3.295077999667504e-05, "loss": 0.7998, "num_input_tokens_seen": 654303776, "step": 3626 }, { "epoch": 0.3970552012917704, "grad_norm": 1.2930717312003301, "learning_rate": 3.294262734256933e-05, "loss": 0.5605, "num_input_tokens_seen": 654472000, "step": 3627 }, { "epoch": 0.39716467336270833, "grad_norm": 1.3317386607895256, "learning_rate": 3.2934473748865976e-05, "loss": 0.5277, "num_input_tokens_seen": 654634400, "step": 3628 }, { "epoch": 0.39727414543364625, "grad_norm": 1.2690993293203154, "learning_rate": 3.292631921652955e-05, "loss": 0.7428, "num_input_tokens_seen": 654824576, "step": 3629 }, { "epoch": 0.3973836175045841, "grad_norm": 1.3802729417566013, "learning_rate": 3.2918163746524714e-05, "loss": 0.6288, "num_input_tokens_seen": 654989888, "step": 3630 }, { "epoch": 0.39749308957552204, "grad_norm": 1.164028397643034, "learning_rate": 3.291000733981624e-05, "loss": 0.7729, "num_input_tokens_seen": 655178272, "step": 3631 }, { "epoch": 0.39760256164645996, "grad_norm": 1.402317567543707, "learning_rate": 3.290184999736903e-05, "loss": 0.6577, "num_input_tokens_seen": 655374944, "step": 3632 }, { "epoch": 0.39771203371739783, "grad_norm": 1.3285255725626135, "learning_rate": 3.2893691720148064e-05, "loss": 0.8782, "num_input_tokens_seen": 655561312, "step": 3633 }, { "epoch": 0.39782150578833575, "grad_norm": 1.2818462101136592, "learning_rate": 3.2885532509118446e-05, "loss": 0.7927, "num_input_tokens_seen": 655758656, "step": 3634 }, { "epoch": 0.39793097785927367, "grad_norm": 1.2217396478247546, "learning_rate": 3.2877372365245426e-05, "loss": 0.6926, "num_input_tokens_seen": 655941888, "step": 3635 }, { "epoch": 0.39804044993021154, "grad_norm": 1.2455841265995515, "learning_rate": 3.28692112894943e-05, "loss": 0.7179, "num_input_tokens_seen": 656100032, "step": 3636 }, { "epoch": 0.39814992200114946, "grad_norm": 1.09391826970123, "learning_rate": 3.286104928283054e-05, "loss": 0.5905, "num_input_tokens_seen": 656296704, "step": 3637 }, { "epoch": 0.3982593940720874, "grad_norm": 1.2616370207118368, "learning_rate": 3.285288634621966e-05, "loss": 0.6305, "num_input_tokens_seen": 656470528, "step": 3638 }, { "epoch": 0.39836886614302525, "grad_norm": 1.2925541146643078, "learning_rate": 3.2844722480627346e-05, "loss": 0.8518, "num_input_tokens_seen": 656636960, "step": 3639 }, { "epoch": 0.39847833821396317, "grad_norm": 1.2377343158255998, "learning_rate": 3.2836557687019356e-05, "loss": 0.7705, "num_input_tokens_seen": 656814368, "step": 3640 }, { "epoch": 0.3985878102849011, "grad_norm": 1.308449261787635, "learning_rate": 3.2828391966361574e-05, "loss": 0.7442, "num_input_tokens_seen": 657002080, "step": 3641 }, { "epoch": 0.39869728235583896, "grad_norm": 1.3069297117203984, "learning_rate": 3.2820225319619985e-05, "loss": 0.7696, "num_input_tokens_seen": 657186656, "step": 3642 }, { "epoch": 0.3988067544267769, "grad_norm": 1.397152699425544, "learning_rate": 3.281205774776069e-05, "loss": 0.6857, "num_input_tokens_seen": 657345696, "step": 3643 }, { "epoch": 0.39891622649771474, "grad_norm": 1.2724797678540034, "learning_rate": 3.280388925174991e-05, "loss": 0.6552, "num_input_tokens_seen": 657547296, "step": 3644 }, { "epoch": 0.39902569856865266, "grad_norm": 1.173183067156921, "learning_rate": 3.279571983255394e-05, "loss": 0.4807, "num_input_tokens_seen": 657695584, "step": 3645 }, { "epoch": 0.3991351706395906, "grad_norm": 1.242954904801652, "learning_rate": 3.278754949113921e-05, "loss": 0.5602, "num_input_tokens_seen": 657857088, "step": 3646 }, { "epoch": 0.39924464271052845, "grad_norm": 1.131322999569468, "learning_rate": 3.277937822847228e-05, "loss": 0.6172, "num_input_tokens_seen": 658014784, "step": 3647 }, { "epoch": 0.3993541147814664, "grad_norm": 1.2692983090830543, "learning_rate": 3.277120604551976e-05, "loss": 0.5639, "num_input_tokens_seen": 658137984, "step": 3648 }, { "epoch": 0.3994635868524043, "grad_norm": 1.269377901645877, "learning_rate": 3.276303294324843e-05, "loss": 0.8239, "num_input_tokens_seen": 658332864, "step": 3649 }, { "epoch": 0.39957305892334216, "grad_norm": 1.3896857958320614, "learning_rate": 3.275485892262514e-05, "loss": 0.7238, "num_input_tokens_seen": 658513408, "step": 3650 }, { "epoch": 0.3996825309942801, "grad_norm": 1.3958951974321174, "learning_rate": 3.274668398461686e-05, "loss": 0.7913, "num_input_tokens_seen": 658691712, "step": 3651 }, { "epoch": 0.399792003065218, "grad_norm": 1.4004636184205108, "learning_rate": 3.273850813019068e-05, "loss": 0.8298, "num_input_tokens_seen": 658890400, "step": 3652 }, { "epoch": 0.39990147513615587, "grad_norm": 1.3085576294426045, "learning_rate": 3.273033136031378e-05, "loss": 0.6915, "num_input_tokens_seen": 659069376, "step": 3653 }, { "epoch": 0.4000109472070938, "grad_norm": 1.3150618845371322, "learning_rate": 3.272215367595346e-05, "loss": 0.852, "num_input_tokens_seen": 659232000, "step": 3654 }, { "epoch": 0.4001204192780317, "grad_norm": 1.4104768172059068, "learning_rate": 3.271397507807712e-05, "loss": 0.7987, "num_input_tokens_seen": 659419264, "step": 3655 }, { "epoch": 0.4002298913489696, "grad_norm": 1.3544353316406197, "learning_rate": 3.2705795567652276e-05, "loss": 0.7644, "num_input_tokens_seen": 659593536, "step": 3656 }, { "epoch": 0.4003393634199075, "grad_norm": 1.3863142256310037, "learning_rate": 3.269761514564655e-05, "loss": 0.8496, "num_input_tokens_seen": 659793120, "step": 3657 }, { "epoch": 0.4004488354908454, "grad_norm": 1.1732675035570337, "learning_rate": 3.268943381302767e-05, "loss": 0.6602, "num_input_tokens_seen": 659992032, "step": 3658 }, { "epoch": 0.4005583075617833, "grad_norm": 1.2539254540609825, "learning_rate": 3.268125157076346e-05, "loss": 0.8245, "num_input_tokens_seen": 660171680, "step": 3659 }, { "epoch": 0.4006677796327212, "grad_norm": 1.389121281996713, "learning_rate": 3.267306841982188e-05, "loss": 0.8781, "num_input_tokens_seen": 660325792, "step": 3660 }, { "epoch": 0.4007772517036591, "grad_norm": 1.191549073710659, "learning_rate": 3.266488436117097e-05, "loss": 0.7475, "num_input_tokens_seen": 660498944, "step": 3661 }, { "epoch": 0.400886723774597, "grad_norm": 1.2752438237291768, "learning_rate": 3.265669939577889e-05, "loss": 0.7077, "num_input_tokens_seen": 660675680, "step": 3662 }, { "epoch": 0.4009961958455349, "grad_norm": 1.0770621105196265, "learning_rate": 3.264851352461391e-05, "loss": 0.5512, "num_input_tokens_seen": 660871904, "step": 3663 }, { "epoch": 0.4011056679164728, "grad_norm": 1.2632799911728663, "learning_rate": 3.26403267486444e-05, "loss": 0.8656, "num_input_tokens_seen": 661040352, "step": 3664 }, { "epoch": 0.4012151399874107, "grad_norm": 1.0695110020974459, "learning_rate": 3.263213906883885e-05, "loss": 0.7723, "num_input_tokens_seen": 661238816, "step": 3665 }, { "epoch": 0.40132461205834863, "grad_norm": 1.2395630468850964, "learning_rate": 3.262395048616584e-05, "loss": 0.6963, "num_input_tokens_seen": 661412864, "step": 3666 }, { "epoch": 0.4014340841292865, "grad_norm": 1.2889644238702296, "learning_rate": 3.2615761001594055e-05, "loss": 0.7936, "num_input_tokens_seen": 661580864, "step": 3667 }, { "epoch": 0.4015435562002244, "grad_norm": 1.3078531475427257, "learning_rate": 3.26075706160923e-05, "loss": 0.8228, "num_input_tokens_seen": 661773952, "step": 3668 }, { "epoch": 0.40165302827116234, "grad_norm": 1.2653919994490215, "learning_rate": 3.259937933062949e-05, "loss": 0.7054, "num_input_tokens_seen": 661946656, "step": 3669 }, { "epoch": 0.4017625003421002, "grad_norm": 1.1054932284930643, "learning_rate": 3.2591187146174636e-05, "loss": 0.5957, "num_input_tokens_seen": 662106816, "step": 3670 }, { "epoch": 0.4018719724130381, "grad_norm": 1.3708221079512648, "learning_rate": 3.258299406369685e-05, "loss": 0.95, "num_input_tokens_seen": 662288032, "step": 3671 }, { "epoch": 0.40198144448397605, "grad_norm": 1.1613410012320147, "learning_rate": 3.257480008416536e-05, "loss": 0.7413, "num_input_tokens_seen": 662493888, "step": 3672 }, { "epoch": 0.4020909165549139, "grad_norm": 1.2296061565246283, "learning_rate": 3.25666052085495e-05, "loss": 0.7711, "num_input_tokens_seen": 662673088, "step": 3673 }, { "epoch": 0.40220038862585183, "grad_norm": 1.2701056956065053, "learning_rate": 3.2558409437818714e-05, "loss": 0.8008, "num_input_tokens_seen": 662841088, "step": 3674 }, { "epoch": 0.40230986069678976, "grad_norm": 1.212205481399557, "learning_rate": 3.255021277294253e-05, "loss": 0.6629, "num_input_tokens_seen": 663009088, "step": 3675 }, { "epoch": 0.4024193327677276, "grad_norm": 1.2206261694492846, "learning_rate": 3.254201521489062e-05, "loss": 0.801, "num_input_tokens_seen": 663183808, "step": 3676 }, { "epoch": 0.40252880483866554, "grad_norm": 1.4016140285107783, "learning_rate": 3.253381676463273e-05, "loss": 0.8856, "num_input_tokens_seen": 663379584, "step": 3677 }, { "epoch": 0.4026382769096034, "grad_norm": 1.2131598967653145, "learning_rate": 3.252561742313871e-05, "loss": 0.7237, "num_input_tokens_seen": 663546688, "step": 3678 }, { "epoch": 0.40274774898054133, "grad_norm": 1.6379427100282962, "learning_rate": 3.2517417191378544e-05, "loss": 0.7983, "num_input_tokens_seen": 663757248, "step": 3679 }, { "epoch": 0.40285722105147925, "grad_norm": 1.2255325366540977, "learning_rate": 3.250921607032229e-05, "loss": 0.7509, "num_input_tokens_seen": 663943168, "step": 3680 }, { "epoch": 0.4029666931224171, "grad_norm": 1.3817897398003052, "learning_rate": 3.2501014060940135e-05, "loss": 0.845, "num_input_tokens_seen": 664134912, "step": 3681 }, { "epoch": 0.40307616519335504, "grad_norm": 1.2974342151642344, "learning_rate": 3.249281116420234e-05, "loss": 0.661, "num_input_tokens_seen": 664336288, "step": 3682 }, { "epoch": 0.40318563726429296, "grad_norm": 1.3138004853686573, "learning_rate": 3.248460738107932e-05, "loss": 0.7089, "num_input_tokens_seen": 664490176, "step": 3683 }, { "epoch": 0.40329510933523083, "grad_norm": 1.242513218047603, "learning_rate": 3.2476402712541556e-05, "loss": 0.6887, "num_input_tokens_seen": 664677440, "step": 3684 }, { "epoch": 0.40340458140616875, "grad_norm": 1.2717386627231846, "learning_rate": 3.246819715955964e-05, "loss": 0.7156, "num_input_tokens_seen": 664863360, "step": 3685 }, { "epoch": 0.40351405347710667, "grad_norm": 1.286092863280707, "learning_rate": 3.2459990723104285e-05, "loss": 0.691, "num_input_tokens_seen": 665021280, "step": 3686 }, { "epoch": 0.40362352554804454, "grad_norm": 1.1662299440055666, "learning_rate": 3.245178340414628e-05, "loss": 0.6117, "num_input_tokens_seen": 665220864, "step": 3687 }, { "epoch": 0.40373299761898246, "grad_norm": 1.1990452743458149, "learning_rate": 3.244357520365654e-05, "loss": 0.6945, "num_input_tokens_seen": 665399840, "step": 3688 }, { "epoch": 0.4038424696899204, "grad_norm": 1.1704539520383377, "learning_rate": 3.243536612260609e-05, "loss": 0.6683, "num_input_tokens_seen": 665576128, "step": 3689 }, { "epoch": 0.40395194176085825, "grad_norm": 1.287552891728141, "learning_rate": 3.242715616196604e-05, "loss": 0.507, "num_input_tokens_seen": 665744576, "step": 3690 }, { "epoch": 0.40406141383179617, "grad_norm": 1.2030954728900063, "learning_rate": 3.241894532270762e-05, "loss": 0.6367, "num_input_tokens_seen": 665898240, "step": 3691 }, { "epoch": 0.4041708859027341, "grad_norm": 1.1396115647654064, "learning_rate": 3.2410733605802146e-05, "loss": 0.5868, "num_input_tokens_seen": 666071392, "step": 3692 }, { "epoch": 0.40428035797367196, "grad_norm": 1.546714928561888, "learning_rate": 3.240252101222105e-05, "loss": 0.9129, "num_input_tokens_seen": 666230208, "step": 3693 }, { "epoch": 0.4043898300446099, "grad_norm": 1.272651570254738, "learning_rate": 3.2394307542935876e-05, "loss": 0.8384, "num_input_tokens_seen": 666440768, "step": 3694 }, { "epoch": 0.4044993021155478, "grad_norm": 1.2800190758028767, "learning_rate": 3.2386093198918246e-05, "loss": 0.6633, "num_input_tokens_seen": 666601376, "step": 3695 }, { "epoch": 0.40460877418648566, "grad_norm": 1.267239612114587, "learning_rate": 3.237787798113992e-05, "loss": 0.7502, "num_input_tokens_seen": 666761760, "step": 3696 }, { "epoch": 0.4047182462574236, "grad_norm": 1.277364421525562, "learning_rate": 3.236966189057273e-05, "loss": 0.7542, "num_input_tokens_seen": 666946784, "step": 3697 }, { "epoch": 0.40482771832836145, "grad_norm": 1.3170148761259581, "learning_rate": 3.236144492818862e-05, "loss": 0.6308, "num_input_tokens_seen": 667107168, "step": 3698 }, { "epoch": 0.4049371903992994, "grad_norm": 1.2279859410733063, "learning_rate": 3.235322709495966e-05, "loss": 0.8963, "num_input_tokens_seen": 667283008, "step": 3699 }, { "epoch": 0.4050466624702373, "grad_norm": 1.3487147500396395, "learning_rate": 3.234500839185799e-05, "loss": 0.7218, "num_input_tokens_seen": 667429952, "step": 3700 }, { "epoch": 0.40515613454117516, "grad_norm": 1.3047888710706277, "learning_rate": 3.233678881985586e-05, "loss": 0.7665, "num_input_tokens_seen": 667631776, "step": 3701 }, { "epoch": 0.4052656066121131, "grad_norm": 1.228903171077104, "learning_rate": 3.232856837992564e-05, "loss": 0.9228, "num_input_tokens_seen": 667821728, "step": 3702 }, { "epoch": 0.405375078683051, "grad_norm": 1.3546740046162247, "learning_rate": 3.232034707303979e-05, "loss": 0.7327, "num_input_tokens_seen": 667997120, "step": 3703 }, { "epoch": 0.40548455075398887, "grad_norm": 1.2747899462135568, "learning_rate": 3.231212490017088e-05, "loss": 0.6943, "num_input_tokens_seen": 668197152, "step": 3704 }, { "epoch": 0.4055940228249268, "grad_norm": 1.258332897127257, "learning_rate": 3.230390186229157e-05, "loss": 0.6198, "num_input_tokens_seen": 668370976, "step": 3705 }, { "epoch": 0.4057034948958647, "grad_norm": 1.2346644484700517, "learning_rate": 3.229567796037463e-05, "loss": 0.7217, "num_input_tokens_seen": 668548832, "step": 3706 }, { "epoch": 0.4058129669668026, "grad_norm": 1.320327126687195, "learning_rate": 3.228745319539294e-05, "loss": 0.5537, "num_input_tokens_seen": 668709888, "step": 3707 }, { "epoch": 0.4059224390377405, "grad_norm": 1.3191903215377156, "learning_rate": 3.227922756831947e-05, "loss": 0.6726, "num_input_tokens_seen": 668870496, "step": 3708 }, { "epoch": 0.4060319111086784, "grad_norm": 1.3539414829924061, "learning_rate": 3.227100108012728e-05, "loss": 0.7782, "num_input_tokens_seen": 669060224, "step": 3709 }, { "epoch": 0.4061413831796163, "grad_norm": 1.1912747670147819, "learning_rate": 3.226277373178957e-05, "loss": 0.5244, "num_input_tokens_seen": 669217024, "step": 3710 }, { "epoch": 0.4062508552505542, "grad_norm": 1.394285249783988, "learning_rate": 3.2254545524279626e-05, "loss": 0.6058, "num_input_tokens_seen": 669393984, "step": 3711 }, { "epoch": 0.40636032732149213, "grad_norm": 1.1371531492172813, "learning_rate": 3.22463164585708e-05, "loss": 0.6531, "num_input_tokens_seen": 669539360, "step": 3712 }, { "epoch": 0.40646979939243, "grad_norm": 1.2974318722017992, "learning_rate": 3.223808653563659e-05, "loss": 0.6643, "num_input_tokens_seen": 669680928, "step": 3713 }, { "epoch": 0.4065792714633679, "grad_norm": 1.3872738868745347, "learning_rate": 3.222985575645058e-05, "loss": 0.7938, "num_input_tokens_seen": 669882976, "step": 3714 }, { "epoch": 0.4066887435343058, "grad_norm": 1.1899319892169435, "learning_rate": 3.222162412198646e-05, "loss": 0.7022, "num_input_tokens_seen": 670060608, "step": 3715 }, { "epoch": 0.4067982156052437, "grad_norm": 1.2406411292423316, "learning_rate": 3.221339163321801e-05, "loss": 0.6649, "num_input_tokens_seen": 670197920, "step": 3716 }, { "epoch": 0.40690768767618163, "grad_norm": 1.1157628292138513, "learning_rate": 3.220515829111911e-05, "loss": 0.6229, "num_input_tokens_seen": 670403104, "step": 3717 }, { "epoch": 0.4070171597471195, "grad_norm": 1.1632652368717944, "learning_rate": 3.219692409666377e-05, "loss": 0.8802, "num_input_tokens_seen": 670611200, "step": 3718 }, { "epoch": 0.4071266318180574, "grad_norm": 1.175329787054426, "learning_rate": 3.218868905082606e-05, "loss": 0.6061, "num_input_tokens_seen": 670811680, "step": 3719 }, { "epoch": 0.40723610388899534, "grad_norm": 1.2680709656643339, "learning_rate": 3.218045315458018e-05, "loss": 0.6817, "num_input_tokens_seen": 670949888, "step": 3720 }, { "epoch": 0.4073455759599332, "grad_norm": 1.2905873383206412, "learning_rate": 3.2172216408900426e-05, "loss": 0.6153, "num_input_tokens_seen": 671121024, "step": 3721 }, { "epoch": 0.4074550480308711, "grad_norm": 1.2963838681406983, "learning_rate": 3.2163978814761174e-05, "loss": 0.7762, "num_input_tokens_seen": 671307840, "step": 3722 }, { "epoch": 0.40756452010180905, "grad_norm": 1.2570227756060484, "learning_rate": 3.215574037313692e-05, "loss": 0.7713, "num_input_tokens_seen": 671491968, "step": 3723 }, { "epoch": 0.4076739921727469, "grad_norm": 1.2582187825708802, "learning_rate": 3.214750108500227e-05, "loss": 0.6724, "num_input_tokens_seen": 671687072, "step": 3724 }, { "epoch": 0.40778346424368483, "grad_norm": 1.2832533827974348, "learning_rate": 3.21392609513319e-05, "loss": 0.7907, "num_input_tokens_seen": 671864032, "step": 3725 }, { "epoch": 0.40789293631462276, "grad_norm": 1.2755488579401064, "learning_rate": 3.21310199731006e-05, "loss": 0.878, "num_input_tokens_seen": 672034720, "step": 3726 }, { "epoch": 0.4080024083855606, "grad_norm": 1.1215942541576578, "learning_rate": 3.212277815128328e-05, "loss": 0.5762, "num_input_tokens_seen": 672210336, "step": 3727 }, { "epoch": 0.40811188045649854, "grad_norm": 1.3624782438028582, "learning_rate": 3.2114535486854915e-05, "loss": 0.6817, "num_input_tokens_seen": 672370944, "step": 3728 }, { "epoch": 0.40822135252743647, "grad_norm": 1.5544024642278875, "learning_rate": 3.210629198079061e-05, "loss": 0.7638, "num_input_tokens_seen": 672532672, "step": 3729 }, { "epoch": 0.40833082459837433, "grad_norm": 1.3236214970362117, "learning_rate": 3.209804763406554e-05, "loss": 0.6333, "num_input_tokens_seen": 672665056, "step": 3730 }, { "epoch": 0.40844029666931225, "grad_norm": 1.2358532034077632, "learning_rate": 3.2089802447655006e-05, "loss": 0.7198, "num_input_tokens_seen": 672848512, "step": 3731 }, { "epoch": 0.4085497687402501, "grad_norm": 1.2264317728342489, "learning_rate": 3.20815564225344e-05, "loss": 0.7495, "num_input_tokens_seen": 673031072, "step": 3732 }, { "epoch": 0.40865924081118804, "grad_norm": 1.1294756446667837, "learning_rate": 3.20733095596792e-05, "loss": 0.6638, "num_input_tokens_seen": 673205568, "step": 3733 }, { "epoch": 0.40876871288212596, "grad_norm": 1.248930759883833, "learning_rate": 3.2065061860065016e-05, "loss": 0.809, "num_input_tokens_seen": 673403584, "step": 3734 }, { "epoch": 0.40887818495306383, "grad_norm": 1.341604027361958, "learning_rate": 3.20568133246675e-05, "loss": 0.7813, "num_input_tokens_seen": 673571808, "step": 3735 }, { "epoch": 0.40898765702400175, "grad_norm": 1.233231026470076, "learning_rate": 3.204856395446247e-05, "loss": 0.6224, "num_input_tokens_seen": 673741152, "step": 3736 }, { "epoch": 0.40909712909493967, "grad_norm": 1.303223662449265, "learning_rate": 3.204031375042579e-05, "loss": 0.6876, "num_input_tokens_seen": 673935584, "step": 3737 }, { "epoch": 0.40920660116587754, "grad_norm": 1.2418519532551822, "learning_rate": 3.2032062713533464e-05, "loss": 0.6662, "num_input_tokens_seen": 674134944, "step": 3738 }, { "epoch": 0.40931607323681546, "grad_norm": 1.173462669620729, "learning_rate": 3.2023810844761554e-05, "loss": 0.7518, "num_input_tokens_seen": 674304736, "step": 3739 }, { "epoch": 0.4094255453077534, "grad_norm": 1.1609401430739124, "learning_rate": 3.201555814508626e-05, "loss": 0.7462, "num_input_tokens_seen": 674494016, "step": 3740 }, { "epoch": 0.40953501737869125, "grad_norm": 1.21380624902034, "learning_rate": 3.200730461548384e-05, "loss": 0.7647, "num_input_tokens_seen": 674651488, "step": 3741 }, { "epoch": 0.40964448944962917, "grad_norm": 1.3235299448560751, "learning_rate": 3.199905025693067e-05, "loss": 0.6252, "num_input_tokens_seen": 674807616, "step": 3742 }, { "epoch": 0.4097539615205671, "grad_norm": 1.2536075509595557, "learning_rate": 3.199079507040324e-05, "loss": 0.8545, "num_input_tokens_seen": 674978080, "step": 3743 }, { "epoch": 0.40986343359150496, "grad_norm": 1.2971235749427192, "learning_rate": 3.198253905687813e-05, "loss": 0.6787, "num_input_tokens_seen": 675174528, "step": 3744 }, { "epoch": 0.4099729056624429, "grad_norm": 1.2107974555958887, "learning_rate": 3.1974282217331985e-05, "loss": 0.8089, "num_input_tokens_seen": 675353280, "step": 3745 }, { "epoch": 0.4100823777333808, "grad_norm": 1.3148607639383787, "learning_rate": 3.1966024552741586e-05, "loss": 0.8599, "num_input_tokens_seen": 675527552, "step": 3746 }, { "epoch": 0.41019184980431866, "grad_norm": 1.1335742888301648, "learning_rate": 3.1957766064083804e-05, "loss": 0.5155, "num_input_tokens_seen": 675685248, "step": 3747 }, { "epoch": 0.4103013218752566, "grad_norm": 1.3238335783512087, "learning_rate": 3.19495067523356e-05, "loss": 0.7574, "num_input_tokens_seen": 675873856, "step": 3748 }, { "epoch": 0.41041079394619445, "grad_norm": 1.3268699101095733, "learning_rate": 3.194124661847403e-05, "loss": 0.7892, "num_input_tokens_seen": 676078144, "step": 3749 }, { "epoch": 0.4105202660171324, "grad_norm": 1.2688358691242079, "learning_rate": 3.193298566347625e-05, "loss": 0.6695, "num_input_tokens_seen": 676262272, "step": 3750 }, { "epoch": 0.4106297380880703, "grad_norm": 1.403869903552485, "learning_rate": 3.192472388831953e-05, "loss": 0.7248, "num_input_tokens_seen": 676450880, "step": 3751 }, { "epoch": 0.41073921015900816, "grad_norm": 1.3596078519785786, "learning_rate": 3.19164612939812e-05, "loss": 0.6391, "num_input_tokens_seen": 676593568, "step": 3752 }, { "epoch": 0.4108486822299461, "grad_norm": 1.4216867678623575, "learning_rate": 3.1908197881438727e-05, "loss": 0.7536, "num_input_tokens_seen": 676772096, "step": 3753 }, { "epoch": 0.410958154300884, "grad_norm": 1.1508172738522093, "learning_rate": 3.1899933651669656e-05, "loss": 0.5998, "num_input_tokens_seen": 676950848, "step": 3754 }, { "epoch": 0.41106762637182187, "grad_norm": 1.3081985560121252, "learning_rate": 3.1891668605651614e-05, "loss": 0.6238, "num_input_tokens_seen": 677095104, "step": 3755 }, { "epoch": 0.4111770984427598, "grad_norm": 1.2416495351868317, "learning_rate": 3.1883402744362355e-05, "loss": 0.6728, "num_input_tokens_seen": 677263328, "step": 3756 }, { "epoch": 0.4112865705136977, "grad_norm": 1.2541749375198394, "learning_rate": 3.1875136068779706e-05, "loss": 0.5951, "num_input_tokens_seen": 677422368, "step": 3757 }, { "epoch": 0.4113960425846356, "grad_norm": 1.2753282330193825, "learning_rate": 3.186686857988161e-05, "loss": 0.8697, "num_input_tokens_seen": 677618816, "step": 3758 }, { "epoch": 0.4115055146555735, "grad_norm": 1.2943971240972438, "learning_rate": 3.1858600278646084e-05, "loss": 0.698, "num_input_tokens_seen": 677778304, "step": 3759 }, { "epoch": 0.4116149867265114, "grad_norm": 1.373061287208767, "learning_rate": 3.185033116605126e-05, "loss": 0.7378, "num_input_tokens_seen": 677944736, "step": 3760 }, { "epoch": 0.4117244587974493, "grad_norm": 1.2180029707059665, "learning_rate": 3.1842061243075353e-05, "loss": 0.5998, "num_input_tokens_seen": 678141408, "step": 3761 }, { "epoch": 0.4118339308683872, "grad_norm": 1.3001079019369015, "learning_rate": 3.183379051069668e-05, "loss": 0.7681, "num_input_tokens_seen": 678320160, "step": 3762 }, { "epoch": 0.41194340293932513, "grad_norm": 1.3777353648524762, "learning_rate": 3.182551896989365e-05, "loss": 0.8954, "num_input_tokens_seen": 678518848, "step": 3763 }, { "epoch": 0.412052875010263, "grad_norm": 1.2650076691014316, "learning_rate": 3.181724662164478e-05, "loss": 0.845, "num_input_tokens_seen": 678717312, "step": 3764 }, { "epoch": 0.4121623470812009, "grad_norm": 1.1963249356866241, "learning_rate": 3.180897346692867e-05, "loss": 0.6463, "num_input_tokens_seen": 678915104, "step": 3765 }, { "epoch": 0.4122718191521388, "grad_norm": 1.2951604240792942, "learning_rate": 3.180069950672401e-05, "loss": 0.6726, "num_input_tokens_seen": 679072352, "step": 3766 }, { "epoch": 0.4123812912230767, "grad_norm": 1.4345772095807228, "learning_rate": 3.17924247420096e-05, "loss": 0.8163, "num_input_tokens_seen": 679246400, "step": 3767 }, { "epoch": 0.41249076329401463, "grad_norm": 1.4259837385644152, "learning_rate": 3.178414917376433e-05, "loss": 0.8238, "num_input_tokens_seen": 679406112, "step": 3768 }, { "epoch": 0.4126002353649525, "grad_norm": 1.178477089479149, "learning_rate": 3.1775872802967175e-05, "loss": 0.6491, "num_input_tokens_seen": 679558656, "step": 3769 }, { "epoch": 0.4127097074358904, "grad_norm": 1.2404213111148943, "learning_rate": 3.176759563059722e-05, "loss": 0.6192, "num_input_tokens_seen": 679711424, "step": 3770 }, { "epoch": 0.41281917950682834, "grad_norm": 1.308207714019226, "learning_rate": 3.175931765763365e-05, "loss": 0.8721, "num_input_tokens_seen": 679914368, "step": 3771 }, { "epoch": 0.4129286515777662, "grad_norm": 1.368821132675364, "learning_rate": 3.175103888505572e-05, "loss": 0.7846, "num_input_tokens_seen": 680093344, "step": 3772 }, { "epoch": 0.4130381236487041, "grad_norm": 1.2374501613252615, "learning_rate": 3.174275931384279e-05, "loss": 0.7348, "num_input_tokens_seen": 680287776, "step": 3773 }, { "epoch": 0.41314759571964205, "grad_norm": 1.4302662162801627, "learning_rate": 3.173447894497433e-05, "loss": 0.8077, "num_input_tokens_seen": 680461600, "step": 3774 }, { "epoch": 0.4132570677905799, "grad_norm": 1.2892069880108843, "learning_rate": 3.172619777942988e-05, "loss": 0.5736, "num_input_tokens_seen": 680650208, "step": 3775 }, { "epoch": 0.41336653986151783, "grad_norm": 1.297205365856764, "learning_rate": 3.1717915818189095e-05, "loss": 0.8898, "num_input_tokens_seen": 680848000, "step": 3776 }, { "epoch": 0.41347601193245576, "grad_norm": 1.4356139705833806, "learning_rate": 3.17096330622317e-05, "loss": 0.6552, "num_input_tokens_seen": 681023168, "step": 3777 }, { "epoch": 0.4135854840033936, "grad_norm": 1.1979643383768332, "learning_rate": 3.170134951253755e-05, "loss": 0.5491, "num_input_tokens_seen": 681224096, "step": 3778 }, { "epoch": 0.41369495607433154, "grad_norm": 1.3344274095163873, "learning_rate": 3.169306517008656e-05, "loss": 0.7226, "num_input_tokens_seen": 681381120, "step": 3779 }, { "epoch": 0.41380442814526947, "grad_norm": 1.2532811968862487, "learning_rate": 3.168478003585876e-05, "loss": 0.7341, "num_input_tokens_seen": 681590336, "step": 3780 }, { "epoch": 0.41391390021620733, "grad_norm": 1.3379898368637635, "learning_rate": 3.167649411083425e-05, "loss": 0.6995, "num_input_tokens_seen": 681790144, "step": 3781 }, { "epoch": 0.41402337228714525, "grad_norm": 1.1666173356843759, "learning_rate": 3.1668207395993265e-05, "loss": 0.7427, "num_input_tokens_seen": 681982112, "step": 3782 }, { "epoch": 0.4141328443580831, "grad_norm": 1.3434834770433088, "learning_rate": 3.1659919892316084e-05, "loss": 0.8465, "num_input_tokens_seen": 682182816, "step": 3783 }, { "epoch": 0.41424231642902104, "grad_norm": 1.278565828367422, "learning_rate": 3.1651631600783114e-05, "loss": 0.7725, "num_input_tokens_seen": 682361792, "step": 3784 }, { "epoch": 0.41435178849995896, "grad_norm": 1.2817793427202584, "learning_rate": 3.164334252237484e-05, "loss": 0.8713, "num_input_tokens_seen": 682547712, "step": 3785 }, { "epoch": 0.41446126057089683, "grad_norm": 1.2672581908126306, "learning_rate": 3.163505265807185e-05, "loss": 0.6638, "num_input_tokens_seen": 682734976, "step": 3786 }, { "epoch": 0.41457073264183475, "grad_norm": 1.232367367755732, "learning_rate": 3.162676200885481e-05, "loss": 0.584, "num_input_tokens_seen": 682907680, "step": 3787 }, { "epoch": 0.41468020471277267, "grad_norm": 1.222483321078053, "learning_rate": 3.161847057570449e-05, "loss": 0.7333, "num_input_tokens_seen": 683095392, "step": 3788 }, { "epoch": 0.41478967678371054, "grad_norm": 1.2031435779276642, "learning_rate": 3.161017835960176e-05, "loss": 0.6066, "num_input_tokens_seen": 683264736, "step": 3789 }, { "epoch": 0.41489914885464846, "grad_norm": 1.374412497806458, "learning_rate": 3.160188536152756e-05, "loss": 0.6242, "num_input_tokens_seen": 683442592, "step": 3790 }, { "epoch": 0.4150086209255864, "grad_norm": 1.2672925724442494, "learning_rate": 3.159359158246294e-05, "loss": 0.7106, "num_input_tokens_seen": 683619776, "step": 3791 }, { "epoch": 0.41511809299652425, "grad_norm": 1.2280992248004658, "learning_rate": 3.158529702338905e-05, "loss": 0.7449, "num_input_tokens_seen": 683815328, "step": 3792 }, { "epoch": 0.41522756506746217, "grad_norm": 1.353922112708214, "learning_rate": 3.157700168528711e-05, "loss": 0.6822, "num_input_tokens_seen": 683987136, "step": 3793 }, { "epoch": 0.4153370371384001, "grad_norm": 1.3544057146012667, "learning_rate": 3.156870556913844e-05, "loss": 0.7657, "num_input_tokens_seen": 684166560, "step": 3794 }, { "epoch": 0.41544650920933796, "grad_norm": 1.3371173935297884, "learning_rate": 3.156040867592446e-05, "loss": 0.7369, "num_input_tokens_seen": 684366368, "step": 3795 }, { "epoch": 0.4155559812802759, "grad_norm": 1.3591258818266474, "learning_rate": 3.155211100662668e-05, "loss": 0.7325, "num_input_tokens_seen": 684557888, "step": 3796 }, { "epoch": 0.4156654533512138, "grad_norm": 1.2405412909951854, "learning_rate": 3.1543812562226685e-05, "loss": 0.6887, "num_input_tokens_seen": 684743808, "step": 3797 }, { "epoch": 0.41577492542215166, "grad_norm": 1.2211601683454714, "learning_rate": 3.153551334370617e-05, "loss": 0.7659, "num_input_tokens_seen": 684955264, "step": 3798 }, { "epoch": 0.4158843974930896, "grad_norm": 1.2243563498633907, "learning_rate": 3.152721335204693e-05, "loss": 0.8113, "num_input_tokens_seen": 685160448, "step": 3799 }, { "epoch": 0.41599386956402745, "grad_norm": 1.2952961330507178, "learning_rate": 3.151891258823082e-05, "loss": 0.8624, "num_input_tokens_seen": 685354880, "step": 3800 }, { "epoch": 0.4161033416349654, "grad_norm": 1.2315551321626708, "learning_rate": 3.151061105323982e-05, "loss": 0.6928, "num_input_tokens_seen": 685527808, "step": 3801 }, { "epoch": 0.4162128137059033, "grad_norm": 1.1343309420182002, "learning_rate": 3.1502308748055975e-05, "loss": 0.4996, "num_input_tokens_seen": 685706112, "step": 3802 }, { "epoch": 0.41632228577684116, "grad_norm": 1.3676365578095673, "learning_rate": 3.1494005673661445e-05, "loss": 0.9452, "num_input_tokens_seen": 685876800, "step": 3803 }, { "epoch": 0.4164317578477791, "grad_norm": 1.268061940828319, "learning_rate": 3.1485701831038436e-05, "loss": 0.885, "num_input_tokens_seen": 686070112, "step": 3804 }, { "epoch": 0.416541229918717, "grad_norm": 1.311615964078412, "learning_rate": 3.147739722116932e-05, "loss": 0.6188, "num_input_tokens_seen": 686239456, "step": 3805 }, { "epoch": 0.41665070198965487, "grad_norm": 1.1701517277336169, "learning_rate": 3.1469091845036486e-05, "loss": 0.6699, "num_input_tokens_seen": 686429632, "step": 3806 }, { "epoch": 0.4167601740605928, "grad_norm": 1.2355745016380864, "learning_rate": 3.146078570362246e-05, "loss": 0.6324, "num_input_tokens_seen": 686583296, "step": 3807 }, { "epoch": 0.4168696461315307, "grad_norm": 1.3562656665371793, "learning_rate": 3.145247879790983e-05, "loss": 0.6235, "num_input_tokens_seen": 686768544, "step": 3808 }, { "epoch": 0.4169791182024686, "grad_norm": 1.3393884891857848, "learning_rate": 3.1444171128881294e-05, "loss": 0.6675, "num_input_tokens_seen": 686935424, "step": 3809 }, { "epoch": 0.4170885902734065, "grad_norm": 1.2700907721332393, "learning_rate": 3.1435862697519636e-05, "loss": 0.7779, "num_input_tokens_seen": 687126272, "step": 3810 }, { "epoch": 0.4171980623443444, "grad_norm": 1.466209730943331, "learning_rate": 3.142755350480772e-05, "loss": 0.949, "num_input_tokens_seen": 687296064, "step": 3811 }, { "epoch": 0.4173075344152823, "grad_norm": 1.1916737740755983, "learning_rate": 3.1419243551728513e-05, "loss": 0.5468, "num_input_tokens_seen": 687445472, "step": 3812 }, { "epoch": 0.4174170064862202, "grad_norm": 1.140336561965795, "learning_rate": 3.141093283926506e-05, "loss": 0.5563, "num_input_tokens_seen": 687612352, "step": 3813 }, { "epoch": 0.41752647855715813, "grad_norm": 1.1990870798001045, "learning_rate": 3.140262136840052e-05, "loss": 0.6637, "num_input_tokens_seen": 687798720, "step": 3814 }, { "epoch": 0.417635950628096, "grad_norm": 1.2610962457009458, "learning_rate": 3.1394309140118104e-05, "loss": 0.6875, "num_input_tokens_seen": 688012416, "step": 3815 }, { "epoch": 0.4177454226990339, "grad_norm": 1.443080271163599, "learning_rate": 3.138599615540114e-05, "loss": 0.783, "num_input_tokens_seen": 688170336, "step": 3816 }, { "epoch": 0.4178548947699718, "grad_norm": 1.3905374306527847, "learning_rate": 3.137768241523305e-05, "loss": 0.7636, "num_input_tokens_seen": 688346624, "step": 3817 }, { "epoch": 0.4179643668409097, "grad_norm": 1.3571593545326874, "learning_rate": 3.1369367920597306e-05, "loss": 0.7487, "num_input_tokens_seen": 688548000, "step": 3818 }, { "epoch": 0.41807383891184763, "grad_norm": 1.1211724336771742, "learning_rate": 3.136105267247752e-05, "loss": 0.6415, "num_input_tokens_seen": 688703232, "step": 3819 }, { "epoch": 0.4181833109827855, "grad_norm": 1.3394508786765302, "learning_rate": 3.1352736671857366e-05, "loss": 0.6731, "num_input_tokens_seen": 688902816, "step": 3820 }, { "epoch": 0.4182927830537234, "grad_norm": 1.2851878802920358, "learning_rate": 3.13444199197206e-05, "loss": 0.7449, "num_input_tokens_seen": 689065440, "step": 3821 }, { "epoch": 0.41840225512466134, "grad_norm": 1.1792893400959237, "learning_rate": 3.13361024170511e-05, "loss": 0.7958, "num_input_tokens_seen": 689287424, "step": 3822 }, { "epoch": 0.4185117271955992, "grad_norm": 1.3128896770570346, "learning_rate": 3.1327784164832786e-05, "loss": 0.7449, "num_input_tokens_seen": 689496864, "step": 3823 }, { "epoch": 0.4186211992665371, "grad_norm": 1.0455960104526916, "learning_rate": 3.13194651640497e-05, "loss": 0.4806, "num_input_tokens_seen": 689688160, "step": 3824 }, { "epoch": 0.41873067133747505, "grad_norm": 1.306634101794372, "learning_rate": 3.1311145415685975e-05, "loss": 0.7647, "num_input_tokens_seen": 689836672, "step": 3825 }, { "epoch": 0.4188401434084129, "grad_norm": 1.2205734794435803, "learning_rate": 3.13028249207258e-05, "loss": 0.6736, "num_input_tokens_seen": 690042080, "step": 3826 }, { "epoch": 0.41894961547935083, "grad_norm": 1.180235830033271, "learning_rate": 3.1294503680153496e-05, "loss": 0.6007, "num_input_tokens_seen": 690237408, "step": 3827 }, { "epoch": 0.41905908755028876, "grad_norm": 1.3371235451377905, "learning_rate": 3.128618169495344e-05, "loss": 0.7314, "num_input_tokens_seen": 690433408, "step": 3828 }, { "epoch": 0.4191685596212266, "grad_norm": 1.5299680364922623, "learning_rate": 3.1277858966110105e-05, "loss": 0.7765, "num_input_tokens_seen": 690605440, "step": 3829 }, { "epoch": 0.41927803169216454, "grad_norm": 1.1924908313358764, "learning_rate": 3.126953549460805e-05, "loss": 0.7089, "num_input_tokens_seen": 690805920, "step": 3830 }, { "epoch": 0.41938750376310246, "grad_norm": 1.2325115382832499, "learning_rate": 3.126121128143194e-05, "loss": 0.7027, "num_input_tokens_seen": 690992960, "step": 3831 }, { "epoch": 0.41949697583404033, "grad_norm": 1.3997969599356561, "learning_rate": 3.1252886327566494e-05, "loss": 0.9494, "num_input_tokens_seen": 691183136, "step": 3832 }, { "epoch": 0.41960644790497825, "grad_norm": 1.2079369070079573, "learning_rate": 3.124456063399656e-05, "loss": 0.5754, "num_input_tokens_seen": 691346656, "step": 3833 }, { "epoch": 0.4197159199759161, "grad_norm": 1.1382923573388612, "learning_rate": 3.123623420170703e-05, "loss": 0.7342, "num_input_tokens_seen": 691525184, "step": 3834 }, { "epoch": 0.41982539204685404, "grad_norm": 1.312186112790375, "learning_rate": 3.122790703168292e-05, "loss": 0.8273, "num_input_tokens_seen": 691719392, "step": 3835 }, { "epoch": 0.41993486411779196, "grad_norm": 1.2802455034853026, "learning_rate": 3.1219579124909324e-05, "loss": 0.6077, "num_input_tokens_seen": 691919424, "step": 3836 }, { "epoch": 0.4200443361887298, "grad_norm": 1.2586547685225138, "learning_rate": 3.121125048237139e-05, "loss": 0.6405, "num_input_tokens_seen": 692089664, "step": 3837 }, { "epoch": 0.42015380825966775, "grad_norm": 1.3402249430694508, "learning_rate": 3.120292110505441e-05, "loss": 0.7296, "num_input_tokens_seen": 692274464, "step": 3838 }, { "epoch": 0.42026328033060567, "grad_norm": 1.3637396361519276, "learning_rate": 3.119459099394372e-05, "loss": 0.8796, "num_input_tokens_seen": 692481216, "step": 3839 }, { "epoch": 0.42037275240154354, "grad_norm": 1.173645360749116, "learning_rate": 3.1186260150024755e-05, "loss": 0.5522, "num_input_tokens_seen": 692651456, "step": 3840 }, { "epoch": 0.42048222447248146, "grad_norm": 1.172178610572475, "learning_rate": 3.117792857428304e-05, "loss": 0.5941, "num_input_tokens_seen": 692840960, "step": 3841 }, { "epoch": 0.4205916965434194, "grad_norm": 1.342365971557708, "learning_rate": 3.116959626770418e-05, "loss": 0.5999, "num_input_tokens_seen": 693013664, "step": 3842 }, { "epoch": 0.42070116861435725, "grad_norm": 1.2732565815663928, "learning_rate": 3.1161263231273884e-05, "loss": 0.7621, "num_input_tokens_seen": 693204960, "step": 3843 }, { "epoch": 0.42081064068529517, "grad_norm": 1.2694574422655487, "learning_rate": 3.115292946597793e-05, "loss": 0.7009, "num_input_tokens_seen": 693371392, "step": 3844 }, { "epoch": 0.4209201127562331, "grad_norm": 1.242370256139509, "learning_rate": 3.1144594972802165e-05, "loss": 0.6409, "num_input_tokens_seen": 693544096, "step": 3845 }, { "epoch": 0.42102958482717096, "grad_norm": 1.308850755647869, "learning_rate": 3.1136259752732576e-05, "loss": 0.9093, "num_input_tokens_seen": 693732704, "step": 3846 }, { "epoch": 0.4211390568981089, "grad_norm": 1.4479777737170936, "learning_rate": 3.112792380675519e-05, "loss": 0.6878, "num_input_tokens_seen": 693868000, "step": 3847 }, { "epoch": 0.4212485289690468, "grad_norm": 1.1986026734641673, "learning_rate": 3.111958713585612e-05, "loss": 0.5781, "num_input_tokens_seen": 694052800, "step": 3848 }, { "epoch": 0.42135800103998466, "grad_norm": 1.2274633610340349, "learning_rate": 3.1111249741021606e-05, "loss": 0.6043, "num_input_tokens_seen": 694238272, "step": 3849 }, { "epoch": 0.4214674731109226, "grad_norm": 1.159104920878861, "learning_rate": 3.110291162323792e-05, "loss": 0.5407, "num_input_tokens_seen": 694416128, "step": 3850 }, { "epoch": 0.42157694518186045, "grad_norm": 1.1414796423113907, "learning_rate": 3.109457278349145e-05, "loss": 0.678, "num_input_tokens_seen": 694615040, "step": 3851 }, { "epoch": 0.4216864172527984, "grad_norm": 1.2230867840503652, "learning_rate": 3.108623322276868e-05, "loss": 0.6687, "num_input_tokens_seen": 694802080, "step": 3852 }, { "epoch": 0.4217958893237363, "grad_norm": 1.292391507005782, "learning_rate": 3.1077892942056153e-05, "loss": 0.657, "num_input_tokens_seen": 694962912, "step": 3853 }, { "epoch": 0.42190536139467416, "grad_norm": 1.2402713319573686, "learning_rate": 3.106955194234051e-05, "loss": 0.7479, "num_input_tokens_seen": 695140544, "step": 3854 }, { "epoch": 0.4220148334656121, "grad_norm": 1.191557977739342, "learning_rate": 3.106121022460847e-05, "loss": 0.6215, "num_input_tokens_seen": 695310336, "step": 3855 }, { "epoch": 0.42212430553655, "grad_norm": 1.2234410701227045, "learning_rate": 3.105286778984686e-05, "loss": 0.7586, "num_input_tokens_seen": 695505664, "step": 3856 }, { "epoch": 0.42223377760748787, "grad_norm": 1.3287449007124856, "learning_rate": 3.104452463904255e-05, "loss": 0.8825, "num_input_tokens_seen": 695699424, "step": 3857 }, { "epoch": 0.4223432496784258, "grad_norm": 1.258828315685132, "learning_rate": 3.1036180773182535e-05, "loss": 0.676, "num_input_tokens_seen": 695893632, "step": 3858 }, { "epoch": 0.4224527217493637, "grad_norm": 1.3428931498482986, "learning_rate": 3.1027836193253874e-05, "loss": 0.6713, "num_input_tokens_seen": 696073504, "step": 3859 }, { "epoch": 0.4225621938203016, "grad_norm": 1.4104294868836968, "learning_rate": 3.1019490900243716e-05, "loss": 0.7296, "num_input_tokens_seen": 696268832, "step": 3860 }, { "epoch": 0.4226716658912395, "grad_norm": 1.314871280810328, "learning_rate": 3.101114489513929e-05, "loss": 0.8261, "num_input_tokens_seen": 696430784, "step": 3861 }, { "epoch": 0.4227811379621774, "grad_norm": 1.3064779456898048, "learning_rate": 3.100279817892792e-05, "loss": 0.7216, "num_input_tokens_seen": 696604160, "step": 3862 }, { "epoch": 0.4228906100331153, "grad_norm": 1.3049590233694466, "learning_rate": 3.099445075259698e-05, "loss": 0.7855, "num_input_tokens_seen": 696777984, "step": 3863 }, { "epoch": 0.4230000821040532, "grad_norm": 1.3827774129607975, "learning_rate": 3.098610261713399e-05, "loss": 0.7956, "num_input_tokens_seen": 696976448, "step": 3864 }, { "epoch": 0.42310955417499113, "grad_norm": 1.3228366357019483, "learning_rate": 3.0977753773526505e-05, "loss": 0.7307, "num_input_tokens_seen": 697142880, "step": 3865 }, { "epoch": 0.423219026245929, "grad_norm": 1.4476937138361865, "learning_rate": 3.096940422276218e-05, "loss": 0.9007, "num_input_tokens_seen": 697370464, "step": 3866 }, { "epoch": 0.4233284983168669, "grad_norm": 1.2323842957294346, "learning_rate": 3.096105396582874e-05, "loss": 0.6706, "num_input_tokens_seen": 697506880, "step": 3867 }, { "epoch": 0.4234379703878048, "grad_norm": 1.361494888752769, "learning_rate": 3.095270300371401e-05, "loss": 0.8648, "num_input_tokens_seen": 697676672, "step": 3868 }, { "epoch": 0.4235474424587427, "grad_norm": 1.416646377567785, "learning_rate": 3.0944351337405906e-05, "loss": 0.899, "num_input_tokens_seen": 697852064, "step": 3869 }, { "epoch": 0.42365691452968063, "grad_norm": 1.3564654376938017, "learning_rate": 3.09359989678924e-05, "loss": 0.7229, "num_input_tokens_seen": 698024992, "step": 3870 }, { "epoch": 0.4237663866006185, "grad_norm": 1.4151870723845523, "learning_rate": 3.092764589616155e-05, "loss": 0.797, "num_input_tokens_seen": 698227264, "step": 3871 }, { "epoch": 0.4238758586715564, "grad_norm": 1.3073091493124018, "learning_rate": 3.0919292123201524e-05, "loss": 0.7225, "num_input_tokens_seen": 698429312, "step": 3872 }, { "epoch": 0.42398533074249434, "grad_norm": 1.3138804669118107, "learning_rate": 3.0910937650000565e-05, "loss": 0.9548, "num_input_tokens_seen": 698615904, "step": 3873 }, { "epoch": 0.4240948028134322, "grad_norm": 1.3220714612480466, "learning_rate": 3.090258247754698e-05, "loss": 0.7437, "num_input_tokens_seen": 698752992, "step": 3874 }, { "epoch": 0.4242042748843701, "grad_norm": 1.2446880483662026, "learning_rate": 3.0894226606829166e-05, "loss": 0.6731, "num_input_tokens_seen": 698941824, "step": 3875 }, { "epoch": 0.42431374695530805, "grad_norm": 1.3153223391813134, "learning_rate": 3.088587003883562e-05, "loss": 0.783, "num_input_tokens_seen": 699101088, "step": 3876 }, { "epoch": 0.4244232190262459, "grad_norm": 1.3044033540605964, "learning_rate": 3.08775127745549e-05, "loss": 0.6962, "num_input_tokens_seen": 699287680, "step": 3877 }, { "epoch": 0.42453269109718383, "grad_norm": 1.2418716796922766, "learning_rate": 3.086915481497565e-05, "loss": 0.7134, "num_input_tokens_seen": 699472480, "step": 3878 }, { "epoch": 0.42464216316812176, "grad_norm": 1.3093686670409665, "learning_rate": 3.08607961610866e-05, "loss": 0.9097, "num_input_tokens_seen": 699663776, "step": 3879 }, { "epoch": 0.4247516352390596, "grad_norm": 1.1960626380108235, "learning_rate": 3.0852436813876576e-05, "loss": 0.7816, "num_input_tokens_seen": 699862016, "step": 3880 }, { "epoch": 0.42486110730999754, "grad_norm": 1.217117193931393, "learning_rate": 3.084407677433447e-05, "loss": 0.5213, "num_input_tokens_seen": 700039872, "step": 3881 }, { "epoch": 0.42497057938093546, "grad_norm": 1.3225526033406803, "learning_rate": 3.083571604344925e-05, "loss": 0.8626, "num_input_tokens_seen": 700217952, "step": 3882 }, { "epoch": 0.42508005145187333, "grad_norm": 1.3837132510549357, "learning_rate": 3.0827354622209976e-05, "loss": 0.6128, "num_input_tokens_seen": 700375648, "step": 3883 }, { "epoch": 0.42518952352281125, "grad_norm": 1.3587993850085618, "learning_rate": 3.081899251160578e-05, "loss": 0.8852, "num_input_tokens_seen": 700551936, "step": 3884 }, { "epoch": 0.4252989955937491, "grad_norm": 1.3215726899020062, "learning_rate": 3.081062971262591e-05, "loss": 0.9995, "num_input_tokens_seen": 700739200, "step": 3885 }, { "epoch": 0.42540846766468704, "grad_norm": 1.203041519637667, "learning_rate": 3.080226622625964e-05, "loss": 0.765, "num_input_tokens_seen": 700926912, "step": 3886 }, { "epoch": 0.42551793973562496, "grad_norm": 1.1605860880387218, "learning_rate": 3.0793902053496374e-05, "loss": 0.5215, "num_input_tokens_seen": 701108576, "step": 3887 }, { "epoch": 0.4256274118065628, "grad_norm": 1.380093287825461, "learning_rate": 3.0785537195325574e-05, "loss": 0.8893, "num_input_tokens_seen": 701314208, "step": 3888 }, { "epoch": 0.42573688387750075, "grad_norm": 1.1941389918043195, "learning_rate": 3.0777171652736784e-05, "loss": 0.7126, "num_input_tokens_seen": 701495872, "step": 3889 }, { "epoch": 0.42584635594843867, "grad_norm": 1.084369652814626, "learning_rate": 3.076880542671963e-05, "loss": 0.5699, "num_input_tokens_seen": 701705088, "step": 3890 }, { "epoch": 0.42595582801937654, "grad_norm": 1.1709889431829674, "learning_rate": 3.0760438518263826e-05, "loss": 0.7596, "num_input_tokens_seen": 701872416, "step": 3891 }, { "epoch": 0.42606530009031446, "grad_norm": 1.240098840711944, "learning_rate": 3.0752070928359147e-05, "loss": 0.7499, "num_input_tokens_seen": 702043104, "step": 3892 }, { "epoch": 0.4261747721612524, "grad_norm": 1.1884910062289942, "learning_rate": 3.0743702657995475e-05, "loss": 0.7803, "num_input_tokens_seen": 702218272, "step": 3893 }, { "epoch": 0.42628424423219025, "grad_norm": 1.0305728423070255, "learning_rate": 3.0735333708162763e-05, "loss": 0.462, "num_input_tokens_seen": 702406432, "step": 3894 }, { "epoch": 0.42639371630312817, "grad_norm": 1.2433499349366985, "learning_rate": 3.0726964079851037e-05, "loss": 0.568, "num_input_tokens_seen": 702590112, "step": 3895 }, { "epoch": 0.4265031883740661, "grad_norm": 1.292951186138612, "learning_rate": 3.071859377405041e-05, "loss": 0.8496, "num_input_tokens_seen": 702793280, "step": 3896 }, { "epoch": 0.42661266044500396, "grad_norm": 1.30482687748171, "learning_rate": 3.071022279175107e-05, "loss": 0.8331, "num_input_tokens_seen": 702982560, "step": 3897 }, { "epoch": 0.4267221325159419, "grad_norm": 1.17940335925463, "learning_rate": 3.070185113394329e-05, "loss": 0.6637, "num_input_tokens_seen": 703158400, "step": 3898 }, { "epoch": 0.4268316045868798, "grad_norm": 1.1938862488170474, "learning_rate": 3.069347880161741e-05, "loss": 0.5959, "num_input_tokens_seen": 703340288, "step": 3899 }, { "epoch": 0.42694107665781766, "grad_norm": 1.1990373207055325, "learning_rate": 3.068510579576389e-05, "loss": 0.6411, "num_input_tokens_seen": 703516576, "step": 3900 }, { "epoch": 0.4270505487287556, "grad_norm": 1.274046910040627, "learning_rate": 3.067673211737321e-05, "loss": 0.6541, "num_input_tokens_seen": 703659488, "step": 3901 }, { "epoch": 0.42716002079969345, "grad_norm": 1.370556196163567, "learning_rate": 3.066835776743598e-05, "loss": 0.7273, "num_input_tokens_seen": 703854368, "step": 3902 }, { "epoch": 0.4272694928706314, "grad_norm": 1.2683866065003602, "learning_rate": 3.0659982746942864e-05, "loss": 0.5986, "num_input_tokens_seen": 704032672, "step": 3903 }, { "epoch": 0.4273789649415693, "grad_norm": 1.2345633061347028, "learning_rate": 3.065160705688461e-05, "loss": 0.7118, "num_input_tokens_seen": 704210080, "step": 3904 }, { "epoch": 0.42748843701250716, "grad_norm": 1.2798492403952346, "learning_rate": 3.064323069825203e-05, "loss": 0.7934, "num_input_tokens_seen": 704389728, "step": 3905 }, { "epoch": 0.4275979090834451, "grad_norm": 1.2411552794888476, "learning_rate": 3.0634853672036054e-05, "loss": 0.764, "num_input_tokens_seen": 704595360, "step": 3906 }, { "epoch": 0.427707381154383, "grad_norm": 1.2932542560595113, "learning_rate": 3.0626475979227665e-05, "loss": 0.7848, "num_input_tokens_seen": 704781504, "step": 3907 }, { "epoch": 0.42781685322532087, "grad_norm": 1.3517610701736817, "learning_rate": 3.061809762081792e-05, "loss": 0.923, "num_input_tokens_seen": 704993184, "step": 3908 }, { "epoch": 0.4279263252962588, "grad_norm": 1.1514581413031917, "learning_rate": 3.060971859779797e-05, "loss": 0.7028, "num_input_tokens_seen": 705187168, "step": 3909 }, { "epoch": 0.4280357973671967, "grad_norm": 1.1675227407654498, "learning_rate": 3.060133891115903e-05, "loss": 0.8451, "num_input_tokens_seen": 705384960, "step": 3910 }, { "epoch": 0.4281452694381346, "grad_norm": 1.1889117031245584, "learning_rate": 3.059295856189241e-05, "loss": 0.5343, "num_input_tokens_seen": 705570432, "step": 3911 }, { "epoch": 0.4282547415090725, "grad_norm": 1.3304009178910843, "learning_rate": 3.058457755098948e-05, "loss": 0.9712, "num_input_tokens_seen": 705762176, "step": 3912 }, { "epoch": 0.4283642135800104, "grad_norm": 1.2746928761399963, "learning_rate": 3.05761958794417e-05, "loss": 0.5657, "num_input_tokens_seen": 705937120, "step": 3913 }, { "epoch": 0.4284736856509483, "grad_norm": 1.1906548804190689, "learning_rate": 3.056781354824061e-05, "loss": 0.6916, "num_input_tokens_seen": 706093920, "step": 3914 }, { "epoch": 0.4285831577218862, "grad_norm": 1.2578004635063162, "learning_rate": 3.055943055837782e-05, "loss": 0.6702, "num_input_tokens_seen": 706283872, "step": 3915 }, { "epoch": 0.42869262979282413, "grad_norm": 1.3830000324566465, "learning_rate": 3.055104691084502e-05, "loss": 0.7949, "num_input_tokens_seen": 706478752, "step": 3916 }, { "epoch": 0.428802101863762, "grad_norm": 1.3795574692898094, "learning_rate": 3.054266260663399e-05, "loss": 0.7404, "num_input_tokens_seen": 706641152, "step": 3917 }, { "epoch": 0.4289115739346999, "grad_norm": 1.1760592718076537, "learning_rate": 3.0534277646736564e-05, "loss": 0.5111, "num_input_tokens_seen": 706818336, "step": 3918 }, { "epoch": 0.4290210460056378, "grad_norm": 1.3898727010849778, "learning_rate": 3.052589203214467e-05, "loss": 0.7979, "num_input_tokens_seen": 706994624, "step": 3919 }, { "epoch": 0.4291305180765757, "grad_norm": 1.4798766388902178, "learning_rate": 3.0517505763850318e-05, "loss": 0.7122, "num_input_tokens_seen": 707141568, "step": 3920 }, { "epoch": 0.42923999014751363, "grad_norm": 1.3202052326756917, "learning_rate": 3.050911884284558e-05, "loss": 0.5327, "num_input_tokens_seen": 707289632, "step": 3921 }, { "epoch": 0.4293494622184515, "grad_norm": 1.1790054572790416, "learning_rate": 3.050073127012261e-05, "loss": 0.4822, "num_input_tokens_seen": 707471968, "step": 3922 }, { "epoch": 0.4294589342893894, "grad_norm": 1.3675632941262599, "learning_rate": 3.0492343046673654e-05, "loss": 0.7077, "num_input_tokens_seen": 707663936, "step": 3923 }, { "epoch": 0.42956840636032734, "grad_norm": 1.3425297186490632, "learning_rate": 3.0483954173491015e-05, "loss": 0.6574, "num_input_tokens_seen": 707855904, "step": 3924 }, { "epoch": 0.4296778784312652, "grad_norm": 1.3368602042429405, "learning_rate": 3.047556465156708e-05, "loss": 0.6901, "num_input_tokens_seen": 708031744, "step": 3925 }, { "epoch": 0.4297873505022031, "grad_norm": 1.2286517578775606, "learning_rate": 3.04671744818943e-05, "loss": 0.612, "num_input_tokens_seen": 708167264, "step": 3926 }, { "epoch": 0.42989682257314105, "grad_norm": 1.3186673631132684, "learning_rate": 3.045878366546524e-05, "loss": 0.7541, "num_input_tokens_seen": 708370208, "step": 3927 }, { "epoch": 0.4300062946440789, "grad_norm": 1.3961021060191885, "learning_rate": 3.045039220327251e-05, "loss": 0.7001, "num_input_tokens_seen": 708533504, "step": 3928 }, { "epoch": 0.43011576671501683, "grad_norm": 1.3270857840395127, "learning_rate": 3.0442000096308802e-05, "loss": 0.7834, "num_input_tokens_seen": 708739360, "step": 3929 }, { "epoch": 0.43022523878595476, "grad_norm": 1.2565871720926527, "learning_rate": 3.043360734556689e-05, "loss": 0.6669, "num_input_tokens_seen": 708942304, "step": 3930 }, { "epoch": 0.4303347108568926, "grad_norm": 1.2872356069252457, "learning_rate": 3.0425213952039612e-05, "loss": 0.6135, "num_input_tokens_seen": 709099776, "step": 3931 }, { "epoch": 0.43044418292783054, "grad_norm": 1.3332777260072461, "learning_rate": 3.0416819916719895e-05, "loss": 0.8763, "num_input_tokens_seen": 709296672, "step": 3932 }, { "epoch": 0.43055365499876846, "grad_norm": 1.1299411029995607, "learning_rate": 3.040842524060073e-05, "loss": 0.5249, "num_input_tokens_seen": 709486176, "step": 3933 }, { "epoch": 0.43066312706970633, "grad_norm": 1.2767504792967899, "learning_rate": 3.0400029924675206e-05, "loss": 0.6727, "num_input_tokens_seen": 709683072, "step": 3934 }, { "epoch": 0.43077259914064425, "grad_norm": 1.1416916236562815, "learning_rate": 3.0391633969936468e-05, "loss": 0.5945, "num_input_tokens_seen": 709862048, "step": 3935 }, { "epoch": 0.4308820712115821, "grad_norm": 1.279601129358216, "learning_rate": 3.0383237377377734e-05, "loss": 0.6451, "num_input_tokens_seen": 710036320, "step": 3936 }, { "epoch": 0.43099154328252004, "grad_norm": 1.3504628023958702, "learning_rate": 3.03748401479923e-05, "loss": 0.8141, "num_input_tokens_seen": 710179232, "step": 3937 }, { "epoch": 0.43110101535345796, "grad_norm": 1.2109207152157044, "learning_rate": 3.0366442282773567e-05, "loss": 0.9026, "num_input_tokens_seen": 710389568, "step": 3938 }, { "epoch": 0.4312104874243958, "grad_norm": 1.2064863536709556, "learning_rate": 3.035804378271496e-05, "loss": 0.5485, "num_input_tokens_seen": 710545024, "step": 3939 }, { "epoch": 0.43131995949533375, "grad_norm": 1.2818808514458224, "learning_rate": 3.034964464881002e-05, "loss": 0.8002, "num_input_tokens_seen": 710726688, "step": 3940 }, { "epoch": 0.43142943156627167, "grad_norm": 1.2918622217529123, "learning_rate": 3.0341244882052346e-05, "loss": 0.656, "num_input_tokens_seen": 710879904, "step": 3941 }, { "epoch": 0.43153890363720954, "grad_norm": 1.2882012929445863, "learning_rate": 3.0332844483435614e-05, "loss": 0.7227, "num_input_tokens_seen": 711061344, "step": 3942 }, { "epoch": 0.43164837570814746, "grad_norm": 1.4125377379920974, "learning_rate": 3.0324443453953578e-05, "loss": 0.977, "num_input_tokens_seen": 711240096, "step": 3943 }, { "epoch": 0.4317578477790854, "grad_norm": 1.4793664054153581, "learning_rate": 3.0316041794600054e-05, "loss": 0.9692, "num_input_tokens_seen": 711425120, "step": 3944 }, { "epoch": 0.43186731985002325, "grad_norm": 1.2020494186315547, "learning_rate": 3.030763950636895e-05, "loss": 0.6132, "num_input_tokens_seen": 711595584, "step": 3945 }, { "epoch": 0.43197679192096117, "grad_norm": 1.3662041751893268, "learning_rate": 3.0299236590254236e-05, "loss": 0.8728, "num_input_tokens_seen": 711773216, "step": 3946 }, { "epoch": 0.4320862639918991, "grad_norm": 1.3623970902802178, "learning_rate": 3.0290833047249966e-05, "loss": 0.6938, "num_input_tokens_seen": 711964288, "step": 3947 }, { "epoch": 0.43219573606283695, "grad_norm": 1.0566541632104058, "learning_rate": 3.0282428878350256e-05, "loss": 0.4894, "num_input_tokens_seen": 712151776, "step": 3948 }, { "epoch": 0.4323052081337749, "grad_norm": 1.1303285924447781, "learning_rate": 3.0274024084549312e-05, "loss": 0.6961, "num_input_tokens_seen": 712328736, "step": 3949 }, { "epoch": 0.4324146802047128, "grad_norm": 1.3231160121318741, "learning_rate": 3.0265618666841405e-05, "loss": 0.8065, "num_input_tokens_seen": 712503008, "step": 3950 }, { "epoch": 0.43252415227565066, "grad_norm": 1.1544412951803307, "learning_rate": 3.0257212626220872e-05, "loss": 0.6454, "num_input_tokens_seen": 712692512, "step": 3951 }, { "epoch": 0.4326336243465886, "grad_norm": 1.1249527921308566, "learning_rate": 3.0248805963682135e-05, "loss": 0.5226, "num_input_tokens_seen": 712815488, "step": 3952 }, { "epoch": 0.43274309641752645, "grad_norm": 1.1741554451309884, "learning_rate": 3.0240398680219685e-05, "loss": 0.7462, "num_input_tokens_seen": 713035232, "step": 3953 }, { "epoch": 0.4328525684884644, "grad_norm": 1.2578686529858225, "learning_rate": 3.0231990776828096e-05, "loss": 0.5986, "num_input_tokens_seen": 713219808, "step": 3954 }, { "epoch": 0.4329620405594023, "grad_norm": 1.224789206215821, "learning_rate": 3.0223582254501993e-05, "loss": 0.5568, "num_input_tokens_seen": 713352192, "step": 3955 }, { "epoch": 0.43307151263034016, "grad_norm": 1.1844744537387681, "learning_rate": 3.02151731142361e-05, "loss": 0.704, "num_input_tokens_seen": 713520416, "step": 3956 }, { "epoch": 0.4331809847012781, "grad_norm": 1.2376359286937793, "learning_rate": 3.0206763357025196e-05, "loss": 0.7014, "num_input_tokens_seen": 713694240, "step": 3957 }, { "epoch": 0.433290456772216, "grad_norm": 1.334230190879838, "learning_rate": 3.0198352983864138e-05, "loss": 0.8019, "num_input_tokens_seen": 713876576, "step": 3958 }, { "epoch": 0.43339992884315387, "grad_norm": 1.4056140733347608, "learning_rate": 3.0189941995747863e-05, "loss": 0.7661, "num_input_tokens_seen": 714059360, "step": 3959 }, { "epoch": 0.4335094009140918, "grad_norm": 1.1768359652957425, "learning_rate": 3.0181530393671364e-05, "loss": 0.7001, "num_input_tokens_seen": 714263648, "step": 3960 }, { "epoch": 0.4336188729850297, "grad_norm": 1.2974557150925587, "learning_rate": 3.0173118178629728e-05, "loss": 0.7705, "num_input_tokens_seen": 714441952, "step": 3961 }, { "epoch": 0.4337283450559676, "grad_norm": 1.2088399625198722, "learning_rate": 3.0164705351618104e-05, "loss": 0.8372, "num_input_tokens_seen": 714646688, "step": 3962 }, { "epoch": 0.4338378171269055, "grad_norm": 1.2808441356649327, "learning_rate": 3.0156291913631712e-05, "loss": 0.9654, "num_input_tokens_seen": 714853216, "step": 3963 }, { "epoch": 0.4339472891978434, "grad_norm": 1.3362868895046496, "learning_rate": 3.0147877865665843e-05, "loss": 0.8553, "num_input_tokens_seen": 715050336, "step": 3964 }, { "epoch": 0.4340567612687813, "grad_norm": 1.0807315994909397, "learning_rate": 3.013946320871586e-05, "loss": 0.5535, "num_input_tokens_seen": 715235808, "step": 3965 }, { "epoch": 0.4341662333397192, "grad_norm": 1.261680248133646, "learning_rate": 3.0131047943777207e-05, "loss": 0.6924, "num_input_tokens_seen": 715426656, "step": 3966 }, { "epoch": 0.43427570541065713, "grad_norm": 1.173841479538088, "learning_rate": 3.012263207184539e-05, "loss": 0.6858, "num_input_tokens_seen": 715608320, "step": 3967 }, { "epoch": 0.434385177481595, "grad_norm": 1.1605100252868128, "learning_rate": 3.011421559391599e-05, "loss": 0.5542, "num_input_tokens_seen": 715787744, "step": 3968 }, { "epoch": 0.4344946495525329, "grad_norm": 1.3686608794997832, "learning_rate": 3.010579851098466e-05, "loss": 0.8148, "num_input_tokens_seen": 715988448, "step": 3969 }, { "epoch": 0.4346041216234708, "grad_norm": 1.1842946410282318, "learning_rate": 3.0097380824047132e-05, "loss": 0.5349, "num_input_tokens_seen": 716166528, "step": 3970 }, { "epoch": 0.4347135936944087, "grad_norm": 1.1873234065406781, "learning_rate": 3.0088962534099195e-05, "loss": 0.614, "num_input_tokens_seen": 716331840, "step": 3971 }, { "epoch": 0.43482306576534663, "grad_norm": 1.1611449784009678, "learning_rate": 3.0080543642136723e-05, "loss": 0.5818, "num_input_tokens_seen": 716518432, "step": 3972 }, { "epoch": 0.4349325378362845, "grad_norm": 1.357818984530892, "learning_rate": 3.007212414915565e-05, "loss": 0.6332, "num_input_tokens_seen": 716684416, "step": 3973 }, { "epoch": 0.4350420099072224, "grad_norm": 1.36567949234335, "learning_rate": 3.0063704056151975e-05, "loss": 0.7219, "num_input_tokens_seen": 716889824, "step": 3974 }, { "epoch": 0.43515148197816034, "grad_norm": 1.3164042577530362, "learning_rate": 3.00552833641218e-05, "loss": 0.6136, "num_input_tokens_seen": 717082016, "step": 3975 }, { "epoch": 0.4352609540490982, "grad_norm": 1.2027071933332352, "learning_rate": 3.0046862074061266e-05, "loss": 0.5752, "num_input_tokens_seen": 717262560, "step": 3976 }, { "epoch": 0.4353704261200361, "grad_norm": 1.3563478021472626, "learning_rate": 3.00384401869666e-05, "loss": 0.734, "num_input_tokens_seen": 717441984, "step": 3977 }, { "epoch": 0.43547989819097405, "grad_norm": 1.2492539563684144, "learning_rate": 3.003001770383409e-05, "loss": 0.5059, "num_input_tokens_seen": 717621856, "step": 3978 }, { "epoch": 0.4355893702619119, "grad_norm": 1.279508383934754, "learning_rate": 3.0021594625660095e-05, "loss": 0.7149, "num_input_tokens_seen": 717817408, "step": 3979 }, { "epoch": 0.43569884233284983, "grad_norm": 1.3201148884077802, "learning_rate": 3.0013170953441062e-05, "loss": 0.8263, "num_input_tokens_seen": 717993024, "step": 3980 }, { "epoch": 0.43580831440378776, "grad_norm": 1.3740046973668723, "learning_rate": 3.000474668817348e-05, "loss": 0.7118, "num_input_tokens_seen": 718182752, "step": 3981 }, { "epoch": 0.4359177864747256, "grad_norm": 1.2244005040330668, "learning_rate": 2.999632183085394e-05, "loss": 0.5441, "num_input_tokens_seen": 718364640, "step": 3982 }, { "epoch": 0.43602725854566354, "grad_norm": 1.161756958939208, "learning_rate": 2.998789638247908e-05, "loss": 0.659, "num_input_tokens_seen": 718579904, "step": 3983 }, { "epoch": 0.43613673061660146, "grad_norm": 1.2066480898788294, "learning_rate": 2.9979470344045614e-05, "loss": 0.5388, "num_input_tokens_seen": 718762912, "step": 3984 }, { "epoch": 0.43624620268753933, "grad_norm": 1.1850024109023152, "learning_rate": 2.9971043716550316e-05, "loss": 0.6563, "num_input_tokens_seen": 718926432, "step": 3985 }, { "epoch": 0.43635567475847725, "grad_norm": 1.1508672533185778, "learning_rate": 2.9962616500990058e-05, "loss": 0.7023, "num_input_tokens_seen": 719123552, "step": 3986 }, { "epoch": 0.4364651468294151, "grad_norm": 1.2562726943969522, "learning_rate": 2.995418869836175e-05, "loss": 0.8936, "num_input_tokens_seen": 719313952, "step": 3987 }, { "epoch": 0.43657461890035304, "grad_norm": 1.2810190969677069, "learning_rate": 2.9945760309662395e-05, "loss": 0.7088, "num_input_tokens_seen": 719485760, "step": 3988 }, { "epoch": 0.43668409097129096, "grad_norm": 1.3246780544195325, "learning_rate": 2.9937331335889045e-05, "loss": 0.8447, "num_input_tokens_seen": 719684000, "step": 3989 }, { "epoch": 0.4367935630422288, "grad_norm": 1.2332317514513567, "learning_rate": 2.9928901778038837e-05, "loss": 0.752, "num_input_tokens_seen": 719898368, "step": 3990 }, { "epoch": 0.43690303511316675, "grad_norm": 1.259681734352478, "learning_rate": 2.9920471637108977e-05, "loss": 0.8068, "num_input_tokens_seen": 720093472, "step": 3991 }, { "epoch": 0.43701250718410467, "grad_norm": 1.198716397018349, "learning_rate": 2.9912040914096724e-05, "loss": 0.9269, "num_input_tokens_seen": 720280064, "step": 3992 }, { "epoch": 0.43712197925504254, "grad_norm": 1.2925666575702741, "learning_rate": 2.990360960999942e-05, "loss": 0.7803, "num_input_tokens_seen": 720468896, "step": 3993 }, { "epoch": 0.43723145132598046, "grad_norm": 1.3139772276363884, "learning_rate": 2.989517772581447e-05, "loss": 0.7404, "num_input_tokens_seen": 720658400, "step": 3994 }, { "epoch": 0.4373409233969184, "grad_norm": 1.2955700847141107, "learning_rate": 2.9886745262539362e-05, "loss": 1.0339, "num_input_tokens_seen": 720862016, "step": 3995 }, { "epoch": 0.43745039546785625, "grad_norm": 1.2784724949998696, "learning_rate": 2.9878312221171627e-05, "loss": 0.9618, "num_input_tokens_seen": 721046592, "step": 3996 }, { "epoch": 0.43755986753879417, "grad_norm": 1.3215662855020585, "learning_rate": 2.9869878602708885e-05, "loss": 0.6566, "num_input_tokens_seen": 721203168, "step": 3997 }, { "epoch": 0.4376693396097321, "grad_norm": 1.214386685364751, "learning_rate": 2.9861444408148815e-05, "loss": 0.6509, "num_input_tokens_seen": 721359296, "step": 3998 }, { "epoch": 0.43777881168066995, "grad_norm": 1.4381156929706345, "learning_rate": 2.985300963848916e-05, "loss": 0.6289, "num_input_tokens_seen": 721518560, "step": 3999 }, { "epoch": 0.4378882837516079, "grad_norm": 1.3830991113914297, "learning_rate": 2.984457429472774e-05, "loss": 0.8155, "num_input_tokens_seen": 721687008, "step": 4000 }, { "epoch": 0.4379977558225458, "grad_norm": 1.193965177465918, "learning_rate": 2.9836138377862442e-05, "loss": 0.5141, "num_input_tokens_seen": 721846048, "step": 4001 }, { "epoch": 0.43810722789348366, "grad_norm": 1.1116826408778882, "learning_rate": 2.9827701888891223e-05, "loss": 0.5326, "num_input_tokens_seen": 722020096, "step": 4002 }, { "epoch": 0.4382166999644216, "grad_norm": 1.4322946259439127, "learning_rate": 2.98192648288121e-05, "loss": 0.6692, "num_input_tokens_seen": 722201984, "step": 4003 }, { "epoch": 0.43832617203535945, "grad_norm": 1.4367129617712386, "learning_rate": 2.9810827198623158e-05, "loss": 0.7519, "num_input_tokens_seen": 722371776, "step": 4004 }, { "epoch": 0.4384356441062974, "grad_norm": 1.3566686942033044, "learning_rate": 2.980238899932256e-05, "loss": 0.6478, "num_input_tokens_seen": 722530368, "step": 4005 }, { "epoch": 0.4385451161772353, "grad_norm": 1.39612349535608, "learning_rate": 2.9793950231908523e-05, "loss": 0.8284, "num_input_tokens_seen": 722701728, "step": 4006 }, { "epoch": 0.43865458824817316, "grad_norm": 1.2508975144722896, "learning_rate": 2.9785510897379337e-05, "loss": 0.7222, "num_input_tokens_seen": 722875776, "step": 4007 }, { "epoch": 0.4387640603191111, "grad_norm": 1.251478989156913, "learning_rate": 2.9777070996733354e-05, "loss": 0.8009, "num_input_tokens_seen": 723069312, "step": 4008 }, { "epoch": 0.438873532390049, "grad_norm": 1.3728280686699008, "learning_rate": 2.976863053096901e-05, "loss": 0.7669, "num_input_tokens_seen": 723256128, "step": 4009 }, { "epoch": 0.43898300446098687, "grad_norm": 1.3805724281535345, "learning_rate": 2.976018950108479e-05, "loss": 0.9514, "num_input_tokens_seen": 723463104, "step": 4010 }, { "epoch": 0.4390924765319248, "grad_norm": 1.2329994939786022, "learning_rate": 2.9751747908079246e-05, "loss": 0.7664, "num_input_tokens_seen": 723652832, "step": 4011 }, { "epoch": 0.4392019486028627, "grad_norm": 1.3060977646307015, "learning_rate": 2.9743305752951016e-05, "loss": 0.8466, "num_input_tokens_seen": 723823744, "step": 4012 }, { "epoch": 0.4393114206738006, "grad_norm": 1.2208550686686146, "learning_rate": 2.9734863036698784e-05, "loss": 0.6108, "num_input_tokens_seen": 724015040, "step": 4013 }, { "epoch": 0.4394208927447385, "grad_norm": 1.1856249511750914, "learning_rate": 2.97264197603213e-05, "loss": 0.6806, "num_input_tokens_seen": 724216192, "step": 4014 }, { "epoch": 0.4395303648156764, "grad_norm": 1.2287898644532234, "learning_rate": 2.97179759248174e-05, "loss": 0.8554, "num_input_tokens_seen": 724410176, "step": 4015 }, { "epoch": 0.4396398368866143, "grad_norm": 1.2653369340259168, "learning_rate": 2.9709531531185964e-05, "loss": 0.7449, "num_input_tokens_seen": 724615136, "step": 4016 }, { "epoch": 0.4397493089575522, "grad_norm": 1.2425827741011295, "learning_rate": 2.9701086580425954e-05, "loss": 0.7408, "num_input_tokens_seen": 724784704, "step": 4017 }, { "epoch": 0.43985878102849013, "grad_norm": 1.345311787568545, "learning_rate": 2.969264107353638e-05, "loss": 0.6664, "num_input_tokens_seen": 724959200, "step": 4018 }, { "epoch": 0.439968253099428, "grad_norm": 1.1464721055162252, "learning_rate": 2.9684195011516347e-05, "loss": 0.5817, "num_input_tokens_seen": 725153408, "step": 4019 }, { "epoch": 0.4400777251703659, "grad_norm": 1.3688614978404794, "learning_rate": 2.9675748395365e-05, "loss": 0.7197, "num_input_tokens_seen": 725315584, "step": 4020 }, { "epoch": 0.4401871972413038, "grad_norm": 1.3616624200039544, "learning_rate": 2.9667301226081546e-05, "loss": 0.6023, "num_input_tokens_seen": 725476864, "step": 4021 }, { "epoch": 0.4402966693122417, "grad_norm": 1.224400601394416, "learning_rate": 2.9658853504665286e-05, "loss": 0.5936, "num_input_tokens_seen": 725659648, "step": 4022 }, { "epoch": 0.44040614138317963, "grad_norm": 1.238125667966598, "learning_rate": 2.965040523211556e-05, "loss": 0.5967, "num_input_tokens_seen": 725884544, "step": 4023 }, { "epoch": 0.4405156134541175, "grad_norm": 1.3325804979153708, "learning_rate": 2.964195640943178e-05, "loss": 0.7454, "num_input_tokens_seen": 726042016, "step": 4024 }, { "epoch": 0.4406250855250554, "grad_norm": 1.2557793053335122, "learning_rate": 2.9633507037613446e-05, "loss": 0.8161, "num_input_tokens_seen": 726232416, "step": 4025 }, { "epoch": 0.44073455759599334, "grad_norm": 1.4501943260728969, "learning_rate": 2.9625057117660077e-05, "loss": 0.7472, "num_input_tokens_seen": 726436480, "step": 4026 }, { "epoch": 0.4408440296669312, "grad_norm": 1.324021046151403, "learning_rate": 2.9616606650571292e-05, "loss": 0.659, "num_input_tokens_seen": 726621504, "step": 4027 }, { "epoch": 0.4409535017378691, "grad_norm": 1.284470581062292, "learning_rate": 2.960815563734677e-05, "loss": 0.6183, "num_input_tokens_seen": 726790624, "step": 4028 }, { "epoch": 0.44106297380880705, "grad_norm": 1.3700487985760712, "learning_rate": 2.959970407898624e-05, "loss": 0.7642, "num_input_tokens_seen": 726954816, "step": 4029 }, { "epoch": 0.4411724458797449, "grad_norm": 1.2158911994583594, "learning_rate": 2.9591251976489514e-05, "loss": 0.6425, "num_input_tokens_seen": 727126176, "step": 4030 }, { "epoch": 0.44128191795068283, "grad_norm": 1.2297841423089255, "learning_rate": 2.9582799330856458e-05, "loss": 0.53, "num_input_tokens_seen": 727312544, "step": 4031 }, { "epoch": 0.44139139002162076, "grad_norm": 1.2446239257329734, "learning_rate": 2.9574346143086994e-05, "loss": 0.6328, "num_input_tokens_seen": 727469792, "step": 4032 }, { "epoch": 0.4415008620925586, "grad_norm": 1.4211209152502537, "learning_rate": 2.9565892414181133e-05, "loss": 0.8226, "num_input_tokens_seen": 727636224, "step": 4033 }, { "epoch": 0.44161033416349654, "grad_norm": 1.3541143505065374, "learning_rate": 2.9557438145138933e-05, "loss": 0.7232, "num_input_tokens_seen": 727848128, "step": 4034 }, { "epoch": 0.44171980623443446, "grad_norm": 1.303311244544896, "learning_rate": 2.9548983336960502e-05, "loss": 0.601, "num_input_tokens_seen": 728020608, "step": 4035 }, { "epoch": 0.44182927830537233, "grad_norm": 1.3180792164708213, "learning_rate": 2.9540527990646045e-05, "loss": 0.7301, "num_input_tokens_seen": 728214592, "step": 4036 }, { "epoch": 0.44193875037631025, "grad_norm": 1.3219207809853306, "learning_rate": 2.953207210719581e-05, "loss": 0.7351, "num_input_tokens_seen": 728406560, "step": 4037 }, { "epoch": 0.4420482224472481, "grad_norm": 1.4690681275318984, "learning_rate": 2.9523615687610102e-05, "loss": 0.8148, "num_input_tokens_seen": 728570080, "step": 4038 }, { "epoch": 0.44215769451818604, "grad_norm": 1.4644535613142022, "learning_rate": 2.9515158732889305e-05, "loss": 0.7182, "num_input_tokens_seen": 728728672, "step": 4039 }, { "epoch": 0.44226716658912396, "grad_norm": 1.2941687970329152, "learning_rate": 2.9506701244033864e-05, "loss": 0.6441, "num_input_tokens_seen": 728919296, "step": 4040 }, { "epoch": 0.4423766386600618, "grad_norm": 1.0650933536394158, "learning_rate": 2.9498243222044282e-05, "loss": 0.6377, "num_input_tokens_seen": 729089312, "step": 4041 }, { "epoch": 0.44248611073099975, "grad_norm": 1.1531474785823537, "learning_rate": 2.9489784667921122e-05, "loss": 0.619, "num_input_tokens_seen": 729277696, "step": 4042 }, { "epoch": 0.44259558280193767, "grad_norm": 1.358073345804802, "learning_rate": 2.9481325582665013e-05, "loss": 0.7955, "num_input_tokens_seen": 729449280, "step": 4043 }, { "epoch": 0.44270505487287554, "grad_norm": 1.0597346165272348, "learning_rate": 2.9472865967276668e-05, "loss": 0.5415, "num_input_tokens_seen": 729620416, "step": 4044 }, { "epoch": 0.44281452694381346, "grad_norm": 1.3720617979606806, "learning_rate": 2.9464405822756823e-05, "loss": 0.8005, "num_input_tokens_seen": 729805216, "step": 4045 }, { "epoch": 0.4429239990147514, "grad_norm": 1.2944686349610361, "learning_rate": 2.9455945150106314e-05, "loss": 1.1538, "num_input_tokens_seen": 730016896, "step": 4046 }, { "epoch": 0.44303347108568925, "grad_norm": 1.2469644404482803, "learning_rate": 2.9447483950326e-05, "loss": 0.7595, "num_input_tokens_seen": 730171456, "step": 4047 }, { "epoch": 0.44314294315662717, "grad_norm": 1.2260323611899973, "learning_rate": 2.9439022224416833e-05, "loss": 0.7009, "num_input_tokens_seen": 730342144, "step": 4048 }, { "epoch": 0.4432524152275651, "grad_norm": 1.2468246660135682, "learning_rate": 2.9430559973379834e-05, "loss": 0.5701, "num_input_tokens_seen": 730490656, "step": 4049 }, { "epoch": 0.44336188729850295, "grad_norm": 1.2386600143745365, "learning_rate": 2.942209719821606e-05, "loss": 0.642, "num_input_tokens_seen": 730642304, "step": 4050 }, { "epoch": 0.4434713593694409, "grad_norm": 1.1608298045093568, "learning_rate": 2.9413633899926634e-05, "loss": 0.6884, "num_input_tokens_seen": 730853312, "step": 4051 }, { "epoch": 0.4435808314403788, "grad_norm": 1.2473906939672093, "learning_rate": 2.940517007951276e-05, "loss": 0.619, "num_input_tokens_seen": 730983008, "step": 4052 }, { "epoch": 0.44369030351131666, "grad_norm": 1.187142217846443, "learning_rate": 2.9396705737975683e-05, "loss": 0.6903, "num_input_tokens_seen": 731190432, "step": 4053 }, { "epoch": 0.4437997755822546, "grad_norm": 1.1946770967573885, "learning_rate": 2.9388240876316727e-05, "loss": 0.4793, "num_input_tokens_seen": 731336480, "step": 4054 }, { "epoch": 0.4439092476531925, "grad_norm": 1.3235800674194196, "learning_rate": 2.9379775495537254e-05, "loss": 0.8048, "num_input_tokens_seen": 731510528, "step": 4055 }, { "epoch": 0.4440187197241304, "grad_norm": 1.165554447442243, "learning_rate": 2.9371309596638725e-05, "loss": 0.5982, "num_input_tokens_seen": 731703392, "step": 4056 }, { "epoch": 0.4441281917950683, "grad_norm": 1.2320602055583747, "learning_rate": 2.9362843180622624e-05, "loss": 0.9146, "num_input_tokens_seen": 731908576, "step": 4057 }, { "epoch": 0.44423766386600616, "grad_norm": 1.25465237769045, "learning_rate": 2.935437624849051e-05, "loss": 0.5755, "num_input_tokens_seen": 732071200, "step": 4058 }, { "epoch": 0.4443471359369441, "grad_norm": 1.182891234343626, "learning_rate": 2.9345908801244015e-05, "loss": 0.5786, "num_input_tokens_seen": 732257120, "step": 4059 }, { "epoch": 0.444456608007882, "grad_norm": 1.1735369278274679, "learning_rate": 2.9337440839884817e-05, "loss": 0.5651, "num_input_tokens_seen": 732438336, "step": 4060 }, { "epoch": 0.44456608007881987, "grad_norm": 1.2259250612427666, "learning_rate": 2.932897236541466e-05, "loss": 0.7945, "num_input_tokens_seen": 732654048, "step": 4061 }, { "epoch": 0.4446755521497578, "grad_norm": 1.2558756541002403, "learning_rate": 2.932050337883534e-05, "loss": 0.5544, "num_input_tokens_seen": 732783296, "step": 4062 }, { "epoch": 0.4447850242206957, "grad_norm": 1.4274767683714198, "learning_rate": 2.9312033881148738e-05, "loss": 0.9232, "num_input_tokens_seen": 732972352, "step": 4063 }, { "epoch": 0.4448944962916336, "grad_norm": 1.223572603953184, "learning_rate": 2.9303563873356767e-05, "loss": 0.6307, "num_input_tokens_seen": 733154240, "step": 4064 }, { "epoch": 0.4450039683625715, "grad_norm": 1.3803650916947023, "learning_rate": 2.9295093356461416e-05, "loss": 0.9087, "num_input_tokens_seen": 733333664, "step": 4065 }, { "epoch": 0.4451134404335094, "grad_norm": 1.4661535485326072, "learning_rate": 2.9286622331464736e-05, "loss": 0.8427, "num_input_tokens_seen": 733531456, "step": 4066 }, { "epoch": 0.4452229125044473, "grad_norm": 1.2373655766391212, "learning_rate": 2.9278150799368825e-05, "loss": 0.7133, "num_input_tokens_seen": 733751200, "step": 4067 }, { "epoch": 0.4453323845753852, "grad_norm": 1.1547552189565533, "learning_rate": 2.9269678761175857e-05, "loss": 0.7294, "num_input_tokens_seen": 733933088, "step": 4068 }, { "epoch": 0.44544185664632313, "grad_norm": 1.1725641385980905, "learning_rate": 2.9261206217888048e-05, "loss": 0.7485, "num_input_tokens_seen": 734102656, "step": 4069 }, { "epoch": 0.445551328717261, "grad_norm": 1.278460000305677, "learning_rate": 2.925273317050769e-05, "loss": 0.7672, "num_input_tokens_seen": 734280960, "step": 4070 }, { "epoch": 0.4456608007881989, "grad_norm": 1.2618105356000002, "learning_rate": 2.9244259620037135e-05, "loss": 0.772, "num_input_tokens_seen": 734458592, "step": 4071 }, { "epoch": 0.44577027285913684, "grad_norm": 1.2347700718115069, "learning_rate": 2.9235785567478774e-05, "loss": 0.7832, "num_input_tokens_seen": 734637792, "step": 4072 }, { "epoch": 0.4458797449300747, "grad_norm": 1.2862165071892402, "learning_rate": 2.9227311013835084e-05, "loss": 0.7481, "num_input_tokens_seen": 734814304, "step": 4073 }, { "epoch": 0.44598921700101263, "grad_norm": 1.2608668991801315, "learning_rate": 2.921883596010857e-05, "loss": 0.5067, "num_input_tokens_seen": 734980288, "step": 4074 }, { "epoch": 0.4460986890719505, "grad_norm": 1.2633542069863783, "learning_rate": 2.921036040730184e-05, "loss": 0.6887, "num_input_tokens_seen": 735175392, "step": 4075 }, { "epoch": 0.4462081611428884, "grad_norm": 1.2768462259165945, "learning_rate": 2.9201884356417514e-05, "loss": 0.6144, "num_input_tokens_seen": 735339584, "step": 4076 }, { "epoch": 0.44631763321382634, "grad_norm": 1.4016273389977245, "learning_rate": 2.9193407808458308e-05, "loss": 0.7013, "num_input_tokens_seen": 735512960, "step": 4077 }, { "epoch": 0.4464271052847642, "grad_norm": 1.471532661166251, "learning_rate": 2.918493076442697e-05, "loss": 0.8481, "num_input_tokens_seen": 735680288, "step": 4078 }, { "epoch": 0.4465365773557021, "grad_norm": 1.1817536607382582, "learning_rate": 2.9176453225326328e-05, "loss": 0.6095, "num_input_tokens_seen": 735845152, "step": 4079 }, { "epoch": 0.44664604942664005, "grad_norm": 1.3994284587154222, "learning_rate": 2.9167975192159247e-05, "loss": 0.7467, "num_input_tokens_seen": 736051232, "step": 4080 }, { "epoch": 0.4467555214975779, "grad_norm": 1.3940830739360235, "learning_rate": 2.9159496665928677e-05, "loss": 0.9203, "num_input_tokens_seen": 736245888, "step": 4081 }, { "epoch": 0.44686499356851583, "grad_norm": 1.5489792816175338, "learning_rate": 2.915101764763759e-05, "loss": 0.8306, "num_input_tokens_seen": 736385216, "step": 4082 }, { "epoch": 0.44697446563945376, "grad_norm": 1.2287368939031584, "learning_rate": 2.914253813828906e-05, "loss": 0.7493, "num_input_tokens_seen": 736585248, "step": 4083 }, { "epoch": 0.4470839377103916, "grad_norm": 1.22745937917278, "learning_rate": 2.9134058138886188e-05, "loss": 0.7806, "num_input_tokens_seen": 736779008, "step": 4084 }, { "epoch": 0.44719340978132954, "grad_norm": 1.1735412450205331, "learning_rate": 2.9125577650432133e-05, "loss": 0.5471, "num_input_tokens_seen": 736956416, "step": 4085 }, { "epoch": 0.44730288185226746, "grad_norm": 1.3759993042971825, "learning_rate": 2.9117096673930138e-05, "loss": 0.7088, "num_input_tokens_seen": 737128672, "step": 4086 }, { "epoch": 0.44741235392320533, "grad_norm": 1.261927489974003, "learning_rate": 2.910861521038347e-05, "loss": 0.6709, "num_input_tokens_seen": 737310560, "step": 4087 }, { "epoch": 0.44752182599414325, "grad_norm": 1.288659928779005, "learning_rate": 2.9100133260795488e-05, "loss": 0.6609, "num_input_tokens_seen": 737491776, "step": 4088 }, { "epoch": 0.4476312980650812, "grad_norm": 1.2724677764170114, "learning_rate": 2.9091650826169565e-05, "loss": 0.5555, "num_input_tokens_seen": 737637600, "step": 4089 }, { "epoch": 0.44774077013601904, "grad_norm": 1.2536590427558039, "learning_rate": 2.9083167907509178e-05, "loss": 0.6714, "num_input_tokens_seen": 737818592, "step": 4090 }, { "epoch": 0.44785024220695696, "grad_norm": 1.288997440430295, "learning_rate": 2.9074684505817835e-05, "loss": 0.7428, "num_input_tokens_seen": 738006528, "step": 4091 }, { "epoch": 0.4479597142778948, "grad_norm": 1.158843508324918, "learning_rate": 2.9066200622099106e-05, "loss": 0.8535, "num_input_tokens_seen": 738213728, "step": 4092 }, { "epoch": 0.44806918634883275, "grad_norm": 1.2840817026098286, "learning_rate": 2.9057716257356614e-05, "loss": 0.7829, "num_input_tokens_seen": 738416896, "step": 4093 }, { "epoch": 0.44817865841977067, "grad_norm": 1.2356151194121927, "learning_rate": 2.9049231412594046e-05, "loss": 0.6918, "num_input_tokens_seen": 738621408, "step": 4094 }, { "epoch": 0.44828813049070854, "grad_norm": 1.2169699752413936, "learning_rate": 2.9040746088815142e-05, "loss": 0.638, "num_input_tokens_seen": 738798368, "step": 4095 }, { "epoch": 0.44839760256164646, "grad_norm": 1.4399932303534435, "learning_rate": 2.9032260287023698e-05, "loss": 0.8713, "num_input_tokens_seen": 738984960, "step": 4096 }, { "epoch": 0.4485070746325844, "grad_norm": 1.160369236979566, "learning_rate": 2.902377400822357e-05, "loss": 0.7486, "num_input_tokens_seen": 739185216, "step": 4097 }, { "epoch": 0.44861654670352225, "grad_norm": 1.4759522645410927, "learning_rate": 2.9015287253418672e-05, "loss": 0.7816, "num_input_tokens_seen": 739343360, "step": 4098 }, { "epoch": 0.44872601877446017, "grad_norm": 1.398811162543762, "learning_rate": 2.900680002361297e-05, "loss": 0.6913, "num_input_tokens_seen": 739489856, "step": 4099 }, { "epoch": 0.4488354908453981, "grad_norm": 1.2779772292990483, "learning_rate": 2.8998312319810482e-05, "loss": 0.5013, "num_input_tokens_seen": 739663008, "step": 4100 }, { "epoch": 0.44894496291633595, "grad_norm": 1.1950622994850828, "learning_rate": 2.8989824143015286e-05, "loss": 0.7365, "num_input_tokens_seen": 739844672, "step": 4101 }, { "epoch": 0.4490544349872739, "grad_norm": 1.1050409358628106, "learning_rate": 2.8981335494231533e-05, "loss": 0.7275, "num_input_tokens_seen": 740044256, "step": 4102 }, { "epoch": 0.4491639070582118, "grad_norm": 1.3463005434003565, "learning_rate": 2.8972846374463387e-05, "loss": 0.899, "num_input_tokens_seen": 740218976, "step": 4103 }, { "epoch": 0.44927337912914966, "grad_norm": 1.2114171708587844, "learning_rate": 2.896435678471512e-05, "loss": 0.6348, "num_input_tokens_seen": 740425056, "step": 4104 }, { "epoch": 0.4493828512000876, "grad_norm": 1.1941238676672963, "learning_rate": 2.895586672599102e-05, "loss": 0.7896, "num_input_tokens_seen": 740608288, "step": 4105 }, { "epoch": 0.4494923232710255, "grad_norm": 1.338892295818998, "learning_rate": 2.894737619929545e-05, "loss": 0.7365, "num_input_tokens_seen": 740798240, "step": 4106 }, { "epoch": 0.4496017953419634, "grad_norm": 1.3675617250811911, "learning_rate": 2.893888520563282e-05, "loss": 0.9102, "num_input_tokens_seen": 740975424, "step": 4107 }, { "epoch": 0.4497112674129013, "grad_norm": 1.5579151789306218, "learning_rate": 2.8930393746007606e-05, "loss": 0.6858, "num_input_tokens_seen": 741119232, "step": 4108 }, { "epoch": 0.44982073948383916, "grad_norm": 1.2900748237082962, "learning_rate": 2.8921901821424313e-05, "loss": 0.6842, "num_input_tokens_seen": 741298656, "step": 4109 }, { "epoch": 0.4499302115547771, "grad_norm": 1.2854552689273502, "learning_rate": 2.8913409432887546e-05, "loss": 0.8468, "num_input_tokens_seen": 741493088, "step": 4110 }, { "epoch": 0.450039683625715, "grad_norm": 1.1859438889724665, "learning_rate": 2.8904916581401913e-05, "loss": 0.7091, "num_input_tokens_seen": 741676992, "step": 4111 }, { "epoch": 0.45014915569665287, "grad_norm": 1.1854907655714266, "learning_rate": 2.8896423267972123e-05, "loss": 0.7575, "num_input_tokens_seen": 741867168, "step": 4112 }, { "epoch": 0.4502586277675908, "grad_norm": 1.277587447411482, "learning_rate": 2.8887929493602905e-05, "loss": 0.7856, "num_input_tokens_seen": 742041664, "step": 4113 }, { "epoch": 0.4503680998385287, "grad_norm": 1.4362878249643234, "learning_rate": 2.8879435259299065e-05, "loss": 0.7802, "num_input_tokens_seen": 742257600, "step": 4114 }, { "epoch": 0.4504775719094666, "grad_norm": 1.0975896197421802, "learning_rate": 2.8870940566065442e-05, "loss": 0.6139, "num_input_tokens_seen": 742423136, "step": 4115 }, { "epoch": 0.4505870439804045, "grad_norm": 1.2801853501709182, "learning_rate": 2.8862445414906953e-05, "loss": 0.6623, "num_input_tokens_seen": 742599648, "step": 4116 }, { "epoch": 0.4506965160513424, "grad_norm": 1.3315052325081085, "learning_rate": 2.8853949806828558e-05, "loss": 0.8687, "num_input_tokens_seen": 742803488, "step": 4117 }, { "epoch": 0.4508059881222803, "grad_norm": 1.2328272906867812, "learning_rate": 2.884545374283526e-05, "loss": 0.6805, "num_input_tokens_seen": 743003968, "step": 4118 }, { "epoch": 0.4509154601932182, "grad_norm": 1.3638032717138413, "learning_rate": 2.8836957223932137e-05, "loss": 0.7523, "num_input_tokens_seen": 743171744, "step": 4119 }, { "epoch": 0.45102493226415613, "grad_norm": 1.2615258509330385, "learning_rate": 2.8828460251124317e-05, "loss": 0.7095, "num_input_tokens_seen": 743366624, "step": 4120 }, { "epoch": 0.451134404335094, "grad_norm": 1.2202959281782806, "learning_rate": 2.881996282541697e-05, "loss": 0.7463, "num_input_tokens_seen": 743542688, "step": 4121 }, { "epoch": 0.4512438764060319, "grad_norm": 1.2708850391479563, "learning_rate": 2.8811464947815314e-05, "loss": 0.5331, "num_input_tokens_seen": 743706208, "step": 4122 }, { "epoch": 0.45135334847696984, "grad_norm": 1.2381112737149915, "learning_rate": 2.8802966619324645e-05, "loss": 0.794, "num_input_tokens_seen": 743927968, "step": 4123 }, { "epoch": 0.4514628205479077, "grad_norm": 1.2563007344491615, "learning_rate": 2.8794467840950295e-05, "loss": 0.6574, "num_input_tokens_seen": 744106720, "step": 4124 }, { "epoch": 0.4515722926188456, "grad_norm": 1.1973303424214459, "learning_rate": 2.8785968613697655e-05, "loss": 0.5533, "num_input_tokens_seen": 744302496, "step": 4125 }, { "epoch": 0.4516817646897835, "grad_norm": 1.370714531864896, "learning_rate": 2.877746893857216e-05, "loss": 0.8233, "num_input_tokens_seen": 744490432, "step": 4126 }, { "epoch": 0.4517912367607214, "grad_norm": 1.3939716915474405, "learning_rate": 2.8768968816579312e-05, "loss": 0.7654, "num_input_tokens_seen": 744665152, "step": 4127 }, { "epoch": 0.45190070883165934, "grad_norm": 1.183035589871053, "learning_rate": 2.8760468248724665e-05, "loss": 0.6564, "num_input_tokens_seen": 744849728, "step": 4128 }, { "epoch": 0.4520101809025972, "grad_norm": 1.0883759489571196, "learning_rate": 2.875196723601381e-05, "loss": 0.5281, "num_input_tokens_seen": 745029600, "step": 4129 }, { "epoch": 0.4521196529735351, "grad_norm": 1.2730548494363714, "learning_rate": 2.8743465779452394e-05, "loss": 0.6389, "num_input_tokens_seen": 745204096, "step": 4130 }, { "epoch": 0.45222912504447305, "grad_norm": 1.314996162234896, "learning_rate": 2.8734963880046145e-05, "loss": 0.5771, "num_input_tokens_seen": 745374336, "step": 4131 }, { "epoch": 0.4523385971154109, "grad_norm": 1.2735180507529396, "learning_rate": 2.8726461538800802e-05, "loss": 0.5602, "num_input_tokens_seen": 745508288, "step": 4132 }, { "epoch": 0.45244806918634883, "grad_norm": 1.2086670787342069, "learning_rate": 2.871795875672219e-05, "loss": 0.5364, "num_input_tokens_seen": 745704512, "step": 4133 }, { "epoch": 0.45255754125728676, "grad_norm": 1.2813025673107785, "learning_rate": 2.870945553481616e-05, "loss": 0.6495, "num_input_tokens_seen": 745868032, "step": 4134 }, { "epoch": 0.4526670133282246, "grad_norm": 1.4160204236179468, "learning_rate": 2.8700951874088634e-05, "loss": 0.5812, "num_input_tokens_seen": 746040960, "step": 4135 }, { "epoch": 0.45277648539916254, "grad_norm": 1.3833179304498664, "learning_rate": 2.869244777554557e-05, "loss": 0.7929, "num_input_tokens_seen": 746244576, "step": 4136 }, { "epoch": 0.45288595747010046, "grad_norm": 1.394552278760829, "learning_rate": 2.8683943240192997e-05, "loss": 0.6314, "num_input_tokens_seen": 746431840, "step": 4137 }, { "epoch": 0.45299542954103833, "grad_norm": 1.4350726323902898, "learning_rate": 2.867543826903698e-05, "loss": 0.8459, "num_input_tokens_seen": 746588416, "step": 4138 }, { "epoch": 0.45310490161197625, "grad_norm": 1.5085164536794846, "learning_rate": 2.866693286308364e-05, "loss": 0.8173, "num_input_tokens_seen": 746765152, "step": 4139 }, { "epoch": 0.4532143736829142, "grad_norm": 1.3490650277183662, "learning_rate": 2.8658427023339156e-05, "loss": 0.755, "num_input_tokens_seen": 746951072, "step": 4140 }, { "epoch": 0.45332384575385204, "grad_norm": 1.3978569487973895, "learning_rate": 2.864992075080975e-05, "loss": 1.021, "num_input_tokens_seen": 747156480, "step": 4141 }, { "epoch": 0.45343331782478996, "grad_norm": 1.2348514072123362, "learning_rate": 2.8641414046501697e-05, "loss": 0.7432, "num_input_tokens_seen": 747331648, "step": 4142 }, { "epoch": 0.4535427898957278, "grad_norm": 1.1519509890780046, "learning_rate": 2.8632906911421313e-05, "loss": 0.7253, "num_input_tokens_seen": 747502560, "step": 4143 }, { "epoch": 0.45365226196666575, "grad_norm": 1.1812612381657872, "learning_rate": 2.8624399346575e-05, "loss": 0.7932, "num_input_tokens_seen": 747707296, "step": 4144 }, { "epoch": 0.45376173403760367, "grad_norm": 1.168129236939218, "learning_rate": 2.861589135296917e-05, "loss": 0.5306, "num_input_tokens_seen": 747858944, "step": 4145 }, { "epoch": 0.45387120610854154, "grad_norm": 1.2128729826646902, "learning_rate": 2.8607382931610306e-05, "loss": 0.6339, "num_input_tokens_seen": 748054496, "step": 4146 }, { "epoch": 0.45398067817947946, "grad_norm": 1.2366602543427279, "learning_rate": 2.8598874083504933e-05, "loss": 0.949, "num_input_tokens_seen": 748261248, "step": 4147 }, { "epoch": 0.4540901502504174, "grad_norm": 1.4464824168629569, "learning_rate": 2.8590364809659632e-05, "loss": 0.7696, "num_input_tokens_seen": 748423424, "step": 4148 }, { "epoch": 0.45419962232135525, "grad_norm": 1.3309636018392, "learning_rate": 2.858185511108104e-05, "loss": 0.7185, "num_input_tokens_seen": 748609792, "step": 4149 }, { "epoch": 0.45430909439229317, "grad_norm": 1.3455315689556806, "learning_rate": 2.8573344988775834e-05, "loss": 0.822, "num_input_tokens_seen": 748801760, "step": 4150 }, { "epoch": 0.4544185664632311, "grad_norm": 1.2034541583932745, "learning_rate": 2.8564834443750753e-05, "loss": 0.5067, "num_input_tokens_seen": 748969536, "step": 4151 }, { "epoch": 0.45452803853416895, "grad_norm": 1.3145343817752126, "learning_rate": 2.8556323477012577e-05, "loss": 0.6684, "num_input_tokens_seen": 749141344, "step": 4152 }, { "epoch": 0.4546375106051069, "grad_norm": 1.2208301203471708, "learning_rate": 2.8547812089568128e-05, "loss": 0.6541, "num_input_tokens_seen": 749343168, "step": 4153 }, { "epoch": 0.4547469826760448, "grad_norm": 1.3737802730900543, "learning_rate": 2.8539300282424288e-05, "loss": 0.8786, "num_input_tokens_seen": 749514304, "step": 4154 }, { "epoch": 0.45485645474698266, "grad_norm": 1.2000365807997555, "learning_rate": 2.8530788056587993e-05, "loss": 0.5559, "num_input_tokens_seen": 749690592, "step": 4155 }, { "epoch": 0.4549659268179206, "grad_norm": 1.3730117554602432, "learning_rate": 2.852227541306622e-05, "loss": 0.8524, "num_input_tokens_seen": 749885248, "step": 4156 }, { "epoch": 0.4550753988888585, "grad_norm": 1.4625269765077633, "learning_rate": 2.851376235286599e-05, "loss": 1.0456, "num_input_tokens_seen": 750072736, "step": 4157 }, { "epoch": 0.4551848709597964, "grad_norm": 1.3331359578114632, "learning_rate": 2.85052488769944e-05, "loss": 0.7398, "num_input_tokens_seen": 750250816, "step": 4158 }, { "epoch": 0.4552943430307343, "grad_norm": 1.242489232298709, "learning_rate": 2.849673498645857e-05, "loss": 0.9143, "num_input_tokens_seen": 750441664, "step": 4159 }, { "epoch": 0.45540381510167216, "grad_norm": 1.2360964011032614, "learning_rate": 2.848822068226567e-05, "loss": 0.4653, "num_input_tokens_seen": 750596672, "step": 4160 }, { "epoch": 0.4555132871726101, "grad_norm": 1.2439809843492815, "learning_rate": 2.8479705965422937e-05, "loss": 0.5955, "num_input_tokens_seen": 750740480, "step": 4161 }, { "epoch": 0.455622759243548, "grad_norm": 1.25067721142082, "learning_rate": 2.8471190836937638e-05, "loss": 0.7815, "num_input_tokens_seen": 750924384, "step": 4162 }, { "epoch": 0.45573223131448587, "grad_norm": 1.2171675850606503, "learning_rate": 2.84626752978171e-05, "loss": 0.5404, "num_input_tokens_seen": 751105376, "step": 4163 }, { "epoch": 0.4558417033854238, "grad_norm": 1.2678410473421673, "learning_rate": 2.845415934906869e-05, "loss": 0.7871, "num_input_tokens_seen": 751283680, "step": 4164 }, { "epoch": 0.4559511754563617, "grad_norm": 1.1273246694788726, "learning_rate": 2.8445642991699835e-05, "loss": 0.6632, "num_input_tokens_seen": 751448992, "step": 4165 }, { "epoch": 0.4560606475272996, "grad_norm": 1.4048841413059152, "learning_rate": 2.8437126226718e-05, "loss": 0.8513, "num_input_tokens_seen": 751643648, "step": 4166 }, { "epoch": 0.4561701195982375, "grad_norm": 1.3980918755527378, "learning_rate": 2.8428609055130707e-05, "loss": 0.9216, "num_input_tokens_seen": 751836736, "step": 4167 }, { "epoch": 0.4562795916691754, "grad_norm": 1.2650215688381976, "learning_rate": 2.8420091477945514e-05, "loss": 0.6976, "num_input_tokens_seen": 752004736, "step": 4168 }, { "epoch": 0.4563890637401133, "grad_norm": 1.2477577602914818, "learning_rate": 2.8411573496170034e-05, "loss": 0.7811, "num_input_tokens_seen": 752211712, "step": 4169 }, { "epoch": 0.4564985358110512, "grad_norm": 1.3665225782654231, "learning_rate": 2.840305511081194e-05, "loss": 0.796, "num_input_tokens_seen": 752391136, "step": 4170 }, { "epoch": 0.45660800788198913, "grad_norm": 1.2057840267523627, "learning_rate": 2.8394536322878916e-05, "loss": 0.5451, "num_input_tokens_seen": 752544128, "step": 4171 }, { "epoch": 0.456717479952927, "grad_norm": 1.2595499016635183, "learning_rate": 2.838601713337875e-05, "loss": 0.8127, "num_input_tokens_seen": 752744160, "step": 4172 }, { "epoch": 0.4568269520238649, "grad_norm": 1.4750977370645046, "learning_rate": 2.8377497543319227e-05, "loss": 0.8205, "num_input_tokens_seen": 752907008, "step": 4173 }, { "epoch": 0.45693642409480284, "grad_norm": 1.2341591368623044, "learning_rate": 2.8368977553708198e-05, "loss": 0.8, "num_input_tokens_seen": 753096288, "step": 4174 }, { "epoch": 0.4570458961657407, "grad_norm": 1.1664399823716953, "learning_rate": 2.836045716555357e-05, "loss": 0.739, "num_input_tokens_seen": 753271456, "step": 4175 }, { "epoch": 0.4571553682366786, "grad_norm": 1.0743186765490227, "learning_rate": 2.835193637986328e-05, "loss": 0.4571, "num_input_tokens_seen": 753479776, "step": 4176 }, { "epoch": 0.4572648403076165, "grad_norm": 1.1095538465301784, "learning_rate": 2.8343415197645317e-05, "loss": 0.5259, "num_input_tokens_seen": 753660320, "step": 4177 }, { "epoch": 0.4573743123785544, "grad_norm": 1.1759759988800862, "learning_rate": 2.8334893619907737e-05, "loss": 0.5846, "num_input_tokens_seen": 753830784, "step": 4178 }, { "epoch": 0.45748378444949234, "grad_norm": 1.324000427793451, "learning_rate": 2.8326371647658618e-05, "loss": 0.8426, "num_input_tokens_seen": 754016480, "step": 4179 }, { "epoch": 0.4575932565204302, "grad_norm": 1.268119943081327, "learning_rate": 2.831784928190609e-05, "loss": 0.6645, "num_input_tokens_seen": 754192768, "step": 4180 }, { "epoch": 0.4577027285913681, "grad_norm": 1.1263118618520451, "learning_rate": 2.8309326523658324e-05, "loss": 0.5332, "num_input_tokens_seen": 754377120, "step": 4181 }, { "epoch": 0.45781220066230605, "grad_norm": 1.3690212486128337, "learning_rate": 2.830080337392357e-05, "loss": 0.7576, "num_input_tokens_seen": 754573568, "step": 4182 }, { "epoch": 0.4579216727332439, "grad_norm": 1.4577628336763313, "learning_rate": 2.8292279833710084e-05, "loss": 0.9079, "num_input_tokens_seen": 754772256, "step": 4183 }, { "epoch": 0.45803114480418183, "grad_norm": 1.1385579649284234, "learning_rate": 2.828375590402618e-05, "loss": 0.5969, "num_input_tokens_seen": 754943840, "step": 4184 }, { "epoch": 0.45814061687511975, "grad_norm": 1.367009832554737, "learning_rate": 2.8275231585880236e-05, "loss": 0.7089, "num_input_tokens_seen": 755118112, "step": 4185 }, { "epoch": 0.4582500889460576, "grad_norm": 1.394065971652379, "learning_rate": 2.826670688028066e-05, "loss": 0.7822, "num_input_tokens_seen": 755281632, "step": 4186 }, { "epoch": 0.45835956101699554, "grad_norm": 1.2202012640238256, "learning_rate": 2.8258181788235906e-05, "loss": 0.8449, "num_input_tokens_seen": 755453888, "step": 4187 }, { "epoch": 0.45846903308793346, "grad_norm": 1.4534554054602504, "learning_rate": 2.824965631075447e-05, "loss": 0.7468, "num_input_tokens_seen": 755618976, "step": 4188 }, { "epoch": 0.45857850515887133, "grad_norm": 1.1984601930063785, "learning_rate": 2.8241130448844905e-05, "loss": 0.5656, "num_input_tokens_seen": 755802432, "step": 4189 }, { "epoch": 0.45868797722980925, "grad_norm": 1.3493398571164537, "learning_rate": 2.82326042035158e-05, "loss": 0.7551, "num_input_tokens_seen": 755932352, "step": 4190 }, { "epoch": 0.4587974493007472, "grad_norm": 1.2502578946150191, "learning_rate": 2.8224077575775803e-05, "loss": 0.6234, "num_input_tokens_seen": 756128352, "step": 4191 }, { "epoch": 0.45890692137168504, "grad_norm": 1.34932223189886, "learning_rate": 2.8215550566633588e-05, "loss": 0.8083, "num_input_tokens_seen": 756295680, "step": 4192 }, { "epoch": 0.45901639344262296, "grad_norm": 1.456401781022707, "learning_rate": 2.820702317709789e-05, "loss": 0.7964, "num_input_tokens_seen": 756443968, "step": 4193 }, { "epoch": 0.4591258655135608, "grad_norm": 1.1897004971146217, "learning_rate": 2.8198495408177484e-05, "loss": 0.5165, "num_input_tokens_seen": 756595840, "step": 4194 }, { "epoch": 0.45923533758449875, "grad_norm": 1.2685945312257032, "learning_rate": 2.8189967260881183e-05, "loss": 0.6723, "num_input_tokens_seen": 756772800, "step": 4195 }, { "epoch": 0.45934480965543667, "grad_norm": 1.4054728585862248, "learning_rate": 2.818143873621785e-05, "loss": 0.6793, "num_input_tokens_seen": 756963424, "step": 4196 }, { "epoch": 0.45945428172637454, "grad_norm": 1.4732242369212494, "learning_rate": 2.8172909835196404e-05, "loss": 0.6344, "num_input_tokens_seen": 757089984, "step": 4197 }, { "epoch": 0.45956375379731246, "grad_norm": 1.366679505931432, "learning_rate": 2.8164380558825782e-05, "loss": 0.7127, "num_input_tokens_seen": 757284640, "step": 4198 }, { "epoch": 0.4596732258682504, "grad_norm": 1.203219422437522, "learning_rate": 2.8155850908114996e-05, "loss": 0.6374, "num_input_tokens_seen": 757471008, "step": 4199 }, { "epoch": 0.45978269793918825, "grad_norm": 1.2543648805005774, "learning_rate": 2.814732088407308e-05, "loss": 0.5336, "num_input_tokens_seen": 757610560, "step": 4200 }, { "epoch": 0.45989217001012617, "grad_norm": 1.361253470914705, "learning_rate": 2.8138790487709115e-05, "loss": 0.9575, "num_input_tokens_seen": 757804992, "step": 4201 }, { "epoch": 0.4600016420810641, "grad_norm": 1.2054486315686912, "learning_rate": 2.8130259720032237e-05, "loss": 0.5293, "num_input_tokens_seen": 757957312, "step": 4202 }, { "epoch": 0.46011111415200195, "grad_norm": 1.2420072124812351, "learning_rate": 2.812172858205162e-05, "loss": 0.639, "num_input_tokens_seen": 758151744, "step": 4203 }, { "epoch": 0.4602205862229399, "grad_norm": 1.2818858267012117, "learning_rate": 2.8113197074776477e-05, "loss": 0.6157, "num_input_tokens_seen": 758309888, "step": 4204 }, { "epoch": 0.4603300582938778, "grad_norm": 1.4501794473827045, "learning_rate": 2.8104665199216074e-05, "loss": 0.9556, "num_input_tokens_seen": 758483936, "step": 4205 }, { "epoch": 0.46043953036481566, "grad_norm": 1.36479059737087, "learning_rate": 2.809613295637971e-05, "loss": 0.7719, "num_input_tokens_seen": 758672320, "step": 4206 }, { "epoch": 0.4605490024357536, "grad_norm": 1.2816974044821146, "learning_rate": 2.8087600347276744e-05, "loss": 0.7458, "num_input_tokens_seen": 758860032, "step": 4207 }, { "epoch": 0.4606584745066915, "grad_norm": 1.2272843101866802, "learning_rate": 2.8079067372916555e-05, "loss": 0.5115, "num_input_tokens_seen": 759009440, "step": 4208 }, { "epoch": 0.4607679465776294, "grad_norm": 1.3232083587263694, "learning_rate": 2.8070534034308583e-05, "loss": 0.8742, "num_input_tokens_seen": 759190656, "step": 4209 }, { "epoch": 0.4608774186485673, "grad_norm": 1.1811047560593733, "learning_rate": 2.8062000332462302e-05, "loss": 0.5874, "num_input_tokens_seen": 759398528, "step": 4210 }, { "epoch": 0.46098689071950516, "grad_norm": 1.3814402954062233, "learning_rate": 2.805346626838723e-05, "loss": 0.7034, "num_input_tokens_seen": 759572128, "step": 4211 }, { "epoch": 0.4610963627904431, "grad_norm": 1.383028821673831, "learning_rate": 2.8044931843092948e-05, "loss": 1.0248, "num_input_tokens_seen": 759758272, "step": 4212 }, { "epoch": 0.461205834861381, "grad_norm": 1.3300931820458313, "learning_rate": 2.8036397057589038e-05, "loss": 0.7229, "num_input_tokens_seen": 759958528, "step": 4213 }, { "epoch": 0.46131530693231887, "grad_norm": 1.3504369381837977, "learning_rate": 2.8027861912885168e-05, "loss": 0.6246, "num_input_tokens_seen": 760135264, "step": 4214 }, { "epoch": 0.4614247790032568, "grad_norm": 1.248714596740037, "learning_rate": 2.801932640999102e-05, "loss": 0.7071, "num_input_tokens_seen": 760295200, "step": 4215 }, { "epoch": 0.4615342510741947, "grad_norm": 1.2968828591717578, "learning_rate": 2.8010790549916333e-05, "loss": 0.7642, "num_input_tokens_seen": 760491872, "step": 4216 }, { "epoch": 0.4616437231451326, "grad_norm": 1.2382129445776644, "learning_rate": 2.8002254333670874e-05, "loss": 0.6831, "num_input_tokens_seen": 760671520, "step": 4217 }, { "epoch": 0.4617531952160705, "grad_norm": 1.187406887620197, "learning_rate": 2.7993717762264464e-05, "loss": 0.835, "num_input_tokens_seen": 760858784, "step": 4218 }, { "epoch": 0.4618626672870084, "grad_norm": 1.1704444800648626, "learning_rate": 2.7985180836706975e-05, "loss": 0.7763, "num_input_tokens_seen": 761024096, "step": 4219 }, { "epoch": 0.4619721393579463, "grad_norm": 1.2623808927344649, "learning_rate": 2.7976643558008297e-05, "loss": 0.5668, "num_input_tokens_seen": 761196576, "step": 4220 }, { "epoch": 0.4620816114288842, "grad_norm": 1.298956108958102, "learning_rate": 2.7968105927178372e-05, "loss": 0.6902, "num_input_tokens_seen": 761399744, "step": 4221 }, { "epoch": 0.46219108349982213, "grad_norm": 1.2983750549798236, "learning_rate": 2.7959567945227195e-05, "loss": 0.7692, "num_input_tokens_seen": 761567968, "step": 4222 }, { "epoch": 0.46230055557076, "grad_norm": 1.11139967005946, "learning_rate": 2.7951029613164782e-05, "loss": 0.603, "num_input_tokens_seen": 761750528, "step": 4223 }, { "epoch": 0.4624100276416979, "grad_norm": 1.3648266708798977, "learning_rate": 2.7942490932001212e-05, "loss": 0.8188, "num_input_tokens_seen": 761936000, "step": 4224 }, { "epoch": 0.46251949971263584, "grad_norm": 1.293935869290424, "learning_rate": 2.7933951902746587e-05, "loss": 0.5685, "num_input_tokens_seen": 762116096, "step": 4225 }, { "epoch": 0.4626289717835737, "grad_norm": 1.1692557500306755, "learning_rate": 2.792541252641106e-05, "loss": 0.7955, "num_input_tokens_seen": 762314336, "step": 4226 }, { "epoch": 0.4627384438545116, "grad_norm": 1.312745343846796, "learning_rate": 2.791687280400483e-05, "loss": 0.8521, "num_input_tokens_seen": 762476736, "step": 4227 }, { "epoch": 0.4628479159254495, "grad_norm": 1.235551104808204, "learning_rate": 2.790833273653812e-05, "loss": 0.8275, "num_input_tokens_seen": 762664896, "step": 4228 }, { "epoch": 0.4629573879963874, "grad_norm": 1.2783263274095742, "learning_rate": 2.7899792325021207e-05, "loss": 0.8029, "num_input_tokens_seen": 762828192, "step": 4229 }, { "epoch": 0.46306686006732534, "grad_norm": 1.3189562089431246, "learning_rate": 2.7891251570464406e-05, "loss": 0.8204, "num_input_tokens_seen": 763015904, "step": 4230 }, { "epoch": 0.4631763321382632, "grad_norm": 1.2940059777199875, "learning_rate": 2.788271047387807e-05, "loss": 0.7079, "num_input_tokens_seen": 763202496, "step": 4231 }, { "epoch": 0.4632858042092011, "grad_norm": 1.3705357125106108, "learning_rate": 2.7874169036272597e-05, "loss": 0.6894, "num_input_tokens_seen": 763336672, "step": 4232 }, { "epoch": 0.46339527628013905, "grad_norm": 1.3336218233734836, "learning_rate": 2.7865627258658417e-05, "loss": 0.6564, "num_input_tokens_seen": 763531776, "step": 4233 }, { "epoch": 0.4635047483510769, "grad_norm": 1.3311398563578691, "learning_rate": 2.7857085142046013e-05, "loss": 0.676, "num_input_tokens_seen": 763696416, "step": 4234 }, { "epoch": 0.46361422042201483, "grad_norm": 1.1828993973250195, "learning_rate": 2.78485426874459e-05, "loss": 0.6123, "num_input_tokens_seen": 763880768, "step": 4235 }, { "epoch": 0.46372369249295275, "grad_norm": 1.0938519021656725, "learning_rate": 2.783999989586863e-05, "loss": 0.6461, "num_input_tokens_seen": 764069152, "step": 4236 }, { "epoch": 0.4638331645638906, "grad_norm": 1.215013253018032, "learning_rate": 2.7831456768324805e-05, "loss": 0.7703, "num_input_tokens_seen": 764253504, "step": 4237 }, { "epoch": 0.46394263663482854, "grad_norm": 1.2724442147878268, "learning_rate": 2.7822913305825054e-05, "loss": 0.8396, "num_input_tokens_seen": 764451296, "step": 4238 }, { "epoch": 0.46405210870576646, "grad_norm": 1.376821005685037, "learning_rate": 2.7814369509380055e-05, "loss": 0.8019, "num_input_tokens_seen": 764630048, "step": 4239 }, { "epoch": 0.46416158077670433, "grad_norm": 1.3829602092582203, "learning_rate": 2.7805825380000528e-05, "loss": 0.733, "num_input_tokens_seen": 764801184, "step": 4240 }, { "epoch": 0.46427105284764225, "grad_norm": 1.1879337923886486, "learning_rate": 2.779728091869722e-05, "loss": 0.7258, "num_input_tokens_seen": 764970528, "step": 4241 }, { "epoch": 0.4643805249185802, "grad_norm": 1.2341935712537793, "learning_rate": 2.778873612648093e-05, "loss": 0.8455, "num_input_tokens_seen": 765174816, "step": 4242 }, { "epoch": 0.46448999698951804, "grad_norm": 1.3127963321946052, "learning_rate": 2.778019100436248e-05, "loss": 0.7934, "num_input_tokens_seen": 765373280, "step": 4243 }, { "epoch": 0.46459946906045596, "grad_norm": 1.2977076236079679, "learning_rate": 2.7771645553352753e-05, "loss": 0.6965, "num_input_tokens_seen": 765577568, "step": 4244 }, { "epoch": 0.4647089411313938, "grad_norm": 1.2633256157779407, "learning_rate": 2.7763099774462646e-05, "loss": 0.5751, "num_input_tokens_seen": 765738624, "step": 4245 }, { "epoch": 0.46481841320233175, "grad_norm": 1.1165016042831928, "learning_rate": 2.775455366870313e-05, "loss": 0.6169, "num_input_tokens_seen": 765927008, "step": 4246 }, { "epoch": 0.46492788527326967, "grad_norm": 1.1620446451576107, "learning_rate": 2.774600723708518e-05, "loss": 0.6674, "num_input_tokens_seen": 766137792, "step": 4247 }, { "epoch": 0.46503735734420754, "grad_norm": 1.3231495279337022, "learning_rate": 2.7737460480619827e-05, "loss": 0.6421, "num_input_tokens_seen": 766287424, "step": 4248 }, { "epoch": 0.46514682941514546, "grad_norm": 1.1214201980240028, "learning_rate": 2.7728913400318125e-05, "loss": 0.6302, "num_input_tokens_seen": 766499328, "step": 4249 }, { "epoch": 0.4652563014860834, "grad_norm": 1.2061724524203503, "learning_rate": 2.7720365997191188e-05, "loss": 0.6673, "num_input_tokens_seen": 766685920, "step": 4250 }, { "epoch": 0.46536577355702125, "grad_norm": 1.3158912443656927, "learning_rate": 2.7711818272250152e-05, "loss": 0.8285, "num_input_tokens_seen": 766883040, "step": 4251 }, { "epoch": 0.46547524562795917, "grad_norm": 1.21019540012931, "learning_rate": 2.7703270226506196e-05, "loss": 0.6953, "num_input_tokens_seen": 767028640, "step": 4252 }, { "epoch": 0.4655847176988971, "grad_norm": 1.14887431654019, "learning_rate": 2.769472186097054e-05, "loss": 0.6327, "num_input_tokens_seen": 767231360, "step": 4253 }, { "epoch": 0.46569418976983495, "grad_norm": 1.1949041828542981, "learning_rate": 2.7686173176654446e-05, "loss": 0.6156, "num_input_tokens_seen": 767412576, "step": 4254 }, { "epoch": 0.4658036618407729, "grad_norm": 1.2641014536359547, "learning_rate": 2.7677624174569187e-05, "loss": 0.8213, "num_input_tokens_seen": 767595808, "step": 4255 }, { "epoch": 0.4659131339117108, "grad_norm": 1.3712512412191666, "learning_rate": 2.766907485572612e-05, "loss": 0.5815, "num_input_tokens_seen": 767783520, "step": 4256 }, { "epoch": 0.46602260598264866, "grad_norm": 1.1528251030538852, "learning_rate": 2.7660525221136595e-05, "loss": 0.5881, "num_input_tokens_seen": 767953984, "step": 4257 }, { "epoch": 0.4661320780535866, "grad_norm": 1.3587497720305841, "learning_rate": 2.7651975271812026e-05, "loss": 0.9978, "num_input_tokens_seen": 768112352, "step": 4258 }, { "epoch": 0.4662415501245245, "grad_norm": 1.2607331574794014, "learning_rate": 2.7643425008763845e-05, "loss": 0.6524, "num_input_tokens_seen": 768274976, "step": 4259 }, { "epoch": 0.4663510221954624, "grad_norm": 1.3087883192571759, "learning_rate": 2.7634874433003545e-05, "loss": 0.7458, "num_input_tokens_seen": 768426848, "step": 4260 }, { "epoch": 0.4664604942664003, "grad_norm": 1.3315418944233839, "learning_rate": 2.762632354554264e-05, "loss": 0.9385, "num_input_tokens_seen": 768608288, "step": 4261 }, { "epoch": 0.46656996633733816, "grad_norm": 1.293606050748245, "learning_rate": 2.7617772347392672e-05, "loss": 0.793, "num_input_tokens_seen": 768771584, "step": 4262 }, { "epoch": 0.4666794384082761, "grad_norm": 1.2724442616304648, "learning_rate": 2.760922083956525e-05, "loss": 0.6927, "num_input_tokens_seen": 768930400, "step": 4263 }, { "epoch": 0.466788910479214, "grad_norm": 1.3741831086846297, "learning_rate": 2.7600669023071978e-05, "loss": 0.6679, "num_input_tokens_seen": 769113632, "step": 4264 }, { "epoch": 0.46689838255015187, "grad_norm": 1.168181741233421, "learning_rate": 2.7592116898924537e-05, "loss": 0.5807, "num_input_tokens_seen": 769252512, "step": 4265 }, { "epoch": 0.4670078546210898, "grad_norm": 1.4159937363609962, "learning_rate": 2.7583564468134615e-05, "loss": 0.7771, "num_input_tokens_seen": 769449408, "step": 4266 }, { "epoch": 0.4671173266920277, "grad_norm": 4.224063719890171, "learning_rate": 2.7575011731713968e-05, "loss": 1.1046, "num_input_tokens_seen": 769641600, "step": 4267 }, { "epoch": 0.4672267987629656, "grad_norm": 1.2798962768610216, "learning_rate": 2.756645869067435e-05, "loss": 0.7965, "num_input_tokens_seen": 769851488, "step": 4268 }, { "epoch": 0.4673362708339035, "grad_norm": 1.249300522600042, "learning_rate": 2.7557905346027578e-05, "loss": 0.7947, "num_input_tokens_seen": 770039648, "step": 4269 }, { "epoch": 0.4674457429048414, "grad_norm": 1.1650307232157795, "learning_rate": 2.7549351698785492e-05, "loss": 0.6467, "num_input_tokens_seen": 770225344, "step": 4270 }, { "epoch": 0.4675552149757793, "grad_norm": 1.224197391404026, "learning_rate": 2.7540797749959974e-05, "loss": 0.552, "num_input_tokens_seen": 770402976, "step": 4271 }, { "epoch": 0.4676646870467172, "grad_norm": 1.2499016723106446, "learning_rate": 2.753224350056293e-05, "loss": 0.8069, "num_input_tokens_seen": 770615104, "step": 4272 }, { "epoch": 0.46777415911765513, "grad_norm": 1.1093427626533061, "learning_rate": 2.7523688951606337e-05, "loss": 0.6014, "num_input_tokens_seen": 770809984, "step": 4273 }, { "epoch": 0.467883631188593, "grad_norm": 1.0893583686684003, "learning_rate": 2.751513410410216e-05, "loss": 0.6194, "num_input_tokens_seen": 770975968, "step": 4274 }, { "epoch": 0.4679931032595309, "grad_norm": 1.2920704282337372, "learning_rate": 2.7506578959062424e-05, "loss": 0.8015, "num_input_tokens_seen": 771149344, "step": 4275 }, { "epoch": 0.46810257533046884, "grad_norm": 1.1291570681812686, "learning_rate": 2.7498023517499183e-05, "loss": 0.6043, "num_input_tokens_seen": 771318912, "step": 4276 }, { "epoch": 0.4682120474014067, "grad_norm": 1.2988757131936373, "learning_rate": 2.7489467780424544e-05, "loss": 0.8178, "num_input_tokens_seen": 771494304, "step": 4277 }, { "epoch": 0.4683215194723446, "grad_norm": 1.2631388133943875, "learning_rate": 2.7480911748850624e-05, "loss": 0.7673, "num_input_tokens_seen": 771662976, "step": 4278 }, { "epoch": 0.4684309915432825, "grad_norm": 1.244140193825352, "learning_rate": 2.7472355423789582e-05, "loss": 0.6127, "num_input_tokens_seen": 771844640, "step": 4279 }, { "epoch": 0.4685404636142204, "grad_norm": 1.1991988686768105, "learning_rate": 2.746379880625362e-05, "loss": 0.5756, "num_input_tokens_seen": 772007488, "step": 4280 }, { "epoch": 0.46864993568515834, "grad_norm": 1.3445637035720015, "learning_rate": 2.7455241897254974e-05, "loss": 0.6482, "num_input_tokens_seen": 772185792, "step": 4281 }, { "epoch": 0.4687594077560962, "grad_norm": 1.2877425400509168, "learning_rate": 2.7446684697805907e-05, "loss": 0.6445, "num_input_tokens_seen": 772344160, "step": 4282 }, { "epoch": 0.4688688798270341, "grad_norm": 1.3389390832919863, "learning_rate": 2.743812720891872e-05, "loss": 0.6815, "num_input_tokens_seen": 772547776, "step": 4283 }, { "epoch": 0.46897835189797205, "grad_norm": 1.072813745148467, "learning_rate": 2.742956943160574e-05, "loss": 0.5607, "num_input_tokens_seen": 772730336, "step": 4284 }, { "epoch": 0.4690878239689099, "grad_norm": 1.3123505371005448, "learning_rate": 2.742101136687934e-05, "loss": 0.8053, "num_input_tokens_seen": 772911776, "step": 4285 }, { "epoch": 0.46919729603984783, "grad_norm": 1.2796263758182536, "learning_rate": 2.7412453015751916e-05, "loss": 0.5849, "num_input_tokens_seen": 773125696, "step": 4286 }, { "epoch": 0.46930676811078575, "grad_norm": 1.440942870713109, "learning_rate": 2.7403894379235916e-05, "loss": 1.0683, "num_input_tokens_seen": 773328864, "step": 4287 }, { "epoch": 0.4694162401817236, "grad_norm": 1.1563531984206559, "learning_rate": 2.7395335458343813e-05, "loss": 0.5099, "num_input_tokens_seen": 773491488, "step": 4288 }, { "epoch": 0.46952571225266154, "grad_norm": 1.4089035429795789, "learning_rate": 2.7386776254088103e-05, "loss": 0.7875, "num_input_tokens_seen": 773648512, "step": 4289 }, { "epoch": 0.46963518432359946, "grad_norm": 1.1837512122433023, "learning_rate": 2.7378216767481322e-05, "loss": 0.5539, "num_input_tokens_seen": 773823680, "step": 4290 }, { "epoch": 0.46974465639453733, "grad_norm": 1.4104331634924072, "learning_rate": 2.736965699953605e-05, "loss": 0.6998, "num_input_tokens_seen": 774021248, "step": 4291 }, { "epoch": 0.46985412846547525, "grad_norm": 1.2165434498425938, "learning_rate": 2.7361096951264882e-05, "loss": 0.8498, "num_input_tokens_seen": 774221952, "step": 4292 }, { "epoch": 0.4699636005364132, "grad_norm": 1.2203163938179364, "learning_rate": 2.7352536623680454e-05, "loss": 0.7234, "num_input_tokens_seen": 774398688, "step": 4293 }, { "epoch": 0.47007307260735104, "grad_norm": 1.507228678995574, "learning_rate": 2.7343976017795443e-05, "loss": 0.7814, "num_input_tokens_seen": 774533088, "step": 4294 }, { "epoch": 0.47018254467828896, "grad_norm": 1.1859692696197321, "learning_rate": 2.7335415134622548e-05, "loss": 0.6258, "num_input_tokens_seen": 774680928, "step": 4295 }, { "epoch": 0.4702920167492268, "grad_norm": 1.3226734698775, "learning_rate": 2.732685397517451e-05, "loss": 0.8843, "num_input_tokens_seen": 774872000, "step": 4296 }, { "epoch": 0.47040148882016475, "grad_norm": 1.348949442162786, "learning_rate": 2.731829254046409e-05, "loss": 0.8768, "num_input_tokens_seen": 775069344, "step": 4297 }, { "epoch": 0.47051096089110267, "grad_norm": 1.3002838466781275, "learning_rate": 2.7309730831504105e-05, "loss": 0.6972, "num_input_tokens_seen": 775241824, "step": 4298 }, { "epoch": 0.47062043296204054, "grad_norm": 1.1763314008771661, "learning_rate": 2.7301168849307364e-05, "loss": 0.8387, "num_input_tokens_seen": 775444768, "step": 4299 }, { "epoch": 0.47072990503297846, "grad_norm": 1.261065854484943, "learning_rate": 2.7292606594886756e-05, "loss": 0.7989, "num_input_tokens_seen": 775631360, "step": 4300 }, { "epoch": 0.4708393771039164, "grad_norm": 1.094767832594905, "learning_rate": 2.728404406925517e-05, "loss": 0.7526, "num_input_tokens_seen": 775824448, "step": 4301 }, { "epoch": 0.47094884917485424, "grad_norm": 1.5190275872634142, "learning_rate": 2.727548127342554e-05, "loss": 0.6731, "num_input_tokens_seen": 776024928, "step": 4302 }, { "epoch": 0.47105832124579217, "grad_norm": 1.1692213403412335, "learning_rate": 2.7266918208410824e-05, "loss": 0.7333, "num_input_tokens_seen": 776206592, "step": 4303 }, { "epoch": 0.4711677933167301, "grad_norm": 1.1719917747807826, "learning_rate": 2.7258354875224014e-05, "loss": 0.6393, "num_input_tokens_seen": 776357568, "step": 4304 }, { "epoch": 0.47127726538766795, "grad_norm": 1.334375550316869, "learning_rate": 2.7249791274878146e-05, "loss": 0.778, "num_input_tokens_seen": 776530944, "step": 4305 }, { "epoch": 0.4713867374586059, "grad_norm": 1.3539961145055064, "learning_rate": 2.724122740838626e-05, "loss": 0.6682, "num_input_tokens_seen": 776713952, "step": 4306 }, { "epoch": 0.4714962095295438, "grad_norm": 1.4045202000741683, "learning_rate": 2.723266327676146e-05, "loss": 0.8789, "num_input_tokens_seen": 776894496, "step": 4307 }, { "epoch": 0.47160568160048166, "grad_norm": 1.3563752235227498, "learning_rate": 2.722409888101686e-05, "loss": 0.6702, "num_input_tokens_seen": 777080416, "step": 4308 }, { "epoch": 0.4717151536714196, "grad_norm": 1.1416108038848845, "learning_rate": 2.7215534222165622e-05, "loss": 0.8059, "num_input_tokens_seen": 777285152, "step": 4309 }, { "epoch": 0.4718246257423575, "grad_norm": 1.296370948365118, "learning_rate": 2.720696930122092e-05, "loss": 0.7218, "num_input_tokens_seen": 777477792, "step": 4310 }, { "epoch": 0.4719340978132954, "grad_norm": 1.187880003510657, "learning_rate": 2.7198404119195965e-05, "loss": 0.9108, "num_input_tokens_seen": 777660800, "step": 4311 }, { "epoch": 0.4720435698842333, "grad_norm": 1.2945100753792858, "learning_rate": 2.718983867710401e-05, "loss": 0.8167, "num_input_tokens_seen": 777846048, "step": 4312 }, { "epoch": 0.47215304195517116, "grad_norm": 1.1190907731861481, "learning_rate": 2.7181272975958318e-05, "loss": 0.5095, "num_input_tokens_seen": 778036000, "step": 4313 }, { "epoch": 0.4722625140261091, "grad_norm": 1.1494707921628642, "learning_rate": 2.717270701677221e-05, "loss": 0.6635, "num_input_tokens_seen": 778228192, "step": 4314 }, { "epoch": 0.472371986097047, "grad_norm": 1.2551234151942088, "learning_rate": 2.7164140800559013e-05, "loss": 0.7656, "num_input_tokens_seen": 778402912, "step": 4315 }, { "epoch": 0.47248145816798487, "grad_norm": 1.0867902851487667, "learning_rate": 2.7155574328332095e-05, "loss": 0.6086, "num_input_tokens_seen": 778608544, "step": 4316 }, { "epoch": 0.4725909302389228, "grad_norm": 1.265050310182358, "learning_rate": 2.7147007601104858e-05, "loss": 0.6855, "num_input_tokens_seen": 778766016, "step": 4317 }, { "epoch": 0.4727004023098607, "grad_norm": 1.4518932741733614, "learning_rate": 2.713844061989072e-05, "loss": 0.7241, "num_input_tokens_seen": 778937152, "step": 4318 }, { "epoch": 0.4728098743807986, "grad_norm": 1.2339973354836358, "learning_rate": 2.7129873385703146e-05, "loss": 0.8103, "num_input_tokens_seen": 779109856, "step": 4319 }, { "epoch": 0.4729193464517365, "grad_norm": 1.271284471492709, "learning_rate": 2.712130589955562e-05, "loss": 0.6347, "num_input_tokens_seen": 779272256, "step": 4320 }, { "epoch": 0.4730288185226744, "grad_norm": 1.3444300860083989, "learning_rate": 2.711273816246167e-05, "loss": 0.6893, "num_input_tokens_seen": 779438688, "step": 4321 }, { "epoch": 0.4731382905936123, "grad_norm": 1.355486339268081, "learning_rate": 2.710417017543483e-05, "loss": 0.82, "num_input_tokens_seen": 779630208, "step": 4322 }, { "epoch": 0.4732477626645502, "grad_norm": 1.1399321868871384, "learning_rate": 2.7095601939488685e-05, "loss": 0.7439, "num_input_tokens_seen": 779832928, "step": 4323 }, { "epoch": 0.47335723473548813, "grad_norm": 1.1519022985029268, "learning_rate": 2.7087033455636834e-05, "loss": 0.677, "num_input_tokens_seen": 780015488, "step": 4324 }, { "epoch": 0.473466706806426, "grad_norm": 1.2510416935590645, "learning_rate": 2.7078464724892917e-05, "loss": 0.853, "num_input_tokens_seen": 780225376, "step": 4325 }, { "epoch": 0.4735761788773639, "grad_norm": 1.3989196883729662, "learning_rate": 2.706989574827059e-05, "loss": 0.6221, "num_input_tokens_seen": 780401664, "step": 4326 }, { "epoch": 0.47368565094830184, "grad_norm": 1.195828843634459, "learning_rate": 2.7061326526783555e-05, "loss": 0.599, "num_input_tokens_seen": 780583552, "step": 4327 }, { "epoch": 0.4737951230192397, "grad_norm": 1.4324244582145373, "learning_rate": 2.7052757061445534e-05, "loss": 0.8147, "num_input_tokens_seen": 780764096, "step": 4328 }, { "epoch": 0.4739045950901776, "grad_norm": 1.3614655566248879, "learning_rate": 2.7044187353270268e-05, "loss": 0.9329, "num_input_tokens_seen": 780949568, "step": 4329 }, { "epoch": 0.4740140671611155, "grad_norm": 1.308191220447757, "learning_rate": 2.703561740327156e-05, "loss": 0.7073, "num_input_tokens_seen": 781107264, "step": 4330 }, { "epoch": 0.4741235392320534, "grad_norm": 1.3019810547395207, "learning_rate": 2.7027047212463198e-05, "loss": 0.7422, "num_input_tokens_seen": 781264960, "step": 4331 }, { "epoch": 0.47423301130299134, "grad_norm": 1.2380446921063513, "learning_rate": 2.7018476781859027e-05, "loss": 0.7077, "num_input_tokens_seen": 781456032, "step": 4332 }, { "epoch": 0.4743424833739292, "grad_norm": 1.215838647420738, "learning_rate": 2.7009906112472904e-05, "loss": 0.6652, "num_input_tokens_seen": 781644192, "step": 4333 }, { "epoch": 0.4744519554448671, "grad_norm": 1.2192101710221919, "learning_rate": 2.700133520531874e-05, "loss": 0.8589, "num_input_tokens_seen": 781852960, "step": 4334 }, { "epoch": 0.47456142751580505, "grad_norm": 1.266635256093387, "learning_rate": 2.6992764061410446e-05, "loss": 0.7215, "num_input_tokens_seen": 782022528, "step": 4335 }, { "epoch": 0.4746708995867429, "grad_norm": 1.266238593664885, "learning_rate": 2.6984192681761972e-05, "loss": 0.7996, "num_input_tokens_seen": 782194784, "step": 4336 }, { "epoch": 0.47478037165768083, "grad_norm": 1.3154649850121087, "learning_rate": 2.6975621067387296e-05, "loss": 0.9537, "num_input_tokens_seen": 782408480, "step": 4337 }, { "epoch": 0.47488984372861875, "grad_norm": 1.2027482768212745, "learning_rate": 2.6967049219300427e-05, "loss": 0.5328, "num_input_tokens_seen": 782588352, "step": 4338 }, { "epoch": 0.4749993157995566, "grad_norm": 1.1932410947929737, "learning_rate": 2.6958477138515393e-05, "loss": 0.9314, "num_input_tokens_seen": 782750080, "step": 4339 }, { "epoch": 0.47510878787049454, "grad_norm": 1.391154670568247, "learning_rate": 2.6949904826046258e-05, "loss": 0.6225, "num_input_tokens_seen": 782889632, "step": 4340 }, { "epoch": 0.47521825994143246, "grad_norm": 1.6605500595647318, "learning_rate": 2.6941332282907107e-05, "loss": 0.7861, "num_input_tokens_seen": 783088768, "step": 4341 }, { "epoch": 0.47532773201237033, "grad_norm": 1.2476690971272024, "learning_rate": 2.693275951011206e-05, "loss": 0.7144, "num_input_tokens_seen": 783261920, "step": 4342 }, { "epoch": 0.47543720408330825, "grad_norm": 1.2042599068856417, "learning_rate": 2.692418650867526e-05, "loss": 0.658, "num_input_tokens_seen": 783433056, "step": 4343 }, { "epoch": 0.4755466761542462, "grad_norm": 1.2915715162034973, "learning_rate": 2.6915613279610874e-05, "loss": 0.9023, "num_input_tokens_seen": 783623904, "step": 4344 }, { "epoch": 0.47565614822518404, "grad_norm": 1.2821938829404904, "learning_rate": 2.6907039823933093e-05, "loss": 0.5613, "num_input_tokens_seen": 783771520, "step": 4345 }, { "epoch": 0.47576562029612196, "grad_norm": 1.0964858579336807, "learning_rate": 2.6898466142656154e-05, "loss": 0.6151, "num_input_tokens_seen": 783918240, "step": 4346 }, { "epoch": 0.4758750923670598, "grad_norm": 1.1909571066818716, "learning_rate": 2.6889892236794294e-05, "loss": 0.5388, "num_input_tokens_seen": 784084672, "step": 4347 }, { "epoch": 0.47598456443799775, "grad_norm": 1.3339261535181983, "learning_rate": 2.68813181073618e-05, "loss": 0.6254, "num_input_tokens_seen": 784248416, "step": 4348 }, { "epoch": 0.47609403650893567, "grad_norm": 1.1534206207867603, "learning_rate": 2.687274375537297e-05, "loss": 0.5564, "num_input_tokens_seen": 784416864, "step": 4349 }, { "epoch": 0.47620350857987354, "grad_norm": 1.4026825052060325, "learning_rate": 2.686416918184213e-05, "loss": 0.8855, "num_input_tokens_seen": 784620032, "step": 4350 }, { "epoch": 0.47631298065081146, "grad_norm": 1.2786167346054336, "learning_rate": 2.6855594387783638e-05, "loss": 0.7481, "num_input_tokens_seen": 784818048, "step": 4351 }, { "epoch": 0.4764224527217494, "grad_norm": 1.3043401564151451, "learning_rate": 2.6847019374211886e-05, "loss": 0.8672, "num_input_tokens_seen": 785028608, "step": 4352 }, { "epoch": 0.47653192479268724, "grad_norm": 1.2167720883999817, "learning_rate": 2.6838444142141267e-05, "loss": 0.6204, "num_input_tokens_seen": 785206240, "step": 4353 }, { "epoch": 0.47664139686362517, "grad_norm": 1.3053228692868255, "learning_rate": 2.6829868692586218e-05, "loss": 0.649, "num_input_tokens_seen": 785376032, "step": 4354 }, { "epoch": 0.4767508689345631, "grad_norm": 1.2870137898403644, "learning_rate": 2.6821293026561206e-05, "loss": 0.7916, "num_input_tokens_seen": 785576064, "step": 4355 }, { "epoch": 0.47686034100550095, "grad_norm": 1.2666890416344163, "learning_rate": 2.6812717145080713e-05, "loss": 0.7191, "num_input_tokens_seen": 785761088, "step": 4356 }, { "epoch": 0.4769698130764389, "grad_norm": 1.174995641497381, "learning_rate": 2.6804141049159243e-05, "loss": 0.5745, "num_input_tokens_seen": 785963808, "step": 4357 }, { "epoch": 0.4770792851473768, "grad_norm": 1.2691628728614923, "learning_rate": 2.6795564739811335e-05, "loss": 0.6032, "num_input_tokens_seen": 786128672, "step": 4358 }, { "epoch": 0.47718875721831466, "grad_norm": 1.2199820135698058, "learning_rate": 2.6786988218051556e-05, "loss": 0.8609, "num_input_tokens_seen": 786316832, "step": 4359 }, { "epoch": 0.4772982292892526, "grad_norm": 1.1696606887406613, "learning_rate": 2.6778411484894478e-05, "loss": 0.5873, "num_input_tokens_seen": 786475872, "step": 4360 }, { "epoch": 0.4774077013601905, "grad_norm": 1.3189414314790235, "learning_rate": 2.6769834541354727e-05, "loss": 0.7035, "num_input_tokens_seen": 786682400, "step": 4361 }, { "epoch": 0.47751717343112837, "grad_norm": 1.2947044132425276, "learning_rate": 2.6761257388446924e-05, "loss": 0.8495, "num_input_tokens_seen": 786849952, "step": 4362 }, { "epoch": 0.4776266455020663, "grad_norm": 1.332737575987591, "learning_rate": 2.675268002718575e-05, "loss": 0.8792, "num_input_tokens_seen": 787042816, "step": 4363 }, { "epoch": 0.47773611757300416, "grad_norm": 1.1524418675952122, "learning_rate": 2.674410245858588e-05, "loss": 0.5287, "num_input_tokens_seen": 787210144, "step": 4364 }, { "epoch": 0.4778455896439421, "grad_norm": 1.2503616286743777, "learning_rate": 2.6735524683662017e-05, "loss": 0.5813, "num_input_tokens_seen": 787370080, "step": 4365 }, { "epoch": 0.47795506171488, "grad_norm": 1.4101696911961126, "learning_rate": 2.6726946703428908e-05, "loss": 0.9046, "num_input_tokens_seen": 787563168, "step": 4366 }, { "epoch": 0.47806453378581787, "grad_norm": 1.203256624626408, "learning_rate": 2.6718368518901295e-05, "loss": 0.648, "num_input_tokens_seen": 787731840, "step": 4367 }, { "epoch": 0.4781740058567558, "grad_norm": 1.237420345401547, "learning_rate": 2.670979013109398e-05, "loss": 0.7588, "num_input_tokens_seen": 787943744, "step": 4368 }, { "epoch": 0.4782834779276937, "grad_norm": 1.118073705495624, "learning_rate": 2.6701211541021757e-05, "loss": 0.715, "num_input_tokens_seen": 788124736, "step": 4369 }, { "epoch": 0.4783929499986316, "grad_norm": 1.3492065358451195, "learning_rate": 2.6692632749699463e-05, "loss": 0.9237, "num_input_tokens_seen": 788331040, "step": 4370 }, { "epoch": 0.4785024220695695, "grad_norm": 1.226602298552547, "learning_rate": 2.6684053758141948e-05, "loss": 0.5633, "num_input_tokens_seen": 788517184, "step": 4371 }, { "epoch": 0.4786118941405074, "grad_norm": 1.3493959559005582, "learning_rate": 2.6675474567364096e-05, "loss": 0.7447, "num_input_tokens_seen": 788688096, "step": 4372 }, { "epoch": 0.4787213662114453, "grad_norm": 1.2108448116146122, "learning_rate": 2.666689517838081e-05, "loss": 0.6098, "num_input_tokens_seen": 788846464, "step": 4373 }, { "epoch": 0.4788308382823832, "grad_norm": 1.1037213969169617, "learning_rate": 2.6658315592206995e-05, "loss": 0.5732, "num_input_tokens_seen": 789037088, "step": 4374 }, { "epoch": 0.47894031035332113, "grad_norm": 1.2022125945956357, "learning_rate": 2.664973580985763e-05, "loss": 0.6179, "num_input_tokens_seen": 789227264, "step": 4375 }, { "epoch": 0.479049782424259, "grad_norm": 1.411478875305167, "learning_rate": 2.6641155832347668e-05, "loss": 0.8444, "num_input_tokens_seen": 789424160, "step": 4376 }, { "epoch": 0.4791592544951969, "grad_norm": 1.2571423089735272, "learning_rate": 2.663257566069211e-05, "loss": 0.643, "num_input_tokens_seen": 789584992, "step": 4377 }, { "epoch": 0.47926872656613484, "grad_norm": 1.2997142551068595, "learning_rate": 2.6623995295905974e-05, "loss": 0.863, "num_input_tokens_seen": 789774720, "step": 4378 }, { "epoch": 0.4793781986370727, "grad_norm": 1.2931769626920606, "learning_rate": 2.6615414739004297e-05, "loss": 0.5948, "num_input_tokens_seen": 789935328, "step": 4379 }, { "epoch": 0.4794876707080106, "grad_norm": 1.2997511717271375, "learning_rate": 2.6606833991002146e-05, "loss": 0.7567, "num_input_tokens_seen": 790125056, "step": 4380 }, { "epoch": 0.4795971427789485, "grad_norm": 1.2871349835778734, "learning_rate": 2.6598253052914596e-05, "loss": 0.6668, "num_input_tokens_seen": 790312320, "step": 4381 }, { "epoch": 0.4797066148498864, "grad_norm": 1.218511851591595, "learning_rate": 2.6589671925756777e-05, "loss": 0.7412, "num_input_tokens_seen": 790492640, "step": 4382 }, { "epoch": 0.47981608692082434, "grad_norm": 1.3942395073391725, "learning_rate": 2.6581090610543796e-05, "loss": 0.6251, "num_input_tokens_seen": 790665568, "step": 4383 }, { "epoch": 0.4799255589917622, "grad_norm": 1.285629834512347, "learning_rate": 2.6572509108290826e-05, "loss": 0.6585, "num_input_tokens_seen": 790838272, "step": 4384 }, { "epoch": 0.4800350310627001, "grad_norm": 1.297632846242284, "learning_rate": 2.6563927420013036e-05, "loss": 0.7041, "num_input_tokens_seen": 791025536, "step": 4385 }, { "epoch": 0.48014450313363805, "grad_norm": 1.2224006263737546, "learning_rate": 2.6555345546725625e-05, "loss": 0.7529, "num_input_tokens_seen": 791225792, "step": 4386 }, { "epoch": 0.4802539752045759, "grad_norm": 1.2233133226225348, "learning_rate": 2.6546763489443806e-05, "loss": 0.6692, "num_input_tokens_seen": 791427616, "step": 4387 }, { "epoch": 0.48036344727551383, "grad_norm": 1.3688510474264861, "learning_rate": 2.6538181249182813e-05, "loss": 0.7764, "num_input_tokens_seen": 791612416, "step": 4388 }, { "epoch": 0.48047291934645175, "grad_norm": 1.275024501714708, "learning_rate": 2.652959882695793e-05, "loss": 0.6676, "num_input_tokens_seen": 791777952, "step": 4389 }, { "epoch": 0.4805823914173896, "grad_norm": 1.0914354084245845, "learning_rate": 2.6521016223784427e-05, "loss": 0.5715, "num_input_tokens_seen": 791954240, "step": 4390 }, { "epoch": 0.48069186348832754, "grad_norm": 1.3224354667278355, "learning_rate": 2.6512433440677613e-05, "loss": 0.6608, "num_input_tokens_seen": 792132992, "step": 4391 }, { "epoch": 0.48080133555926546, "grad_norm": 1.1836136920117664, "learning_rate": 2.6503850478652815e-05, "loss": 0.5923, "num_input_tokens_seen": 792313536, "step": 4392 }, { "epoch": 0.48091080763020333, "grad_norm": 1.2689560746330601, "learning_rate": 2.6495267338725375e-05, "loss": 0.6078, "num_input_tokens_seen": 792500576, "step": 4393 }, { "epoch": 0.48102027970114125, "grad_norm": 1.0164211380591677, "learning_rate": 2.6486684021910667e-05, "loss": 0.5514, "num_input_tokens_seen": 792689632, "step": 4394 }, { "epoch": 0.4811297517720792, "grad_norm": 1.3368397393197782, "learning_rate": 2.647810052922409e-05, "loss": 0.6569, "num_input_tokens_seen": 792885184, "step": 4395 }, { "epoch": 0.48123922384301704, "grad_norm": 1.1165450055950938, "learning_rate": 2.6469516861681042e-05, "loss": 0.5656, "num_input_tokens_seen": 793107840, "step": 4396 }, { "epoch": 0.48134869591395496, "grad_norm": 1.216737356919299, "learning_rate": 2.6460933020296962e-05, "loss": 0.6172, "num_input_tokens_seen": 793283232, "step": 4397 }, { "epoch": 0.4814581679848929, "grad_norm": 1.206465999258555, "learning_rate": 2.6452349006087295e-05, "loss": 0.6068, "num_input_tokens_seen": 793481696, "step": 4398 }, { "epoch": 0.48156764005583075, "grad_norm": 1.2832244453226682, "learning_rate": 2.644376482006752e-05, "loss": 0.6319, "num_input_tokens_seen": 793630880, "step": 4399 }, { "epoch": 0.48167711212676867, "grad_norm": 1.4106762210063284, "learning_rate": 2.6435180463253123e-05, "loss": 0.7313, "num_input_tokens_seen": 793814784, "step": 4400 }, { "epoch": 0.48178658419770654, "grad_norm": 1.3832573040594087, "learning_rate": 2.6426595936659616e-05, "loss": 0.9118, "num_input_tokens_seen": 793982336, "step": 4401 }, { "epoch": 0.48189605626864446, "grad_norm": 1.2218573644589787, "learning_rate": 2.6418011241302543e-05, "loss": 0.8365, "num_input_tokens_seen": 794196480, "step": 4402 }, { "epoch": 0.4820055283395824, "grad_norm": 1.3881759079791853, "learning_rate": 2.6409426378197456e-05, "loss": 0.6519, "num_input_tokens_seen": 794353504, "step": 4403 }, { "epoch": 0.48211500041052024, "grad_norm": 1.2866074736885833, "learning_rate": 2.6400841348359913e-05, "loss": 0.7739, "num_input_tokens_seen": 794543232, "step": 4404 }, { "epoch": 0.48222447248145817, "grad_norm": 1.21560316482978, "learning_rate": 2.6392256152805517e-05, "loss": 0.6613, "num_input_tokens_seen": 794742592, "step": 4405 }, { "epoch": 0.4823339445523961, "grad_norm": 1.1821237128300055, "learning_rate": 2.6383670792549885e-05, "loss": 0.6293, "num_input_tokens_seen": 794908800, "step": 4406 }, { "epoch": 0.48244341662333395, "grad_norm": 1.1757847478567416, "learning_rate": 2.6375085268608645e-05, "loss": 0.769, "num_input_tokens_seen": 795058432, "step": 4407 }, { "epoch": 0.4825528886942719, "grad_norm": 1.2007016137544424, "learning_rate": 2.636649958199744e-05, "loss": 0.9351, "num_input_tokens_seen": 795263392, "step": 4408 }, { "epoch": 0.4826623607652098, "grad_norm": 1.1254793311567943, "learning_rate": 2.635791373373195e-05, "loss": 0.62, "num_input_tokens_seen": 795431616, "step": 4409 }, { "epoch": 0.48277183283614766, "grad_norm": 1.281124062280147, "learning_rate": 2.634932772482786e-05, "loss": 0.8784, "num_input_tokens_seen": 795633440, "step": 4410 }, { "epoch": 0.4828813049070856, "grad_norm": 1.1951160643488603, "learning_rate": 2.634074155630088e-05, "loss": 0.6956, "num_input_tokens_seen": 795817120, "step": 4411 }, { "epoch": 0.4829907769780235, "grad_norm": 1.2187421749548475, "learning_rate": 2.6332155229166738e-05, "loss": 0.7658, "num_input_tokens_seen": 796006848, "step": 4412 }, { "epoch": 0.48310024904896137, "grad_norm": 1.3450913942051106, "learning_rate": 2.6323568744441173e-05, "loss": 0.9806, "num_input_tokens_seen": 796193440, "step": 4413 }, { "epoch": 0.4832097211198993, "grad_norm": 1.8922198796041556, "learning_rate": 2.631498210313997e-05, "loss": 1.08, "num_input_tokens_seen": 796393696, "step": 4414 }, { "epoch": 0.4833191931908372, "grad_norm": 1.2065818962508221, "learning_rate": 2.630639530627888e-05, "loss": 0.8097, "num_input_tokens_seen": 796585440, "step": 4415 }, { "epoch": 0.4834286652617751, "grad_norm": 1.2974899856092716, "learning_rate": 2.6297808354873733e-05, "loss": 0.7643, "num_input_tokens_seen": 796793760, "step": 4416 }, { "epoch": 0.483538137332713, "grad_norm": 1.0828927807132496, "learning_rate": 2.6289221249940337e-05, "loss": 0.5665, "num_input_tokens_seen": 796984384, "step": 4417 }, { "epoch": 0.48364760940365087, "grad_norm": 1.1645110629364128, "learning_rate": 2.6280633992494536e-05, "loss": 0.4882, "num_input_tokens_seen": 797142080, "step": 4418 }, { "epoch": 0.4837570814745888, "grad_norm": 1.354566114150572, "learning_rate": 2.627204658355218e-05, "loss": 0.8295, "num_input_tokens_seen": 797341216, "step": 4419 }, { "epoch": 0.4838665535455267, "grad_norm": 1.185838088541869, "learning_rate": 2.6263459024129144e-05, "loss": 0.5711, "num_input_tokens_seen": 797519072, "step": 4420 }, { "epoch": 0.4839760256164646, "grad_norm": 1.1749995475118862, "learning_rate": 2.6254871315241318e-05, "loss": 0.6282, "num_input_tokens_seen": 797736352, "step": 4421 }, { "epoch": 0.4840854976874025, "grad_norm": 1.313454417215731, "learning_rate": 2.6246283457904612e-05, "loss": 0.6268, "num_input_tokens_seen": 797910400, "step": 4422 }, { "epoch": 0.4841949697583404, "grad_norm": 1.2434597098338827, "learning_rate": 2.6237695453134964e-05, "loss": 0.7688, "num_input_tokens_seen": 798082880, "step": 4423 }, { "epoch": 0.4843044418292783, "grad_norm": 1.3370079971650646, "learning_rate": 2.6229107301948308e-05, "loss": 0.7146, "num_input_tokens_seen": 798269920, "step": 4424 }, { "epoch": 0.4844139139002162, "grad_norm": 1.517151683066381, "learning_rate": 2.62205190053606e-05, "loss": 0.7889, "num_input_tokens_seen": 798445536, "step": 4425 }, { "epoch": 0.48452338597115413, "grad_norm": 1.3503893131937195, "learning_rate": 2.6211930564387832e-05, "loss": 0.8621, "num_input_tokens_seen": 798624512, "step": 4426 }, { "epoch": 0.484632858042092, "grad_norm": 1.1744614097228405, "learning_rate": 2.6203341980045996e-05, "loss": 0.5768, "num_input_tokens_seen": 798803264, "step": 4427 }, { "epoch": 0.4847423301130299, "grad_norm": 1.2681933116609538, "learning_rate": 2.6194753253351102e-05, "loss": 0.6256, "num_input_tokens_seen": 798996128, "step": 4428 }, { "epoch": 0.48485180218396784, "grad_norm": 1.2964352701007318, "learning_rate": 2.6186164385319186e-05, "loss": 0.7118, "num_input_tokens_seen": 799178688, "step": 4429 }, { "epoch": 0.4849612742549057, "grad_norm": 1.2641536024387134, "learning_rate": 2.6177575376966284e-05, "loss": 0.6527, "num_input_tokens_seen": 799344224, "step": 4430 }, { "epoch": 0.4850707463258436, "grad_norm": 1.3665159483508298, "learning_rate": 2.6168986229308473e-05, "loss": 0.8343, "num_input_tokens_seen": 799533728, "step": 4431 }, { "epoch": 0.48518021839678155, "grad_norm": 1.326222392090584, "learning_rate": 2.6160396943361827e-05, "loss": 0.7435, "num_input_tokens_seen": 799706432, "step": 4432 }, { "epoch": 0.4852896904677194, "grad_norm": 1.3648577212845212, "learning_rate": 2.6151807520142436e-05, "loss": 0.6554, "num_input_tokens_seen": 799876672, "step": 4433 }, { "epoch": 0.48539916253865734, "grad_norm": 1.2112641909056372, "learning_rate": 2.6143217960666416e-05, "loss": 0.7752, "num_input_tokens_seen": 800091936, "step": 4434 }, { "epoch": 0.4855086346095952, "grad_norm": 1.2009844874159166, "learning_rate": 2.6134628265949903e-05, "loss": 0.9503, "num_input_tokens_seen": 800296448, "step": 4435 }, { "epoch": 0.4856181066805331, "grad_norm": 1.4932857286721972, "learning_rate": 2.6126038437009025e-05, "loss": 0.8758, "num_input_tokens_seen": 800480352, "step": 4436 }, { "epoch": 0.48572757875147105, "grad_norm": 1.1747311182998958, "learning_rate": 2.6117448474859958e-05, "loss": 0.5211, "num_input_tokens_seen": 800652832, "step": 4437 }, { "epoch": 0.4858370508224089, "grad_norm": 1.3072980244799826, "learning_rate": 2.6108858380518874e-05, "loss": 0.6511, "num_input_tokens_seen": 800797312, "step": 4438 }, { "epoch": 0.48594652289334683, "grad_norm": 1.32313623883966, "learning_rate": 2.6100268155001968e-05, "loss": 0.6788, "num_input_tokens_seen": 800938432, "step": 4439 }, { "epoch": 0.48605599496428475, "grad_norm": 1.3428984760674318, "learning_rate": 2.6091677799325436e-05, "loss": 0.7295, "num_input_tokens_seen": 801115616, "step": 4440 }, { "epoch": 0.4861654670352226, "grad_norm": 1.2327648719319104, "learning_rate": 2.608308731450551e-05, "loss": 0.701, "num_input_tokens_seen": 801276896, "step": 4441 }, { "epoch": 0.48627493910616054, "grad_norm": 1.3238768459707362, "learning_rate": 2.607449670155842e-05, "loss": 0.8499, "num_input_tokens_seen": 801458560, "step": 4442 }, { "epoch": 0.48638441117709846, "grad_norm": 1.2204123188764897, "learning_rate": 2.6065905961500432e-05, "loss": 0.5845, "num_input_tokens_seen": 801611776, "step": 4443 }, { "epoch": 0.48649388324803633, "grad_norm": 1.3153308131745975, "learning_rate": 2.60573150953478e-05, "loss": 0.7975, "num_input_tokens_seen": 801803296, "step": 4444 }, { "epoch": 0.48660335531897425, "grad_norm": 1.1824728817078327, "learning_rate": 2.6048724104116818e-05, "loss": 0.7043, "num_input_tokens_seen": 801974656, "step": 4445 }, { "epoch": 0.4867128273899122, "grad_norm": 1.2057292545502578, "learning_rate": 2.6040132988823775e-05, "loss": 0.7022, "num_input_tokens_seen": 802158336, "step": 4446 }, { "epoch": 0.48682229946085004, "grad_norm": 1.4058485941726597, "learning_rate": 2.603154175048499e-05, "loss": 0.887, "num_input_tokens_seen": 802326560, "step": 4447 }, { "epoch": 0.48693177153178796, "grad_norm": 1.3472211052320815, "learning_rate": 2.602295039011679e-05, "loss": 0.7277, "num_input_tokens_seen": 802533312, "step": 4448 }, { "epoch": 0.4870412436027259, "grad_norm": 1.3328691906516663, "learning_rate": 2.6014358908735504e-05, "loss": 0.5252, "num_input_tokens_seen": 802682720, "step": 4449 }, { "epoch": 0.48715071567366375, "grad_norm": 1.3599142999855005, "learning_rate": 2.600576730735751e-05, "loss": 0.667, "num_input_tokens_seen": 802874912, "step": 4450 }, { "epoch": 0.48726018774460167, "grad_norm": 1.1965288941672183, "learning_rate": 2.5997175586999163e-05, "loss": 0.7728, "num_input_tokens_seen": 803060384, "step": 4451 }, { "epoch": 0.48736965981553954, "grad_norm": 1.397647655923349, "learning_rate": 2.5988583748676854e-05, "loss": 0.9502, "num_input_tokens_seen": 803274304, "step": 4452 }, { "epoch": 0.48747913188647746, "grad_norm": 1.1878902898183976, "learning_rate": 2.597999179340697e-05, "loss": 0.6277, "num_input_tokens_seen": 803426848, "step": 4453 }, { "epoch": 0.4875886039574154, "grad_norm": 1.2759738307230535, "learning_rate": 2.5971399722205936e-05, "loss": 0.8243, "num_input_tokens_seen": 803614784, "step": 4454 }, { "epoch": 0.48769807602835324, "grad_norm": 1.2278093451665484, "learning_rate": 2.596280753609017e-05, "loss": 0.7554, "num_input_tokens_seen": 803776512, "step": 4455 }, { "epoch": 0.48780754809929117, "grad_norm": 1.4316079837257765, "learning_rate": 2.5954215236076113e-05, "loss": 0.887, "num_input_tokens_seen": 803953920, "step": 4456 }, { "epoch": 0.4879170201702291, "grad_norm": 1.2404785396498594, "learning_rate": 2.594562282318021e-05, "loss": 0.8002, "num_input_tokens_seen": 804149472, "step": 4457 }, { "epoch": 0.48802649224116695, "grad_norm": 1.079876955140308, "learning_rate": 2.5937030298418945e-05, "loss": 0.7096, "num_input_tokens_seen": 804347936, "step": 4458 }, { "epoch": 0.4881359643121049, "grad_norm": 1.5053430604752756, "learning_rate": 2.5928437662808785e-05, "loss": 0.8319, "num_input_tokens_seen": 804512352, "step": 4459 }, { "epoch": 0.4882454363830428, "grad_norm": 1.3584071693167192, "learning_rate": 2.5919844917366225e-05, "loss": 0.8599, "num_input_tokens_seen": 804707008, "step": 4460 }, { "epoch": 0.48835490845398066, "grad_norm": 1.1634302150222615, "learning_rate": 2.5911252063107772e-05, "loss": 0.6703, "num_input_tokens_seen": 804879936, "step": 4461 }, { "epoch": 0.4884643805249186, "grad_norm": 1.2121628007292593, "learning_rate": 2.5902659101049933e-05, "loss": 0.961, "num_input_tokens_seen": 805064736, "step": 4462 }, { "epoch": 0.4885738525958565, "grad_norm": 1.1800688575600446, "learning_rate": 2.589406603220925e-05, "loss": 0.6585, "num_input_tokens_seen": 805259392, "step": 4463 }, { "epoch": 0.48868332466679437, "grad_norm": 1.3468302973787896, "learning_rate": 2.5885472857602273e-05, "loss": 0.9244, "num_input_tokens_seen": 805432992, "step": 4464 }, { "epoch": 0.4887927967377323, "grad_norm": 1.3231252921297367, "learning_rate": 2.5876879578245543e-05, "loss": 0.7632, "num_input_tokens_seen": 805600320, "step": 4465 }, { "epoch": 0.4889022688086702, "grad_norm": 1.3018338180526112, "learning_rate": 2.5868286195155638e-05, "loss": 0.7125, "num_input_tokens_seen": 805756896, "step": 4466 }, { "epoch": 0.4890117408796081, "grad_norm": 1.3985322888178917, "learning_rate": 2.585969270934913e-05, "loss": 0.6898, "num_input_tokens_seen": 805904288, "step": 4467 }, { "epoch": 0.489121212950546, "grad_norm": 1.3297287178150954, "learning_rate": 2.5851099121842624e-05, "loss": 0.6502, "num_input_tokens_seen": 806088864, "step": 4468 }, { "epoch": 0.48923068502148387, "grad_norm": 1.0852610175666013, "learning_rate": 2.5842505433652713e-05, "loss": 0.5917, "num_input_tokens_seen": 806281280, "step": 4469 }, { "epoch": 0.4893401570924218, "grad_norm": 1.2513179030462034, "learning_rate": 2.583391164579603e-05, "loss": 0.8224, "num_input_tokens_seen": 806451072, "step": 4470 }, { "epoch": 0.4894496291633597, "grad_norm": 1.1744518685675145, "learning_rate": 2.5825317759289185e-05, "loss": 0.6617, "num_input_tokens_seen": 806652448, "step": 4471 }, { "epoch": 0.4895591012342976, "grad_norm": 1.22622734398867, "learning_rate": 2.581672377514883e-05, "loss": 0.7852, "num_input_tokens_seen": 806852704, "step": 4472 }, { "epoch": 0.4896685733052355, "grad_norm": 1.2184578961223087, "learning_rate": 2.580812969439162e-05, "loss": 0.7199, "num_input_tokens_seen": 807056544, "step": 4473 }, { "epoch": 0.4897780453761734, "grad_norm": 1.2638530336028713, "learning_rate": 2.5799535518034205e-05, "loss": 0.7812, "num_input_tokens_seen": 807243584, "step": 4474 }, { "epoch": 0.4898875174471113, "grad_norm": 1.3513437876940744, "learning_rate": 2.579094124709327e-05, "loss": 0.6906, "num_input_tokens_seen": 807424352, "step": 4475 }, { "epoch": 0.4899969895180492, "grad_norm": 1.3102541282456304, "learning_rate": 2.578234688258549e-05, "loss": 0.6976, "num_input_tokens_seen": 807598176, "step": 4476 }, { "epoch": 0.49010646158898713, "grad_norm": 1.3291703990457362, "learning_rate": 2.5773752425527576e-05, "loss": 0.6548, "num_input_tokens_seen": 807757440, "step": 4477 }, { "epoch": 0.490215933659925, "grad_norm": 1.2660291050161228, "learning_rate": 2.576515787693622e-05, "loss": 0.6487, "num_input_tokens_seen": 807940896, "step": 4478 }, { "epoch": 0.4903254057308629, "grad_norm": 1.3810888636571985, "learning_rate": 2.5756563237828158e-05, "loss": 0.793, "num_input_tokens_seen": 808070368, "step": 4479 }, { "epoch": 0.49043487780180084, "grad_norm": 1.2639098142398448, "learning_rate": 2.574796850922011e-05, "loss": 0.7771, "num_input_tokens_seen": 808270624, "step": 4480 }, { "epoch": 0.4905443498727387, "grad_norm": 1.1680945899560509, "learning_rate": 2.573937369212882e-05, "loss": 0.6574, "num_input_tokens_seen": 808464832, "step": 4481 }, { "epoch": 0.4906538219436766, "grad_norm": 1.0879174165833916, "learning_rate": 2.5730778787571035e-05, "loss": 0.702, "num_input_tokens_seen": 808650528, "step": 4482 }, { "epoch": 0.49076329401461455, "grad_norm": 1.164683438788627, "learning_rate": 2.5722183796563508e-05, "loss": 0.6131, "num_input_tokens_seen": 808821664, "step": 4483 }, { "epoch": 0.4908727660855524, "grad_norm": 1.3588262743979234, "learning_rate": 2.571358872012303e-05, "loss": 0.9082, "num_input_tokens_seen": 809001984, "step": 4484 }, { "epoch": 0.49098223815649034, "grad_norm": 1.2052897510157063, "learning_rate": 2.5704993559266364e-05, "loss": 0.7736, "num_input_tokens_seen": 809177824, "step": 4485 }, { "epoch": 0.4910917102274282, "grad_norm": 1.259452177574701, "learning_rate": 2.569639831501031e-05, "loss": 0.8074, "num_input_tokens_seen": 809369120, "step": 4486 }, { "epoch": 0.4912011822983661, "grad_norm": 1.3487571752885898, "learning_rate": 2.5687802988371667e-05, "loss": 0.8023, "num_input_tokens_seen": 809534880, "step": 4487 }, { "epoch": 0.49131065436930405, "grad_norm": 1.2181148830441906, "learning_rate": 2.5679207580367242e-05, "loss": 0.7795, "num_input_tokens_seen": 809707584, "step": 4488 }, { "epoch": 0.4914201264402419, "grad_norm": 1.2999114373191147, "learning_rate": 2.5670612092013857e-05, "loss": 0.8149, "num_input_tokens_seen": 809882528, "step": 4489 }, { "epoch": 0.49152959851117983, "grad_norm": 1.169791900598509, "learning_rate": 2.5662016524328346e-05, "loss": 0.557, "num_input_tokens_seen": 810067776, "step": 4490 }, { "epoch": 0.49163907058211775, "grad_norm": 1.1927368238397347, "learning_rate": 2.565342087832755e-05, "loss": 0.682, "num_input_tokens_seen": 810261536, "step": 4491 }, { "epoch": 0.4917485426530556, "grad_norm": 1.2546408809680274, "learning_rate": 2.5644825155028314e-05, "loss": 0.7489, "num_input_tokens_seen": 810443648, "step": 4492 }, { "epoch": 0.49185801472399354, "grad_norm": 1.2167478401505076, "learning_rate": 2.563622935544749e-05, "loss": 0.6142, "num_input_tokens_seen": 810632032, "step": 4493 }, { "epoch": 0.49196748679493146, "grad_norm": 1.1629782113537717, "learning_rate": 2.5627633480601953e-05, "loss": 0.6247, "num_input_tokens_seen": 810838112, "step": 4494 }, { "epoch": 0.49207695886586933, "grad_norm": 1.2598367833585131, "learning_rate": 2.561903753150857e-05, "loss": 0.7199, "num_input_tokens_seen": 811036128, "step": 4495 }, { "epoch": 0.49218643093680725, "grad_norm": 1.332301942946247, "learning_rate": 2.561044150918423e-05, "loss": 0.7065, "num_input_tokens_seen": 811214208, "step": 4496 }, { "epoch": 0.4922959030077452, "grad_norm": 1.3260104680892375, "learning_rate": 2.5601845414645832e-05, "loss": 0.6394, "num_input_tokens_seen": 811364064, "step": 4497 }, { "epoch": 0.49240537507868304, "grad_norm": 1.369039229748376, "learning_rate": 2.5593249248910272e-05, "loss": 0.8001, "num_input_tokens_seen": 811551328, "step": 4498 }, { "epoch": 0.49251484714962096, "grad_norm": 1.227859540209589, "learning_rate": 2.558465301299445e-05, "loss": 0.5506, "num_input_tokens_seen": 811716640, "step": 4499 }, { "epoch": 0.4926243192205589, "grad_norm": 1.277706549681737, "learning_rate": 2.5576056707915308e-05, "loss": 0.7206, "num_input_tokens_seen": 811911520, "step": 4500 }, { "epoch": 0.49273379129149675, "grad_norm": 1.3343197581092765, "learning_rate": 2.556746033468975e-05, "loss": 0.8743, "num_input_tokens_seen": 812097440, "step": 4501 }, { "epoch": 0.49284326336243467, "grad_norm": 1.3625798648210485, "learning_rate": 2.5558863894334722e-05, "loss": 0.8324, "num_input_tokens_seen": 812271040, "step": 4502 }, { "epoch": 0.49295273543337254, "grad_norm": 1.2362281795076413, "learning_rate": 2.5550267387867162e-05, "loss": 0.6287, "num_input_tokens_seen": 812446432, "step": 4503 }, { "epoch": 0.49306220750431046, "grad_norm": 1.2112488869267282, "learning_rate": 2.5541670816304026e-05, "loss": 0.6071, "num_input_tokens_seen": 812626528, "step": 4504 }, { "epoch": 0.4931716795752484, "grad_norm": 1.1709942368871318, "learning_rate": 2.5533074180662265e-05, "loss": 0.555, "num_input_tokens_seen": 812805728, "step": 4505 }, { "epoch": 0.49328115164618624, "grad_norm": 1.1981102607309966, "learning_rate": 2.5524477481958846e-05, "loss": 0.6928, "num_input_tokens_seen": 812972608, "step": 4506 }, { "epoch": 0.49339062371712417, "grad_norm": 1.311741200857602, "learning_rate": 2.551588072121075e-05, "loss": 0.7494, "num_input_tokens_seen": 813116192, "step": 4507 }, { "epoch": 0.4935000957880621, "grad_norm": 1.2468403460053812, "learning_rate": 2.5507283899434948e-05, "loss": 0.7713, "num_input_tokens_seen": 813316448, "step": 4508 }, { "epoch": 0.49360956785899995, "grad_norm": 1.165386394388972, "learning_rate": 2.549868701764842e-05, "loss": 0.6535, "num_input_tokens_seen": 813518496, "step": 4509 }, { "epoch": 0.4937190399299379, "grad_norm": 1.237728537061952, "learning_rate": 2.5490090076868177e-05, "loss": 0.6622, "num_input_tokens_seen": 813707104, "step": 4510 }, { "epoch": 0.4938285120008758, "grad_norm": 1.2404180435789798, "learning_rate": 2.5481493078111218e-05, "loss": 0.791, "num_input_tokens_seen": 813908928, "step": 4511 }, { "epoch": 0.49393798407181366, "grad_norm": 1.2287376700449184, "learning_rate": 2.547289602239455e-05, "loss": 0.7506, "num_input_tokens_seen": 814108288, "step": 4512 }, { "epoch": 0.4940474561427516, "grad_norm": 1.2418240189323093, "learning_rate": 2.5464298910735186e-05, "loss": 0.7561, "num_input_tokens_seen": 814300032, "step": 4513 }, { "epoch": 0.4941569282136895, "grad_norm": 1.1918356700023103, "learning_rate": 2.545570174415015e-05, "loss": 0.5613, "num_input_tokens_seen": 814487520, "step": 4514 }, { "epoch": 0.49426640028462737, "grad_norm": 1.3383293824634375, "learning_rate": 2.5447104523656466e-05, "loss": 0.8227, "num_input_tokens_seen": 814686208, "step": 4515 }, { "epoch": 0.4943758723555653, "grad_norm": 1.270457050502787, "learning_rate": 2.543850725027117e-05, "loss": 0.7865, "num_input_tokens_seen": 814892512, "step": 4516 }, { "epoch": 0.4944853444265032, "grad_norm": 1.2889216490579551, "learning_rate": 2.54299099250113e-05, "loss": 0.9123, "num_input_tokens_seen": 815059616, "step": 4517 }, { "epoch": 0.4945948164974411, "grad_norm": 1.1211555277852938, "learning_rate": 2.5421312548893917e-05, "loss": 0.6339, "num_input_tokens_seen": 815235904, "step": 4518 }, { "epoch": 0.494704288568379, "grad_norm": 1.2767608432462452, "learning_rate": 2.5412715122936064e-05, "loss": 0.6855, "num_input_tokens_seen": 815434592, "step": 4519 }, { "epoch": 0.49481376063931687, "grad_norm": 1.3511107467053436, "learning_rate": 2.5404117648154794e-05, "loss": 0.7103, "num_input_tokens_seen": 815609984, "step": 4520 }, { "epoch": 0.4949232327102548, "grad_norm": 1.1600615080607282, "learning_rate": 2.5395520125567185e-05, "loss": 0.5366, "num_input_tokens_seen": 815789408, "step": 4521 }, { "epoch": 0.4950327047811927, "grad_norm": 1.2713469212737245, "learning_rate": 2.53869225561903e-05, "loss": 0.8518, "num_input_tokens_seen": 815966368, "step": 4522 }, { "epoch": 0.4951421768521306, "grad_norm": 1.2312213836287773, "learning_rate": 2.537832494104121e-05, "loss": 0.9355, "num_input_tokens_seen": 816182528, "step": 4523 }, { "epoch": 0.4952516489230685, "grad_norm": 1.2007478291136293, "learning_rate": 2.536972728113701e-05, "loss": 0.8543, "num_input_tokens_seen": 816363296, "step": 4524 }, { "epoch": 0.4953611209940064, "grad_norm": 1.3096649340553956, "learning_rate": 2.536112957749478e-05, "loss": 0.8236, "num_input_tokens_seen": 816545184, "step": 4525 }, { "epoch": 0.4954705930649443, "grad_norm": 1.3253846114134915, "learning_rate": 2.535253183113161e-05, "loss": 0.8922, "num_input_tokens_seen": 816747680, "step": 4526 }, { "epoch": 0.4955800651358822, "grad_norm": 1.3786658058502, "learning_rate": 2.5343934043064598e-05, "loss": 0.7905, "num_input_tokens_seen": 816949952, "step": 4527 }, { "epoch": 0.49568953720682013, "grad_norm": 1.3621393356332017, "learning_rate": 2.5335336214310845e-05, "loss": 0.7685, "num_input_tokens_seen": 817106304, "step": 4528 }, { "epoch": 0.495799009277758, "grad_norm": 1.34960005628623, "learning_rate": 2.5326738345887462e-05, "loss": 0.7375, "num_input_tokens_seen": 817296256, "step": 4529 }, { "epoch": 0.4959084813486959, "grad_norm": 1.3628995519527747, "learning_rate": 2.531814043881155e-05, "loss": 0.7674, "num_input_tokens_seen": 817505024, "step": 4530 }, { "epoch": 0.49601795341963384, "grad_norm": 1.0944765130350897, "learning_rate": 2.5309542494100234e-05, "loss": 0.5189, "num_input_tokens_seen": 817702144, "step": 4531 }, { "epoch": 0.4961274254905717, "grad_norm": 1.361461397540675, "learning_rate": 2.5300944512770636e-05, "loss": 0.9222, "num_input_tokens_seen": 817887168, "step": 4532 }, { "epoch": 0.4962368975615096, "grad_norm": 1.2761128879315717, "learning_rate": 2.529234649583988e-05, "loss": 0.7377, "num_input_tokens_seen": 818062784, "step": 4533 }, { "epoch": 0.49634636963244755, "grad_norm": 1.309619148817333, "learning_rate": 2.528374844432509e-05, "loss": 0.8108, "num_input_tokens_seen": 818240864, "step": 4534 }, { "epoch": 0.4964558417033854, "grad_norm": 1.250556774118098, "learning_rate": 2.5275150359243405e-05, "loss": 0.6954, "num_input_tokens_seen": 818429696, "step": 4535 }, { "epoch": 0.49656531377432334, "grad_norm": 1.30955174238791, "learning_rate": 2.526655224161196e-05, "loss": 0.7044, "num_input_tokens_seen": 818623232, "step": 4536 }, { "epoch": 0.4966747858452612, "grad_norm": 1.3488416682672868, "learning_rate": 2.5257954092447878e-05, "loss": 0.7814, "num_input_tokens_seen": 818797504, "step": 4537 }, { "epoch": 0.4967842579161991, "grad_norm": 1.2213751568105564, "learning_rate": 2.5249355912768334e-05, "loss": 0.7245, "num_input_tokens_seen": 818972224, "step": 4538 }, { "epoch": 0.49689372998713704, "grad_norm": 1.3219314671353013, "learning_rate": 2.5240757703590462e-05, "loss": 0.6803, "num_input_tokens_seen": 819165536, "step": 4539 }, { "epoch": 0.4970032020580749, "grad_norm": 1.3443872581926848, "learning_rate": 2.523215946593141e-05, "loss": 0.9152, "num_input_tokens_seen": 819351680, "step": 4540 }, { "epoch": 0.49711267412901283, "grad_norm": 1.1649004077500345, "learning_rate": 2.5223561200808334e-05, "loss": 0.5306, "num_input_tokens_seen": 819517888, "step": 4541 }, { "epoch": 0.49722214619995075, "grad_norm": 1.1040397547115415, "learning_rate": 2.5214962909238397e-05, "loss": 0.593, "num_input_tokens_seen": 819709856, "step": 4542 }, { "epoch": 0.4973316182708886, "grad_norm": 1.430168196116579, "learning_rate": 2.5206364592238767e-05, "loss": 0.857, "num_input_tokens_seen": 819889728, "step": 4543 }, { "epoch": 0.49744109034182654, "grad_norm": 1.2920868508050043, "learning_rate": 2.5197766250826586e-05, "loss": 0.8211, "num_input_tokens_seen": 820062208, "step": 4544 }, { "epoch": 0.49755056241276446, "grad_norm": 1.263505597502579, "learning_rate": 2.518916788601905e-05, "loss": 0.6393, "num_input_tokens_seen": 820259328, "step": 4545 }, { "epoch": 0.49766003448370233, "grad_norm": 1.3303764557648128, "learning_rate": 2.5180569498833308e-05, "loss": 0.7956, "num_input_tokens_seen": 820425312, "step": 4546 }, { "epoch": 0.49776950655464025, "grad_norm": 1.2822148946447662, "learning_rate": 2.5171971090286546e-05, "loss": 0.6533, "num_input_tokens_seen": 820572704, "step": 4547 }, { "epoch": 0.4978789786255782, "grad_norm": 1.3538028472842272, "learning_rate": 2.5163372661395923e-05, "loss": 0.6965, "num_input_tokens_seen": 820751680, "step": 4548 }, { "epoch": 0.49798845069651604, "grad_norm": 1.2103596446820228, "learning_rate": 2.5154774213178634e-05, "loss": 0.7379, "num_input_tokens_seen": 820959776, "step": 4549 }, { "epoch": 0.49809792276745396, "grad_norm": 1.547755520605742, "learning_rate": 2.5146175746651847e-05, "loss": 0.9381, "num_input_tokens_seen": 821163168, "step": 4550 }, { "epoch": 0.4982073948383919, "grad_norm": 1.2905850752966148, "learning_rate": 2.5137577262832746e-05, "loss": 0.7848, "num_input_tokens_seen": 821354912, "step": 4551 }, { "epoch": 0.49831686690932975, "grad_norm": 1.4535796571942523, "learning_rate": 2.512897876273852e-05, "loss": 0.6358, "num_input_tokens_seen": 821543968, "step": 4552 }, { "epoch": 0.49842633898026767, "grad_norm": 1.198835578862967, "learning_rate": 2.5120380247386356e-05, "loss": 0.6543, "num_input_tokens_seen": 821714208, "step": 4553 }, { "epoch": 0.49853581105120554, "grad_norm": 1.339431966334711, "learning_rate": 2.5111781717793436e-05, "loss": 0.9203, "num_input_tokens_seen": 821902816, "step": 4554 }, { "epoch": 0.49864528312214346, "grad_norm": 1.2616659808860404, "learning_rate": 2.5103183174976953e-05, "loss": 0.7813, "num_input_tokens_seen": 822076416, "step": 4555 }, { "epoch": 0.4987547551930814, "grad_norm": 1.183201136167057, "learning_rate": 2.5094584619954105e-05, "loss": 0.6934, "num_input_tokens_seen": 822256288, "step": 4556 }, { "epoch": 0.49886422726401924, "grad_norm": 1.2309685070648384, "learning_rate": 2.508598605374206e-05, "loss": 0.7223, "num_input_tokens_seen": 822462368, "step": 4557 }, { "epoch": 0.49897369933495717, "grad_norm": 1.282325200443707, "learning_rate": 2.5077387477358044e-05, "loss": 0.7381, "num_input_tokens_seen": 822635296, "step": 4558 }, { "epoch": 0.4990831714058951, "grad_norm": 1.4584740252975543, "learning_rate": 2.5068788891819235e-05, "loss": 0.8709, "num_input_tokens_seen": 822812032, "step": 4559 }, { "epoch": 0.49919264347683295, "grad_norm": 1.4305823188837843, "learning_rate": 2.5060190298142828e-05, "loss": 0.7359, "num_input_tokens_seen": 822985632, "step": 4560 }, { "epoch": 0.4993021155477709, "grad_norm": 1.145059717445122, "learning_rate": 2.5051591697346028e-05, "loss": 0.5459, "num_input_tokens_seen": 823168192, "step": 4561 }, { "epoch": 0.4994115876187088, "grad_norm": 1.1744066992801199, "learning_rate": 2.504299309044602e-05, "loss": 0.5383, "num_input_tokens_seen": 823329472, "step": 4562 }, { "epoch": 0.49952105968964666, "grad_norm": 1.3122132760169232, "learning_rate": 2.503439447846002e-05, "loss": 0.6406, "num_input_tokens_seen": 823516736, "step": 4563 }, { "epoch": 0.4996305317605846, "grad_norm": 1.2508221306872063, "learning_rate": 2.5025795862405216e-05, "loss": 0.6389, "num_input_tokens_seen": 823698624, "step": 4564 }, { "epoch": 0.4997400038315225, "grad_norm": 1.173610267162333, "learning_rate": 2.5017197243298813e-05, "loss": 0.7223, "num_input_tokens_seen": 823886560, "step": 4565 }, { "epoch": 0.49984947590246037, "grad_norm": 1.1686426980439866, "learning_rate": 2.500859862215801e-05, "loss": 0.6084, "num_input_tokens_seen": 824049408, "step": 4566 }, { "epoch": 0.4999589479733983, "grad_norm": 1.316971428322993, "learning_rate": 2.5e-05, "loss": 0.7948, "num_input_tokens_seen": 824223904, "step": 4567 }, { "epoch": 0.5000684200443362, "grad_norm": 1.2865894523653159, "learning_rate": 2.4991401377841993e-05, "loss": 0.7047, "num_input_tokens_seen": 824387872, "step": 4568 }, { "epoch": 0.5001778921152741, "grad_norm": 1.3054328662408186, "learning_rate": 2.4982802756701193e-05, "loss": 0.7034, "num_input_tokens_seen": 824582080, "step": 4569 }, { "epoch": 0.500287364186212, "grad_norm": 1.2730784934027772, "learning_rate": 2.4974204137594786e-05, "loss": 0.5718, "num_input_tokens_seen": 824740672, "step": 4570 }, { "epoch": 0.5003968362571499, "grad_norm": 1.2619497365297343, "learning_rate": 2.4965605521539982e-05, "loss": 0.6985, "num_input_tokens_seen": 824934880, "step": 4571 }, { "epoch": 0.5005063083280878, "grad_norm": 1.4405692133166366, "learning_rate": 2.4957006909553982e-05, "loss": 0.7332, "num_input_tokens_seen": 825149696, "step": 4572 }, { "epoch": 0.5006157803990257, "grad_norm": 1.353344662921129, "learning_rate": 2.4948408302653985e-05, "loss": 0.7051, "num_input_tokens_seen": 825298208, "step": 4573 }, { "epoch": 0.5007252524699636, "grad_norm": 1.4885172018608304, "learning_rate": 2.4939809701857174e-05, "loss": 0.8565, "num_input_tokens_seen": 825473824, "step": 4574 }, { "epoch": 0.5008347245409015, "grad_norm": 1.3740171474375171, "learning_rate": 2.4931211108180767e-05, "loss": 0.8587, "num_input_tokens_seen": 825658176, "step": 4575 }, { "epoch": 0.5009441966118394, "grad_norm": 1.1885575053000808, "learning_rate": 2.492261252264196e-05, "loss": 0.739, "num_input_tokens_seen": 825855072, "step": 4576 }, { "epoch": 0.5010536686827773, "grad_norm": 1.2751477698170455, "learning_rate": 2.491401394625794e-05, "loss": 1.0133, "num_input_tokens_seen": 826050400, "step": 4577 }, { "epoch": 0.5011631407537153, "grad_norm": 1.1164068949479309, "learning_rate": 2.490541538004591e-05, "loss": 0.5579, "num_input_tokens_seen": 826219744, "step": 4578 }, { "epoch": 0.5012726128246531, "grad_norm": 1.094644997762891, "learning_rate": 2.489681682502305e-05, "loss": 0.6077, "num_input_tokens_seen": 826419104, "step": 4579 }, { "epoch": 0.501382084895591, "grad_norm": 1.1413039707988804, "learning_rate": 2.4888218282206573e-05, "loss": 0.6397, "num_input_tokens_seen": 826569408, "step": 4580 }, { "epoch": 0.5014915569665289, "grad_norm": 1.3304252003627923, "learning_rate": 2.4879619752613653e-05, "loss": 0.6157, "num_input_tokens_seen": 826747936, "step": 4581 }, { "epoch": 0.5016010290374668, "grad_norm": 1.1733618903658578, "learning_rate": 2.487102123726148e-05, "loss": 0.7546, "num_input_tokens_seen": 826927584, "step": 4582 }, { "epoch": 0.5017105011084048, "grad_norm": 1.3321004273470556, "learning_rate": 2.486242273716726e-05, "loss": 0.7435, "num_input_tokens_seen": 827114176, "step": 4583 }, { "epoch": 0.5018199731793426, "grad_norm": 1.4391263800576064, "learning_rate": 2.4853824253348162e-05, "loss": 0.742, "num_input_tokens_seen": 827277920, "step": 4584 }, { "epoch": 0.5019294452502805, "grad_norm": 1.1880538050608156, "learning_rate": 2.484522578682138e-05, "loss": 0.8854, "num_input_tokens_seen": 827485792, "step": 4585 }, { "epoch": 0.5020389173212184, "grad_norm": 1.2700093938825519, "learning_rate": 2.4836627338604083e-05, "loss": 0.7862, "num_input_tokens_seen": 827667008, "step": 4586 }, { "epoch": 0.5021483893921563, "grad_norm": 1.2725195355217958, "learning_rate": 2.482802890971347e-05, "loss": 0.7606, "num_input_tokens_seen": 827844192, "step": 4587 }, { "epoch": 0.5022578614630943, "grad_norm": 1.2970717407273844, "learning_rate": 2.4819430501166695e-05, "loss": 0.7711, "num_input_tokens_seen": 828021376, "step": 4588 }, { "epoch": 0.5023673335340322, "grad_norm": 1.585673718995236, "learning_rate": 2.4810832113980952e-05, "loss": 0.5647, "num_input_tokens_seen": 828230368, "step": 4589 }, { "epoch": 0.50247680560497, "grad_norm": 1.304835328276038, "learning_rate": 2.4802233749173416e-05, "loss": 0.815, "num_input_tokens_seen": 828409344, "step": 4590 }, { "epoch": 0.5025862776759079, "grad_norm": 1.3231741236675065, "learning_rate": 2.479363540776124e-05, "loss": 0.7442, "num_input_tokens_seen": 828594816, "step": 4591 }, { "epoch": 0.5026957497468458, "grad_norm": 1.3180402807554976, "learning_rate": 2.4785037090761605e-05, "loss": 0.8391, "num_input_tokens_seen": 828758112, "step": 4592 }, { "epoch": 0.5028052218177838, "grad_norm": 1.431661774816064, "learning_rate": 2.477643879919167e-05, "loss": 0.8676, "num_input_tokens_seen": 828955904, "step": 4593 }, { "epoch": 0.5029146938887217, "grad_norm": 1.3799109756392611, "learning_rate": 2.47678405340686e-05, "loss": 0.7869, "num_input_tokens_seen": 829134656, "step": 4594 }, { "epoch": 0.5030241659596596, "grad_norm": 1.319502542566276, "learning_rate": 2.4759242296409547e-05, "loss": 0.5883, "num_input_tokens_seen": 829305568, "step": 4595 }, { "epoch": 0.5031336380305974, "grad_norm": 1.3982146101479518, "learning_rate": 2.4750644087231668e-05, "loss": 0.9888, "num_input_tokens_seen": 829496864, "step": 4596 }, { "epoch": 0.5032431101015353, "grad_norm": 1.4107608080430842, "learning_rate": 2.4742045907552124e-05, "loss": 0.9745, "num_input_tokens_seen": 829668448, "step": 4597 }, { "epoch": 0.5033525821724733, "grad_norm": 1.5174972139486915, "learning_rate": 2.473344775838805e-05, "loss": 0.8008, "num_input_tokens_seen": 829842496, "step": 4598 }, { "epoch": 0.5034620542434112, "grad_norm": 1.2536030340344217, "learning_rate": 2.4724849640756608e-05, "loss": 0.6628, "num_input_tokens_seen": 830023264, "step": 4599 }, { "epoch": 0.5035715263143491, "grad_norm": 1.4094131075492482, "learning_rate": 2.4716251555674913e-05, "loss": 0.6408, "num_input_tokens_seen": 830164384, "step": 4600 }, { "epoch": 0.5036809983852869, "grad_norm": 1.246928398881897, "learning_rate": 2.4707653504160118e-05, "loss": 0.8331, "num_input_tokens_seen": 830370912, "step": 4601 }, { "epoch": 0.5037904704562248, "grad_norm": 1.1761544481276789, "learning_rate": 2.4699055487229366e-05, "loss": 0.454, "num_input_tokens_seen": 830553472, "step": 4602 }, { "epoch": 0.5038999425271627, "grad_norm": 1.3009060636557501, "learning_rate": 2.4690457505899765e-05, "loss": 0.6247, "num_input_tokens_seen": 830729312, "step": 4603 }, { "epoch": 0.5040094145981007, "grad_norm": 1.324225456648416, "learning_rate": 2.468185956118845e-05, "loss": 0.6947, "num_input_tokens_seen": 830910528, "step": 4604 }, { "epoch": 0.5041188866690386, "grad_norm": 1.31045040679498, "learning_rate": 2.4673261654112543e-05, "loss": 0.7686, "num_input_tokens_seen": 831076512, "step": 4605 }, { "epoch": 0.5042283587399765, "grad_norm": 1.1198732863850709, "learning_rate": 2.466466378568916e-05, "loss": 0.673, "num_input_tokens_seen": 831275200, "step": 4606 }, { "epoch": 0.5043378308109143, "grad_norm": 1.3962148979917834, "learning_rate": 2.4656065956935408e-05, "loss": 0.8783, "num_input_tokens_seen": 831425504, "step": 4607 }, { "epoch": 0.5044473028818522, "grad_norm": 1.2124143393234605, "learning_rate": 2.464746816886839e-05, "loss": 0.8276, "num_input_tokens_seen": 831625088, "step": 4608 }, { "epoch": 0.5045567749527902, "grad_norm": 1.3702861439726832, "learning_rate": 2.4638870422505225e-05, "loss": 0.8796, "num_input_tokens_seen": 831803840, "step": 4609 }, { "epoch": 0.5046662470237281, "grad_norm": 1.3741690986169162, "learning_rate": 2.4630272718862992e-05, "loss": 0.8266, "num_input_tokens_seen": 831984832, "step": 4610 }, { "epoch": 0.504775719094666, "grad_norm": 1.353772864179474, "learning_rate": 2.4621675058958792e-05, "loss": 0.7756, "num_input_tokens_seen": 832184192, "step": 4611 }, { "epoch": 0.5048851911656039, "grad_norm": 1.4875589855904747, "learning_rate": 2.4613077443809706e-05, "loss": 0.6661, "num_input_tokens_seen": 832330464, "step": 4612 }, { "epoch": 0.5049946632365417, "grad_norm": 1.3244177294647888, "learning_rate": 2.460447987443282e-05, "loss": 0.7282, "num_input_tokens_seen": 832506528, "step": 4613 }, { "epoch": 0.5051041353074797, "grad_norm": 1.3990547480228903, "learning_rate": 2.459588235184521e-05, "loss": 0.8362, "num_input_tokens_seen": 832690432, "step": 4614 }, { "epoch": 0.5052136073784176, "grad_norm": 1.3047998345720524, "learning_rate": 2.4587284877063942e-05, "loss": 0.7608, "num_input_tokens_seen": 832862688, "step": 4615 }, { "epoch": 0.5053230794493555, "grad_norm": 1.313920569267549, "learning_rate": 2.4578687451106085e-05, "loss": 0.8531, "num_input_tokens_seen": 833037632, "step": 4616 }, { "epoch": 0.5054325515202934, "grad_norm": 1.4393898524297828, "learning_rate": 2.4570090074988697e-05, "loss": 0.8333, "num_input_tokens_seen": 833191072, "step": 4617 }, { "epoch": 0.5055420235912312, "grad_norm": 1.2772339006605093, "learning_rate": 2.456149274972884e-05, "loss": 0.7134, "num_input_tokens_seen": 833361312, "step": 4618 }, { "epoch": 0.5056514956621692, "grad_norm": 1.2092721624293425, "learning_rate": 2.455289547634354e-05, "loss": 0.7392, "num_input_tokens_seen": 833536704, "step": 4619 }, { "epoch": 0.5057609677331071, "grad_norm": 1.3312976864436123, "learning_rate": 2.4544298255849862e-05, "loss": 0.8226, "num_input_tokens_seen": 833730240, "step": 4620 }, { "epoch": 0.505870439804045, "grad_norm": 1.2941585264340891, "learning_rate": 2.453570108926482e-05, "loss": 0.7372, "num_input_tokens_seen": 833907424, "step": 4621 }, { "epoch": 0.5059799118749829, "grad_norm": 1.3427296690309891, "learning_rate": 2.4527103977605447e-05, "loss": 0.8083, "num_input_tokens_seen": 834112160, "step": 4622 }, { "epoch": 0.5060893839459208, "grad_norm": 1.232650082795562, "learning_rate": 2.4518506921888788e-05, "loss": 0.6943, "num_input_tokens_seen": 834284640, "step": 4623 }, { "epoch": 0.5061988560168587, "grad_norm": 1.3553504562428387, "learning_rate": 2.4509909923131822e-05, "loss": 0.8215, "num_input_tokens_seen": 834460256, "step": 4624 }, { "epoch": 0.5063083280877966, "grad_norm": 1.3848639320160572, "learning_rate": 2.450131298235158e-05, "loss": 0.7743, "num_input_tokens_seen": 834666560, "step": 4625 }, { "epoch": 0.5064178001587345, "grad_norm": 1.4245783499918503, "learning_rate": 2.449271610056506e-05, "loss": 0.6635, "num_input_tokens_seen": 834838144, "step": 4626 }, { "epoch": 0.5065272722296724, "grad_norm": 1.3386492951856728, "learning_rate": 2.448411927878926e-05, "loss": 0.8605, "num_input_tokens_seen": 835026752, "step": 4627 }, { "epoch": 0.5066367443006103, "grad_norm": 1.364414214314655, "learning_rate": 2.447552251804116e-05, "loss": 0.8306, "num_input_tokens_seen": 835210656, "step": 4628 }, { "epoch": 0.5067462163715483, "grad_norm": 1.1632584220537077, "learning_rate": 2.4466925819337734e-05, "loss": 0.7278, "num_input_tokens_seen": 835398368, "step": 4629 }, { "epoch": 0.5068556884424861, "grad_norm": 1.2421484407246002, "learning_rate": 2.4458329183695983e-05, "loss": 0.5558, "num_input_tokens_seen": 835576448, "step": 4630 }, { "epoch": 0.506965160513424, "grad_norm": 1.388412945049651, "learning_rate": 2.444973261213284e-05, "loss": 0.8294, "num_input_tokens_seen": 835769312, "step": 4631 }, { "epoch": 0.5070746325843619, "grad_norm": 1.211621294462031, "learning_rate": 2.4441136105665284e-05, "loss": 0.6983, "num_input_tokens_seen": 835947168, "step": 4632 }, { "epoch": 0.5071841046552998, "grad_norm": 1.3643482918250547, "learning_rate": 2.4432539665310252e-05, "loss": 0.8351, "num_input_tokens_seen": 836126816, "step": 4633 }, { "epoch": 0.5072935767262378, "grad_norm": 1.1952115683234492, "learning_rate": 2.44239432920847e-05, "loss": 0.6348, "num_input_tokens_seen": 836296608, "step": 4634 }, { "epoch": 0.5074030487971756, "grad_norm": 1.4269380240237861, "learning_rate": 2.441534698700555e-05, "loss": 0.947, "num_input_tokens_seen": 836493280, "step": 4635 }, { "epoch": 0.5075125208681135, "grad_norm": 1.2846490386646907, "learning_rate": 2.4406750751089737e-05, "loss": 0.6459, "num_input_tokens_seen": 836655456, "step": 4636 }, { "epoch": 0.5076219929390514, "grad_norm": 1.296442120476383, "learning_rate": 2.4398154585354174e-05, "loss": 0.8388, "num_input_tokens_seen": 836819200, "step": 4637 }, { "epoch": 0.5077314650099893, "grad_norm": 1.4268152119646145, "learning_rate": 2.438955849081577e-05, "loss": 0.8457, "num_input_tokens_seen": 836976000, "step": 4638 }, { "epoch": 0.5078409370809273, "grad_norm": 1.3416972338509545, "learning_rate": 2.4380962468491438e-05, "loss": 0.6062, "num_input_tokens_seen": 837168192, "step": 4639 }, { "epoch": 0.5079504091518652, "grad_norm": 1.178923049716051, "learning_rate": 2.4372366519398053e-05, "loss": 0.8231, "num_input_tokens_seen": 837378976, "step": 4640 }, { "epoch": 0.508059881222803, "grad_norm": 1.2384943733564935, "learning_rate": 2.4363770644552523e-05, "loss": 0.7786, "num_input_tokens_seen": 837553920, "step": 4641 }, { "epoch": 0.5081693532937409, "grad_norm": 1.3435864459905162, "learning_rate": 2.4355174844971695e-05, "loss": 0.6899, "num_input_tokens_seen": 837756192, "step": 4642 }, { "epoch": 0.5082788253646788, "grad_norm": 1.3161732725003663, "learning_rate": 2.434657912167245e-05, "loss": 0.7296, "num_input_tokens_seen": 837910304, "step": 4643 }, { "epoch": 0.5083882974356168, "grad_norm": 1.35444294483139, "learning_rate": 2.4337983475671657e-05, "loss": 0.6771, "num_input_tokens_seen": 838068896, "step": 4644 }, { "epoch": 0.5084977695065547, "grad_norm": 1.206927989505634, "learning_rate": 2.4329387907986145e-05, "loss": 0.6447, "num_input_tokens_seen": 838261536, "step": 4645 }, { "epoch": 0.5086072415774926, "grad_norm": 1.1229395010660657, "learning_rate": 2.4320792419632764e-05, "loss": 0.5491, "num_input_tokens_seen": 838451040, "step": 4646 }, { "epoch": 0.5087167136484304, "grad_norm": 1.358294046258983, "learning_rate": 2.431219701162834e-05, "loss": 0.762, "num_input_tokens_seen": 838617024, "step": 4647 }, { "epoch": 0.5088261857193683, "grad_norm": 1.1874619528046026, "learning_rate": 2.4303601684989698e-05, "loss": 0.7074, "num_input_tokens_seen": 838774048, "step": 4648 }, { "epoch": 0.5089356577903063, "grad_norm": 1.3408790347693913, "learning_rate": 2.429500644073364e-05, "loss": 0.8319, "num_input_tokens_seen": 838955488, "step": 4649 }, { "epoch": 0.5090451298612442, "grad_norm": 1.2332834656832143, "learning_rate": 2.428641127987697e-05, "loss": 0.854, "num_input_tokens_seen": 839140512, "step": 4650 }, { "epoch": 0.5091546019321821, "grad_norm": 1.200578198610444, "learning_rate": 2.4277816203436498e-05, "loss": 0.7887, "num_input_tokens_seen": 839327776, "step": 4651 }, { "epoch": 0.5092640740031199, "grad_norm": 1.4310298100638086, "learning_rate": 2.426922121242897e-05, "loss": 0.8172, "num_input_tokens_seen": 839476512, "step": 4652 }, { "epoch": 0.5093735460740578, "grad_norm": 1.2830370094390622, "learning_rate": 2.426062630787119e-05, "loss": 0.7144, "num_input_tokens_seen": 839655264, "step": 4653 }, { "epoch": 0.5094830181449957, "grad_norm": 1.294146367413521, "learning_rate": 2.4252031490779894e-05, "loss": 0.8514, "num_input_tokens_seen": 839833792, "step": 4654 }, { "epoch": 0.5095924902159337, "grad_norm": 1.2464445092817522, "learning_rate": 2.4243436762171848e-05, "loss": 0.6819, "num_input_tokens_seen": 839998432, "step": 4655 }, { "epoch": 0.5097019622868716, "grad_norm": 1.2656766150981646, "learning_rate": 2.4234842123063786e-05, "loss": 0.9939, "num_input_tokens_seen": 840208544, "step": 4656 }, { "epoch": 0.5098114343578095, "grad_norm": 1.1919942936760692, "learning_rate": 2.4226247574472433e-05, "loss": 0.635, "num_input_tokens_seen": 840393344, "step": 4657 }, { "epoch": 0.5099209064287473, "grad_norm": 1.1211641934158072, "learning_rate": 2.421765311741452e-05, "loss": 0.7471, "num_input_tokens_seen": 840566944, "step": 4658 }, { "epoch": 0.5100303784996852, "grad_norm": 1.1885314778262603, "learning_rate": 2.420905875290674e-05, "loss": 0.5094, "num_input_tokens_seen": 840736288, "step": 4659 }, { "epoch": 0.5101398505706232, "grad_norm": 1.3687908253612528, "learning_rate": 2.4200464481965807e-05, "loss": 0.7844, "num_input_tokens_seen": 840937664, "step": 4660 }, { "epoch": 0.5102493226415611, "grad_norm": 1.2569745038526323, "learning_rate": 2.419187030560839e-05, "loss": 0.7691, "num_input_tokens_seen": 841136128, "step": 4661 }, { "epoch": 0.510358794712499, "grad_norm": 1.2216068920033514, "learning_rate": 2.4183276224851178e-05, "loss": 0.6193, "num_input_tokens_seen": 841324288, "step": 4662 }, { "epoch": 0.5104682667834369, "grad_norm": 1.4192671282891958, "learning_rate": 2.417468224071082e-05, "loss": 0.6841, "num_input_tokens_seen": 841469888, "step": 4663 }, { "epoch": 0.5105777388543747, "grad_norm": 1.3784413621182887, "learning_rate": 2.4166088354203974e-05, "loss": 0.9889, "num_input_tokens_seen": 841637888, "step": 4664 }, { "epoch": 0.5106872109253127, "grad_norm": 1.1226863381874017, "learning_rate": 2.415749456634729e-05, "loss": 0.5168, "num_input_tokens_seen": 841817760, "step": 4665 }, { "epoch": 0.5107966829962506, "grad_norm": 1.3353268304007215, "learning_rate": 2.4148900878157378e-05, "loss": 0.864, "num_input_tokens_seen": 842032576, "step": 4666 }, { "epoch": 0.5109061550671885, "grad_norm": 1.1305093964899464, "learning_rate": 2.4140307290650874e-05, "loss": 0.4939, "num_input_tokens_seen": 842226560, "step": 4667 }, { "epoch": 0.5110156271381264, "grad_norm": 1.3594812373598915, "learning_rate": 2.4131713804844364e-05, "loss": 0.8269, "num_input_tokens_seen": 842414496, "step": 4668 }, { "epoch": 0.5111250992090642, "grad_norm": 1.2018365277159855, "learning_rate": 2.412312042175446e-05, "loss": 0.5688, "num_input_tokens_seen": 842608704, "step": 4669 }, { "epoch": 0.5112345712800022, "grad_norm": 1.3975817656780143, "learning_rate": 2.4114527142397732e-05, "loss": 0.7895, "num_input_tokens_seen": 842780064, "step": 4670 }, { "epoch": 0.5113440433509401, "grad_norm": 1.3330140725013266, "learning_rate": 2.4105933967790744e-05, "loss": 0.6927, "num_input_tokens_seen": 842981440, "step": 4671 }, { "epoch": 0.511453515421878, "grad_norm": 1.3212760253623952, "learning_rate": 2.4097340898950073e-05, "loss": 0.772, "num_input_tokens_seen": 843179456, "step": 4672 }, { "epoch": 0.5115629874928159, "grad_norm": 1.419832712178625, "learning_rate": 2.4088747936892237e-05, "loss": 1.0709, "num_input_tokens_seen": 843370528, "step": 4673 }, { "epoch": 0.5116724595637538, "grad_norm": 1.1643464426991215, "learning_rate": 2.4080155082633784e-05, "loss": 0.7427, "num_input_tokens_seen": 843543232, "step": 4674 }, { "epoch": 0.5117819316346917, "grad_norm": 1.2382730670261461, "learning_rate": 2.407156233719122e-05, "loss": 0.7398, "num_input_tokens_seen": 843756480, "step": 4675 }, { "epoch": 0.5118914037056296, "grad_norm": 1.2433424085297793, "learning_rate": 2.4062969701581054e-05, "loss": 0.7332, "num_input_tokens_seen": 843955392, "step": 4676 }, { "epoch": 0.5120008757765675, "grad_norm": 1.3228913802886666, "learning_rate": 2.4054377176819793e-05, "loss": 0.5909, "num_input_tokens_seen": 844163488, "step": 4677 }, { "epoch": 0.5121103478475054, "grad_norm": 1.2310026433499526, "learning_rate": 2.4045784763923893e-05, "loss": 0.7168, "num_input_tokens_seen": 844345824, "step": 4678 }, { "epoch": 0.5122198199184433, "grad_norm": 1.2186174076042193, "learning_rate": 2.4037192463909837e-05, "loss": 0.8186, "num_input_tokens_seen": 844507776, "step": 4679 }, { "epoch": 0.5123292919893813, "grad_norm": 1.1722373910698887, "learning_rate": 2.4028600277794066e-05, "loss": 0.9017, "num_input_tokens_seen": 844713856, "step": 4680 }, { "epoch": 0.5124387640603191, "grad_norm": 1.1198994725105276, "learning_rate": 2.4020008206593036e-05, "loss": 0.6588, "num_input_tokens_seen": 844884096, "step": 4681 }, { "epoch": 0.512548236131257, "grad_norm": 1.2310455907781814, "learning_rate": 2.401141625132315e-05, "loss": 0.6475, "num_input_tokens_seen": 845043360, "step": 4682 }, { "epoch": 0.5126577082021949, "grad_norm": 1.2792762347005022, "learning_rate": 2.4002824413000836e-05, "loss": 0.6861, "num_input_tokens_seen": 845185824, "step": 4683 }, { "epoch": 0.5127671802731328, "grad_norm": 1.3503503380179278, "learning_rate": 2.3994232692642496e-05, "loss": 0.6432, "num_input_tokens_seen": 845354944, "step": 4684 }, { "epoch": 0.5128766523440708, "grad_norm": 1.1989756284914093, "learning_rate": 2.3985641091264495e-05, "loss": 0.8197, "num_input_tokens_seen": 845546912, "step": 4685 }, { "epoch": 0.5129861244150086, "grad_norm": 1.2903602771963052, "learning_rate": 2.3977049609883222e-05, "loss": 0.7045, "num_input_tokens_seen": 845696768, "step": 4686 }, { "epoch": 0.5130955964859465, "grad_norm": 1.3370563216832754, "learning_rate": 2.3968458249515016e-05, "loss": 0.7707, "num_input_tokens_seen": 845863200, "step": 4687 }, { "epoch": 0.5132050685568844, "grad_norm": 1.4749293908497971, "learning_rate": 2.395986701117623e-05, "loss": 1.0119, "num_input_tokens_seen": 846036576, "step": 4688 }, { "epoch": 0.5133145406278223, "grad_norm": 1.3514203123590893, "learning_rate": 2.3951275895883188e-05, "loss": 0.8651, "num_input_tokens_seen": 846219136, "step": 4689 }, { "epoch": 0.5134240126987603, "grad_norm": 1.3188438599075312, "learning_rate": 2.3942684904652204e-05, "loss": 0.7929, "num_input_tokens_seen": 846402368, "step": 4690 }, { "epoch": 0.5135334847696982, "grad_norm": 1.4133939901567194, "learning_rate": 2.3934094038499577e-05, "loss": 0.8649, "num_input_tokens_seen": 846574400, "step": 4691 }, { "epoch": 0.513642956840636, "grad_norm": 1.439092003336612, "learning_rate": 2.392550329844158e-05, "loss": 0.7526, "num_input_tokens_seen": 846742624, "step": 4692 }, { "epoch": 0.5137524289115739, "grad_norm": 1.2371959081461865, "learning_rate": 2.3916912685494502e-05, "loss": 0.6734, "num_input_tokens_seen": 846930336, "step": 4693 }, { "epoch": 0.5138619009825118, "grad_norm": 1.2808490684120206, "learning_rate": 2.390832220067457e-05, "loss": 0.7444, "num_input_tokens_seen": 847123648, "step": 4694 }, { "epoch": 0.5139713730534498, "grad_norm": 1.4416859259816557, "learning_rate": 2.3899731844998048e-05, "loss": 0.8045, "num_input_tokens_seen": 847290752, "step": 4695 }, { "epoch": 0.5140808451243877, "grad_norm": 1.2806551878439694, "learning_rate": 2.3891141619481132e-05, "loss": 0.7587, "num_input_tokens_seen": 847467488, "step": 4696 }, { "epoch": 0.5141903171953256, "grad_norm": 1.2543306197073647, "learning_rate": 2.388255152514004e-05, "loss": 0.682, "num_input_tokens_seen": 847657664, "step": 4697 }, { "epoch": 0.5142997892662634, "grad_norm": 1.2565809108217383, "learning_rate": 2.387396156299098e-05, "loss": 0.4822, "num_input_tokens_seen": 847830144, "step": 4698 }, { "epoch": 0.5144092613372013, "grad_norm": 1.3025256766133249, "learning_rate": 2.3865371734050107e-05, "loss": 0.6122, "num_input_tokens_seen": 847996352, "step": 4699 }, { "epoch": 0.5145187334081393, "grad_norm": 1.3375404066595602, "learning_rate": 2.385678203933359e-05, "loss": 0.687, "num_input_tokens_seen": 848172416, "step": 4700 }, { "epoch": 0.5146282054790772, "grad_norm": 1.0770043339832487, "learning_rate": 2.3848192479857566e-05, "loss": 0.6665, "num_input_tokens_seen": 848379392, "step": 4701 }, { "epoch": 0.5147376775500151, "grad_norm": 1.2323074408602095, "learning_rate": 2.3839603056638186e-05, "loss": 0.7487, "num_input_tokens_seen": 848538208, "step": 4702 }, { "epoch": 0.5148471496209529, "grad_norm": 1.180613728878892, "learning_rate": 2.3831013770691533e-05, "loss": 0.6476, "num_input_tokens_seen": 848689408, "step": 4703 }, { "epoch": 0.5149566216918908, "grad_norm": 1.3587708281576214, "learning_rate": 2.3822424623033712e-05, "loss": 0.9179, "num_input_tokens_seen": 848866592, "step": 4704 }, { "epoch": 0.5150660937628287, "grad_norm": 1.2238085499551632, "learning_rate": 2.381383561468082e-05, "loss": 0.6197, "num_input_tokens_seen": 849069088, "step": 4705 }, { "epoch": 0.5151755658337667, "grad_norm": 1.3361607861333349, "learning_rate": 2.38052467466489e-05, "loss": 0.8342, "num_input_tokens_seen": 849207072, "step": 4706 }, { "epoch": 0.5152850379047046, "grad_norm": 1.3067798853905692, "learning_rate": 2.3796658019954013e-05, "loss": 0.8924, "num_input_tokens_seen": 849402400, "step": 4707 }, { "epoch": 0.5153945099756425, "grad_norm": 1.1663359900007566, "learning_rate": 2.3788069435612174e-05, "loss": 0.6358, "num_input_tokens_seen": 849567712, "step": 4708 }, { "epoch": 0.5155039820465803, "grad_norm": 1.306507052449617, "learning_rate": 2.3779480994639406e-05, "loss": 0.8459, "num_input_tokens_seen": 849753184, "step": 4709 }, { "epoch": 0.5156134541175182, "grad_norm": 1.2026574848967133, "learning_rate": 2.37708926980517e-05, "loss": 0.6796, "num_input_tokens_seen": 849909984, "step": 4710 }, { "epoch": 0.5157229261884562, "grad_norm": 1.218358784021428, "learning_rate": 2.3762304546865042e-05, "loss": 0.707, "num_input_tokens_seen": 850046400, "step": 4711 }, { "epoch": 0.5158323982593941, "grad_norm": 1.1207536453980846, "learning_rate": 2.375371654209539e-05, "loss": 0.7439, "num_input_tokens_seen": 850255392, "step": 4712 }, { "epoch": 0.515941870330332, "grad_norm": 1.2363857840400456, "learning_rate": 2.3745128684758684e-05, "loss": 0.8206, "num_input_tokens_seen": 850433696, "step": 4713 }, { "epoch": 0.5160513424012699, "grad_norm": 1.2761566058443123, "learning_rate": 2.3736540975870865e-05, "loss": 0.9746, "num_input_tokens_seen": 850622304, "step": 4714 }, { "epoch": 0.5161608144722077, "grad_norm": 1.2111224125607998, "learning_rate": 2.3727953416447826e-05, "loss": 0.6455, "num_input_tokens_seen": 850818976, "step": 4715 }, { "epoch": 0.5162702865431457, "grad_norm": 1.10475270296048, "learning_rate": 2.3719366007505477e-05, "loss": 0.5936, "num_input_tokens_seen": 850986976, "step": 4716 }, { "epoch": 0.5163797586140836, "grad_norm": 1.364918160552754, "learning_rate": 2.3710778750059665e-05, "loss": 0.8448, "num_input_tokens_seen": 851166176, "step": 4717 }, { "epoch": 0.5164892306850215, "grad_norm": 1.3010518018919768, "learning_rate": 2.3702191645126266e-05, "loss": 0.7961, "num_input_tokens_seen": 851374272, "step": 4718 }, { "epoch": 0.5165987027559594, "grad_norm": 1.3351917527748052, "learning_rate": 2.3693604693721126e-05, "loss": 0.6708, "num_input_tokens_seen": 851537792, "step": 4719 }, { "epoch": 0.5167081748268972, "grad_norm": 1.2334355509558723, "learning_rate": 2.368501789686004e-05, "loss": 0.6459, "num_input_tokens_seen": 851719456, "step": 4720 }, { "epoch": 0.5168176468978352, "grad_norm": 1.1451665787940857, "learning_rate": 2.367643125555883e-05, "loss": 0.5755, "num_input_tokens_seen": 851915680, "step": 4721 }, { "epoch": 0.5169271189687731, "grad_norm": 1.2266296562713659, "learning_rate": 2.3667844770833265e-05, "loss": 0.8051, "num_input_tokens_seen": 852133408, "step": 4722 }, { "epoch": 0.517036591039711, "grad_norm": 1.2243696400952013, "learning_rate": 2.3659258443699128e-05, "loss": 0.665, "num_input_tokens_seen": 852343968, "step": 4723 }, { "epoch": 0.5171460631106489, "grad_norm": 1.2160718787808442, "learning_rate": 2.3650672275172145e-05, "loss": 0.6773, "num_input_tokens_seen": 852535712, "step": 4724 }, { "epoch": 0.5172555351815868, "grad_norm": 1.2912486179193003, "learning_rate": 2.3642086266268053e-05, "loss": 0.6971, "num_input_tokens_seen": 852701024, "step": 4725 }, { "epoch": 0.5173650072525247, "grad_norm": 1.350096199352175, "learning_rate": 2.363350041800257e-05, "loss": 0.8445, "num_input_tokens_seen": 852890080, "step": 4726 }, { "epoch": 0.5174744793234626, "grad_norm": 1.2717712828690861, "learning_rate": 2.362491473139136e-05, "loss": 0.8064, "num_input_tokens_seen": 853063680, "step": 4727 }, { "epoch": 0.5175839513944005, "grad_norm": 1.275618575143965, "learning_rate": 2.3616329207450124e-05, "loss": 0.8204, "num_input_tokens_seen": 853249824, "step": 4728 }, { "epoch": 0.5176934234653384, "grad_norm": 1.3664096472356058, "learning_rate": 2.360774384719449e-05, "loss": 0.6753, "num_input_tokens_seen": 853431040, "step": 4729 }, { "epoch": 0.5178028955362763, "grad_norm": 1.372077827840294, "learning_rate": 2.359915865164009e-05, "loss": 0.8935, "num_input_tokens_seen": 853635104, "step": 4730 }, { "epoch": 0.5179123676072143, "grad_norm": 1.2865195883969434, "learning_rate": 2.3590573621802553e-05, "loss": 0.7694, "num_input_tokens_seen": 853842976, "step": 4731 }, { "epoch": 0.5180218396781521, "grad_norm": 1.2225217897622445, "learning_rate": 2.3581988758697463e-05, "loss": 0.7688, "num_input_tokens_seen": 854019488, "step": 4732 }, { "epoch": 0.51813131174909, "grad_norm": 1.2322259375400866, "learning_rate": 2.3573404063340386e-05, "loss": 0.905, "num_input_tokens_seen": 854198688, "step": 4733 }, { "epoch": 0.5182407838200279, "grad_norm": 1.1384613208859766, "learning_rate": 2.3564819536746883e-05, "loss": 0.593, "num_input_tokens_seen": 854389984, "step": 4734 }, { "epoch": 0.5183502558909658, "grad_norm": 1.3413904898977944, "learning_rate": 2.3556235179932494e-05, "loss": 0.7793, "num_input_tokens_seen": 854586208, "step": 4735 }, { "epoch": 0.5184597279619038, "grad_norm": 1.1832865703548283, "learning_rate": 2.354765099391271e-05, "loss": 0.7146, "num_input_tokens_seen": 854773920, "step": 4736 }, { "epoch": 0.5185692000328416, "grad_norm": 1.2303377596447316, "learning_rate": 2.353906697970304e-05, "loss": 0.7044, "num_input_tokens_seen": 854964320, "step": 4737 }, { "epoch": 0.5186786721037795, "grad_norm": 1.3564509809799892, "learning_rate": 2.3530483138318964e-05, "loss": 0.8837, "num_input_tokens_seen": 855109024, "step": 4738 }, { "epoch": 0.5187881441747174, "grad_norm": 1.2257461260789422, "learning_rate": 2.352189947077591e-05, "loss": 0.6832, "num_input_tokens_seen": 855285312, "step": 4739 }, { "epoch": 0.5188976162456553, "grad_norm": 1.305789550594191, "learning_rate": 2.3513315978089336e-05, "loss": 0.652, "num_input_tokens_seen": 855423968, "step": 4740 }, { "epoch": 0.5190070883165933, "grad_norm": 1.3412042944551836, "learning_rate": 2.3504732661274627e-05, "loss": 0.7119, "num_input_tokens_seen": 855593312, "step": 4741 }, { "epoch": 0.5191165603875312, "grad_norm": 1.2882129528478865, "learning_rate": 2.3496149521347194e-05, "loss": 0.8052, "num_input_tokens_seen": 855771616, "step": 4742 }, { "epoch": 0.519226032458469, "grad_norm": 1.3213527576688122, "learning_rate": 2.348756655932239e-05, "loss": 0.8435, "num_input_tokens_seen": 855980384, "step": 4743 }, { "epoch": 0.5193355045294069, "grad_norm": 1.188346109620297, "learning_rate": 2.3478983776215572e-05, "loss": 0.7079, "num_input_tokens_seen": 856150848, "step": 4744 }, { "epoch": 0.5194449766003448, "grad_norm": 1.2877415680413933, "learning_rate": 2.3470401173042076e-05, "loss": 0.647, "num_input_tokens_seen": 856319296, "step": 4745 }, { "epoch": 0.5195544486712828, "grad_norm": 1.2976115329853148, "learning_rate": 2.3461818750817186e-05, "loss": 0.6906, "num_input_tokens_seen": 856516864, "step": 4746 }, { "epoch": 0.5196639207422207, "grad_norm": 1.2102819329461552, "learning_rate": 2.3453236510556207e-05, "loss": 0.6833, "num_input_tokens_seen": 856678144, "step": 4747 }, { "epoch": 0.5197733928131586, "grad_norm": 1.3193119478903585, "learning_rate": 2.344465445327438e-05, "loss": 0.7495, "num_input_tokens_seen": 856839424, "step": 4748 }, { "epoch": 0.5198828648840964, "grad_norm": 1.133802362934072, "learning_rate": 2.3436072579986973e-05, "loss": 0.5976, "num_input_tokens_seen": 857019072, "step": 4749 }, { "epoch": 0.5199923369550343, "grad_norm": 1.2103744182118432, "learning_rate": 2.3427490891709176e-05, "loss": 0.6886, "num_input_tokens_seen": 857234336, "step": 4750 }, { "epoch": 0.5201018090259723, "grad_norm": 1.2780422435727647, "learning_rate": 2.3418909389456203e-05, "loss": 0.5229, "num_input_tokens_seen": 857394496, "step": 4751 }, { "epoch": 0.5202112810969102, "grad_norm": 1.1926976443593282, "learning_rate": 2.3410328074243232e-05, "loss": 0.6878, "num_input_tokens_seen": 857587136, "step": 4752 }, { "epoch": 0.5203207531678481, "grad_norm": 1.1328624845855402, "learning_rate": 2.340174694708541e-05, "loss": 0.6391, "num_input_tokens_seen": 857772832, "step": 4753 }, { "epoch": 0.5204302252387859, "grad_norm": 1.34683516547471, "learning_rate": 2.339316600899787e-05, "loss": 0.6634, "num_input_tokens_seen": 857929856, "step": 4754 }, { "epoch": 0.5205396973097238, "grad_norm": 1.4055272258237257, "learning_rate": 2.338458526099571e-05, "loss": 0.885, "num_input_tokens_seen": 858113088, "step": 4755 }, { "epoch": 0.5206491693806617, "grad_norm": 1.246316632753639, "learning_rate": 2.337600470409404e-05, "loss": 0.7307, "num_input_tokens_seen": 858268768, "step": 4756 }, { "epoch": 0.5207586414515997, "grad_norm": 1.1202806454948733, "learning_rate": 2.3367424339307895e-05, "loss": 0.7408, "num_input_tokens_seen": 858467456, "step": 4757 }, { "epoch": 0.5208681135225376, "grad_norm": 1.3495067331062023, "learning_rate": 2.3358844167652334e-05, "loss": 0.7521, "num_input_tokens_seen": 858660320, "step": 4758 }, { "epoch": 0.5209775855934755, "grad_norm": 1.3132504179885056, "learning_rate": 2.3350264190142377e-05, "loss": 0.7516, "num_input_tokens_seen": 858864160, "step": 4759 }, { "epoch": 0.5210870576644133, "grad_norm": 1.5470700381846667, "learning_rate": 2.3341684407793004e-05, "loss": 0.8894, "num_input_tokens_seen": 859019168, "step": 4760 }, { "epoch": 0.5211965297353512, "grad_norm": 1.0780031992346404, "learning_rate": 2.3333104821619207e-05, "loss": 0.5534, "num_input_tokens_seen": 859194112, "step": 4761 }, { "epoch": 0.5213060018062892, "grad_norm": 1.24871760390285, "learning_rate": 2.332452543263591e-05, "loss": 0.6153, "num_input_tokens_seen": 859324256, "step": 4762 }, { "epoch": 0.5214154738772271, "grad_norm": 1.4115814868459942, "learning_rate": 2.3315946241858058e-05, "loss": 0.7163, "num_input_tokens_seen": 859509952, "step": 4763 }, { "epoch": 0.521524945948165, "grad_norm": 1.2219863860344367, "learning_rate": 2.330736725030054e-05, "loss": 0.7455, "num_input_tokens_seen": 859683328, "step": 4764 }, { "epoch": 0.5216344180191029, "grad_norm": 1.2322617803170326, "learning_rate": 2.329878845897824e-05, "loss": 0.7198, "num_input_tokens_seen": 859834528, "step": 4765 }, { "epoch": 0.5217438900900407, "grad_norm": 1.2164940617985163, "learning_rate": 2.3290209868906025e-05, "loss": 0.7367, "num_input_tokens_seen": 860001632, "step": 4766 }, { "epoch": 0.5218533621609787, "grad_norm": 1.1055721598538564, "learning_rate": 2.3281631481098704e-05, "loss": 0.4768, "num_input_tokens_seen": 860206368, "step": 4767 }, { "epoch": 0.5219628342319166, "grad_norm": 1.3042052057115252, "learning_rate": 2.3273053296571104e-05, "loss": 0.7879, "num_input_tokens_seen": 860373696, "step": 4768 }, { "epoch": 0.5220723063028545, "grad_norm": 1.3323893135719895, "learning_rate": 2.326447531633799e-05, "loss": 0.9015, "num_input_tokens_seen": 860584704, "step": 4769 }, { "epoch": 0.5221817783737924, "grad_norm": 1.4549036932727226, "learning_rate": 2.3255897541414133e-05, "loss": 1.005, "num_input_tokens_seen": 860787872, "step": 4770 }, { "epoch": 0.5222912504447302, "grad_norm": 1.314669133614114, "learning_rate": 2.3247319972814256e-05, "loss": 0.8123, "num_input_tokens_seen": 860964384, "step": 4771 }, { "epoch": 0.5224007225156682, "grad_norm": 1.3239305120626872, "learning_rate": 2.3238742611553075e-05, "loss": 0.7913, "num_input_tokens_seen": 861138656, "step": 4772 }, { "epoch": 0.5225101945866061, "grad_norm": 1.2043010858838916, "learning_rate": 2.3230165458645282e-05, "loss": 0.646, "num_input_tokens_seen": 861280224, "step": 4773 }, { "epoch": 0.522619666657544, "grad_norm": 1.208214386752775, "learning_rate": 2.322158851510553e-05, "loss": 0.7289, "num_input_tokens_seen": 861469504, "step": 4774 }, { "epoch": 0.5227291387284819, "grad_norm": 1.2181125832439108, "learning_rate": 2.3213011781948456e-05, "loss": 0.8554, "num_input_tokens_seen": 861674912, "step": 4775 }, { "epoch": 0.5228386107994198, "grad_norm": 1.290238370089419, "learning_rate": 2.320443526018867e-05, "loss": 0.7139, "num_input_tokens_seen": 861871136, "step": 4776 }, { "epoch": 0.5229480828703577, "grad_norm": 1.265392376278563, "learning_rate": 2.319585895084077e-05, "loss": 0.7043, "num_input_tokens_seen": 862021664, "step": 4777 }, { "epoch": 0.5230575549412956, "grad_norm": 1.2623477466291924, "learning_rate": 2.3187282854919296e-05, "loss": 0.686, "num_input_tokens_seen": 862212960, "step": 4778 }, { "epoch": 0.5231670270122335, "grad_norm": 1.0760331246159158, "learning_rate": 2.3178706973438793e-05, "loss": 0.5277, "num_input_tokens_seen": 862412768, "step": 4779 }, { "epoch": 0.5232764990831714, "grad_norm": 1.4287690928494228, "learning_rate": 2.3170131307413788e-05, "loss": 0.7344, "num_input_tokens_seen": 862595552, "step": 4780 }, { "epoch": 0.5233859711541093, "grad_norm": 1.1822028721922444, "learning_rate": 2.3161555857858735e-05, "loss": 0.6211, "num_input_tokens_seen": 862802976, "step": 4781 }, { "epoch": 0.5234954432250473, "grad_norm": 1.154908200698924, "learning_rate": 2.3152980625788126e-05, "loss": 0.6523, "num_input_tokens_seen": 863005024, "step": 4782 }, { "epoch": 0.5236049152959851, "grad_norm": 1.2238042639760163, "learning_rate": 2.3144405612216365e-05, "loss": 0.6374, "num_input_tokens_seen": 863190048, "step": 4783 }, { "epoch": 0.523714387366923, "grad_norm": 1.3170803619290103, "learning_rate": 2.3135830818157877e-05, "loss": 0.6081, "num_input_tokens_seen": 863408224, "step": 4784 }, { "epoch": 0.5238238594378609, "grad_norm": 1.3468712773459692, "learning_rate": 2.3127256244627036e-05, "loss": 0.8334, "num_input_tokens_seen": 863591680, "step": 4785 }, { "epoch": 0.5239333315087988, "grad_norm": 1.3357008339573921, "learning_rate": 2.31186818926382e-05, "loss": 0.747, "num_input_tokens_seen": 863731680, "step": 4786 }, { "epoch": 0.5240428035797368, "grad_norm": 1.177606072720258, "learning_rate": 2.311010776320571e-05, "loss": 0.6623, "num_input_tokens_seen": 863892960, "step": 4787 }, { "epoch": 0.5241522756506746, "grad_norm": 1.2877445303538422, "learning_rate": 2.3101533857343848e-05, "loss": 0.8344, "num_input_tokens_seen": 864073504, "step": 4788 }, { "epoch": 0.5242617477216125, "grad_norm": 1.3544217774147216, "learning_rate": 2.3092960176066912e-05, "loss": 0.6367, "num_input_tokens_seen": 864219552, "step": 4789 }, { "epoch": 0.5243712197925504, "grad_norm": 1.3107158705425963, "learning_rate": 2.3084386720389135e-05, "loss": 0.597, "num_input_tokens_seen": 864410176, "step": 4790 }, { "epoch": 0.5244806918634883, "grad_norm": 1.2923140697924482, "learning_rate": 2.3075813491324743e-05, "loss": 0.6825, "num_input_tokens_seen": 864601920, "step": 4791 }, { "epoch": 0.5245901639344263, "grad_norm": 1.2152865666873955, "learning_rate": 2.3067240489887947e-05, "loss": 0.8041, "num_input_tokens_seen": 864798592, "step": 4792 }, { "epoch": 0.5246996360053642, "grad_norm": 1.3429041573445566, "learning_rate": 2.3058667717092895e-05, "loss": 0.6823, "num_input_tokens_seen": 864965024, "step": 4793 }, { "epoch": 0.524809108076302, "grad_norm": 1.283280647525718, "learning_rate": 2.305009517395375e-05, "loss": 0.7255, "num_input_tokens_seen": 865144896, "step": 4794 }, { "epoch": 0.5249185801472399, "grad_norm": 1.2608047339823725, "learning_rate": 2.3041522861484617e-05, "loss": 0.7192, "num_input_tokens_seen": 865338880, "step": 4795 }, { "epoch": 0.5250280522181778, "grad_norm": 1.1846833705804976, "learning_rate": 2.3032950780699582e-05, "loss": 0.683, "num_input_tokens_seen": 865540928, "step": 4796 }, { "epoch": 0.5251375242891158, "grad_norm": 1.2379296221719083, "learning_rate": 2.302437893261271e-05, "loss": 0.8557, "num_input_tokens_seen": 865735584, "step": 4797 }, { "epoch": 0.5252469963600537, "grad_norm": 1.281138205884599, "learning_rate": 2.3015807318238027e-05, "loss": 0.5805, "num_input_tokens_seen": 865913888, "step": 4798 }, { "epoch": 0.5253564684309916, "grad_norm": 1.2789074487045533, "learning_rate": 2.300723593858956e-05, "loss": 0.7204, "num_input_tokens_seen": 866098912, "step": 4799 }, { "epoch": 0.5254659405019294, "grad_norm": 1.384065308386063, "learning_rate": 2.299866479468126e-05, "loss": 0.8575, "num_input_tokens_seen": 866262880, "step": 4800 }, { "epoch": 0.5255754125728673, "grad_norm": 1.1749425792867376, "learning_rate": 2.29900938875271e-05, "loss": 0.937, "num_input_tokens_seen": 866459328, "step": 4801 }, { "epoch": 0.5256848846438053, "grad_norm": 1.2899132435479652, "learning_rate": 2.298152321814098e-05, "loss": 0.7265, "num_input_tokens_seen": 866625536, "step": 4802 }, { "epoch": 0.5257943567147432, "grad_norm": 1.0389844248414755, "learning_rate": 2.2972952787536815e-05, "loss": 0.417, "num_input_tokens_seen": 866801152, "step": 4803 }, { "epoch": 0.5259038287856811, "grad_norm": 1.303159950208466, "learning_rate": 2.2964382596728447e-05, "loss": 0.7237, "num_input_tokens_seen": 866957504, "step": 4804 }, { "epoch": 0.5260133008566189, "grad_norm": 1.2484586272284164, "learning_rate": 2.295581264672973e-05, "loss": 0.5086, "num_input_tokens_seen": 867122144, "step": 4805 }, { "epoch": 0.5261227729275568, "grad_norm": 1.0590012670370705, "learning_rate": 2.2947242938554475e-05, "loss": 0.5052, "num_input_tokens_seen": 867315008, "step": 4806 }, { "epoch": 0.5262322449984947, "grad_norm": 1.3989766110011836, "learning_rate": 2.2938673473216448e-05, "loss": 0.8607, "num_input_tokens_seen": 867491296, "step": 4807 }, { "epoch": 0.5263417170694327, "grad_norm": 1.429336504766044, "learning_rate": 2.293010425172942e-05, "loss": 0.6455, "num_input_tokens_seen": 867647872, "step": 4808 }, { "epoch": 0.5264511891403706, "grad_norm": 1.3134622679432433, "learning_rate": 2.2921535275107092e-05, "loss": 0.6339, "num_input_tokens_seen": 867824832, "step": 4809 }, { "epoch": 0.5265606612113085, "grad_norm": 1.2305126848400163, "learning_rate": 2.291296654436318e-05, "loss": 0.6224, "num_input_tokens_seen": 868034944, "step": 4810 }, { "epoch": 0.5266701332822463, "grad_norm": 1.2780820247075275, "learning_rate": 2.2904398060511324e-05, "loss": 0.8253, "num_input_tokens_seen": 868213696, "step": 4811 }, { "epoch": 0.5267796053531842, "grad_norm": 1.219994276611786, "learning_rate": 2.289582982456517e-05, "loss": 0.6109, "num_input_tokens_seen": 868419776, "step": 4812 }, { "epoch": 0.5268890774241222, "grad_norm": 1.2195948093622888, "learning_rate": 2.2887261837538335e-05, "loss": 0.6017, "num_input_tokens_seen": 868614880, "step": 4813 }, { "epoch": 0.5269985494950601, "grad_norm": 1.3239625666100718, "learning_rate": 2.2878694100444377e-05, "loss": 0.7352, "num_input_tokens_seen": 868771680, "step": 4814 }, { "epoch": 0.527108021565998, "grad_norm": 1.2748933672725924, "learning_rate": 2.2870126614296856e-05, "loss": 0.7668, "num_input_tokens_seen": 868944608, "step": 4815 }, { "epoch": 0.5272174936369359, "grad_norm": 1.2785638237762795, "learning_rate": 2.2861559380109287e-05, "loss": 0.78, "num_input_tokens_seen": 869149792, "step": 4816 }, { "epoch": 0.5273269657078737, "grad_norm": 1.2664352460738642, "learning_rate": 2.2852992398895155e-05, "loss": 0.6638, "num_input_tokens_seen": 869318912, "step": 4817 }, { "epoch": 0.5274364377788117, "grad_norm": 1.208892079221343, "learning_rate": 2.284442567166791e-05, "loss": 0.7344, "num_input_tokens_seen": 869535072, "step": 4818 }, { "epoch": 0.5275459098497496, "grad_norm": 1.293210931783312, "learning_rate": 2.283585919944099e-05, "loss": 0.8286, "num_input_tokens_seen": 869713600, "step": 4819 }, { "epoch": 0.5276553819206875, "grad_norm": 1.3626463977661827, "learning_rate": 2.28272929832278e-05, "loss": 0.7133, "num_input_tokens_seen": 869864800, "step": 4820 }, { "epoch": 0.5277648539916254, "grad_norm": 1.298072813304688, "learning_rate": 2.2818727024041685e-05, "loss": 0.7637, "num_input_tokens_seen": 870032352, "step": 4821 }, { "epoch": 0.5278743260625632, "grad_norm": 1.279019810616355, "learning_rate": 2.2810161322896e-05, "loss": 0.7537, "num_input_tokens_seen": 870229024, "step": 4822 }, { "epoch": 0.5279837981335012, "grad_norm": 1.2956275915625086, "learning_rate": 2.2801595880804037e-05, "loss": 0.7722, "num_input_tokens_seen": 870419648, "step": 4823 }, { "epoch": 0.5280932702044391, "grad_norm": 1.2038958668493127, "learning_rate": 2.279303069877909e-05, "loss": 0.7127, "num_input_tokens_seen": 870605120, "step": 4824 }, { "epoch": 0.528202742275377, "grad_norm": 1.2099232517682155, "learning_rate": 2.2784465777834383e-05, "loss": 0.8827, "num_input_tokens_seen": 870809408, "step": 4825 }, { "epoch": 0.5283122143463149, "grad_norm": 1.1987927702273657, "learning_rate": 2.2775901118983138e-05, "loss": 0.6874, "num_input_tokens_seen": 871003392, "step": 4826 }, { "epoch": 0.5284216864172528, "grad_norm": 1.1822452227955143, "learning_rate": 2.2767336723238543e-05, "loss": 0.7421, "num_input_tokens_seen": 871181472, "step": 4827 }, { "epoch": 0.5285311584881907, "grad_norm": 1.2705127853777103, "learning_rate": 2.2758772591613743e-05, "loss": 0.71, "num_input_tokens_seen": 871355744, "step": 4828 }, { "epoch": 0.5286406305591286, "grad_norm": 1.3279651770076313, "learning_rate": 2.275020872512187e-05, "loss": 0.8326, "num_input_tokens_seen": 871553536, "step": 4829 }, { "epoch": 0.5287501026300665, "grad_norm": 1.4227898138572308, "learning_rate": 2.274164512477599e-05, "loss": 0.9164, "num_input_tokens_seen": 871702496, "step": 4830 }, { "epoch": 0.5288595747010044, "grad_norm": 1.265617747344912, "learning_rate": 2.2733081791589188e-05, "loss": 0.7647, "num_input_tokens_seen": 871857280, "step": 4831 }, { "epoch": 0.5289690467719423, "grad_norm": 1.2573876463067055, "learning_rate": 2.272451872657447e-05, "loss": 0.6724, "num_input_tokens_seen": 871994816, "step": 4832 }, { "epoch": 0.5290785188428803, "grad_norm": 1.2117526848355806, "learning_rate": 2.2715955930744828e-05, "loss": 0.6445, "num_input_tokens_seen": 872168640, "step": 4833 }, { "epoch": 0.5291879909138181, "grad_norm": 1.259784787054483, "learning_rate": 2.2707393405113246e-05, "loss": 0.8357, "num_input_tokens_seen": 872334624, "step": 4834 }, { "epoch": 0.529297462984756, "grad_norm": 1.123935831933024, "learning_rate": 2.2698831150692635e-05, "loss": 0.654, "num_input_tokens_seen": 872512704, "step": 4835 }, { "epoch": 0.5294069350556939, "grad_norm": 1.2643885754220883, "learning_rate": 2.2690269168495904e-05, "loss": 0.7207, "num_input_tokens_seen": 872695712, "step": 4836 }, { "epoch": 0.5295164071266318, "grad_norm": 1.3174895405218443, "learning_rate": 2.2681707459535912e-05, "loss": 0.8417, "num_input_tokens_seen": 872890816, "step": 4837 }, { "epoch": 0.5296258791975698, "grad_norm": 1.266984327329244, "learning_rate": 2.2673146024825496e-05, "loss": 0.5884, "num_input_tokens_seen": 873069344, "step": 4838 }, { "epoch": 0.5297353512685076, "grad_norm": 1.2588285050823345, "learning_rate": 2.2664584865377454e-05, "loss": 0.6925, "num_input_tokens_seen": 873238240, "step": 4839 }, { "epoch": 0.5298448233394455, "grad_norm": 1.2071301071028935, "learning_rate": 2.2656023982204556e-05, "loss": 0.6294, "num_input_tokens_seen": 873401984, "step": 4840 }, { "epoch": 0.5299542954103834, "grad_norm": 1.1226106808306373, "learning_rate": 2.2647463376319556e-05, "loss": 0.6402, "num_input_tokens_seen": 873573120, "step": 4841 }, { "epoch": 0.5300637674813213, "grad_norm": 1.3237957123958568, "learning_rate": 2.2638903048735124e-05, "loss": 0.8164, "num_input_tokens_seen": 873760832, "step": 4842 }, { "epoch": 0.5301732395522593, "grad_norm": 1.2283478881632692, "learning_rate": 2.263034300046396e-05, "loss": 0.5049, "num_input_tokens_seen": 873896352, "step": 4843 }, { "epoch": 0.5302827116231972, "grad_norm": 1.191172092127735, "learning_rate": 2.262178323251868e-05, "loss": 0.7309, "num_input_tokens_seen": 874062112, "step": 4844 }, { "epoch": 0.530392183694135, "grad_norm": 1.260097915235735, "learning_rate": 2.2613223745911906e-05, "loss": 0.5604, "num_input_tokens_seen": 874224512, "step": 4845 }, { "epoch": 0.5305016557650729, "grad_norm": 1.206304337458642, "learning_rate": 2.2604664541656193e-05, "loss": 0.6768, "num_input_tokens_seen": 874417152, "step": 4846 }, { "epoch": 0.5306111278360108, "grad_norm": 1.3506146674089732, "learning_rate": 2.2596105620764083e-05, "loss": 0.8151, "num_input_tokens_seen": 874611584, "step": 4847 }, { "epoch": 0.5307205999069488, "grad_norm": 1.3417313516712943, "learning_rate": 2.2587546984248086e-05, "loss": 0.7907, "num_input_tokens_seen": 874769952, "step": 4848 }, { "epoch": 0.5308300719778867, "grad_norm": 1.239538763618081, "learning_rate": 2.2578988633120667e-05, "loss": 0.6988, "num_input_tokens_seen": 874949376, "step": 4849 }, { "epoch": 0.5309395440488246, "grad_norm": 1.2869266733445277, "learning_rate": 2.2570430568394275e-05, "loss": 0.7918, "num_input_tokens_seen": 875129472, "step": 4850 }, { "epoch": 0.5310490161197624, "grad_norm": 1.2744374474027862, "learning_rate": 2.256187279108129e-05, "loss": 0.8634, "num_input_tokens_seen": 875325248, "step": 4851 }, { "epoch": 0.5311584881907003, "grad_norm": 1.417848187830464, "learning_rate": 2.2553315302194102e-05, "loss": 0.8175, "num_input_tokens_seen": 875504672, "step": 4852 }, { "epoch": 0.5312679602616383, "grad_norm": 1.307312614404803, "learning_rate": 2.254475810274503e-05, "loss": 0.7567, "num_input_tokens_seen": 875701792, "step": 4853 }, { "epoch": 0.5313774323325762, "grad_norm": 1.30678330627465, "learning_rate": 2.2536201193746375e-05, "loss": 0.7238, "num_input_tokens_seen": 875894880, "step": 4854 }, { "epoch": 0.5314869044035141, "grad_norm": 1.393646557993517, "learning_rate": 2.2527644576210423e-05, "loss": 0.7429, "num_input_tokens_seen": 876098272, "step": 4855 }, { "epoch": 0.5315963764744519, "grad_norm": 1.1929394471692294, "learning_rate": 2.2519088251149385e-05, "loss": 0.548, "num_input_tokens_seen": 876279264, "step": 4856 }, { "epoch": 0.5317058485453898, "grad_norm": 1.3415651523893533, "learning_rate": 2.2510532219575465e-05, "loss": 0.845, "num_input_tokens_seen": 876488256, "step": 4857 }, { "epoch": 0.5318153206163277, "grad_norm": 1.310954228181923, "learning_rate": 2.2501976482500823e-05, "loss": 0.8563, "num_input_tokens_seen": 876711360, "step": 4858 }, { "epoch": 0.5319247926872657, "grad_norm": 1.2465624271678737, "learning_rate": 2.2493421040937585e-05, "loss": 0.8203, "num_input_tokens_seen": 876921472, "step": 4859 }, { "epoch": 0.5320342647582036, "grad_norm": 1.2339158954801297, "learning_rate": 2.248486589589785e-05, "loss": 0.7309, "num_input_tokens_seen": 877076928, "step": 4860 }, { "epoch": 0.5321437368291415, "grad_norm": 1.3927827874622616, "learning_rate": 2.2476311048393666e-05, "loss": 0.8316, "num_input_tokens_seen": 877270240, "step": 4861 }, { "epoch": 0.5322532089000793, "grad_norm": 1.4873963776727321, "learning_rate": 2.246775649943707e-05, "loss": 0.8748, "num_input_tokens_seen": 877451680, "step": 4862 }, { "epoch": 0.5323626809710172, "grad_norm": 1.187831179966979, "learning_rate": 2.2459202250040032e-05, "loss": 0.7787, "num_input_tokens_seen": 877605344, "step": 4863 }, { "epoch": 0.5324721530419552, "grad_norm": 1.1990050084993367, "learning_rate": 2.2450648301214517e-05, "loss": 0.7077, "num_input_tokens_seen": 877791040, "step": 4864 }, { "epoch": 0.5325816251128931, "grad_norm": 1.2106977656154065, "learning_rate": 2.2442094653972428e-05, "loss": 0.8287, "num_input_tokens_seen": 877963296, "step": 4865 }, { "epoch": 0.532691097183831, "grad_norm": 1.2426062304026362, "learning_rate": 2.243354130932565e-05, "loss": 0.6878, "num_input_tokens_seen": 878135104, "step": 4866 }, { "epoch": 0.5328005692547689, "grad_norm": 1.2911502923717906, "learning_rate": 2.242498826828604e-05, "loss": 0.7541, "num_input_tokens_seen": 878321920, "step": 4867 }, { "epoch": 0.5329100413257067, "grad_norm": 1.088600625566299, "learning_rate": 2.241643553186538e-05, "loss": 0.7641, "num_input_tokens_seen": 878513440, "step": 4868 }, { "epoch": 0.5330195133966447, "grad_norm": 1.0897620932775938, "learning_rate": 2.240788310107547e-05, "loss": 0.4879, "num_input_tokens_seen": 878692864, "step": 4869 }, { "epoch": 0.5331289854675826, "grad_norm": 1.389025550582567, "learning_rate": 2.2399330976928028e-05, "loss": 0.8938, "num_input_tokens_seen": 878879456, "step": 4870 }, { "epoch": 0.5332384575385205, "grad_norm": 1.1905967588905064, "learning_rate": 2.2390779160434767e-05, "loss": 0.5721, "num_input_tokens_seen": 879064928, "step": 4871 }, { "epoch": 0.5333479296094584, "grad_norm": 1.2630696814879967, "learning_rate": 2.2382227652607333e-05, "loss": 0.7968, "num_input_tokens_seen": 879269664, "step": 4872 }, { "epoch": 0.5334574016803962, "grad_norm": 1.4085812636344066, "learning_rate": 2.2373676454457364e-05, "loss": 0.6469, "num_input_tokens_seen": 879455360, "step": 4873 }, { "epoch": 0.5335668737513342, "grad_norm": 1.2052246697103635, "learning_rate": 2.2365125566996457e-05, "loss": 0.7378, "num_input_tokens_seen": 879658976, "step": 4874 }, { "epoch": 0.5336763458222721, "grad_norm": 1.5566011086381681, "learning_rate": 2.2356574991236154e-05, "loss": 0.8755, "num_input_tokens_seen": 879803008, "step": 4875 }, { "epoch": 0.53378581789321, "grad_norm": 1.5573730277387225, "learning_rate": 2.2348024728187983e-05, "loss": 0.7691, "num_input_tokens_seen": 880011776, "step": 4876 }, { "epoch": 0.5338952899641479, "grad_norm": 1.1231680365178267, "learning_rate": 2.2339474778863408e-05, "loss": 0.5424, "num_input_tokens_seen": 880214720, "step": 4877 }, { "epoch": 0.5340047620350858, "grad_norm": 1.3837148879368257, "learning_rate": 2.2330925144273884e-05, "loss": 0.7147, "num_input_tokens_seen": 880385856, "step": 4878 }, { "epoch": 0.5341142341060237, "grad_norm": 1.3794588273178938, "learning_rate": 2.2322375825430815e-05, "loss": 0.6893, "num_input_tokens_seen": 880555200, "step": 4879 }, { "epoch": 0.5342237061769616, "grad_norm": 1.467401900383721, "learning_rate": 2.231382682334556e-05, "loss": 0.6847, "num_input_tokens_seen": 880750752, "step": 4880 }, { "epoch": 0.5343331782478995, "grad_norm": 1.2382956422757605, "learning_rate": 2.2305278139029465e-05, "loss": 0.7432, "num_input_tokens_seen": 880914272, "step": 4881 }, { "epoch": 0.5344426503188374, "grad_norm": 1.5022119266880143, "learning_rate": 2.2296729773493806e-05, "loss": 0.9886, "num_input_tokens_seen": 881082720, "step": 4882 }, { "epoch": 0.5345521223897753, "grad_norm": 1.312595454786674, "learning_rate": 2.228818172774986e-05, "loss": 0.8082, "num_input_tokens_seen": 881295968, "step": 4883 }, { "epoch": 0.5346615944607133, "grad_norm": 1.1856068025679551, "learning_rate": 2.2279634002808818e-05, "loss": 0.5442, "num_input_tokens_seen": 881448064, "step": 4884 }, { "epoch": 0.5347710665316511, "grad_norm": 1.2758387293951703, "learning_rate": 2.2271086599681887e-05, "loss": 0.6449, "num_input_tokens_seen": 881614944, "step": 4885 }, { "epoch": 0.534880538602589, "grad_norm": 1.1167580906370165, "learning_rate": 2.2262539519380182e-05, "loss": 0.7576, "num_input_tokens_seen": 881781152, "step": 4886 }, { "epoch": 0.5349900106735269, "grad_norm": 1.296371867926355, "learning_rate": 2.225399276291482e-05, "loss": 0.8247, "num_input_tokens_seen": 881989472, "step": 4887 }, { "epoch": 0.5350994827444648, "grad_norm": 1.2309257506376101, "learning_rate": 2.2245446331296874e-05, "loss": 0.6711, "num_input_tokens_seen": 882151872, "step": 4888 }, { "epoch": 0.5352089548154028, "grad_norm": 1.0745299287393781, "learning_rate": 2.223690022553735e-05, "loss": 0.6098, "num_input_tokens_seen": 882358176, "step": 4889 }, { "epoch": 0.5353184268863406, "grad_norm": 1.37233706016087, "learning_rate": 2.2228354446647252e-05, "loss": 0.6655, "num_input_tokens_seen": 882541632, "step": 4890 }, { "epoch": 0.5354278989572785, "grad_norm": 1.2667839489564667, "learning_rate": 2.2219808995637524e-05, "loss": 0.6633, "num_input_tokens_seen": 882725760, "step": 4891 }, { "epoch": 0.5355373710282164, "grad_norm": 1.2616579968260813, "learning_rate": 2.2211263873519082e-05, "loss": 0.9358, "num_input_tokens_seen": 882919968, "step": 4892 }, { "epoch": 0.5356468430991543, "grad_norm": 1.2701298637526022, "learning_rate": 2.2202719081302785e-05, "loss": 0.6534, "num_input_tokens_seen": 883092224, "step": 4893 }, { "epoch": 0.5357563151700923, "grad_norm": 1.1998743011284998, "learning_rate": 2.219417461999947e-05, "loss": 0.8227, "num_input_tokens_seen": 883288448, "step": 4894 }, { "epoch": 0.5358657872410302, "grad_norm": 1.2450470549335548, "learning_rate": 2.218563049061995e-05, "loss": 0.9085, "num_input_tokens_seen": 883475488, "step": 4895 }, { "epoch": 0.535975259311968, "grad_norm": 1.326897311784455, "learning_rate": 2.217708669417495e-05, "loss": 0.6109, "num_input_tokens_seen": 883618400, "step": 4896 }, { "epoch": 0.5360847313829059, "grad_norm": 1.2862397701942836, "learning_rate": 2.2168543231675204e-05, "loss": 0.7116, "num_input_tokens_seen": 883790432, "step": 4897 }, { "epoch": 0.5361942034538438, "grad_norm": 1.2651699802350431, "learning_rate": 2.2160000104131372e-05, "loss": 0.6061, "num_input_tokens_seen": 883917440, "step": 4898 }, { "epoch": 0.5363036755247818, "grad_norm": 1.2408945804456388, "learning_rate": 2.215145731255411e-05, "loss": 0.8024, "num_input_tokens_seen": 884096640, "step": 4899 }, { "epoch": 0.5364131475957197, "grad_norm": 1.3663412908573782, "learning_rate": 2.2142914857953993e-05, "loss": 0.6452, "num_input_tokens_seen": 884271136, "step": 4900 }, { "epoch": 0.5365226196666576, "grad_norm": 1.2370925637184877, "learning_rate": 2.2134372741341585e-05, "loss": 0.6526, "num_input_tokens_seen": 884481696, "step": 4901 }, { "epoch": 0.5366320917375954, "grad_norm": 1.2855322847144017, "learning_rate": 2.2125830963727412e-05, "loss": 0.858, "num_input_tokens_seen": 884645216, "step": 4902 }, { "epoch": 0.5367415638085333, "grad_norm": 1.1548748602513503, "learning_rate": 2.2117289526121934e-05, "loss": 0.685, "num_input_tokens_seen": 884837184, "step": 4903 }, { "epoch": 0.5368510358794713, "grad_norm": 1.169902412876778, "learning_rate": 2.2108748429535603e-05, "loss": 0.8191, "num_input_tokens_seen": 885007872, "step": 4904 }, { "epoch": 0.5369605079504092, "grad_norm": 1.2719964140505597, "learning_rate": 2.21002076749788e-05, "loss": 0.7797, "num_input_tokens_seen": 885215072, "step": 4905 }, { "epoch": 0.5370699800213471, "grad_norm": 1.345419799920753, "learning_rate": 2.209166726346189e-05, "loss": 0.7178, "num_input_tokens_seen": 885391136, "step": 4906 }, { "epoch": 0.5371794520922849, "grad_norm": 1.4551814305033455, "learning_rate": 2.2083127195995176e-05, "loss": 0.8455, "num_input_tokens_seen": 885553536, "step": 4907 }, { "epoch": 0.5372889241632228, "grad_norm": 1.3620752722516323, "learning_rate": 2.2074587473588936e-05, "loss": 0.718, "num_input_tokens_seen": 885725792, "step": 4908 }, { "epoch": 0.5373983962341607, "grad_norm": 1.3231188501963818, "learning_rate": 2.206604809725342e-05, "loss": 0.7993, "num_input_tokens_seen": 885928960, "step": 4909 }, { "epoch": 0.5375078683050987, "grad_norm": 1.2196389404294625, "learning_rate": 2.205750906799879e-05, "loss": 0.7634, "num_input_tokens_seen": 886114208, "step": 4910 }, { "epoch": 0.5376173403760366, "grad_norm": 1.1107705163806, "learning_rate": 2.204897038683522e-05, "loss": 0.5729, "num_input_tokens_seen": 886288032, "step": 4911 }, { "epoch": 0.5377268124469745, "grad_norm": 1.2001893033279947, "learning_rate": 2.2040432054772807e-05, "loss": 0.8703, "num_input_tokens_seen": 886492992, "step": 4912 }, { "epoch": 0.5378362845179123, "grad_norm": 1.3537487803530284, "learning_rate": 2.2031894072821633e-05, "loss": 0.6991, "num_input_tokens_seen": 886663904, "step": 4913 }, { "epoch": 0.5379457565888502, "grad_norm": 1.2112296459586116, "learning_rate": 2.2023356441991712e-05, "loss": 0.5344, "num_input_tokens_seen": 886837056, "step": 4914 }, { "epoch": 0.5380552286597882, "grad_norm": 1.200786894845197, "learning_rate": 2.2014819163293028e-05, "loss": 0.6827, "num_input_tokens_seen": 886992960, "step": 4915 }, { "epoch": 0.5381647007307261, "grad_norm": 1.2442444379244164, "learning_rate": 2.200628223773554e-05, "loss": 0.7791, "num_input_tokens_seen": 887167904, "step": 4916 }, { "epoch": 0.538274172801664, "grad_norm": 1.2769373442619079, "learning_rate": 2.199774566632913e-05, "loss": 0.8381, "num_input_tokens_seen": 887362560, "step": 4917 }, { "epoch": 0.5383836448726019, "grad_norm": 1.3248905064668464, "learning_rate": 2.198920945008368e-05, "loss": 1.0244, "num_input_tokens_seen": 887569088, "step": 4918 }, { "epoch": 0.5384931169435397, "grad_norm": 1.1621043485644973, "learning_rate": 2.198067359000899e-05, "loss": 0.5345, "num_input_tokens_seen": 887761504, "step": 4919 }, { "epoch": 0.5386025890144777, "grad_norm": 1.2379096884687548, "learning_rate": 2.1972138087114835e-05, "loss": 0.6864, "num_input_tokens_seen": 887918752, "step": 4920 }, { "epoch": 0.5387120610854156, "grad_norm": 1.2587389170244934, "learning_rate": 2.1963602942410968e-05, "loss": 0.5941, "num_input_tokens_seen": 888087200, "step": 4921 }, { "epoch": 0.5388215331563535, "grad_norm": 1.226690346439173, "learning_rate": 2.195506815690706e-05, "loss": 0.7483, "num_input_tokens_seen": 888303808, "step": 4922 }, { "epoch": 0.5389310052272914, "grad_norm": 1.2577610360747378, "learning_rate": 2.1946533731612773e-05, "loss": 0.5528, "num_input_tokens_seen": 888461952, "step": 4923 }, { "epoch": 0.5390404772982292, "grad_norm": 1.1504808270723152, "learning_rate": 2.1937999667537704e-05, "loss": 0.7557, "num_input_tokens_seen": 888637120, "step": 4924 }, { "epoch": 0.5391499493691672, "grad_norm": 1.2170944707183071, "learning_rate": 2.192946596569143e-05, "loss": 0.7313, "num_input_tokens_seen": 888831776, "step": 4925 }, { "epoch": 0.5392594214401051, "grad_norm": 1.2778103877250917, "learning_rate": 2.192093262708345e-05, "loss": 0.622, "num_input_tokens_seen": 889020384, "step": 4926 }, { "epoch": 0.539368893511043, "grad_norm": 1.2391666171380251, "learning_rate": 2.1912399652723255e-05, "loss": 0.668, "num_input_tokens_seen": 889217056, "step": 4927 }, { "epoch": 0.5394783655819809, "grad_norm": 1.4062064270050616, "learning_rate": 2.190386704362029e-05, "loss": 0.78, "num_input_tokens_seen": 889398720, "step": 4928 }, { "epoch": 0.5395878376529188, "grad_norm": 1.2832233305433316, "learning_rate": 2.1895334800783925e-05, "loss": 0.7584, "num_input_tokens_seen": 889598976, "step": 4929 }, { "epoch": 0.5396973097238567, "grad_norm": 1.4613957629831364, "learning_rate": 2.188680292522353e-05, "loss": 0.7702, "num_input_tokens_seen": 889806400, "step": 4930 }, { "epoch": 0.5398067817947946, "grad_norm": 1.2150526449783852, "learning_rate": 2.1878271417948385e-05, "loss": 0.6694, "num_input_tokens_seen": 890024128, "step": 4931 }, { "epoch": 0.5399162538657325, "grad_norm": 1.277906614640715, "learning_rate": 2.1869740279967768e-05, "loss": 0.7419, "num_input_tokens_seen": 890206912, "step": 4932 }, { "epoch": 0.5400257259366704, "grad_norm": 1.3461954796819038, "learning_rate": 2.1861209512290888e-05, "loss": 1.0243, "num_input_tokens_seen": 890378272, "step": 4933 }, { "epoch": 0.5401351980076083, "grad_norm": 1.2089302901340107, "learning_rate": 2.1852679115926926e-05, "loss": 0.7983, "num_input_tokens_seen": 890549856, "step": 4934 }, { "epoch": 0.5402446700785463, "grad_norm": 1.443457407841944, "learning_rate": 2.184414909188501e-05, "loss": 0.7991, "num_input_tokens_seen": 890693440, "step": 4935 }, { "epoch": 0.5403541421494841, "grad_norm": 1.2263035105968807, "learning_rate": 2.1835619441174214e-05, "loss": 0.851, "num_input_tokens_seen": 890868832, "step": 4936 }, { "epoch": 0.540463614220422, "grad_norm": 1.3252511740080046, "learning_rate": 2.1827090164803605e-05, "loss": 0.8924, "num_input_tokens_seen": 891052512, "step": 4937 }, { "epoch": 0.5405730862913599, "grad_norm": 1.3257566091195419, "learning_rate": 2.181856126378215e-05, "loss": 0.5744, "num_input_tokens_seen": 891258592, "step": 4938 }, { "epoch": 0.5406825583622978, "grad_norm": 1.22639955057819, "learning_rate": 2.181003273911883e-05, "loss": 0.7564, "num_input_tokens_seen": 891467136, "step": 4939 }, { "epoch": 0.5407920304332358, "grad_norm": 1.3604510203177003, "learning_rate": 2.1801504591822526e-05, "loss": 0.6313, "num_input_tokens_seen": 891658656, "step": 4940 }, { "epoch": 0.5409015025041736, "grad_norm": 1.458758764428595, "learning_rate": 2.179297682290211e-05, "loss": 0.8119, "num_input_tokens_seen": 891823520, "step": 4941 }, { "epoch": 0.5410109745751115, "grad_norm": 1.2357293919491246, "learning_rate": 2.178444943336642e-05, "loss": 0.6588, "num_input_tokens_seen": 892011456, "step": 4942 }, { "epoch": 0.5411204466460494, "grad_norm": 1.3240874909020297, "learning_rate": 2.1775922424224203e-05, "loss": 0.5618, "num_input_tokens_seen": 892196928, "step": 4943 }, { "epoch": 0.5412299187169873, "grad_norm": 1.3283335802436194, "learning_rate": 2.1767395796484207e-05, "loss": 0.7244, "num_input_tokens_seen": 892331776, "step": 4944 }, { "epoch": 0.5413393907879253, "grad_norm": 1.1435830416531798, "learning_rate": 2.17588695511551e-05, "loss": 0.5291, "num_input_tokens_seen": 892524192, "step": 4945 }, { "epoch": 0.5414488628588632, "grad_norm": 1.1830938309535177, "learning_rate": 2.1750343689245544e-05, "loss": 0.7808, "num_input_tokens_seen": 892695104, "step": 4946 }, { "epoch": 0.541558334929801, "grad_norm": 1.2624135186063812, "learning_rate": 2.1741818211764103e-05, "loss": 0.6706, "num_input_tokens_seen": 892873632, "step": 4947 }, { "epoch": 0.5416678070007389, "grad_norm": 1.2905745914396585, "learning_rate": 2.173329311971934e-05, "loss": 0.7458, "num_input_tokens_seen": 893013856, "step": 4948 }, { "epoch": 0.5417772790716768, "grad_norm": 1.2122643369557335, "learning_rate": 2.1724768414119766e-05, "loss": 0.5248, "num_input_tokens_seen": 893171328, "step": 4949 }, { "epoch": 0.5418867511426148, "grad_norm": 1.1659617962704625, "learning_rate": 2.171624409597382e-05, "loss": 0.6172, "num_input_tokens_seen": 893381888, "step": 4950 }, { "epoch": 0.5419962232135527, "grad_norm": 1.2681362057976158, "learning_rate": 2.170772016628993e-05, "loss": 0.9146, "num_input_tokens_seen": 893562208, "step": 4951 }, { "epoch": 0.5421056952844906, "grad_norm": 1.2687055044405224, "learning_rate": 2.1699196626076437e-05, "loss": 0.7089, "num_input_tokens_seen": 893741856, "step": 4952 }, { "epoch": 0.5422151673554284, "grad_norm": 1.3272833457346258, "learning_rate": 2.169067347634168e-05, "loss": 0.6406, "num_input_tokens_seen": 893917472, "step": 4953 }, { "epoch": 0.5423246394263663, "grad_norm": 1.2390635813916406, "learning_rate": 2.168215071809392e-05, "loss": 0.6338, "num_input_tokens_seen": 894091968, "step": 4954 }, { "epoch": 0.5424341114973042, "grad_norm": 1.2803731569610026, "learning_rate": 2.167362835234139e-05, "loss": 0.7927, "num_input_tokens_seen": 894268256, "step": 4955 }, { "epoch": 0.5425435835682422, "grad_norm": 1.2039542869970303, "learning_rate": 2.166510638009227e-05, "loss": 0.5165, "num_input_tokens_seen": 894431776, "step": 4956 }, { "epoch": 0.5426530556391801, "grad_norm": 1.1871114898391668, "learning_rate": 2.1656584802354678e-05, "loss": 0.6557, "num_input_tokens_seen": 894613216, "step": 4957 }, { "epoch": 0.5427625277101179, "grad_norm": 1.2410985624164101, "learning_rate": 2.1648063620136733e-05, "loss": 0.7383, "num_input_tokens_seen": 894808096, "step": 4958 }, { "epoch": 0.5428719997810558, "grad_norm": 1.1149492049514576, "learning_rate": 2.1639542834446434e-05, "loss": 0.4883, "num_input_tokens_seen": 894985280, "step": 4959 }, { "epoch": 0.5429814718519937, "grad_norm": 1.1263660508294453, "learning_rate": 2.163102244629181e-05, "loss": 0.7986, "num_input_tokens_seen": 895175232, "step": 4960 }, { "epoch": 0.5430909439229317, "grad_norm": 1.1184955222253776, "learning_rate": 2.162250245668078e-05, "loss": 0.5528, "num_input_tokens_seen": 895342560, "step": 4961 }, { "epoch": 0.5432004159938696, "grad_norm": 1.3579051522797498, "learning_rate": 2.1613982866621252e-05, "loss": 0.5849, "num_input_tokens_seen": 895472256, "step": 4962 }, { "epoch": 0.5433098880648075, "grad_norm": 1.3277315791236146, "learning_rate": 2.1605463677121086e-05, "loss": 0.5903, "num_input_tokens_seen": 895659520, "step": 4963 }, { "epoch": 0.5434193601357453, "grad_norm": 1.3134445243246453, "learning_rate": 2.159694488918807e-05, "loss": 0.805, "num_input_tokens_seen": 895858208, "step": 4964 }, { "epoch": 0.5435288322066832, "grad_norm": 1.1542640849198682, "learning_rate": 2.158842650382997e-05, "loss": 0.7356, "num_input_tokens_seen": 896044800, "step": 4965 }, { "epoch": 0.5436383042776212, "grad_norm": 1.6006465618762133, "learning_rate": 2.157990852205449e-05, "loss": 0.8172, "num_input_tokens_seen": 896234304, "step": 4966 }, { "epoch": 0.5437477763485591, "grad_norm": 1.3948711333690864, "learning_rate": 2.1571390944869306e-05, "loss": 0.9159, "num_input_tokens_seen": 896397376, "step": 4967 }, { "epoch": 0.543857248419497, "grad_norm": 1.2911132683237132, "learning_rate": 2.1562873773282005e-05, "loss": 1.0406, "num_input_tokens_seen": 896594944, "step": 4968 }, { "epoch": 0.5439667204904349, "grad_norm": 1.2623366505163174, "learning_rate": 2.1554357008300164e-05, "loss": 0.6399, "num_input_tokens_seen": 896740992, "step": 4969 }, { "epoch": 0.5440761925613727, "grad_norm": 1.2482483511664977, "learning_rate": 2.1545840650931317e-05, "loss": 0.7745, "num_input_tokens_seen": 896937888, "step": 4970 }, { "epoch": 0.5441856646323107, "grad_norm": 1.3343034533006772, "learning_rate": 2.1537324702182907e-05, "loss": 0.8711, "num_input_tokens_seen": 897123136, "step": 4971 }, { "epoch": 0.5442951367032486, "grad_norm": 1.2086028204135537, "learning_rate": 2.1528809163062375e-05, "loss": 0.6281, "num_input_tokens_seen": 897322272, "step": 4972 }, { "epoch": 0.5444046087741865, "grad_norm": 1.2408899692106312, "learning_rate": 2.1520294034577072e-05, "loss": 0.6747, "num_input_tokens_seen": 897513120, "step": 4973 }, { "epoch": 0.5445140808451244, "grad_norm": 1.3232875914719568, "learning_rate": 2.1511779317734336e-05, "loss": 0.8038, "num_input_tokens_seen": 897697472, "step": 4974 }, { "epoch": 0.5446235529160622, "grad_norm": 1.2736207795039036, "learning_rate": 2.1503265013541433e-05, "loss": 0.6547, "num_input_tokens_seen": 897860320, "step": 4975 }, { "epoch": 0.5447330249870002, "grad_norm": 1.2654831889259386, "learning_rate": 2.1494751123005605e-05, "loss": 0.8771, "num_input_tokens_seen": 898075360, "step": 4976 }, { "epoch": 0.5448424970579381, "grad_norm": 1.298523280572905, "learning_rate": 2.1486237647134014e-05, "loss": 0.6275, "num_input_tokens_seen": 898273376, "step": 4977 }, { "epoch": 0.544951969128876, "grad_norm": 1.2864613964131861, "learning_rate": 2.147772458693379e-05, "loss": 0.6879, "num_input_tokens_seen": 898481920, "step": 4978 }, { "epoch": 0.5450614411998139, "grad_norm": 1.3487688419981008, "learning_rate": 2.146921194341202e-05, "loss": 0.6392, "num_input_tokens_seen": 898649472, "step": 4979 }, { "epoch": 0.5451709132707518, "grad_norm": 1.1488833956635285, "learning_rate": 2.1460699717575718e-05, "loss": 0.5415, "num_input_tokens_seen": 898834048, "step": 4980 }, { "epoch": 0.5452803853416897, "grad_norm": 1.1583531814814063, "learning_rate": 2.1452187910431875e-05, "loss": 0.8757, "num_input_tokens_seen": 899041696, "step": 4981 }, { "epoch": 0.5453898574126276, "grad_norm": 1.2712442432074418, "learning_rate": 2.1443676522987432e-05, "loss": 0.8687, "num_input_tokens_seen": 899218880, "step": 4982 }, { "epoch": 0.5454993294835655, "grad_norm": 1.37382235814615, "learning_rate": 2.1435165556249246e-05, "loss": 0.728, "num_input_tokens_seen": 899391808, "step": 4983 }, { "epoch": 0.5456088015545034, "grad_norm": 1.3908272071156433, "learning_rate": 2.142665501122417e-05, "loss": 0.769, "num_input_tokens_seen": 899596096, "step": 4984 }, { "epoch": 0.5457182736254413, "grad_norm": 1.191168689494597, "learning_rate": 2.141814488891896e-05, "loss": 0.6841, "num_input_tokens_seen": 899767456, "step": 4985 }, { "epoch": 0.5458277456963793, "grad_norm": 1.2286435592822216, "learning_rate": 2.1409635190340373e-05, "loss": 0.6675, "num_input_tokens_seen": 899954720, "step": 4986 }, { "epoch": 0.5459372177673171, "grad_norm": 1.1165036863034694, "learning_rate": 2.1401125916495072e-05, "loss": 0.5039, "num_input_tokens_seen": 900154528, "step": 4987 }, { "epoch": 0.546046689838255, "grad_norm": 1.1694273755380087, "learning_rate": 2.1392617068389697e-05, "loss": 0.5491, "num_input_tokens_seen": 900317824, "step": 4988 }, { "epoch": 0.5461561619091929, "grad_norm": 1.3792440632811118, "learning_rate": 2.1384108647030836e-05, "loss": 0.8438, "num_input_tokens_seen": 900527488, "step": 4989 }, { "epoch": 0.5462656339801308, "grad_norm": 1.2340486795323373, "learning_rate": 2.1375600653425003e-05, "loss": 0.7102, "num_input_tokens_seen": 900698176, "step": 4990 }, { "epoch": 0.5463751060510688, "grad_norm": 1.3574311839271633, "learning_rate": 2.136709308857869e-05, "loss": 0.7787, "num_input_tokens_seen": 900909856, "step": 4991 }, { "epoch": 0.5464845781220066, "grad_norm": 1.2510408359665643, "learning_rate": 2.135858595349831e-05, "loss": 0.7027, "num_input_tokens_seen": 901063744, "step": 4992 }, { "epoch": 0.5465940501929445, "grad_norm": 1.1856164047742062, "learning_rate": 2.135007924919026e-05, "loss": 0.5379, "num_input_tokens_seen": 901246976, "step": 4993 }, { "epoch": 0.5467035222638824, "grad_norm": 1.4209694126925678, "learning_rate": 2.134157297666085e-05, "loss": 0.6421, "num_input_tokens_seen": 901387872, "step": 4994 }, { "epoch": 0.5468129943348203, "grad_norm": 1.5261214650960149, "learning_rate": 2.133306713691636e-05, "loss": 0.7068, "num_input_tokens_seen": 901588352, "step": 4995 }, { "epoch": 0.5469224664057583, "grad_norm": 1.2687415850881754, "learning_rate": 2.1324561730963025e-05, "loss": 0.7041, "num_input_tokens_seen": 901753216, "step": 4996 }, { "epoch": 0.5470319384766962, "grad_norm": 1.1995685079775231, "learning_rate": 2.1316056759807006e-05, "loss": 0.5395, "num_input_tokens_seen": 901928160, "step": 4997 }, { "epoch": 0.547141410547634, "grad_norm": 1.2632427164876006, "learning_rate": 2.1307552224454435e-05, "loss": 0.7918, "num_input_tokens_seen": 902123936, "step": 4998 }, { "epoch": 0.5472508826185719, "grad_norm": 1.5421431638456984, "learning_rate": 2.129904812591137e-05, "loss": 0.791, "num_input_tokens_seen": 902269088, "step": 4999 }, { "epoch": 0.5473603546895098, "grad_norm": 1.3400665631771513, "learning_rate": 2.129054446518385e-05, "loss": 0.989, "num_input_tokens_seen": 902477632, "step": 5000 }, { "epoch": 0.5474698267604478, "grad_norm": 1.4204002529518978, "learning_rate": 2.1282041243277816e-05, "loss": 0.8198, "num_input_tokens_seen": 902657504, "step": 5001 }, { "epoch": 0.5475792988313857, "grad_norm": 1.2502888822529727, "learning_rate": 2.1273538461199194e-05, "loss": 0.8118, "num_input_tokens_seen": 902853728, "step": 5002 }, { "epoch": 0.5476887709023236, "grad_norm": 1.2650252439973044, "learning_rate": 2.1265036119953864e-05, "loss": 0.6153, "num_input_tokens_seen": 903033376, "step": 5003 }, { "epoch": 0.5477982429732614, "grad_norm": 1.3317203998029552, "learning_rate": 2.12565342205476e-05, "loss": 0.7814, "num_input_tokens_seen": 903221088, "step": 5004 }, { "epoch": 0.5479077150441993, "grad_norm": 1.3905553478842472, "learning_rate": 2.1248032763986203e-05, "loss": 0.6778, "num_input_tokens_seen": 903366240, "step": 5005 }, { "epoch": 0.5480171871151372, "grad_norm": 1.2395147683978416, "learning_rate": 2.1239531751275344e-05, "loss": 0.8157, "num_input_tokens_seen": 903549696, "step": 5006 }, { "epoch": 0.5481266591860752, "grad_norm": 1.2170807582292975, "learning_rate": 2.123103118342069e-05, "loss": 0.8017, "num_input_tokens_seen": 903746144, "step": 5007 }, { "epoch": 0.5482361312570131, "grad_norm": 1.343180202949068, "learning_rate": 2.1222531061427843e-05, "loss": 0.8181, "num_input_tokens_seen": 903936768, "step": 5008 }, { "epoch": 0.5483456033279509, "grad_norm": 1.3433785923290131, "learning_rate": 2.1214031386302347e-05, "loss": 0.6677, "num_input_tokens_seen": 904072064, "step": 5009 }, { "epoch": 0.5484550753988888, "grad_norm": 1.35317178517571, "learning_rate": 2.1205532159049714e-05, "loss": 0.8803, "num_input_tokens_seen": 904252832, "step": 5010 }, { "epoch": 0.5485645474698267, "grad_norm": 1.4038915566517718, "learning_rate": 2.1197033380675357e-05, "loss": 0.8263, "num_input_tokens_seen": 904433600, "step": 5011 }, { "epoch": 0.5486740195407647, "grad_norm": 1.1902610903509492, "learning_rate": 2.1188535052184695e-05, "loss": 0.8637, "num_input_tokens_seen": 904634528, "step": 5012 }, { "epoch": 0.5487834916117026, "grad_norm": 1.1820791896621496, "learning_rate": 2.118003717458304e-05, "loss": 0.5752, "num_input_tokens_seen": 904804768, "step": 5013 }, { "epoch": 0.5488929636826405, "grad_norm": 1.083735586948317, "learning_rate": 2.1171539748875692e-05, "loss": 0.6304, "num_input_tokens_seen": 904981728, "step": 5014 }, { "epoch": 0.5490024357535783, "grad_norm": 1.236575422819575, "learning_rate": 2.1163042776067865e-05, "loss": 1.0495, "num_input_tokens_seen": 905188704, "step": 5015 }, { "epoch": 0.5491119078245162, "grad_norm": 1.2263481292969145, "learning_rate": 2.1154546257164744e-05, "loss": 0.6804, "num_input_tokens_seen": 905377088, "step": 5016 }, { "epoch": 0.5492213798954542, "grad_norm": 1.422642594952229, "learning_rate": 2.114605019317145e-05, "loss": 0.694, "num_input_tokens_seen": 905549568, "step": 5017 }, { "epoch": 0.5493308519663921, "grad_norm": 1.1707434722698857, "learning_rate": 2.1137554585093056e-05, "loss": 0.8062, "num_input_tokens_seen": 905728992, "step": 5018 }, { "epoch": 0.54944032403733, "grad_norm": 1.2008432822749602, "learning_rate": 2.1129059433934567e-05, "loss": 0.5581, "num_input_tokens_seen": 905901696, "step": 5019 }, { "epoch": 0.5495497961082679, "grad_norm": 1.3353971314253599, "learning_rate": 2.1120564740700945e-05, "loss": 0.7647, "num_input_tokens_seen": 906082688, "step": 5020 }, { "epoch": 0.5496592681792057, "grad_norm": 1.1398190302845035, "learning_rate": 2.1112070506397105e-05, "loss": 0.6781, "num_input_tokens_seen": 906252704, "step": 5021 }, { "epoch": 0.5497687402501437, "grad_norm": 1.3965764795620723, "learning_rate": 2.1103576732027882e-05, "loss": 0.715, "num_input_tokens_seen": 906422720, "step": 5022 }, { "epoch": 0.5498782123210816, "grad_norm": 1.3258467487796977, "learning_rate": 2.1095083418598083e-05, "loss": 0.7709, "num_input_tokens_seen": 906594080, "step": 5023 }, { "epoch": 0.5499876843920195, "grad_norm": 1.1666419106082146, "learning_rate": 2.1086590567112463e-05, "loss": 0.7461, "num_input_tokens_seen": 906786272, "step": 5024 }, { "epoch": 0.5500971564629574, "grad_norm": 1.1920991977372473, "learning_rate": 2.1078098178575686e-05, "loss": 0.5154, "num_input_tokens_seen": 906967040, "step": 5025 }, { "epoch": 0.5502066285338952, "grad_norm": 1.2662353456784592, "learning_rate": 2.1069606253992406e-05, "loss": 0.8948, "num_input_tokens_seen": 907167968, "step": 5026 }, { "epoch": 0.5503161006048332, "grad_norm": 1.2063230640637992, "learning_rate": 2.1061114794367185e-05, "loss": 0.7384, "num_input_tokens_seen": 907356352, "step": 5027 }, { "epoch": 0.5504255726757711, "grad_norm": 1.3240952785847115, "learning_rate": 2.1052623800704557e-05, "loss": 0.9204, "num_input_tokens_seen": 907557056, "step": 5028 }, { "epoch": 0.550535044746709, "grad_norm": 1.3671721103347005, "learning_rate": 2.1044133274008983e-05, "loss": 0.8109, "num_input_tokens_seen": 907734688, "step": 5029 }, { "epoch": 0.5506445168176469, "grad_norm": 1.2088568255017105, "learning_rate": 2.1035643215284882e-05, "loss": 0.7335, "num_input_tokens_seen": 907937184, "step": 5030 }, { "epoch": 0.5507539888885848, "grad_norm": 1.2399653586040318, "learning_rate": 2.1027153625536616e-05, "loss": 0.5591, "num_input_tokens_seen": 908108544, "step": 5031 }, { "epoch": 0.5508634609595227, "grad_norm": 1.3818279576155408, "learning_rate": 2.1018664505768476e-05, "loss": 0.6129, "num_input_tokens_seen": 908271840, "step": 5032 }, { "epoch": 0.5509729330304606, "grad_norm": 1.3599826287815089, "learning_rate": 2.101017585698472e-05, "loss": 0.66, "num_input_tokens_seen": 908423040, "step": 5033 }, { "epoch": 0.5510824051013985, "grad_norm": 1.2446140125291942, "learning_rate": 2.1001687680189524e-05, "loss": 0.777, "num_input_tokens_seen": 908625088, "step": 5034 }, { "epoch": 0.5511918771723364, "grad_norm": 1.3149521037904999, "learning_rate": 2.0993199976387043e-05, "loss": 0.6753, "num_input_tokens_seen": 908813024, "step": 5035 }, { "epoch": 0.5513013492432743, "grad_norm": 1.4274712566682006, "learning_rate": 2.0984712746581337e-05, "loss": 0.6891, "num_input_tokens_seen": 908989088, "step": 5036 }, { "epoch": 0.5514108213142123, "grad_norm": 1.3019256598056121, "learning_rate": 2.0976225991776434e-05, "loss": 0.6448, "num_input_tokens_seen": 909151040, "step": 5037 }, { "epoch": 0.5515202933851501, "grad_norm": 1.330150002933487, "learning_rate": 2.0967739712976308e-05, "loss": 0.7178, "num_input_tokens_seen": 909338976, "step": 5038 }, { "epoch": 0.551629765456088, "grad_norm": 1.1746611979209678, "learning_rate": 2.0959253911184867e-05, "loss": 0.6415, "num_input_tokens_seen": 909522208, "step": 5039 }, { "epoch": 0.5517392375270259, "grad_norm": 1.2120819098520077, "learning_rate": 2.0950768587405963e-05, "loss": 0.7438, "num_input_tokens_seen": 909708352, "step": 5040 }, { "epoch": 0.5518487095979638, "grad_norm": 1.2838416057021207, "learning_rate": 2.0942283742643392e-05, "loss": 0.7387, "num_input_tokens_seen": 909893376, "step": 5041 }, { "epoch": 0.5519581816689018, "grad_norm": 1.2117647360284824, "learning_rate": 2.0933799377900907e-05, "loss": 0.6153, "num_input_tokens_seen": 910082432, "step": 5042 }, { "epoch": 0.5520676537398396, "grad_norm": 1.3656287326368914, "learning_rate": 2.0925315494182168e-05, "loss": 0.7827, "num_input_tokens_seen": 910248416, "step": 5043 }, { "epoch": 0.5521771258107775, "grad_norm": 1.2655305709462141, "learning_rate": 2.091683209249082e-05, "loss": 0.7814, "num_input_tokens_seen": 910433216, "step": 5044 }, { "epoch": 0.5522865978817154, "grad_norm": 1.2962369153384095, "learning_rate": 2.090834917383044e-05, "loss": 0.6318, "num_input_tokens_seen": 910613312, "step": 5045 }, { "epoch": 0.5523960699526533, "grad_norm": 1.2595987845249315, "learning_rate": 2.089986673920452e-05, "loss": 0.6972, "num_input_tokens_seen": 910800128, "step": 5046 }, { "epoch": 0.5525055420235913, "grad_norm": 1.184090739739239, "learning_rate": 2.0891384789616535e-05, "loss": 0.7616, "num_input_tokens_seen": 911003296, "step": 5047 }, { "epoch": 0.5526150140945292, "grad_norm": 1.2167470563606442, "learning_rate": 2.088290332606987e-05, "loss": 0.7694, "num_input_tokens_seen": 911171520, "step": 5048 }, { "epoch": 0.552724486165467, "grad_norm": 1.4091816331793134, "learning_rate": 2.0874422349567866e-05, "loss": 0.6619, "num_input_tokens_seen": 911342432, "step": 5049 }, { "epoch": 0.5528339582364049, "grad_norm": 1.4352628465830704, "learning_rate": 2.0865941861113818e-05, "loss": 0.9613, "num_input_tokens_seen": 911553664, "step": 5050 }, { "epoch": 0.5529434303073428, "grad_norm": 1.5795345998357386, "learning_rate": 2.085746186171094e-05, "loss": 0.6672, "num_input_tokens_seen": 911740928, "step": 5051 }, { "epoch": 0.5530529023782808, "grad_norm": 1.2687362764114285, "learning_rate": 2.0848982352362413e-05, "loss": 0.8157, "num_input_tokens_seen": 911899968, "step": 5052 }, { "epoch": 0.5531623744492187, "grad_norm": 1.3854873132499976, "learning_rate": 2.0840503334071332e-05, "loss": 0.8, "num_input_tokens_seen": 912071104, "step": 5053 }, { "epoch": 0.5532718465201566, "grad_norm": 1.3909357988033892, "learning_rate": 2.0832024807840762e-05, "loss": 0.812, "num_input_tokens_seen": 912278528, "step": 5054 }, { "epoch": 0.5533813185910944, "grad_norm": 1.2773305909765136, "learning_rate": 2.082354677467368e-05, "loss": 0.767, "num_input_tokens_seen": 912476544, "step": 5055 }, { "epoch": 0.5534907906620323, "grad_norm": 1.1668715921443138, "learning_rate": 2.081506923557303e-05, "loss": 0.699, "num_input_tokens_seen": 912651936, "step": 5056 }, { "epoch": 0.5536002627329702, "grad_norm": 1.135722769442504, "learning_rate": 2.08065921915417e-05, "loss": 0.5955, "num_input_tokens_seen": 912847712, "step": 5057 }, { "epoch": 0.5537097348039082, "grad_norm": 1.1648255989610088, "learning_rate": 2.079811564358249e-05, "loss": 0.8396, "num_input_tokens_seen": 913037440, "step": 5058 }, { "epoch": 0.5538192068748461, "grad_norm": 1.251466320214087, "learning_rate": 2.0789639592698164e-05, "loss": 0.7926, "num_input_tokens_seen": 913226048, "step": 5059 }, { "epoch": 0.5539286789457839, "grad_norm": 1.4373908623439564, "learning_rate": 2.0781164039891432e-05, "loss": 0.8552, "num_input_tokens_seen": 913391584, "step": 5060 }, { "epoch": 0.5540381510167218, "grad_norm": 1.2120524042370282, "learning_rate": 2.0772688986164928e-05, "loss": 0.7468, "num_input_tokens_seen": 913563392, "step": 5061 }, { "epoch": 0.5541476230876597, "grad_norm": 1.3006390101472418, "learning_rate": 2.076421443252123e-05, "loss": 0.5846, "num_input_tokens_seen": 913742368, "step": 5062 }, { "epoch": 0.5542570951585977, "grad_norm": 1.401417035535466, "learning_rate": 2.0755740379962864e-05, "loss": 0.8922, "num_input_tokens_seen": 913942624, "step": 5063 }, { "epoch": 0.5543665672295356, "grad_norm": 1.2147022913176246, "learning_rate": 2.0747266829492312e-05, "loss": 0.7978, "num_input_tokens_seen": 914136160, "step": 5064 }, { "epoch": 0.5544760393004735, "grad_norm": 1.182020848159758, "learning_rate": 2.0738793782111954e-05, "loss": 0.8024, "num_input_tokens_seen": 914324096, "step": 5065 }, { "epoch": 0.5545855113714113, "grad_norm": 1.2587058170750616, "learning_rate": 2.0730321238824156e-05, "loss": 0.6321, "num_input_tokens_seen": 914487392, "step": 5066 }, { "epoch": 0.5546949834423492, "grad_norm": 1.1858838276535035, "learning_rate": 2.072184920063118e-05, "loss": 0.6425, "num_input_tokens_seen": 914703328, "step": 5067 }, { "epoch": 0.5548044555132872, "grad_norm": 1.0842146345391115, "learning_rate": 2.0713377668535276e-05, "loss": 0.7127, "num_input_tokens_seen": 914892608, "step": 5068 }, { "epoch": 0.5549139275842251, "grad_norm": 1.2430783322388768, "learning_rate": 2.070490664353859e-05, "loss": 0.7138, "num_input_tokens_seen": 915070464, "step": 5069 }, { "epoch": 0.555023399655163, "grad_norm": 1.2659315985902497, "learning_rate": 2.0696436126643236e-05, "loss": 0.9494, "num_input_tokens_seen": 915258400, "step": 5070 }, { "epoch": 0.5551328717261009, "grad_norm": 1.1646635820631446, "learning_rate": 2.0687966118851268e-05, "loss": 0.8068, "num_input_tokens_seen": 915457536, "step": 5071 }, { "epoch": 0.5552423437970387, "grad_norm": 1.3033481050143678, "learning_rate": 2.067949662116466e-05, "loss": 0.7889, "num_input_tokens_seen": 915602912, "step": 5072 }, { "epoch": 0.5553518158679767, "grad_norm": 1.3238156135442862, "learning_rate": 2.067102763458535e-05, "loss": 0.6716, "num_input_tokens_seen": 915756352, "step": 5073 }, { "epoch": 0.5554612879389146, "grad_norm": 1.246997613064853, "learning_rate": 2.0662559160115186e-05, "loss": 0.6023, "num_input_tokens_seen": 915930848, "step": 5074 }, { "epoch": 0.5555707600098525, "grad_norm": 1.2410749335401232, "learning_rate": 2.065409119875599e-05, "loss": 0.823, "num_input_tokens_seen": 916127968, "step": 5075 }, { "epoch": 0.5556802320807904, "grad_norm": 1.4161608391327205, "learning_rate": 2.0645623751509495e-05, "loss": 0.7421, "num_input_tokens_seen": 916274688, "step": 5076 }, { "epoch": 0.5557897041517282, "grad_norm": 1.2184551077874386, "learning_rate": 2.0637156819377378e-05, "loss": 0.6162, "num_input_tokens_seen": 916468896, "step": 5077 }, { "epoch": 0.5558991762226662, "grad_norm": 1.2970756926997238, "learning_rate": 2.0628690403361285e-05, "loss": 0.9308, "num_input_tokens_seen": 916642944, "step": 5078 }, { "epoch": 0.5560086482936041, "grad_norm": 1.4171919783718088, "learning_rate": 2.0620224504462742e-05, "loss": 0.7521, "num_input_tokens_seen": 916838496, "step": 5079 }, { "epoch": 0.556118120364542, "grad_norm": 1.298353111196432, "learning_rate": 2.061175912368328e-05, "loss": 0.5736, "num_input_tokens_seen": 917018368, "step": 5080 }, { "epoch": 0.5562275924354799, "grad_norm": 1.2501536751696658, "learning_rate": 2.0603294262024323e-05, "loss": 0.65, "num_input_tokens_seen": 917200928, "step": 5081 }, { "epoch": 0.5563370645064178, "grad_norm": 1.160996976948695, "learning_rate": 2.059482992048725e-05, "loss": 0.7929, "num_input_tokens_seen": 917408128, "step": 5082 }, { "epoch": 0.5564465365773557, "grad_norm": 1.3643115940292827, "learning_rate": 2.058636610007337e-05, "loss": 0.8905, "num_input_tokens_seen": 917584416, "step": 5083 }, { "epoch": 0.5565560086482936, "grad_norm": 1.219644022974743, "learning_rate": 2.057790280178394e-05, "loss": 0.6044, "num_input_tokens_seen": 917769664, "step": 5084 }, { "epoch": 0.5566654807192315, "grad_norm": 1.2334240497699498, "learning_rate": 2.056944002662017e-05, "loss": 0.8594, "num_input_tokens_seen": 917963424, "step": 5085 }, { "epoch": 0.5567749527901694, "grad_norm": 1.4013287794850986, "learning_rate": 2.0560977775583162e-05, "loss": 0.8714, "num_input_tokens_seen": 918172416, "step": 5086 }, { "epoch": 0.5568844248611073, "grad_norm": 1.3189530003946544, "learning_rate": 2.0552516049674007e-05, "loss": 0.6985, "num_input_tokens_seen": 918320032, "step": 5087 }, { "epoch": 0.5569938969320453, "grad_norm": 1.290488084823069, "learning_rate": 2.0544054849893696e-05, "loss": 0.6979, "num_input_tokens_seen": 918512000, "step": 5088 }, { "epoch": 0.5571033690029831, "grad_norm": 1.084231621665431, "learning_rate": 2.0535594177243183e-05, "loss": 0.7105, "num_input_tokens_seen": 918718080, "step": 5089 }, { "epoch": 0.557212841073921, "grad_norm": 1.282125546034542, "learning_rate": 2.0527134032723337e-05, "loss": 0.9929, "num_input_tokens_seen": 918926400, "step": 5090 }, { "epoch": 0.5573223131448589, "grad_norm": 1.3551065362243009, "learning_rate": 2.0518674417334982e-05, "loss": 0.7281, "num_input_tokens_seen": 919061472, "step": 5091 }, { "epoch": 0.5574317852157968, "grad_norm": 1.1970163306713322, "learning_rate": 2.0510215332078884e-05, "loss": 0.6037, "num_input_tokens_seen": 919263744, "step": 5092 }, { "epoch": 0.5575412572867348, "grad_norm": 1.4942085200821893, "learning_rate": 2.050175677795572e-05, "loss": 0.7978, "num_input_tokens_seen": 919453920, "step": 5093 }, { "epoch": 0.5576507293576726, "grad_norm": 1.2771407966484378, "learning_rate": 2.0493298755966145e-05, "loss": 0.7985, "num_input_tokens_seen": 919602880, "step": 5094 }, { "epoch": 0.5577602014286105, "grad_norm": 1.2601931770528678, "learning_rate": 2.0484841267110698e-05, "loss": 0.8073, "num_input_tokens_seen": 919786112, "step": 5095 }, { "epoch": 0.5578696734995484, "grad_norm": 1.2347140751872536, "learning_rate": 2.0476384312389914e-05, "loss": 0.6233, "num_input_tokens_seen": 919982112, "step": 5096 }, { "epoch": 0.5579791455704863, "grad_norm": 1.3606158326400446, "learning_rate": 2.04679278928042e-05, "loss": 0.6873, "num_input_tokens_seen": 920157952, "step": 5097 }, { "epoch": 0.5580886176414243, "grad_norm": 1.3929435175708669, "learning_rate": 2.0459472009353957e-05, "loss": 0.8741, "num_input_tokens_seen": 920362016, "step": 5098 }, { "epoch": 0.5581980897123622, "grad_norm": 1.3946243810677101, "learning_rate": 2.0451016663039503e-05, "loss": 0.8312, "num_input_tokens_seen": 920548608, "step": 5099 }, { "epoch": 0.5583075617833, "grad_norm": 1.234784263656013, "learning_rate": 2.0442561854861076e-05, "loss": 0.6009, "num_input_tokens_seen": 920718400, "step": 5100 }, { "epoch": 0.5584170338542379, "grad_norm": 1.2224455338028866, "learning_rate": 2.043410758581887e-05, "loss": 0.7294, "num_input_tokens_seen": 920903648, "step": 5101 }, { "epoch": 0.5585265059251758, "grad_norm": 1.0793248562632083, "learning_rate": 2.042565385691301e-05, "loss": 0.5218, "num_input_tokens_seen": 921050144, "step": 5102 }, { "epoch": 0.5586359779961138, "grad_norm": 1.2509549784529217, "learning_rate": 2.041720066914355e-05, "loss": 0.7643, "num_input_tokens_seen": 921245696, "step": 5103 }, { "epoch": 0.5587454500670517, "grad_norm": 1.1205250962633082, "learning_rate": 2.040874802351049e-05, "loss": 0.7074, "num_input_tokens_seen": 921428704, "step": 5104 }, { "epoch": 0.5588549221379896, "grad_norm": 1.3700597184106296, "learning_rate": 2.040029592101376e-05, "loss": 0.8414, "num_input_tokens_seen": 921607904, "step": 5105 }, { "epoch": 0.5589643942089274, "grad_norm": 1.307929117889458, "learning_rate": 2.039184436265324e-05, "loss": 0.8857, "num_input_tokens_seen": 921800768, "step": 5106 }, { "epoch": 0.5590738662798653, "grad_norm": 1.3589184312207583, "learning_rate": 2.038339334942871e-05, "loss": 0.7862, "num_input_tokens_seen": 921958464, "step": 5107 }, { "epoch": 0.5591833383508032, "grad_norm": 1.1809699559149174, "learning_rate": 2.0374942882339935e-05, "loss": 0.882, "num_input_tokens_seen": 922137664, "step": 5108 }, { "epoch": 0.5592928104217412, "grad_norm": 1.1507248708636102, "learning_rate": 2.0366492962386563e-05, "loss": 0.6233, "num_input_tokens_seen": 922345312, "step": 5109 }, { "epoch": 0.5594022824926791, "grad_norm": 1.2843248640610025, "learning_rate": 2.0358043590568215e-05, "loss": 0.6916, "num_input_tokens_seen": 922530560, "step": 5110 }, { "epoch": 0.5595117545636169, "grad_norm": 1.3343007730390999, "learning_rate": 2.034959476788445e-05, "loss": 0.6966, "num_input_tokens_seen": 922694752, "step": 5111 }, { "epoch": 0.5596212266345548, "grad_norm": 1.1553456661962416, "learning_rate": 2.034114649533472e-05, "loss": 0.9519, "num_input_tokens_seen": 922889632, "step": 5112 }, { "epoch": 0.5597306987054927, "grad_norm": 1.3822892055571014, "learning_rate": 2.033269877391846e-05, "loss": 0.7289, "num_input_tokens_seen": 923084736, "step": 5113 }, { "epoch": 0.5598401707764307, "grad_norm": 1.204379084549982, "learning_rate": 2.032425160463501e-05, "loss": 0.6876, "num_input_tokens_seen": 923232800, "step": 5114 }, { "epoch": 0.5599496428473686, "grad_norm": 1.2258528582126398, "learning_rate": 2.0315804988483665e-05, "loss": 0.7089, "num_input_tokens_seen": 923392288, "step": 5115 }, { "epoch": 0.5600591149183065, "grad_norm": 1.1675856864737952, "learning_rate": 2.030735892646362e-05, "loss": 0.5511, "num_input_tokens_seen": 923554240, "step": 5116 }, { "epoch": 0.5601685869892443, "grad_norm": 1.4479146266712482, "learning_rate": 2.029891341957405e-05, "loss": 0.9287, "num_input_tokens_seen": 923715296, "step": 5117 }, { "epoch": 0.5602780590601822, "grad_norm": 1.2702138151568065, "learning_rate": 2.0290468468814045e-05, "loss": 0.8578, "num_input_tokens_seen": 923896736, "step": 5118 }, { "epoch": 0.5603875311311202, "grad_norm": 1.293007564826426, "learning_rate": 2.0282024075182603e-05, "loss": 0.7242, "num_input_tokens_seen": 924071680, "step": 5119 }, { "epoch": 0.5604970032020581, "grad_norm": 1.1942826865077016, "learning_rate": 2.0273580239678706e-05, "loss": 0.6784, "num_input_tokens_seen": 924244608, "step": 5120 }, { "epoch": 0.560606475272996, "grad_norm": 1.2853839058104264, "learning_rate": 2.0265136963301225e-05, "loss": 0.7525, "num_input_tokens_seen": 924404544, "step": 5121 }, { "epoch": 0.5607159473439339, "grad_norm": 1.3411522084303755, "learning_rate": 2.025669424704899e-05, "loss": 0.72, "num_input_tokens_seen": 924576800, "step": 5122 }, { "epoch": 0.5608254194148717, "grad_norm": 1.2725929314754711, "learning_rate": 2.0248252091920757e-05, "loss": 0.6505, "num_input_tokens_seen": 924776608, "step": 5123 }, { "epoch": 0.5609348914858097, "grad_norm": 1.4460894171898724, "learning_rate": 2.0239810498915213e-05, "loss": 0.6875, "num_input_tokens_seen": 924922880, "step": 5124 }, { "epoch": 0.5610443635567476, "grad_norm": 1.2321295776994237, "learning_rate": 2.0231369469030996e-05, "loss": 0.6832, "num_input_tokens_seen": 925113280, "step": 5125 }, { "epoch": 0.5611538356276855, "grad_norm": 1.1781559279857179, "learning_rate": 2.0222929003266645e-05, "loss": 0.6811, "num_input_tokens_seen": 925296064, "step": 5126 }, { "epoch": 0.5612633076986234, "grad_norm": 1.262043866765946, "learning_rate": 2.0214489102620675e-05, "loss": 0.6185, "num_input_tokens_seen": 925482656, "step": 5127 }, { "epoch": 0.5613727797695612, "grad_norm": 1.4029207875382246, "learning_rate": 2.0206049768091482e-05, "loss": 0.8353, "num_input_tokens_seen": 925655136, "step": 5128 }, { "epoch": 0.5614822518404992, "grad_norm": 1.4281830924007297, "learning_rate": 2.019761100067745e-05, "loss": 0.8278, "num_input_tokens_seen": 925840384, "step": 5129 }, { "epoch": 0.5615917239114371, "grad_norm": 1.2857505096903308, "learning_rate": 2.0189172801376845e-05, "loss": 0.6623, "num_input_tokens_seen": 926042656, "step": 5130 }, { "epoch": 0.561701195982375, "grad_norm": 1.2753348228783556, "learning_rate": 2.01807351711879e-05, "loss": 0.7527, "num_input_tokens_seen": 926250080, "step": 5131 }, { "epoch": 0.5618106680533129, "grad_norm": 1.299143587967349, "learning_rate": 2.0172298111108782e-05, "loss": 0.6741, "num_input_tokens_seen": 926436000, "step": 5132 }, { "epoch": 0.5619201401242508, "grad_norm": 1.3152029724269654, "learning_rate": 2.016386162213756e-05, "loss": 0.657, "num_input_tokens_seen": 926636928, "step": 5133 }, { "epoch": 0.5620296121951887, "grad_norm": 1.2493905012000173, "learning_rate": 2.0155425705272268e-05, "loss": 0.8948, "num_input_tokens_seen": 926828672, "step": 5134 }, { "epoch": 0.5621390842661266, "grad_norm": 1.2971805993771695, "learning_rate": 2.0146990361510844e-05, "loss": 0.6581, "num_input_tokens_seen": 926991520, "step": 5135 }, { "epoch": 0.5622485563370645, "grad_norm": 1.3424615227212966, "learning_rate": 2.0138555591851198e-05, "loss": 0.7163, "num_input_tokens_seen": 927157056, "step": 5136 }, { "epoch": 0.5623580284080024, "grad_norm": 1.3165630847088439, "learning_rate": 2.013012139729112e-05, "loss": 0.7373, "num_input_tokens_seen": 927351040, "step": 5137 }, { "epoch": 0.5624675004789403, "grad_norm": 1.2173985544353116, "learning_rate": 2.0121687778828372e-05, "loss": 0.7935, "num_input_tokens_seen": 927555104, "step": 5138 }, { "epoch": 0.5625769725498783, "grad_norm": 1.436393809742391, "learning_rate": 2.0113254737460643e-05, "loss": 0.7021, "num_input_tokens_seen": 927690176, "step": 5139 }, { "epoch": 0.5626864446208161, "grad_norm": 1.3745194809255739, "learning_rate": 2.0104822274185525e-05, "loss": 0.7872, "num_input_tokens_seen": 927858400, "step": 5140 }, { "epoch": 0.562795916691754, "grad_norm": 1.4596894180785056, "learning_rate": 2.009639039000059e-05, "loss": 0.7356, "num_input_tokens_seen": 928032000, "step": 5141 }, { "epoch": 0.5629053887626919, "grad_norm": 1.3291236264585775, "learning_rate": 2.0087959085903282e-05, "loss": 0.6888, "num_input_tokens_seen": 928218816, "step": 5142 }, { "epoch": 0.5630148608336298, "grad_norm": 1.2461024556109985, "learning_rate": 2.0079528362891032e-05, "loss": 0.8143, "num_input_tokens_seen": 928408096, "step": 5143 }, { "epoch": 0.5631243329045678, "grad_norm": 1.3483433395520563, "learning_rate": 2.0071098221961168e-05, "loss": 0.8208, "num_input_tokens_seen": 928609472, "step": 5144 }, { "epoch": 0.5632338049755056, "grad_norm": 1.3347553131089225, "learning_rate": 2.0062668664110957e-05, "loss": 0.7201, "num_input_tokens_seen": 928823168, "step": 5145 }, { "epoch": 0.5633432770464435, "grad_norm": 1.12118492686115, "learning_rate": 2.005423969033761e-05, "loss": 0.5968, "num_input_tokens_seen": 928981536, "step": 5146 }, { "epoch": 0.5634527491173814, "grad_norm": 1.228447358822812, "learning_rate": 2.004581130163825e-05, "loss": 0.6834, "num_input_tokens_seen": 929173280, "step": 5147 }, { "epoch": 0.5635622211883193, "grad_norm": 1.389965801229968, "learning_rate": 2.0037383499009948e-05, "loss": 0.6464, "num_input_tokens_seen": 929341952, "step": 5148 }, { "epoch": 0.5636716932592573, "grad_norm": 1.2304003984146812, "learning_rate": 2.0028956283449686e-05, "loss": 0.6408, "num_input_tokens_seen": 929548256, "step": 5149 }, { "epoch": 0.5637811653301952, "grad_norm": 1.2116769317356544, "learning_rate": 2.00205296559544e-05, "loss": 0.91, "num_input_tokens_seen": 929753216, "step": 5150 }, { "epoch": 0.563890637401133, "grad_norm": 1.2797846439364913, "learning_rate": 2.0012103617520926e-05, "loss": 0.525, "num_input_tokens_seen": 929905984, "step": 5151 }, { "epoch": 0.5640001094720709, "grad_norm": 1.3159793874813377, "learning_rate": 2.000367816914606e-05, "loss": 0.6845, "num_input_tokens_seen": 930061216, "step": 5152 }, { "epoch": 0.5641095815430088, "grad_norm": 0.9789362020697663, "learning_rate": 1.9995253311826526e-05, "loss": 0.5558, "num_input_tokens_seen": 930281184, "step": 5153 }, { "epoch": 0.5642190536139468, "grad_norm": 1.1629193216255047, "learning_rate": 1.9986829046558944e-05, "loss": 0.56, "num_input_tokens_seen": 930460832, "step": 5154 }, { "epoch": 0.5643285256848847, "grad_norm": 1.2215255530296558, "learning_rate": 1.997840537433991e-05, "loss": 0.9095, "num_input_tokens_seen": 930661088, "step": 5155 }, { "epoch": 0.5644379977558226, "grad_norm": 1.1711131353218043, "learning_rate": 1.9969982296165915e-05, "loss": 0.5955, "num_input_tokens_seen": 930841856, "step": 5156 }, { "epoch": 0.5645474698267604, "grad_norm": 1.3170531633142348, "learning_rate": 1.996155981303341e-05, "loss": 0.639, "num_input_tokens_seen": 930995520, "step": 5157 }, { "epoch": 0.5646569418976983, "grad_norm": 1.2366260332353671, "learning_rate": 1.9953137925938737e-05, "loss": 0.6926, "num_input_tokens_seen": 931182112, "step": 5158 }, { "epoch": 0.5647664139686362, "grad_norm": 1.281973332242283, "learning_rate": 1.9944716635878197e-05, "loss": 0.7113, "num_input_tokens_seen": 931346752, "step": 5159 }, { "epoch": 0.5648758860395742, "grad_norm": 1.2712420864102723, "learning_rate": 1.9936295943848028e-05, "loss": 0.6718, "num_input_tokens_seen": 931512064, "step": 5160 }, { "epoch": 0.5649853581105121, "grad_norm": 1.2808459040104128, "learning_rate": 1.9927875850844356e-05, "loss": 0.6866, "num_input_tokens_seen": 931686784, "step": 5161 }, { "epoch": 0.5650948301814499, "grad_norm": 1.235431894698487, "learning_rate": 1.9919456357863286e-05, "loss": 0.6033, "num_input_tokens_seen": 931836640, "step": 5162 }, { "epoch": 0.5652043022523878, "grad_norm": 1.204786812613481, "learning_rate": 1.9911037465900807e-05, "loss": 0.6833, "num_input_tokens_seen": 932000608, "step": 5163 }, { "epoch": 0.5653137743233257, "grad_norm": 1.3125824675310211, "learning_rate": 1.990261917595287e-05, "loss": 0.7289, "num_input_tokens_seen": 932180928, "step": 5164 }, { "epoch": 0.5654232463942637, "grad_norm": 1.2447633726769638, "learning_rate": 1.9894201489015342e-05, "loss": 0.6289, "num_input_tokens_seen": 932348480, "step": 5165 }, { "epoch": 0.5655327184652016, "grad_norm": 1.1508913877034523, "learning_rate": 1.9885784406084012e-05, "loss": 0.7782, "num_input_tokens_seen": 932545824, "step": 5166 }, { "epoch": 0.5656421905361395, "grad_norm": 1.2571840789619417, "learning_rate": 1.9877367928154618e-05, "loss": 0.8401, "num_input_tokens_seen": 932760192, "step": 5167 }, { "epoch": 0.5657516626070773, "grad_norm": 1.2721784955959117, "learning_rate": 1.9868952056222795e-05, "loss": 0.6959, "num_input_tokens_seen": 932947456, "step": 5168 }, { "epoch": 0.5658611346780152, "grad_norm": 1.3999943937461863, "learning_rate": 1.9860536791284148e-05, "loss": 0.7142, "num_input_tokens_seen": 933128896, "step": 5169 }, { "epoch": 0.5659706067489532, "grad_norm": 1.2219814107857654, "learning_rate": 1.985212213433416e-05, "loss": 0.7648, "num_input_tokens_seen": 933336768, "step": 5170 }, { "epoch": 0.5660800788198911, "grad_norm": 1.3738377167011937, "learning_rate": 1.9843708086368287e-05, "loss": 0.7751, "num_input_tokens_seen": 933525376, "step": 5171 }, { "epoch": 0.566189550890829, "grad_norm": 1.216811717327089, "learning_rate": 1.9835294648381898e-05, "loss": 0.8413, "num_input_tokens_seen": 933713536, "step": 5172 }, { "epoch": 0.5662990229617669, "grad_norm": 1.2211979953934342, "learning_rate": 1.9826881821370268e-05, "loss": 0.7625, "num_input_tokens_seen": 933899904, "step": 5173 }, { "epoch": 0.5664084950327047, "grad_norm": 1.2398533513659475, "learning_rate": 1.9818469606328642e-05, "loss": 0.7137, "num_input_tokens_seen": 934067008, "step": 5174 }, { "epoch": 0.5665179671036427, "grad_norm": 1.51150345562973, "learning_rate": 1.9810058004252146e-05, "loss": 0.749, "num_input_tokens_seen": 934240384, "step": 5175 }, { "epoch": 0.5666274391745806, "grad_norm": 1.4867914708243601, "learning_rate": 1.9801647016135868e-05, "loss": 0.8713, "num_input_tokens_seen": 934399200, "step": 5176 }, { "epoch": 0.5667369112455185, "grad_norm": 1.437880258437315, "learning_rate": 1.9793236642974806e-05, "loss": 0.8931, "num_input_tokens_seen": 934586240, "step": 5177 }, { "epoch": 0.5668463833164564, "grad_norm": 1.2670395563690462, "learning_rate": 1.9784826885763903e-05, "loss": 0.5831, "num_input_tokens_seen": 934777088, "step": 5178 }, { "epoch": 0.5669558553873942, "grad_norm": 1.3032066026930162, "learning_rate": 1.977641774549801e-05, "loss": 0.6634, "num_input_tokens_seen": 934931872, "step": 5179 }, { "epoch": 0.5670653274583322, "grad_norm": 1.4159762252061725, "learning_rate": 1.9768009223171907e-05, "loss": 0.813, "num_input_tokens_seen": 935108384, "step": 5180 }, { "epoch": 0.5671747995292701, "grad_norm": 1.264582404691972, "learning_rate": 1.9759601319780317e-05, "loss": 0.5376, "num_input_tokens_seen": 935251744, "step": 5181 }, { "epoch": 0.567284271600208, "grad_norm": 1.214618870568069, "learning_rate": 1.9751194036317868e-05, "loss": 0.6096, "num_input_tokens_seen": 935409888, "step": 5182 }, { "epoch": 0.5673937436711459, "grad_norm": 1.2502480737571775, "learning_rate": 1.9742787373779137e-05, "loss": 0.7021, "num_input_tokens_seen": 935602752, "step": 5183 }, { "epoch": 0.5675032157420838, "grad_norm": 1.3684419816652214, "learning_rate": 1.9734381333158604e-05, "loss": 0.8124, "num_input_tokens_seen": 935778368, "step": 5184 }, { "epoch": 0.5676126878130217, "grad_norm": 1.316971066251946, "learning_rate": 1.9725975915450687e-05, "loss": 0.7234, "num_input_tokens_seen": 935953312, "step": 5185 }, { "epoch": 0.5677221598839596, "grad_norm": 1.3209139565652275, "learning_rate": 1.971757112164975e-05, "loss": 0.8517, "num_input_tokens_seen": 936139456, "step": 5186 }, { "epoch": 0.5678316319548975, "grad_norm": 1.1557055557587466, "learning_rate": 1.970916695275004e-05, "loss": 0.5732, "num_input_tokens_seen": 936308128, "step": 5187 }, { "epoch": 0.5679411040258354, "grad_norm": 1.247804573429419, "learning_rate": 1.9700763409745773e-05, "loss": 0.5845, "num_input_tokens_seen": 936423488, "step": 5188 }, { "epoch": 0.5680505760967733, "grad_norm": 1.322864030807133, "learning_rate": 1.9692360493631058e-05, "loss": 0.6367, "num_input_tokens_seen": 936588800, "step": 5189 }, { "epoch": 0.5681600481677113, "grad_norm": 1.391811893027727, "learning_rate": 1.968395820539996e-05, "loss": 0.9478, "num_input_tokens_seen": 936772480, "step": 5190 }, { "epoch": 0.5682695202386491, "grad_norm": 1.142432413835847, "learning_rate": 1.967555654604643e-05, "loss": 0.7164, "num_input_tokens_seen": 936951680, "step": 5191 }, { "epoch": 0.568378992309587, "grad_norm": 1.3467658155754338, "learning_rate": 1.9667155516564385e-05, "loss": 0.7618, "num_input_tokens_seen": 937154176, "step": 5192 }, { "epoch": 0.5684884643805249, "grad_norm": 1.2486546906383875, "learning_rate": 1.9658755117947657e-05, "loss": 0.9073, "num_input_tokens_seen": 937346144, "step": 5193 }, { "epoch": 0.5685979364514628, "grad_norm": 1.2975995900589734, "learning_rate": 1.965035535118998e-05, "loss": 0.8494, "num_input_tokens_seen": 937540128, "step": 5194 }, { "epoch": 0.5687074085224008, "grad_norm": 1.2278395401095408, "learning_rate": 1.9641956217285048e-05, "loss": 0.866, "num_input_tokens_seen": 937719328, "step": 5195 }, { "epoch": 0.5688168805933386, "grad_norm": 1.3096700768300553, "learning_rate": 1.9633557717226443e-05, "loss": 0.8548, "num_input_tokens_seen": 937892256, "step": 5196 }, { "epoch": 0.5689263526642765, "grad_norm": 1.364087016965718, "learning_rate": 1.96251598520077e-05, "loss": 0.6422, "num_input_tokens_seen": 938082208, "step": 5197 }, { "epoch": 0.5690358247352144, "grad_norm": 1.1702915429147498, "learning_rate": 1.9616762622622272e-05, "loss": 0.7881, "num_input_tokens_seen": 938291648, "step": 5198 }, { "epoch": 0.5691452968061523, "grad_norm": 1.307932535767733, "learning_rate": 1.960836603006354e-05, "loss": 0.8533, "num_input_tokens_seen": 938441728, "step": 5199 }, { "epoch": 0.5692547688770903, "grad_norm": 1.173001828441837, "learning_rate": 1.9599970075324797e-05, "loss": 0.5863, "num_input_tokens_seen": 938616448, "step": 5200 }, { "epoch": 0.5693642409480282, "grad_norm": 1.2281832343896075, "learning_rate": 1.959157475939927e-05, "loss": 0.6881, "num_input_tokens_seen": 938810656, "step": 5201 }, { "epoch": 0.569473713018966, "grad_norm": 1.3837547755654347, "learning_rate": 1.9583180083280118e-05, "loss": 0.7814, "num_input_tokens_seen": 939003520, "step": 5202 }, { "epoch": 0.5695831850899039, "grad_norm": 1.4158492628566912, "learning_rate": 1.9574786047960394e-05, "loss": 0.948, "num_input_tokens_seen": 939188992, "step": 5203 }, { "epoch": 0.5696926571608418, "grad_norm": 1.1572251203181942, "learning_rate": 1.9566392654433123e-05, "loss": 0.581, "num_input_tokens_seen": 939354304, "step": 5204 }, { "epoch": 0.5698021292317798, "grad_norm": 1.2424876012509702, "learning_rate": 1.95579999036912e-05, "loss": 0.5946, "num_input_tokens_seen": 939508864, "step": 5205 }, { "epoch": 0.5699116013027177, "grad_norm": 1.2444995500127731, "learning_rate": 1.9549607796727487e-05, "loss": 0.8921, "num_input_tokens_seen": 939702400, "step": 5206 }, { "epoch": 0.5700210733736556, "grad_norm": 1.4123012048369494, "learning_rate": 1.9541216334534764e-05, "loss": 0.6433, "num_input_tokens_seen": 939870624, "step": 5207 }, { "epoch": 0.5701305454445934, "grad_norm": 1.2029933114250362, "learning_rate": 1.9532825518105702e-05, "loss": 0.6541, "num_input_tokens_seen": 940038848, "step": 5208 }, { "epoch": 0.5702400175155313, "grad_norm": 1.232875299244264, "learning_rate": 1.9524435348432933e-05, "loss": 0.6135, "num_input_tokens_seen": 940228800, "step": 5209 }, { "epoch": 0.5703494895864692, "grad_norm": 1.22651356399009, "learning_rate": 1.9516045826508994e-05, "loss": 0.6384, "num_input_tokens_seen": 940396576, "step": 5210 }, { "epoch": 0.5704589616574072, "grad_norm": 1.2246611126400164, "learning_rate": 1.9507656953326355e-05, "loss": 0.839, "num_input_tokens_seen": 940591680, "step": 5211 }, { "epoch": 0.5705684337283451, "grad_norm": 1.2721238644978958, "learning_rate": 1.949926872987739e-05, "loss": 0.7935, "num_input_tokens_seen": 940755872, "step": 5212 }, { "epoch": 0.5706779057992829, "grad_norm": 1.3259402537716247, "learning_rate": 1.9490881157154422e-05, "loss": 0.7336, "num_input_tokens_seen": 940936864, "step": 5213 }, { "epoch": 0.5707873778702208, "grad_norm": 1.2970954983332048, "learning_rate": 1.948249423614969e-05, "loss": 0.6243, "num_input_tokens_seen": 941108896, "step": 5214 }, { "epoch": 0.5708968499411587, "grad_norm": 1.2606663995090595, "learning_rate": 1.9474107967855332e-05, "loss": 0.6991, "num_input_tokens_seen": 941273760, "step": 5215 }, { "epoch": 0.5710063220120967, "grad_norm": 1.3148917703365055, "learning_rate": 1.9465722353263445e-05, "loss": 0.7685, "num_input_tokens_seen": 941485888, "step": 5216 }, { "epoch": 0.5711157940830346, "grad_norm": 1.3955586386114704, "learning_rate": 1.945733739336602e-05, "loss": 0.839, "num_input_tokens_seen": 941657248, "step": 5217 }, { "epoch": 0.5712252661539725, "grad_norm": 1.226996521180809, "learning_rate": 1.9448953089154982e-05, "loss": 0.6621, "num_input_tokens_seen": 941825696, "step": 5218 }, { "epoch": 0.5713347382249103, "grad_norm": 1.3320715666008518, "learning_rate": 1.9440569441622182e-05, "loss": 0.8085, "num_input_tokens_seen": 942024160, "step": 5219 }, { "epoch": 0.5714442102958482, "grad_norm": 1.173037549978025, "learning_rate": 1.9432186451759397e-05, "loss": 0.682, "num_input_tokens_seen": 942222624, "step": 5220 }, { "epoch": 0.5715536823667862, "grad_norm": 1.1725106613390235, "learning_rate": 1.9423804120558307e-05, "loss": 0.6697, "num_input_tokens_seen": 942443264, "step": 5221 }, { "epoch": 0.5716631544377241, "grad_norm": 1.4050159974052903, "learning_rate": 1.9415422449010523e-05, "loss": 0.7611, "num_input_tokens_seen": 942636576, "step": 5222 }, { "epoch": 0.571772626508662, "grad_norm": 1.351981346190728, "learning_rate": 1.94070414381076e-05, "loss": 0.7642, "num_input_tokens_seen": 942819584, "step": 5223 }, { "epoch": 0.5718820985795999, "grad_norm": 1.1707900047204016, "learning_rate": 1.9398661088840974e-05, "loss": 0.6555, "num_input_tokens_seen": 943009536, "step": 5224 }, { "epoch": 0.5719915706505377, "grad_norm": 1.2309126280387634, "learning_rate": 1.9390281402202043e-05, "loss": 0.7859, "num_input_tokens_seen": 943229952, "step": 5225 }, { "epoch": 0.5721010427214757, "grad_norm": 1.1816397674809647, "learning_rate": 1.9381902379182085e-05, "loss": 0.6229, "num_input_tokens_seen": 943406912, "step": 5226 }, { "epoch": 0.5722105147924136, "grad_norm": 1.1631435376821335, "learning_rate": 1.9373524020772337e-05, "loss": 0.9615, "num_input_tokens_seen": 943615008, "step": 5227 }, { "epoch": 0.5723199868633515, "grad_norm": 1.245222546476818, "learning_rate": 1.9365146327963955e-05, "loss": 0.8624, "num_input_tokens_seen": 943813472, "step": 5228 }, { "epoch": 0.5724294589342894, "grad_norm": 1.1869220331001913, "learning_rate": 1.9356769301747972e-05, "loss": 0.6925, "num_input_tokens_seen": 944006784, "step": 5229 }, { "epoch": 0.5725389310052272, "grad_norm": 1.1227218555667946, "learning_rate": 1.9348392943115405e-05, "loss": 0.61, "num_input_tokens_seen": 944207712, "step": 5230 }, { "epoch": 0.5726484030761652, "grad_norm": 1.2557282327432007, "learning_rate": 1.9340017253057142e-05, "loss": 0.6085, "num_input_tokens_seen": 944378848, "step": 5231 }, { "epoch": 0.5727578751471031, "grad_norm": 1.1505020165390518, "learning_rate": 1.933164223256403e-05, "loss": 0.6213, "num_input_tokens_seen": 944577760, "step": 5232 }, { "epoch": 0.572867347218041, "grad_norm": 1.223973792228741, "learning_rate": 1.932326788262679e-05, "loss": 0.6846, "num_input_tokens_seen": 944750464, "step": 5233 }, { "epoch": 0.5729768192889789, "grad_norm": 1.3195698923852375, "learning_rate": 1.931489420423611e-05, "loss": 0.7816, "num_input_tokens_seen": 944905248, "step": 5234 }, { "epoch": 0.5730862913599168, "grad_norm": 1.306791790028527, "learning_rate": 1.930652119838259e-05, "loss": 0.6278, "num_input_tokens_seen": 945085344, "step": 5235 }, { "epoch": 0.5731957634308547, "grad_norm": 1.6302004276689257, "learning_rate": 1.9298148866056716e-05, "loss": 0.7355, "num_input_tokens_seen": 945249088, "step": 5236 }, { "epoch": 0.5733052355017926, "grad_norm": 1.1537778045511105, "learning_rate": 1.9289777208248942e-05, "loss": 0.7272, "num_input_tokens_seen": 945391552, "step": 5237 }, { "epoch": 0.5734147075727305, "grad_norm": 1.0983411071070022, "learning_rate": 1.92814062259496e-05, "loss": 0.6367, "num_input_tokens_seen": 945579040, "step": 5238 }, { "epoch": 0.5735241796436684, "grad_norm": 1.5070808493449854, "learning_rate": 1.9273035920148966e-05, "loss": 0.783, "num_input_tokens_seen": 945723968, "step": 5239 }, { "epoch": 0.5736336517146063, "grad_norm": 1.255076257660964, "learning_rate": 1.9264666291837242e-05, "loss": 0.7364, "num_input_tokens_seen": 945894656, "step": 5240 }, { "epoch": 0.5737431237855443, "grad_norm": 1.3275509210651526, "learning_rate": 1.9256297342004527e-05, "loss": 0.8578, "num_input_tokens_seen": 946075200, "step": 5241 }, { "epoch": 0.5738525958564821, "grad_norm": 1.3192076264169639, "learning_rate": 1.924792907164086e-05, "loss": 0.9042, "num_input_tokens_seen": 946239840, "step": 5242 }, { "epoch": 0.57396206792742, "grad_norm": 1.248637505887521, "learning_rate": 1.9239561481736183e-05, "loss": 0.6339, "num_input_tokens_seen": 946408512, "step": 5243 }, { "epoch": 0.5740715399983579, "grad_norm": 1.2099363556982627, "learning_rate": 1.9231194573280383e-05, "loss": 0.8207, "num_input_tokens_seen": 946589728, "step": 5244 }, { "epoch": 0.5741810120692958, "grad_norm": 1.3051319393239824, "learning_rate": 1.9222828347263222e-05, "loss": 0.6888, "num_input_tokens_seen": 946794688, "step": 5245 }, { "epoch": 0.5742904841402338, "grad_norm": 1.276779190031564, "learning_rate": 1.9214462804674425e-05, "loss": 0.673, "num_input_tokens_seen": 946992032, "step": 5246 }, { "epoch": 0.5743999562111716, "grad_norm": 1.2261898179213424, "learning_rate": 1.9206097946503625e-05, "loss": 0.8339, "num_input_tokens_seen": 947158688, "step": 5247 }, { "epoch": 0.5745094282821095, "grad_norm": 1.2158412456625396, "learning_rate": 1.9197733773740356e-05, "loss": 0.7893, "num_input_tokens_seen": 947330048, "step": 5248 }, { "epoch": 0.5746189003530474, "grad_norm": 1.281903309688496, "learning_rate": 1.91893702873741e-05, "loss": 0.6079, "num_input_tokens_seen": 947504320, "step": 5249 }, { "epoch": 0.5747283724239853, "grad_norm": 1.3135698590239244, "learning_rate": 1.918100748839422e-05, "loss": 0.7136, "num_input_tokens_seen": 947674336, "step": 5250 }, { "epoch": 0.5748378444949233, "grad_norm": 1.3016958601132689, "learning_rate": 1.9172645377790037e-05, "loss": 0.7487, "num_input_tokens_seen": 947841216, "step": 5251 }, { "epoch": 0.5749473165658612, "grad_norm": 1.403615773102633, "learning_rate": 1.916428395655076e-05, "loss": 0.9854, "num_input_tokens_seen": 948018848, "step": 5252 }, { "epoch": 0.575056788636799, "grad_norm": 1.328121993117575, "learning_rate": 1.915592322566553e-05, "loss": 0.7018, "num_input_tokens_seen": 948181248, "step": 5253 }, { "epoch": 0.5751662607077369, "grad_norm": 1.2965652831354104, "learning_rate": 1.9147563186123423e-05, "loss": 0.8095, "num_input_tokens_seen": 948360672, "step": 5254 }, { "epoch": 0.5752757327786748, "grad_norm": 1.2146938022914233, "learning_rate": 1.9139203838913394e-05, "loss": 0.6982, "num_input_tokens_seen": 948560032, "step": 5255 }, { "epoch": 0.5753852048496128, "grad_norm": 1.1903871273603515, "learning_rate": 1.913084518502436e-05, "loss": 0.7193, "num_input_tokens_seen": 948714368, "step": 5256 }, { "epoch": 0.5754946769205507, "grad_norm": 1.1717394941187382, "learning_rate": 1.9122487225445107e-05, "loss": 0.7993, "num_input_tokens_seen": 948903648, "step": 5257 }, { "epoch": 0.5756041489914886, "grad_norm": 1.2769986774375, "learning_rate": 1.911412996116439e-05, "loss": 0.9461, "num_input_tokens_seen": 949118464, "step": 5258 }, { "epoch": 0.5757136210624264, "grad_norm": 1.4394152777366287, "learning_rate": 1.9105773393170836e-05, "loss": 0.8147, "num_input_tokens_seen": 949303712, "step": 5259 }, { "epoch": 0.5758230931333643, "grad_norm": 1.3309360151781953, "learning_rate": 1.9097417522453023e-05, "loss": 0.5687, "num_input_tokens_seen": 949504416, "step": 5260 }, { "epoch": 0.5759325652043022, "grad_norm": 1.1665987891932703, "learning_rate": 1.9089062349999437e-05, "loss": 0.6587, "num_input_tokens_seen": 949696608, "step": 5261 }, { "epoch": 0.5760420372752402, "grad_norm": 1.426042423104122, "learning_rate": 1.9080707876798475e-05, "loss": 0.7386, "num_input_tokens_seen": 949838848, "step": 5262 }, { "epoch": 0.5761515093461781, "grad_norm": 1.280791596076171, "learning_rate": 1.9072354103838458e-05, "loss": 0.5829, "num_input_tokens_seen": 950006848, "step": 5263 }, { "epoch": 0.5762609814171159, "grad_norm": 1.3763871132079697, "learning_rate": 1.9064001032107612e-05, "loss": 0.7869, "num_input_tokens_seen": 950194784, "step": 5264 }, { "epoch": 0.5763704534880538, "grad_norm": 1.3561273563324805, "learning_rate": 1.9055648662594107e-05, "loss": 0.6898, "num_input_tokens_seen": 950376224, "step": 5265 }, { "epoch": 0.5764799255589917, "grad_norm": 1.2870385204025083, "learning_rate": 1.904729699628599e-05, "loss": 0.7049, "num_input_tokens_seen": 950536832, "step": 5266 }, { "epoch": 0.5765893976299297, "grad_norm": 1.2162725258727969, "learning_rate": 1.9038946034171258e-05, "loss": 0.7944, "num_input_tokens_seen": 950711328, "step": 5267 }, { "epoch": 0.5766988697008676, "grad_norm": 1.2134987109962914, "learning_rate": 1.903059577723783e-05, "loss": 0.6113, "num_input_tokens_seen": 950870816, "step": 5268 }, { "epoch": 0.5768083417718055, "grad_norm": 1.2041532407933935, "learning_rate": 1.9022246226473494e-05, "loss": 0.6373, "num_input_tokens_seen": 951029184, "step": 5269 }, { "epoch": 0.5769178138427433, "grad_norm": 1.2743357199489294, "learning_rate": 1.9013897382866013e-05, "loss": 0.6858, "num_input_tokens_seen": 951206816, "step": 5270 }, { "epoch": 0.5770272859136812, "grad_norm": 1.2433151309152795, "learning_rate": 1.900554924740302e-05, "loss": 0.7019, "num_input_tokens_seen": 951358016, "step": 5271 }, { "epoch": 0.5771367579846192, "grad_norm": 1.2792104910302833, "learning_rate": 1.8997201821072097e-05, "loss": 0.6708, "num_input_tokens_seen": 951534528, "step": 5272 }, { "epoch": 0.5772462300555571, "grad_norm": 1.4785398131071315, "learning_rate": 1.8988855104860718e-05, "loss": 0.9943, "num_input_tokens_seen": 951704320, "step": 5273 }, { "epoch": 0.577355702126495, "grad_norm": 1.2350342354295711, "learning_rate": 1.8980509099756287e-05, "loss": 0.783, "num_input_tokens_seen": 951898752, "step": 5274 }, { "epoch": 0.5774651741974329, "grad_norm": 1.2809441364206688, "learning_rate": 1.8972163806746132e-05, "loss": 0.7669, "num_input_tokens_seen": 952101920, "step": 5275 }, { "epoch": 0.5775746462683707, "grad_norm": 1.4111479761701495, "learning_rate": 1.8963819226817468e-05, "loss": 0.5525, "num_input_tokens_seen": 952306880, "step": 5276 }, { "epoch": 0.5776841183393087, "grad_norm": 1.123609584314299, "learning_rate": 1.895547536095746e-05, "loss": 0.5393, "num_input_tokens_seen": 952478464, "step": 5277 }, { "epoch": 0.5777935904102466, "grad_norm": 1.374859932787868, "learning_rate": 1.8947132210153144e-05, "loss": 0.8122, "num_input_tokens_seen": 952644672, "step": 5278 }, { "epoch": 0.5779030624811845, "grad_norm": 1.2568765795226868, "learning_rate": 1.8938789775391536e-05, "loss": 0.6977, "num_input_tokens_seen": 952846048, "step": 5279 }, { "epoch": 0.5780125345521224, "grad_norm": 1.1908488486213338, "learning_rate": 1.8930448057659497e-05, "loss": 0.8444, "num_input_tokens_seen": 953036896, "step": 5280 }, { "epoch": 0.5781220066230603, "grad_norm": 1.4301438567354865, "learning_rate": 1.892210705794385e-05, "loss": 0.7312, "num_input_tokens_seen": 953207584, "step": 5281 }, { "epoch": 0.5782314786939982, "grad_norm": 1.2090011373661267, "learning_rate": 1.8913766777231322e-05, "loss": 0.7561, "num_input_tokens_seen": 953407616, "step": 5282 }, { "epoch": 0.5783409507649361, "grad_norm": 1.3871546100619738, "learning_rate": 1.8905427216508554e-05, "loss": 0.7085, "num_input_tokens_seen": 953551424, "step": 5283 }, { "epoch": 0.578450422835874, "grad_norm": 1.2069083833349408, "learning_rate": 1.8897088376762094e-05, "loss": 0.7052, "num_input_tokens_seen": 953737792, "step": 5284 }, { "epoch": 0.5785598949068119, "grad_norm": 1.2982064731323093, "learning_rate": 1.8888750258978404e-05, "loss": 0.8348, "num_input_tokens_seen": 953907808, "step": 5285 }, { "epoch": 0.5786693669777498, "grad_norm": 1.1238939888501598, "learning_rate": 1.8880412864143886e-05, "loss": 0.852, "num_input_tokens_seen": 954133824, "step": 5286 }, { "epoch": 0.5787788390486877, "grad_norm": 1.165471549071032, "learning_rate": 1.887207619324482e-05, "loss": 0.6797, "num_input_tokens_seen": 954309664, "step": 5287 }, { "epoch": 0.5788883111196256, "grad_norm": 1.1371518912404934, "learning_rate": 1.8863740247267426e-05, "loss": 0.6149, "num_input_tokens_seen": 954479904, "step": 5288 }, { "epoch": 0.5789977831905635, "grad_norm": 1.4534513045618922, "learning_rate": 1.8855405027197838e-05, "loss": 0.9868, "num_input_tokens_seen": 954656640, "step": 5289 }, { "epoch": 0.5791072552615014, "grad_norm": 1.1980839930338831, "learning_rate": 1.884707053402208e-05, "loss": 0.5796, "num_input_tokens_seen": 954838976, "step": 5290 }, { "epoch": 0.5792167273324393, "grad_norm": 1.4264590807694053, "learning_rate": 1.8838736768726125e-05, "loss": 0.7819, "num_input_tokens_seen": 955006752, "step": 5291 }, { "epoch": 0.5793261994033773, "grad_norm": 1.286444299701126, "learning_rate": 1.8830403732295823e-05, "loss": 0.9763, "num_input_tokens_seen": 955198048, "step": 5292 }, { "epoch": 0.5794356714743151, "grad_norm": 1.133657574319275, "learning_rate": 1.8822071425716968e-05, "loss": 0.85, "num_input_tokens_seen": 955400768, "step": 5293 }, { "epoch": 0.579545143545253, "grad_norm": 1.2793236183690848, "learning_rate": 1.881373984997525e-05, "loss": 0.9957, "num_input_tokens_seen": 955584896, "step": 5294 }, { "epoch": 0.5796546156161909, "grad_norm": 1.353687225957503, "learning_rate": 1.880540900605628e-05, "loss": 0.9051, "num_input_tokens_seen": 955756480, "step": 5295 }, { "epoch": 0.5797640876871288, "grad_norm": 1.2526252358846248, "learning_rate": 1.8797078894945596e-05, "loss": 0.7964, "num_input_tokens_seen": 955926272, "step": 5296 }, { "epoch": 0.5798735597580668, "grad_norm": 1.3286560791928523, "learning_rate": 1.8788749517628606e-05, "loss": 0.8012, "num_input_tokens_seen": 956122944, "step": 5297 }, { "epoch": 0.5799830318290047, "grad_norm": 1.3033292633165452, "learning_rate": 1.878042087509069e-05, "loss": 0.6837, "num_input_tokens_seen": 956307968, "step": 5298 }, { "epoch": 0.5800925038999425, "grad_norm": 1.1389982563839394, "learning_rate": 1.877209296831708e-05, "loss": 0.6519, "num_input_tokens_seen": 956492544, "step": 5299 }, { "epoch": 0.5802019759708804, "grad_norm": 1.1855684935155677, "learning_rate": 1.8763765798292966e-05, "loss": 0.6671, "num_input_tokens_seen": 956669280, "step": 5300 }, { "epoch": 0.5803114480418183, "grad_norm": 1.0968014792992309, "learning_rate": 1.8755439366003448e-05, "loss": 0.71, "num_input_tokens_seen": 956855872, "step": 5301 }, { "epoch": 0.5804209201127563, "grad_norm": 1.227830802099495, "learning_rate": 1.8747113672433505e-05, "loss": 0.7924, "num_input_tokens_seen": 957044928, "step": 5302 }, { "epoch": 0.5805303921836942, "grad_norm": 1.1060434202428326, "learning_rate": 1.8738788718568066e-05, "loss": 0.7529, "num_input_tokens_seen": 957248320, "step": 5303 }, { "epoch": 0.580639864254632, "grad_norm": 1.08002271734293, "learning_rate": 1.8730464505391953e-05, "loss": 0.72, "num_input_tokens_seen": 957406688, "step": 5304 }, { "epoch": 0.5807493363255699, "grad_norm": 1.309041598534021, "learning_rate": 1.8722141033889904e-05, "loss": 0.7287, "num_input_tokens_seen": 957584320, "step": 5305 }, { "epoch": 0.5808588083965078, "grad_norm": 1.2654778665802273, "learning_rate": 1.8713818305046566e-05, "loss": 0.8282, "num_input_tokens_seen": 957754560, "step": 5306 }, { "epoch": 0.5809682804674458, "grad_norm": 1.2623946325314932, "learning_rate": 1.87054963198465e-05, "loss": 0.8704, "num_input_tokens_seen": 957944064, "step": 5307 }, { "epoch": 0.5810777525383837, "grad_norm": 1.1258960440333334, "learning_rate": 1.86971750792742e-05, "loss": 0.7101, "num_input_tokens_seen": 958129760, "step": 5308 }, { "epoch": 0.5811872246093216, "grad_norm": 1.2306948499486725, "learning_rate": 1.8688854584314028e-05, "loss": 0.8386, "num_input_tokens_seen": 958335392, "step": 5309 }, { "epoch": 0.5812966966802594, "grad_norm": 1.1643687619806462, "learning_rate": 1.8680534835950302e-05, "loss": 0.6684, "num_input_tokens_seen": 958512352, "step": 5310 }, { "epoch": 0.5814061687511973, "grad_norm": 1.2482457248791021, "learning_rate": 1.8672215835167217e-05, "loss": 0.7258, "num_input_tokens_seen": 958689312, "step": 5311 }, { "epoch": 0.5815156408221352, "grad_norm": 1.1098470623746193, "learning_rate": 1.8663897582948912e-05, "loss": 0.6262, "num_input_tokens_seen": 958854624, "step": 5312 }, { "epoch": 0.5816251128930732, "grad_norm": 1.36563440664443, "learning_rate": 1.86555800802794e-05, "loss": 0.9159, "num_input_tokens_seen": 959038976, "step": 5313 }, { "epoch": 0.5817345849640111, "grad_norm": 1.2183926376298169, "learning_rate": 1.864726332814264e-05, "loss": 0.7262, "num_input_tokens_seen": 959190624, "step": 5314 }, { "epoch": 0.581844057034949, "grad_norm": 1.246220358034045, "learning_rate": 1.863894732752248e-05, "loss": 0.635, "num_input_tokens_seen": 959370048, "step": 5315 }, { "epoch": 0.5819535291058868, "grad_norm": 1.317997454662499, "learning_rate": 1.8630632079402693e-05, "loss": 0.5877, "num_input_tokens_seen": 959542304, "step": 5316 }, { "epoch": 0.5820630011768247, "grad_norm": 1.1692265400925883, "learning_rate": 1.8622317584766962e-05, "loss": 0.6888, "num_input_tokens_seen": 959735616, "step": 5317 }, { "epoch": 0.5821724732477627, "grad_norm": 1.2245193277985535, "learning_rate": 1.861400384459886e-05, "loss": 0.682, "num_input_tokens_seen": 959901376, "step": 5318 }, { "epoch": 0.5822819453187006, "grad_norm": 1.4632774014888243, "learning_rate": 1.86056908598819e-05, "loss": 1.0569, "num_input_tokens_seen": 960116416, "step": 5319 }, { "epoch": 0.5823914173896385, "grad_norm": 1.2799088041034776, "learning_rate": 1.8597378631599484e-05, "loss": 0.8573, "num_input_tokens_seen": 960311520, "step": 5320 }, { "epoch": 0.5825008894605763, "grad_norm": 1.3119162214907982, "learning_rate": 1.8589067160734935e-05, "loss": 0.6862, "num_input_tokens_seen": 960470336, "step": 5321 }, { "epoch": 0.5826103615315142, "grad_norm": 1.1827014543419396, "learning_rate": 1.8580756448271496e-05, "loss": 0.7469, "num_input_tokens_seen": 960672384, "step": 5322 }, { "epoch": 0.5827198336024522, "grad_norm": 1.2754661455914087, "learning_rate": 1.8572446495192288e-05, "loss": 0.8786, "num_input_tokens_seen": 960886304, "step": 5323 }, { "epoch": 0.5828293056733901, "grad_norm": 1.2216929092728523, "learning_rate": 1.8564137302480373e-05, "loss": 0.711, "num_input_tokens_seen": 961071776, "step": 5324 }, { "epoch": 0.582938777744328, "grad_norm": 1.2394147913283022, "learning_rate": 1.8555828871118715e-05, "loss": 0.8633, "num_input_tokens_seen": 961231040, "step": 5325 }, { "epoch": 0.5830482498152659, "grad_norm": 1.2604881875840084, "learning_rate": 1.8547521202090178e-05, "loss": 0.8133, "num_input_tokens_seen": 961409792, "step": 5326 }, { "epoch": 0.5831577218862037, "grad_norm": 1.3160320621776296, "learning_rate": 1.8539214296377545e-05, "loss": 0.7706, "num_input_tokens_seen": 961573760, "step": 5327 }, { "epoch": 0.5832671939571417, "grad_norm": 1.101472404529114, "learning_rate": 1.853090815496351e-05, "loss": 0.702, "num_input_tokens_seen": 961764384, "step": 5328 }, { "epoch": 0.5833766660280796, "grad_norm": 1.276172485911886, "learning_rate": 1.8522602778830688e-05, "loss": 0.9349, "num_input_tokens_seen": 961950080, "step": 5329 }, { "epoch": 0.5834861380990175, "grad_norm": 1.2062317011734909, "learning_rate": 1.851429816896156e-05, "loss": 0.6777, "num_input_tokens_seen": 962140256, "step": 5330 }, { "epoch": 0.5835956101699554, "grad_norm": 1.289236022367485, "learning_rate": 1.850599432633857e-05, "loss": 0.6275, "num_input_tokens_seen": 962322368, "step": 5331 }, { "epoch": 0.5837050822408933, "grad_norm": 1.1207432747340638, "learning_rate": 1.8497691251944027e-05, "loss": 0.6795, "num_input_tokens_seen": 962516576, "step": 5332 }, { "epoch": 0.5838145543118312, "grad_norm": 1.1480374320603324, "learning_rate": 1.848938894676019e-05, "loss": 0.7046, "num_input_tokens_seen": 962704288, "step": 5333 }, { "epoch": 0.5839240263827691, "grad_norm": 1.1798182029822721, "learning_rate": 1.8481087411769187e-05, "loss": 0.5481, "num_input_tokens_seen": 962881248, "step": 5334 }, { "epoch": 0.584033498453707, "grad_norm": 1.3313446065033516, "learning_rate": 1.8472786647953078e-05, "loss": 0.6449, "num_input_tokens_seen": 963031552, "step": 5335 }, { "epoch": 0.5841429705246449, "grad_norm": 1.3301253569474953, "learning_rate": 1.8464486656293834e-05, "loss": 1.0847, "num_input_tokens_seen": 963210528, "step": 5336 }, { "epoch": 0.5842524425955828, "grad_norm": 1.3787370529327776, "learning_rate": 1.845618743777332e-05, "loss": 0.6754, "num_input_tokens_seen": 963368672, "step": 5337 }, { "epoch": 0.5843619146665207, "grad_norm": 1.1893620197752501, "learning_rate": 1.8447888993373336e-05, "loss": 0.8798, "num_input_tokens_seen": 963577440, "step": 5338 }, { "epoch": 0.5844713867374586, "grad_norm": 1.0913295120862903, "learning_rate": 1.8439591324075545e-05, "loss": 0.6739, "num_input_tokens_seen": 963768064, "step": 5339 }, { "epoch": 0.5845808588083965, "grad_norm": 1.226415783313757, "learning_rate": 1.8431294430861572e-05, "loss": 0.6251, "num_input_tokens_seen": 963930464, "step": 5340 }, { "epoch": 0.5846903308793344, "grad_norm": 1.205037219869601, "learning_rate": 1.8422998314712897e-05, "loss": 0.8172, "num_input_tokens_seen": 964114816, "step": 5341 }, { "epoch": 0.5847998029502723, "grad_norm": 1.3453925096775423, "learning_rate": 1.841470297661095e-05, "loss": 0.885, "num_input_tokens_seen": 964317984, "step": 5342 }, { "epoch": 0.5849092750212103, "grad_norm": 1.1153679281726852, "learning_rate": 1.8406408417537064e-05, "loss": 0.9545, "num_input_tokens_seen": 964534816, "step": 5343 }, { "epoch": 0.5850187470921481, "grad_norm": 1.3104453580512192, "learning_rate": 1.8398114638472444e-05, "loss": 0.5781, "num_input_tokens_seen": 964693632, "step": 5344 }, { "epoch": 0.585128219163086, "grad_norm": 1.2689390238999025, "learning_rate": 1.8389821640398245e-05, "loss": 0.9383, "num_input_tokens_seen": 964883808, "step": 5345 }, { "epoch": 0.5852376912340239, "grad_norm": 1.2978241905385794, "learning_rate": 1.8381529424295516e-05, "loss": 0.7277, "num_input_tokens_seen": 965054048, "step": 5346 }, { "epoch": 0.5853471633049618, "grad_norm": 1.3334305201317156, "learning_rate": 1.8373237991145202e-05, "loss": 0.8436, "num_input_tokens_seen": 965228768, "step": 5347 }, { "epoch": 0.5854566353758998, "grad_norm": 1.2536117350441067, "learning_rate": 1.8364947341928156e-05, "loss": 0.8186, "num_input_tokens_seen": 965400576, "step": 5348 }, { "epoch": 0.5855661074468377, "grad_norm": 1.2737070746109806, "learning_rate": 1.8356657477625157e-05, "loss": 0.7624, "num_input_tokens_seen": 965569024, "step": 5349 }, { "epoch": 0.5856755795177755, "grad_norm": 1.186809590060422, "learning_rate": 1.8348368399216892e-05, "loss": 0.9171, "num_input_tokens_seen": 965762560, "step": 5350 }, { "epoch": 0.5857850515887134, "grad_norm": 1.3539560545462757, "learning_rate": 1.8340080107683915e-05, "loss": 0.7223, "num_input_tokens_seen": 965944000, "step": 5351 }, { "epoch": 0.5858945236596513, "grad_norm": 1.2065553189814593, "learning_rate": 1.833179260400674e-05, "loss": 0.6672, "num_input_tokens_seen": 966109984, "step": 5352 }, { "epoch": 0.5860039957305893, "grad_norm": 1.372949241325063, "learning_rate": 1.8323505889165747e-05, "loss": 0.8184, "num_input_tokens_seen": 966275072, "step": 5353 }, { "epoch": 0.5861134678015272, "grad_norm": 1.4776396682484938, "learning_rate": 1.831521996414125e-05, "loss": 0.7877, "num_input_tokens_seen": 966441056, "step": 5354 }, { "epoch": 0.586222939872465, "grad_norm": 1.227554843166888, "learning_rate": 1.8306934829913448e-05, "loss": 0.6966, "num_input_tokens_seen": 966613312, "step": 5355 }, { "epoch": 0.5863324119434029, "grad_norm": 1.1674308431265967, "learning_rate": 1.8298650487462455e-05, "loss": 0.7099, "num_input_tokens_seen": 966833504, "step": 5356 }, { "epoch": 0.5864418840143408, "grad_norm": 1.3328603809695847, "learning_rate": 1.8290366937768306e-05, "loss": 1.0304, "num_input_tokens_seen": 967043392, "step": 5357 }, { "epoch": 0.5865513560852788, "grad_norm": 1.2706780052106141, "learning_rate": 1.8282084181810915e-05, "loss": 0.8782, "num_input_tokens_seen": 967259104, "step": 5358 }, { "epoch": 0.5866608281562167, "grad_norm": 1.2746307773412393, "learning_rate": 1.827380222057013e-05, "loss": 0.6825, "num_input_tokens_seen": 967461600, "step": 5359 }, { "epoch": 0.5867703002271546, "grad_norm": 1.1461794186059726, "learning_rate": 1.8265521055025677e-05, "loss": 0.5396, "num_input_tokens_seen": 967644832, "step": 5360 }, { "epoch": 0.5868797722980924, "grad_norm": 1.330068579820169, "learning_rate": 1.825724068615721e-05, "loss": 0.9616, "num_input_tokens_seen": 967839264, "step": 5361 }, { "epoch": 0.5869892443690303, "grad_norm": 1.1331111645787582, "learning_rate": 1.824896111494429e-05, "loss": 0.6433, "num_input_tokens_seen": 967995616, "step": 5362 }, { "epoch": 0.5870987164399682, "grad_norm": 1.3319753148480733, "learning_rate": 1.8240682342366354e-05, "loss": 0.7917, "num_input_tokens_seen": 968177056, "step": 5363 }, { "epoch": 0.5872081885109062, "grad_norm": 1.2787904159110388, "learning_rate": 1.8232404369402784e-05, "loss": 0.6896, "num_input_tokens_seen": 968325344, "step": 5364 }, { "epoch": 0.5873176605818441, "grad_norm": 1.1877088864966492, "learning_rate": 1.822412719703283e-05, "loss": 0.84, "num_input_tokens_seen": 968509248, "step": 5365 }, { "epoch": 0.587427132652782, "grad_norm": 1.2458330318148618, "learning_rate": 1.8215850826235682e-05, "loss": 0.7646, "num_input_tokens_seen": 968724288, "step": 5366 }, { "epoch": 0.5875366047237198, "grad_norm": 1.2944926245204118, "learning_rate": 1.820757525799041e-05, "loss": 0.8142, "num_input_tokens_seen": 968907968, "step": 5367 }, { "epoch": 0.5876460767946577, "grad_norm": 1.3990487835185934, "learning_rate": 1.8199300493275993e-05, "loss": 0.8325, "num_input_tokens_seen": 969105088, "step": 5368 }, { "epoch": 0.5877555488655957, "grad_norm": 1.2744844497594028, "learning_rate": 1.8191026533071336e-05, "loss": 0.8287, "num_input_tokens_seen": 969294592, "step": 5369 }, { "epoch": 0.5878650209365336, "grad_norm": 1.3787252507275383, "learning_rate": 1.8182753378355218e-05, "loss": 0.884, "num_input_tokens_seen": 969442880, "step": 5370 }, { "epoch": 0.5879744930074715, "grad_norm": 1.357960765544333, "learning_rate": 1.817448103010635e-05, "loss": 1.0454, "num_input_tokens_seen": 969619392, "step": 5371 }, { "epoch": 0.5880839650784093, "grad_norm": 1.322678201564231, "learning_rate": 1.816620948930332e-05, "loss": 0.9974, "num_input_tokens_seen": 969809568, "step": 5372 }, { "epoch": 0.5881934371493472, "grad_norm": 1.2358826230835418, "learning_rate": 1.8157938756924656e-05, "loss": 0.7218, "num_input_tokens_seen": 969978912, "step": 5373 }, { "epoch": 0.5883029092202852, "grad_norm": 1.2282156039685574, "learning_rate": 1.8149668833948747e-05, "loss": 0.8229, "num_input_tokens_seen": 970169760, "step": 5374 }, { "epoch": 0.5884123812912231, "grad_norm": 1.2773891055952786, "learning_rate": 1.8141399721353915e-05, "loss": 0.9436, "num_input_tokens_seen": 970359040, "step": 5375 }, { "epoch": 0.588521853362161, "grad_norm": 1.303055707886829, "learning_rate": 1.81331314201184e-05, "loss": 0.7799, "num_input_tokens_seen": 970564448, "step": 5376 }, { "epoch": 0.5886313254330989, "grad_norm": 1.1902345492716837, "learning_rate": 1.8124863931220293e-05, "loss": 0.7778, "num_input_tokens_seen": 970747232, "step": 5377 }, { "epoch": 0.5887407975040367, "grad_norm": 1.3968569385291103, "learning_rate": 1.811659725563765e-05, "loss": 0.7139, "num_input_tokens_seen": 970927552, "step": 5378 }, { "epoch": 0.5888502695749747, "grad_norm": 1.302483484452695, "learning_rate": 1.8108331394348388e-05, "loss": 0.7163, "num_input_tokens_seen": 971126464, "step": 5379 }, { "epoch": 0.5889597416459126, "grad_norm": 1.2196526730271473, "learning_rate": 1.8100066348330356e-05, "loss": 0.8249, "num_input_tokens_seen": 971299840, "step": 5380 }, { "epoch": 0.5890692137168505, "grad_norm": 1.228284222713649, "learning_rate": 1.8091802118561272e-05, "loss": 0.6595, "num_input_tokens_seen": 971497632, "step": 5381 }, { "epoch": 0.5891786857877884, "grad_norm": 1.12934977652624, "learning_rate": 1.80835387060188e-05, "loss": 0.7831, "num_input_tokens_seen": 971697664, "step": 5382 }, { "epoch": 0.5892881578587263, "grad_norm": 1.2670904552774296, "learning_rate": 1.8075276111680478e-05, "loss": 0.8302, "num_input_tokens_seen": 971884928, "step": 5383 }, { "epoch": 0.5893976299296642, "grad_norm": 1.2096103409658152, "learning_rate": 1.806701433652375e-05, "loss": 0.7442, "num_input_tokens_seen": 972069728, "step": 5384 }, { "epoch": 0.5895071020006021, "grad_norm": 1.2617615967080251, "learning_rate": 1.805875338152598e-05, "loss": 0.673, "num_input_tokens_seen": 972274688, "step": 5385 }, { "epoch": 0.58961657407154, "grad_norm": 1.3112798196112927, "learning_rate": 1.8050493247664404e-05, "loss": 0.8682, "num_input_tokens_seen": 972455456, "step": 5386 }, { "epoch": 0.5897260461424779, "grad_norm": 1.5071253816860881, "learning_rate": 1.80422339359162e-05, "loss": 1.0582, "num_input_tokens_seen": 972633760, "step": 5387 }, { "epoch": 0.5898355182134158, "grad_norm": 1.3773545133329181, "learning_rate": 1.8033975447258416e-05, "loss": 0.77, "num_input_tokens_seen": 972775104, "step": 5388 }, { "epoch": 0.5899449902843537, "grad_norm": 1.1572601957302615, "learning_rate": 1.802571778266802e-05, "loss": 0.5463, "num_input_tokens_seen": 972933472, "step": 5389 }, { "epoch": 0.5900544623552916, "grad_norm": 1.3083366483091567, "learning_rate": 1.8017460943121878e-05, "loss": 0.7448, "num_input_tokens_seen": 973131488, "step": 5390 }, { "epoch": 0.5901639344262295, "grad_norm": 1.1750470781025952, "learning_rate": 1.8009204929596757e-05, "loss": 0.6134, "num_input_tokens_seen": 973299712, "step": 5391 }, { "epoch": 0.5902734064971674, "grad_norm": 1.3222240627411357, "learning_rate": 1.8000949743069336e-05, "loss": 0.8962, "num_input_tokens_seen": 973499072, "step": 5392 }, { "epoch": 0.5903828785681053, "grad_norm": 1.258306370797878, "learning_rate": 1.7992695384516174e-05, "loss": 0.713, "num_input_tokens_seen": 973654304, "step": 5393 }, { "epoch": 0.5904923506390433, "grad_norm": 1.3926842262542187, "learning_rate": 1.7984441854913758e-05, "loss": 0.9427, "num_input_tokens_seen": 973814016, "step": 5394 }, { "epoch": 0.5906018227099811, "grad_norm": 1.26842537471204, "learning_rate": 1.7976189155238448e-05, "loss": 0.6623, "num_input_tokens_seen": 973995008, "step": 5395 }, { "epoch": 0.590711294780919, "grad_norm": 1.4504918678263492, "learning_rate": 1.796793728646654e-05, "loss": 0.9194, "num_input_tokens_seen": 974191904, "step": 5396 }, { "epoch": 0.5908207668518569, "grad_norm": 1.2949833229640704, "learning_rate": 1.7959686249574214e-05, "loss": 0.8079, "num_input_tokens_seen": 974402688, "step": 5397 }, { "epoch": 0.5909302389227948, "grad_norm": 1.200644872164357, "learning_rate": 1.7951436045537536e-05, "loss": 0.5146, "num_input_tokens_seen": 974560160, "step": 5398 }, { "epoch": 0.5910397109937328, "grad_norm": 1.4589748152454125, "learning_rate": 1.7943186675332504e-05, "loss": 0.9154, "num_input_tokens_seen": 974739360, "step": 5399 }, { "epoch": 0.5911491830646707, "grad_norm": 1.346490814928414, "learning_rate": 1.7934938139934993e-05, "loss": 0.7965, "num_input_tokens_seen": 974869504, "step": 5400 }, { "epoch": 0.5912586551356085, "grad_norm": 1.1140994628937826, "learning_rate": 1.7926690440320803e-05, "loss": 0.6232, "num_input_tokens_seen": 975040640, "step": 5401 }, { "epoch": 0.5913681272065464, "grad_norm": 1.2371654115975685, "learning_rate": 1.7918443577465605e-05, "loss": 0.7882, "num_input_tokens_seen": 975214464, "step": 5402 }, { "epoch": 0.5914775992774843, "grad_norm": 1.2632156798567928, "learning_rate": 1.7910197552344993e-05, "loss": 0.8397, "num_input_tokens_seen": 975398368, "step": 5403 }, { "epoch": 0.5915870713484223, "grad_norm": 1.2127734141451618, "learning_rate": 1.7901952365934467e-05, "loss": 0.854, "num_input_tokens_seen": 975589216, "step": 5404 }, { "epoch": 0.5916965434193602, "grad_norm": 1.326306343283991, "learning_rate": 1.78937080192094e-05, "loss": 0.7249, "num_input_tokens_seen": 975723168, "step": 5405 }, { "epoch": 0.591806015490298, "grad_norm": 1.2897409214410844, "learning_rate": 1.7885464513145094e-05, "loss": 0.8784, "num_input_tokens_seen": 975884896, "step": 5406 }, { "epoch": 0.5919154875612359, "grad_norm": 1.2183238042327433, "learning_rate": 1.7877221848716726e-05, "loss": 0.7616, "num_input_tokens_seen": 976095232, "step": 5407 }, { "epoch": 0.5920249596321738, "grad_norm": 1.3297619324772894, "learning_rate": 1.7868980026899405e-05, "loss": 0.8733, "num_input_tokens_seen": 976256512, "step": 5408 }, { "epoch": 0.5921344317031118, "grad_norm": 1.3481374130420327, "learning_rate": 1.786073904866811e-05, "loss": 0.7395, "num_input_tokens_seen": 976448480, "step": 5409 }, { "epoch": 0.5922439037740497, "grad_norm": 1.2611379319941194, "learning_rate": 1.785249891499774e-05, "loss": 0.9165, "num_input_tokens_seen": 976637536, "step": 5410 }, { "epoch": 0.5923533758449876, "grad_norm": 1.1836187781781637, "learning_rate": 1.7844259626863083e-05, "loss": 0.7528, "num_input_tokens_seen": 976831744, "step": 5411 }, { "epoch": 0.5924628479159254, "grad_norm": 1.3355271004626792, "learning_rate": 1.783602118523883e-05, "loss": 0.8915, "num_input_tokens_seen": 977008928, "step": 5412 }, { "epoch": 0.5925723199868633, "grad_norm": 1.3647817753748936, "learning_rate": 1.7827783591099583e-05, "loss": 0.6286, "num_input_tokens_seen": 977186784, "step": 5413 }, { "epoch": 0.5926817920578012, "grad_norm": 1.1193726517116467, "learning_rate": 1.781954684541982e-05, "loss": 0.6468, "num_input_tokens_seen": 977389952, "step": 5414 }, { "epoch": 0.5927912641287392, "grad_norm": 1.2960362307229052, "learning_rate": 1.7811310949173947e-05, "loss": 0.8473, "num_input_tokens_seen": 977569824, "step": 5415 }, { "epoch": 0.5929007361996771, "grad_norm": 1.3328274224039611, "learning_rate": 1.7803075903336238e-05, "loss": 0.8091, "num_input_tokens_seen": 977736928, "step": 5416 }, { "epoch": 0.593010208270615, "grad_norm": 1.194430505809234, "learning_rate": 1.7794841708880888e-05, "loss": 0.8207, "num_input_tokens_seen": 977954432, "step": 5417 }, { "epoch": 0.5931196803415528, "grad_norm": 1.302610788960219, "learning_rate": 1.7786608366782002e-05, "loss": 0.9478, "num_input_tokens_seen": 978171040, "step": 5418 }, { "epoch": 0.5932291524124907, "grad_norm": 1.145369551461972, "learning_rate": 1.7778375878013547e-05, "loss": 0.6887, "num_input_tokens_seen": 978349120, "step": 5419 }, { "epoch": 0.5933386244834287, "grad_norm": 1.225464882514412, "learning_rate": 1.7770144243549425e-05, "loss": 0.5265, "num_input_tokens_seen": 978545568, "step": 5420 }, { "epoch": 0.5934480965543666, "grad_norm": 1.313596676010661, "learning_rate": 1.7761913464363413e-05, "loss": 0.7012, "num_input_tokens_seen": 978732384, "step": 5421 }, { "epoch": 0.5935575686253045, "grad_norm": 1.237809485451317, "learning_rate": 1.7753683541429212e-05, "loss": 0.8515, "num_input_tokens_seen": 978922112, "step": 5422 }, { "epoch": 0.5936670406962423, "grad_norm": 1.313673675381609, "learning_rate": 1.7745454475720387e-05, "loss": 0.6774, "num_input_tokens_seen": 979108256, "step": 5423 }, { "epoch": 0.5937765127671802, "grad_norm": 1.231142035461054, "learning_rate": 1.7737226268210423e-05, "loss": 0.6143, "num_input_tokens_seen": 979285888, "step": 5424 }, { "epoch": 0.5938859848381182, "grad_norm": 1.2834968869327943, "learning_rate": 1.7728998919872723e-05, "loss": 0.9674, "num_input_tokens_seen": 979485248, "step": 5425 }, { "epoch": 0.5939954569090561, "grad_norm": 1.296417845156575, "learning_rate": 1.7720772431680538e-05, "loss": 0.8077, "num_input_tokens_seen": 979658176, "step": 5426 }, { "epoch": 0.594104928979994, "grad_norm": 1.2731693664221202, "learning_rate": 1.771254680460707e-05, "loss": 0.6688, "num_input_tokens_seen": 979855520, "step": 5427 }, { "epoch": 0.5942144010509319, "grad_norm": 1.4557112781970094, "learning_rate": 1.770432203962537e-05, "loss": 0.8152, "num_input_tokens_seen": 980028672, "step": 5428 }, { "epoch": 0.5943238731218697, "grad_norm": 1.344499645050104, "learning_rate": 1.7696098137708434e-05, "loss": 0.91, "num_input_tokens_seen": 980241024, "step": 5429 }, { "epoch": 0.5944333451928077, "grad_norm": 1.2113246175892969, "learning_rate": 1.7687875099829127e-05, "loss": 0.6866, "num_input_tokens_seen": 980414624, "step": 5430 }, { "epoch": 0.5945428172637456, "grad_norm": 1.2928345947738131, "learning_rate": 1.767965292696021e-05, "loss": 0.6133, "num_input_tokens_seen": 980602784, "step": 5431 }, { "epoch": 0.5946522893346835, "grad_norm": 1.4099363541273129, "learning_rate": 1.7671431620074365e-05, "loss": 0.789, "num_input_tokens_seen": 980753984, "step": 5432 }, { "epoch": 0.5947617614056214, "grad_norm": 1.4191329002432636, "learning_rate": 1.7663211180144146e-05, "loss": 1.0414, "num_input_tokens_seen": 980930048, "step": 5433 }, { "epoch": 0.5948712334765593, "grad_norm": 1.4314685005082675, "learning_rate": 1.7654991608142024e-05, "loss": 0.7856, "num_input_tokens_seen": 981098496, "step": 5434 }, { "epoch": 0.5949807055474972, "grad_norm": 1.405777364354256, "learning_rate": 1.7646772905040347e-05, "loss": 0.825, "num_input_tokens_seen": 981279264, "step": 5435 }, { "epoch": 0.5950901776184351, "grad_norm": 1.407046495150635, "learning_rate": 1.7638555071811373e-05, "loss": 0.8719, "num_input_tokens_seen": 981448384, "step": 5436 }, { "epoch": 0.595199649689373, "grad_norm": 1.1789759327775375, "learning_rate": 1.763033810942728e-05, "loss": 0.6106, "num_input_tokens_seen": 981624672, "step": 5437 }, { "epoch": 0.5953091217603109, "grad_norm": 1.2374633590976554, "learning_rate": 1.7622122018860082e-05, "loss": 0.7378, "num_input_tokens_seen": 981779008, "step": 5438 }, { "epoch": 0.5954185938312488, "grad_norm": 1.175013345277361, "learning_rate": 1.761390680108176e-05, "loss": 0.6691, "num_input_tokens_seen": 981948128, "step": 5439 }, { "epoch": 0.5955280659021867, "grad_norm": 1.1829554785076084, "learning_rate": 1.7605692457064133e-05, "loss": 0.6119, "num_input_tokens_seen": 982133600, "step": 5440 }, { "epoch": 0.5956375379731246, "grad_norm": 1.211177875902799, "learning_rate": 1.7597478987778956e-05, "loss": 0.7315, "num_input_tokens_seen": 982314816, "step": 5441 }, { "epoch": 0.5957470100440625, "grad_norm": 1.2671036736058843, "learning_rate": 1.758926639419786e-05, "loss": 0.8267, "num_input_tokens_seen": 982511040, "step": 5442 }, { "epoch": 0.5958564821150004, "grad_norm": 1.17799614927702, "learning_rate": 1.7581054677292387e-05, "loss": 0.7293, "num_input_tokens_seen": 982694272, "step": 5443 }, { "epoch": 0.5959659541859383, "grad_norm": 1.2439224794496102, "learning_rate": 1.7572843838033964e-05, "loss": 0.8918, "num_input_tokens_seen": 982891840, "step": 5444 }, { "epoch": 0.5960754262568763, "grad_norm": 1.2652471943906902, "learning_rate": 1.756463387739391e-05, "loss": 0.8333, "num_input_tokens_seen": 983074176, "step": 5445 }, { "epoch": 0.5961848983278141, "grad_norm": 1.1944803071121242, "learning_rate": 1.7556424796343462e-05, "loss": 0.6789, "num_input_tokens_seen": 983230976, "step": 5446 }, { "epoch": 0.596294370398752, "grad_norm": 1.1465991091140701, "learning_rate": 1.754821659585373e-05, "loss": 0.8542, "num_input_tokens_seen": 983404800, "step": 5447 }, { "epoch": 0.5964038424696899, "grad_norm": 1.241701570869405, "learning_rate": 1.754000927689573e-05, "loss": 0.7352, "num_input_tokens_seen": 983607296, "step": 5448 }, { "epoch": 0.5965133145406278, "grad_norm": 1.2268784717388266, "learning_rate": 1.7531802840440364e-05, "loss": 0.5752, "num_input_tokens_seen": 983796352, "step": 5449 }, { "epoch": 0.5966227866115658, "grad_norm": 1.3983588169914543, "learning_rate": 1.7523597287458447e-05, "loss": 0.8487, "num_input_tokens_seen": 983955616, "step": 5450 }, { "epoch": 0.5967322586825037, "grad_norm": 1.4307817117795067, "learning_rate": 1.7515392618920686e-05, "loss": 0.7848, "num_input_tokens_seen": 984128768, "step": 5451 }, { "epoch": 0.5968417307534415, "grad_norm": 1.227641317619757, "learning_rate": 1.750718883579766e-05, "loss": 0.751, "num_input_tokens_seen": 984304832, "step": 5452 }, { "epoch": 0.5969512028243794, "grad_norm": 1.0891941007463486, "learning_rate": 1.749898593905988e-05, "loss": 0.5144, "num_input_tokens_seen": 984487616, "step": 5453 }, { "epoch": 0.5970606748953173, "grad_norm": 1.276368868650863, "learning_rate": 1.7490783929677714e-05, "loss": 0.6408, "num_input_tokens_seen": 984659424, "step": 5454 }, { "epoch": 0.5971701469662553, "grad_norm": 1.2986434461043814, "learning_rate": 1.748258280862147e-05, "loss": 0.6533, "num_input_tokens_seen": 984848928, "step": 5455 }, { "epoch": 0.5972796190371932, "grad_norm": 1.2941939435361756, "learning_rate": 1.7474382576861293e-05, "loss": 0.7597, "num_input_tokens_seen": 984977280, "step": 5456 }, { "epoch": 0.597389091108131, "grad_norm": 1.2324229390038492, "learning_rate": 1.7466183235367274e-05, "loss": 0.8465, "num_input_tokens_seen": 985152000, "step": 5457 }, { "epoch": 0.5974985631790689, "grad_norm": 1.4234935226051877, "learning_rate": 1.745798478510938e-05, "loss": 0.9797, "num_input_tokens_seen": 985338592, "step": 5458 }, { "epoch": 0.5976080352500068, "grad_norm": 1.3422027373864505, "learning_rate": 1.744978722705747e-05, "loss": 0.9235, "num_input_tokens_seen": 985530784, "step": 5459 }, { "epoch": 0.5977175073209448, "grad_norm": 1.1192552863961793, "learning_rate": 1.74415905621813e-05, "loss": 0.6718, "num_input_tokens_seen": 985720288, "step": 5460 }, { "epoch": 0.5978269793918827, "grad_norm": 1.3054595764312682, "learning_rate": 1.74333947914505e-05, "loss": 0.7027, "num_input_tokens_seen": 985897920, "step": 5461 }, { "epoch": 0.5979364514628206, "grad_norm": 1.3268349609227654, "learning_rate": 1.7425199915834646e-05, "loss": 0.7748, "num_input_tokens_seen": 986072416, "step": 5462 }, { "epoch": 0.5980459235337584, "grad_norm": 1.249807247559253, "learning_rate": 1.7417005936303155e-05, "loss": 0.7614, "num_input_tokens_seen": 986280064, "step": 5463 }, { "epoch": 0.5981553956046963, "grad_norm": 1.0890127315670262, "learning_rate": 1.740881285382537e-05, "loss": 0.5321, "num_input_tokens_seen": 986455008, "step": 5464 }, { "epoch": 0.5982648676756342, "grad_norm": 1.168231590065531, "learning_rate": 1.7400620669370513e-05, "loss": 0.7024, "num_input_tokens_seen": 986648768, "step": 5465 }, { "epoch": 0.5983743397465722, "grad_norm": 1.1546578141501556, "learning_rate": 1.7392429383907698e-05, "loss": 0.7324, "num_input_tokens_seen": 986835808, "step": 5466 }, { "epoch": 0.5984838118175101, "grad_norm": 1.3318515331934437, "learning_rate": 1.7384238998405954e-05, "loss": 0.8376, "num_input_tokens_seen": 987027104, "step": 5467 }, { "epoch": 0.598593283888448, "grad_norm": 1.2778371155593589, "learning_rate": 1.7376049513834165e-05, "loss": 0.9533, "num_input_tokens_seen": 987226912, "step": 5468 }, { "epoch": 0.5987027559593858, "grad_norm": 1.0914414702557713, "learning_rate": 1.7367860931161155e-05, "loss": 0.5967, "num_input_tokens_seen": 987399616, "step": 5469 }, { "epoch": 0.5988122280303237, "grad_norm": 1.4023396696493562, "learning_rate": 1.73596732513556e-05, "loss": 0.8264, "num_input_tokens_seen": 987573664, "step": 5470 }, { "epoch": 0.5989217001012617, "grad_norm": 1.062767051346414, "learning_rate": 1.7351486475386088e-05, "loss": 0.6711, "num_input_tokens_seen": 987790720, "step": 5471 }, { "epoch": 0.5990311721721996, "grad_norm": 1.297456587945371, "learning_rate": 1.7343300604221118e-05, "loss": 0.8188, "num_input_tokens_seen": 987949984, "step": 5472 }, { "epoch": 0.5991406442431375, "grad_norm": 1.1963908498629037, "learning_rate": 1.733511563882904e-05, "loss": 0.7175, "num_input_tokens_seen": 988148000, "step": 5473 }, { "epoch": 0.5992501163140753, "grad_norm": 1.2154909721958798, "learning_rate": 1.732693158017813e-05, "loss": 0.8653, "num_input_tokens_seen": 988364608, "step": 5474 }, { "epoch": 0.5993595883850132, "grad_norm": 1.1248055925877316, "learning_rate": 1.7318748429236547e-05, "loss": 0.5839, "num_input_tokens_seen": 988550976, "step": 5475 }, { "epoch": 0.5994690604559512, "grad_norm": 1.296363132068269, "learning_rate": 1.7310566186972345e-05, "loss": 0.83, "num_input_tokens_seen": 988704416, "step": 5476 }, { "epoch": 0.5995785325268891, "grad_norm": 1.3052715433356141, "learning_rate": 1.7302384854353455e-05, "loss": 1.0223, "num_input_tokens_seen": 988870848, "step": 5477 }, { "epoch": 0.599688004597827, "grad_norm": 1.220945679027226, "learning_rate": 1.7294204432347723e-05, "loss": 0.7546, "num_input_tokens_seen": 989056992, "step": 5478 }, { "epoch": 0.5997974766687649, "grad_norm": 1.3476951261800934, "learning_rate": 1.7286024921922887e-05, "loss": 0.7647, "num_input_tokens_seen": 989236864, "step": 5479 }, { "epoch": 0.5999069487397027, "grad_norm": 1.3806349424916842, "learning_rate": 1.7277846324046544e-05, "loss": 0.8436, "num_input_tokens_seen": 989414272, "step": 5480 }, { "epoch": 0.6000164208106407, "grad_norm": 1.4535051074284386, "learning_rate": 1.7269668639686225e-05, "loss": 1.1026, "num_input_tokens_seen": 989620800, "step": 5481 }, { "epoch": 0.6001258928815786, "grad_norm": 1.477002598000258, "learning_rate": 1.7261491869809327e-05, "loss": 1.0973, "num_input_tokens_seen": 989802688, "step": 5482 }, { "epoch": 0.6002353649525165, "grad_norm": 1.25193955626849, "learning_rate": 1.7253316015383145e-05, "loss": 0.6267, "num_input_tokens_seen": 989968000, "step": 5483 }, { "epoch": 0.6003448370234544, "grad_norm": 1.3491772458087985, "learning_rate": 1.7245141077374865e-05, "loss": 0.842, "num_input_tokens_seen": 990164448, "step": 5484 }, { "epoch": 0.6004543090943923, "grad_norm": 1.3357805304860564, "learning_rate": 1.723696705675158e-05, "loss": 0.7046, "num_input_tokens_seen": 990334464, "step": 5485 }, { "epoch": 0.6005637811653302, "grad_norm": 1.3396759817893156, "learning_rate": 1.7228793954480246e-05, "loss": 0.7015, "num_input_tokens_seen": 990489696, "step": 5486 }, { "epoch": 0.6006732532362681, "grad_norm": 1.2240998152537081, "learning_rate": 1.722062177152773e-05, "loss": 0.7332, "num_input_tokens_seen": 990666656, "step": 5487 }, { "epoch": 0.600782725307206, "grad_norm": 1.292305122013121, "learning_rate": 1.7212450508860794e-05, "loss": 0.6961, "num_input_tokens_seen": 990806656, "step": 5488 }, { "epoch": 0.6008921973781439, "grad_norm": 1.2058175907983544, "learning_rate": 1.720428016744607e-05, "loss": 0.9414, "num_input_tokens_seen": 990993696, "step": 5489 }, { "epoch": 0.6010016694490818, "grad_norm": 1.41750729621207, "learning_rate": 1.7196110748250095e-05, "loss": 0.8922, "num_input_tokens_seen": 991158784, "step": 5490 }, { "epoch": 0.6011111415200197, "grad_norm": 1.200472992821638, "learning_rate": 1.718794225223931e-05, "loss": 0.7054, "num_input_tokens_seen": 991330592, "step": 5491 }, { "epoch": 0.6012206135909576, "grad_norm": 1.3093351626652086, "learning_rate": 1.7179774680380014e-05, "loss": 0.8146, "num_input_tokens_seen": 991511584, "step": 5492 }, { "epoch": 0.6013300856618955, "grad_norm": 1.323650451965912, "learning_rate": 1.7171608033638435e-05, "loss": 0.7095, "num_input_tokens_seen": 991679808, "step": 5493 }, { "epoch": 0.6014395577328334, "grad_norm": 1.196266043305316, "learning_rate": 1.716344231298065e-05, "loss": 0.9056, "num_input_tokens_seen": 991882304, "step": 5494 }, { "epoch": 0.6015490298037713, "grad_norm": 1.4802714596342672, "learning_rate": 1.715527751937266e-05, "loss": 0.9579, "num_input_tokens_seen": 992036416, "step": 5495 }, { "epoch": 0.6016585018747093, "grad_norm": 1.4152572389925275, "learning_rate": 1.714711365378034e-05, "loss": 0.9317, "num_input_tokens_seen": 992193888, "step": 5496 }, { "epoch": 0.6017679739456471, "grad_norm": 1.1793796567112333, "learning_rate": 1.713895071716946e-05, "loss": 0.6919, "num_input_tokens_seen": 992388096, "step": 5497 }, { "epoch": 0.601877446016585, "grad_norm": 1.1934191100408496, "learning_rate": 1.7130788710505696e-05, "loss": 0.6749, "num_input_tokens_seen": 992584992, "step": 5498 }, { "epoch": 0.6019869180875229, "grad_norm": 1.2315813635286748, "learning_rate": 1.7122627634754573e-05, "loss": 0.6773, "num_input_tokens_seen": 992744704, "step": 5499 }, { "epoch": 0.6020963901584608, "grad_norm": 1.393859616061718, "learning_rate": 1.7114467490881553e-05, "loss": 0.8476, "num_input_tokens_seen": 992923008, "step": 5500 }, { "epoch": 0.6022058622293988, "grad_norm": 1.2088860639740937, "learning_rate": 1.7106308279851945e-05, "loss": 0.6068, "num_input_tokens_seen": 993128864, "step": 5501 }, { "epoch": 0.6023153343003367, "grad_norm": 1.3915235209750785, "learning_rate": 1.7098150002630988e-05, "loss": 0.6729, "num_input_tokens_seen": 993310976, "step": 5502 }, { "epoch": 0.6024248063712745, "grad_norm": 1.2951748285883595, "learning_rate": 1.7089992660183766e-05, "loss": 0.6883, "num_input_tokens_seen": 993510336, "step": 5503 }, { "epoch": 0.6025342784422124, "grad_norm": 1.18269933766464, "learning_rate": 1.7081836253475292e-05, "loss": 0.7234, "num_input_tokens_seen": 993709248, "step": 5504 }, { "epoch": 0.6026437505131503, "grad_norm": 1.1426583547219373, "learning_rate": 1.7073680783470457e-05, "loss": 0.6737, "num_input_tokens_seen": 993902560, "step": 5505 }, { "epoch": 0.6027532225840883, "grad_norm": 1.2360146171730158, "learning_rate": 1.7065526251134033e-05, "loss": 0.6561, "num_input_tokens_seen": 994063392, "step": 5506 }, { "epoch": 0.6028626946550262, "grad_norm": 1.36329287985839, "learning_rate": 1.7057372657430687e-05, "loss": 0.8774, "num_input_tokens_seen": 994213696, "step": 5507 }, { "epoch": 0.602972166725964, "grad_norm": 1.214889917998964, "learning_rate": 1.7049220003324964e-05, "loss": 0.7296, "num_input_tokens_seen": 994381472, "step": 5508 }, { "epoch": 0.6030816387969019, "grad_norm": 1.390310830620938, "learning_rate": 1.7041068289781326e-05, "loss": 0.6512, "num_input_tokens_seen": 994562912, "step": 5509 }, { "epoch": 0.6031911108678398, "grad_norm": 1.2407467241891557, "learning_rate": 1.7032917517764085e-05, "loss": 1.0969, "num_input_tokens_seen": 994788928, "step": 5510 }, { "epoch": 0.6033005829387778, "grad_norm": 1.229063945723827, "learning_rate": 1.7024767688237475e-05, "loss": 0.6186, "num_input_tokens_seen": 994947520, "step": 5511 }, { "epoch": 0.6034100550097157, "grad_norm": 1.4021450741434363, "learning_rate": 1.7016618802165607e-05, "loss": 0.8869, "num_input_tokens_seen": 995099392, "step": 5512 }, { "epoch": 0.6035195270806536, "grad_norm": 1.1884281145437912, "learning_rate": 1.7008470860512466e-05, "loss": 0.6334, "num_input_tokens_seen": 995279936, "step": 5513 }, { "epoch": 0.6036289991515914, "grad_norm": 1.2241448064210816, "learning_rate": 1.7000323864241953e-05, "loss": 0.5778, "num_input_tokens_seen": 995445472, "step": 5514 }, { "epoch": 0.6037384712225293, "grad_norm": 1.1965963910878086, "learning_rate": 1.699217781431782e-05, "loss": 0.7222, "num_input_tokens_seen": 995621312, "step": 5515 }, { "epoch": 0.6038479432934672, "grad_norm": 1.3034460591611612, "learning_rate": 1.6984032711703753e-05, "loss": 0.9345, "num_input_tokens_seen": 995824480, "step": 5516 }, { "epoch": 0.6039574153644052, "grad_norm": 1.2014992368634578, "learning_rate": 1.697588855736329e-05, "loss": 0.6892, "num_input_tokens_seen": 996008832, "step": 5517 }, { "epoch": 0.6040668874353431, "grad_norm": 1.4279203906909093, "learning_rate": 1.6967745352259868e-05, "loss": 0.9348, "num_input_tokens_seen": 996161152, "step": 5518 }, { "epoch": 0.604176359506281, "grad_norm": 1.2830109938687912, "learning_rate": 1.6959603097356823e-05, "loss": 0.7976, "num_input_tokens_seen": 996343264, "step": 5519 }, { "epoch": 0.6042858315772188, "grad_norm": 1.2917056641793148, "learning_rate": 1.6951461793617354e-05, "loss": 0.6922, "num_input_tokens_seen": 996519552, "step": 5520 }, { "epoch": 0.6043953036481567, "grad_norm": 1.421980717679831, "learning_rate": 1.694332144200458e-05, "loss": 0.6488, "num_input_tokens_seen": 996677248, "step": 5521 }, { "epoch": 0.6045047757190947, "grad_norm": 1.31602286803601, "learning_rate": 1.693518204348146e-05, "loss": 1.0386, "num_input_tokens_seen": 996856224, "step": 5522 }, { "epoch": 0.6046142477900326, "grad_norm": 1.4426259408644804, "learning_rate": 1.6927043599010906e-05, "loss": 0.8737, "num_input_tokens_seen": 996998912, "step": 5523 }, { "epoch": 0.6047237198609705, "grad_norm": 1.263637960883903, "learning_rate": 1.691890610955565e-05, "loss": 0.8438, "num_input_tokens_seen": 997210368, "step": 5524 }, { "epoch": 0.6048331919319083, "grad_norm": 1.2695297945454398, "learning_rate": 1.691076957607835e-05, "loss": 0.6916, "num_input_tokens_seen": 997382400, "step": 5525 }, { "epoch": 0.6049426640028462, "grad_norm": 1.3382853350379358, "learning_rate": 1.690263399954155e-05, "loss": 0.6764, "num_input_tokens_seen": 997560256, "step": 5526 }, { "epoch": 0.6050521360737842, "grad_norm": 1.3368967638923763, "learning_rate": 1.689449938090767e-05, "loss": 1.0718, "num_input_tokens_seen": 997747968, "step": 5527 }, { "epoch": 0.6051616081447221, "grad_norm": 1.3030328365822827, "learning_rate": 1.688636572113902e-05, "loss": 0.8101, "num_input_tokens_seen": 997946208, "step": 5528 }, { "epoch": 0.60527108021566, "grad_norm": 1.125413130032348, "learning_rate": 1.6878233021197783e-05, "loss": 0.5713, "num_input_tokens_seen": 998106816, "step": 5529 }, { "epoch": 0.6053805522865979, "grad_norm": 1.3076984588232408, "learning_rate": 1.687010128204607e-05, "loss": 0.7715, "num_input_tokens_seen": 998271904, "step": 5530 }, { "epoch": 0.6054900243575357, "grad_norm": 1.2258599571502335, "learning_rate": 1.6861970504645818e-05, "loss": 0.6131, "num_input_tokens_seen": 998465440, "step": 5531 }, { "epoch": 0.6055994964284737, "grad_norm": 1.4064222442256573, "learning_rate": 1.6853840689958904e-05, "loss": 0.7166, "num_input_tokens_seen": 998603424, "step": 5532 }, { "epoch": 0.6057089684994116, "grad_norm": 1.121442202336825, "learning_rate": 1.684571183894707e-05, "loss": 0.5986, "num_input_tokens_seen": 998796960, "step": 5533 }, { "epoch": 0.6058184405703495, "grad_norm": 1.1958123452253226, "learning_rate": 1.6837583952571927e-05, "loss": 0.7069, "num_input_tokens_seen": 998978848, "step": 5534 }, { "epoch": 0.6059279126412874, "grad_norm": 1.1669731872799487, "learning_rate": 1.682945703179501e-05, "loss": 0.6636, "num_input_tokens_seen": 999136096, "step": 5535 }, { "epoch": 0.6060373847122253, "grad_norm": 1.4265291943375584, "learning_rate": 1.6821331077577697e-05, "loss": 0.716, "num_input_tokens_seen": 999311488, "step": 5536 }, { "epoch": 0.6061468567831632, "grad_norm": 1.2856473593095794, "learning_rate": 1.6813206090881285e-05, "loss": 0.8667, "num_input_tokens_seen": 999493824, "step": 5537 }, { "epoch": 0.6062563288541011, "grad_norm": 1.4598075862709874, "learning_rate": 1.680508207266694e-05, "loss": 0.8186, "num_input_tokens_seen": 999650176, "step": 5538 }, { "epoch": 0.606365800925039, "grad_norm": 1.2566736882786838, "learning_rate": 1.6796959023895717e-05, "loss": 0.6468, "num_input_tokens_seen": 999826912, "step": 5539 }, { "epoch": 0.6064752729959769, "grad_norm": 1.3997875069391157, "learning_rate": 1.678883694552857e-05, "loss": 0.8095, "num_input_tokens_seen": 999996928, "step": 5540 }, { "epoch": 0.6065847450669148, "grad_norm": 1.143861802687084, "learning_rate": 1.678071583852631e-05, "loss": 0.8928, "num_input_tokens_seen": 1000215104, "step": 5541 }, { "epoch": 0.6066942171378527, "grad_norm": 1.4138839356028177, "learning_rate": 1.677259570384966e-05, "loss": 1.0532, "num_input_tokens_seen": 1000430144, "step": 5542 }, { "epoch": 0.6068036892087906, "grad_norm": 1.1887660555552522, "learning_rate": 1.67644765424592e-05, "loss": 0.5697, "num_input_tokens_seen": 1000591424, "step": 5543 }, { "epoch": 0.6069131612797285, "grad_norm": 1.3410701199489432, "learning_rate": 1.6756358355315433e-05, "loss": 0.7783, "num_input_tokens_seen": 1000786528, "step": 5544 }, { "epoch": 0.6070226333506664, "grad_norm": 1.2441660640397414, "learning_rate": 1.6748241143378702e-05, "loss": 0.8278, "num_input_tokens_seen": 1000958112, "step": 5545 }, { "epoch": 0.6071321054216043, "grad_norm": 1.3475924711753662, "learning_rate": 1.6740124907609266e-05, "loss": 0.8167, "num_input_tokens_seen": 1001154784, "step": 5546 }, { "epoch": 0.6072415774925423, "grad_norm": 1.1727385327508986, "learning_rate": 1.6732009648967272e-05, "loss": 0.7418, "num_input_tokens_seen": 1001326592, "step": 5547 }, { "epoch": 0.6073510495634801, "grad_norm": 1.237287489629271, "learning_rate": 1.6723895368412732e-05, "loss": 0.6757, "num_input_tokens_seen": 1001510496, "step": 5548 }, { "epoch": 0.607460521634418, "grad_norm": 1.2392730110745507, "learning_rate": 1.6715782066905544e-05, "loss": 0.7962, "num_input_tokens_seen": 1001681856, "step": 5549 }, { "epoch": 0.6075699937053559, "grad_norm": 1.165297192662006, "learning_rate": 1.6707669745405502e-05, "loss": 0.9985, "num_input_tokens_seen": 1001895104, "step": 5550 }, { "epoch": 0.6076794657762938, "grad_norm": 1.1112708963632878, "learning_rate": 1.6699558404872272e-05, "loss": 0.9074, "num_input_tokens_seen": 1002098048, "step": 5551 }, { "epoch": 0.6077889378472318, "grad_norm": 1.2547264387613377, "learning_rate": 1.6691448046265426e-05, "loss": 0.7959, "num_input_tokens_seen": 1002267840, "step": 5552 }, { "epoch": 0.6078984099181697, "grad_norm": 1.120082279316553, "learning_rate": 1.6683338670544384e-05, "loss": 0.6694, "num_input_tokens_seen": 1002453984, "step": 5553 }, { "epoch": 0.6080078819891075, "grad_norm": 1.2206290993179734, "learning_rate": 1.6675230278668484e-05, "loss": 0.9039, "num_input_tokens_seen": 1002635648, "step": 5554 }, { "epoch": 0.6081173540600454, "grad_norm": 1.3098709933262167, "learning_rate": 1.6667122871596925e-05, "loss": 0.5707, "num_input_tokens_seen": 1002845760, "step": 5555 }, { "epoch": 0.6082268261309833, "grad_norm": 1.3461936200732159, "learning_rate": 1.6659016450288805e-05, "loss": 0.8132, "num_input_tokens_seen": 1003031904, "step": 5556 }, { "epoch": 0.6083362982019213, "grad_norm": 1.1559307069932054, "learning_rate": 1.665091101570309e-05, "loss": 0.6512, "num_input_tokens_seen": 1003211776, "step": 5557 }, { "epoch": 0.6084457702728592, "grad_norm": 1.2653541275077083, "learning_rate": 1.6642806568798635e-05, "loss": 0.5813, "num_input_tokens_seen": 1003377536, "step": 5558 }, { "epoch": 0.608555242343797, "grad_norm": 1.208230025161257, "learning_rate": 1.6634703110534195e-05, "loss": 0.7503, "num_input_tokens_seen": 1003571072, "step": 5559 }, { "epoch": 0.6086647144147349, "grad_norm": 1.2228088771143453, "learning_rate": 1.662660064186838e-05, "loss": 0.5809, "num_input_tokens_seen": 1003759904, "step": 5560 }, { "epoch": 0.6087741864856728, "grad_norm": 1.3501901439698603, "learning_rate": 1.661849916375971e-05, "loss": 0.7881, "num_input_tokens_seen": 1003934624, "step": 5561 }, { "epoch": 0.6088836585566108, "grad_norm": 1.2823650346348883, "learning_rate": 1.6610398677166555e-05, "loss": 0.554, "num_input_tokens_seen": 1004079104, "step": 5562 }, { "epoch": 0.6089931306275487, "grad_norm": 1.2520718094247294, "learning_rate": 1.660229918304721e-05, "loss": 0.6521, "num_input_tokens_seen": 1004269504, "step": 5563 }, { "epoch": 0.6091026026984866, "grad_norm": 1.1919972439164424, "learning_rate": 1.659420068235981e-05, "loss": 0.705, "num_input_tokens_seen": 1004470880, "step": 5564 }, { "epoch": 0.6092120747694244, "grad_norm": 1.1559764950979468, "learning_rate": 1.6586103176062397e-05, "loss": 0.5571, "num_input_tokens_seen": 1004621184, "step": 5565 }, { "epoch": 0.6093215468403623, "grad_norm": 1.3032866398300058, "learning_rate": 1.6578006665112906e-05, "loss": 0.8197, "num_input_tokens_seen": 1004827712, "step": 5566 }, { "epoch": 0.6094310189113002, "grad_norm": 1.1640509790452054, "learning_rate": 1.6569911150469113e-05, "loss": 0.5778, "num_input_tokens_seen": 1005021024, "step": 5567 }, { "epoch": 0.6095404909822382, "grad_norm": 1.3685230380493023, "learning_rate": 1.6561816633088723e-05, "loss": 0.8989, "num_input_tokens_seen": 1005199552, "step": 5568 }, { "epoch": 0.6096499630531761, "grad_norm": 1.2864687168870892, "learning_rate": 1.6553723113929296e-05, "loss": 0.9195, "num_input_tokens_seen": 1005363968, "step": 5569 }, { "epoch": 0.609759435124114, "grad_norm": 1.3362492426271495, "learning_rate": 1.6545630593948276e-05, "loss": 0.7277, "num_input_tokens_seen": 1005554368, "step": 5570 }, { "epoch": 0.6098689071950518, "grad_norm": 1.1472120429032033, "learning_rate": 1.6537539074102988e-05, "loss": 0.6542, "num_input_tokens_seen": 1005733568, "step": 5571 }, { "epoch": 0.6099783792659897, "grad_norm": 1.2393284650521972, "learning_rate": 1.6529448555350653e-05, "loss": 0.6329, "num_input_tokens_seen": 1005929344, "step": 5572 }, { "epoch": 0.6100878513369277, "grad_norm": 1.1863476030335243, "learning_rate": 1.652135903864837e-05, "loss": 0.6468, "num_input_tokens_seen": 1006120192, "step": 5573 }, { "epoch": 0.6101973234078656, "grad_norm": 1.2807552033526615, "learning_rate": 1.6513270524953092e-05, "loss": 0.8085, "num_input_tokens_seen": 1006292672, "step": 5574 }, { "epoch": 0.6103067954788035, "grad_norm": 1.2225325159222484, "learning_rate": 1.65051830152217e-05, "loss": 0.9803, "num_input_tokens_seen": 1006487104, "step": 5575 }, { "epoch": 0.6104162675497413, "grad_norm": 1.229648910362894, "learning_rate": 1.6497096510410908e-05, "loss": 0.7258, "num_input_tokens_seen": 1006673696, "step": 5576 }, { "epoch": 0.6105257396206792, "grad_norm": 1.4313375821032017, "learning_rate": 1.648901101147735e-05, "loss": 0.9891, "num_input_tokens_seen": 1006853344, "step": 5577 }, { "epoch": 0.6106352116916172, "grad_norm": 1.3220490992542329, "learning_rate": 1.6480926519377514e-05, "loss": 0.7626, "num_input_tokens_seen": 1007018656, "step": 5578 }, { "epoch": 0.6107446837625551, "grad_norm": 1.2875026165833783, "learning_rate": 1.6472843035067784e-05, "loss": 1.0454, "num_input_tokens_seen": 1007218240, "step": 5579 }, { "epoch": 0.610854155833493, "grad_norm": 1.1678846450173848, "learning_rate": 1.6464760559504424e-05, "loss": 0.7578, "num_input_tokens_seen": 1007417152, "step": 5580 }, { "epoch": 0.6109636279044309, "grad_norm": 1.342485897835484, "learning_rate": 1.6456679093643572e-05, "loss": 0.8691, "num_input_tokens_seen": 1007611584, "step": 5581 }, { "epoch": 0.6110730999753687, "grad_norm": 1.3088618672826586, "learning_rate": 1.644859863844126e-05, "loss": 0.6419, "num_input_tokens_seen": 1007767712, "step": 5582 }, { "epoch": 0.6111825720463067, "grad_norm": 1.0986845690933733, "learning_rate": 1.644051919485337e-05, "loss": 0.6541, "num_input_tokens_seen": 1007960800, "step": 5583 }, { "epoch": 0.6112920441172446, "grad_norm": 1.17228452520442, "learning_rate": 1.643244076383571e-05, "loss": 0.6812, "num_input_tokens_seen": 1008135072, "step": 5584 }, { "epoch": 0.6114015161881825, "grad_norm": 1.4457958341164094, "learning_rate": 1.6424363346343912e-05, "loss": 0.9242, "num_input_tokens_seen": 1008317408, "step": 5585 }, { "epoch": 0.6115109882591204, "grad_norm": 1.3247774009138462, "learning_rate": 1.641628694333354e-05, "loss": 0.7242, "num_input_tokens_seen": 1008475776, "step": 5586 }, { "epoch": 0.6116204603300583, "grad_norm": 1.1809544107680447, "learning_rate": 1.6408211555760028e-05, "loss": 0.656, "num_input_tokens_seen": 1008647136, "step": 5587 }, { "epoch": 0.6117299324009962, "grad_norm": 1.1537372504095043, "learning_rate": 1.6400137184578647e-05, "loss": 0.6507, "num_input_tokens_seen": 1008812896, "step": 5588 }, { "epoch": 0.6118394044719341, "grad_norm": 1.2008885987743658, "learning_rate": 1.639206383074461e-05, "loss": 0.7057, "num_input_tokens_seen": 1008995680, "step": 5589 }, { "epoch": 0.611948876542872, "grad_norm": 1.114416888396981, "learning_rate": 1.6383991495212957e-05, "loss": 0.8946, "num_input_tokens_seen": 1009209600, "step": 5590 }, { "epoch": 0.6120583486138099, "grad_norm": 1.2602041501462278, "learning_rate": 1.6375920178938646e-05, "loss": 0.6025, "num_input_tokens_seen": 1009361696, "step": 5591 }, { "epoch": 0.6121678206847478, "grad_norm": 1.3082032033670241, "learning_rate": 1.6367849882876485e-05, "loss": 0.7432, "num_input_tokens_seen": 1009551200, "step": 5592 }, { "epoch": 0.6122772927556857, "grad_norm": 1.206706426177961, "learning_rate": 1.635978060798118e-05, "loss": 0.937, "num_input_tokens_seen": 1009746976, "step": 5593 }, { "epoch": 0.6123867648266236, "grad_norm": 1.2226579075031196, "learning_rate": 1.6351712355207323e-05, "loss": 0.8917, "num_input_tokens_seen": 1009928192, "step": 5594 }, { "epoch": 0.6124962368975615, "grad_norm": 1.311239636492072, "learning_rate": 1.6343645125509348e-05, "loss": 0.8546, "num_input_tokens_seen": 1010113216, "step": 5595 }, { "epoch": 0.6126057089684994, "grad_norm": 1.3037401517530829, "learning_rate": 1.633557891984162e-05, "loss": 0.7644, "num_input_tokens_seen": 1010279648, "step": 5596 }, { "epoch": 0.6127151810394373, "grad_norm": 1.0758226662325383, "learning_rate": 1.632751373915833e-05, "loss": 0.551, "num_input_tokens_seen": 1010474976, "step": 5597 }, { "epoch": 0.6128246531103753, "grad_norm": 1.1968217185488803, "learning_rate": 1.6319449584413596e-05, "loss": 0.7171, "num_input_tokens_seen": 1010668960, "step": 5598 }, { "epoch": 0.6129341251813131, "grad_norm": 1.2759633669542414, "learning_rate": 1.6311386456561373e-05, "loss": 0.7969, "num_input_tokens_seen": 1010881760, "step": 5599 }, { "epoch": 0.613043597252251, "grad_norm": 1.4288095582311264, "learning_rate": 1.6303324356555523e-05, "loss": 0.8477, "num_input_tokens_seen": 1011052224, "step": 5600 }, { "epoch": 0.6131530693231889, "grad_norm": 1.3012495143130933, "learning_rate": 1.6295263285349776e-05, "loss": 0.7495, "num_input_tokens_seen": 1011230752, "step": 5601 }, { "epoch": 0.6132625413941268, "grad_norm": 1.307876891598468, "learning_rate": 1.628720324389774e-05, "loss": 0.5814, "num_input_tokens_seen": 1011392704, "step": 5602 }, { "epoch": 0.6133720134650648, "grad_norm": 1.400366747026097, "learning_rate": 1.6279144233152922e-05, "loss": 0.9052, "num_input_tokens_seen": 1011561376, "step": 5603 }, { "epoch": 0.6134814855360027, "grad_norm": 1.164410942206802, "learning_rate": 1.6271086254068653e-05, "loss": 0.7543, "num_input_tokens_seen": 1011760512, "step": 5604 }, { "epoch": 0.6135909576069405, "grad_norm": 1.420829556293259, "learning_rate": 1.6263029307598198e-05, "loss": 0.8816, "num_input_tokens_seen": 1011946208, "step": 5605 }, { "epoch": 0.6137004296778784, "grad_norm": 1.2391932164785355, "learning_rate": 1.6254973394694672e-05, "loss": 0.8905, "num_input_tokens_seen": 1012138624, "step": 5606 }, { "epoch": 0.6138099017488163, "grad_norm": 1.1225338502125657, "learning_rate": 1.6246918516311072e-05, "loss": 0.633, "num_input_tokens_seen": 1012319840, "step": 5607 }, { "epoch": 0.6139193738197543, "grad_norm": 1.2878454763350873, "learning_rate": 1.623886467340029e-05, "loss": 0.9574, "num_input_tokens_seen": 1012523456, "step": 5608 }, { "epoch": 0.6140288458906922, "grad_norm": 1.2966108397811393, "learning_rate": 1.6230811866915057e-05, "loss": 0.9073, "num_input_tokens_seen": 1012695264, "step": 5609 }, { "epoch": 0.61413831796163, "grad_norm": 1.3307152565106546, "learning_rate": 1.622276009780802e-05, "loss": 0.5939, "num_input_tokens_seen": 1012852288, "step": 5610 }, { "epoch": 0.6142477900325679, "grad_norm": 1.438106989211026, "learning_rate": 1.621470936703169e-05, "loss": 0.8305, "num_input_tokens_seen": 1013010432, "step": 5611 }, { "epoch": 0.6143572621035058, "grad_norm": 1.2315991250404552, "learning_rate": 1.6206659675538445e-05, "loss": 0.6938, "num_input_tokens_seen": 1013190976, "step": 5612 }, { "epoch": 0.6144667341744438, "grad_norm": 1.453244522265524, "learning_rate": 1.6198611024280543e-05, "loss": 0.8629, "num_input_tokens_seen": 1013346432, "step": 5613 }, { "epoch": 0.6145762062453817, "grad_norm": 1.2766626624566637, "learning_rate": 1.6190563414210132e-05, "loss": 0.5833, "num_input_tokens_seen": 1013514432, "step": 5614 }, { "epoch": 0.6146856783163196, "grad_norm": 1.3852517978785286, "learning_rate": 1.6182516846279237e-05, "loss": 1.0842, "num_input_tokens_seen": 1013679296, "step": 5615 }, { "epoch": 0.6147951503872574, "grad_norm": 1.2978310335830556, "learning_rate": 1.6174471321439737e-05, "loss": 0.8542, "num_input_tokens_seen": 1013856928, "step": 5616 }, { "epoch": 0.6149046224581953, "grad_norm": 1.0875989430459243, "learning_rate": 1.6166426840643415e-05, "loss": 0.7794, "num_input_tokens_seen": 1014039040, "step": 5617 }, { "epoch": 0.6150140945291332, "grad_norm": 1.1013320248871605, "learning_rate": 1.6158383404841902e-05, "loss": 0.8752, "num_input_tokens_seen": 1014261472, "step": 5618 }, { "epoch": 0.6151235666000712, "grad_norm": 1.3257749522502988, "learning_rate": 1.615034101498673e-05, "loss": 0.6257, "num_input_tokens_seen": 1014436192, "step": 5619 }, { "epoch": 0.6152330386710091, "grad_norm": 1.7451288685036805, "learning_rate": 1.6142299672029307e-05, "loss": 0.7707, "num_input_tokens_seen": 1014618752, "step": 5620 }, { "epoch": 0.615342510741947, "grad_norm": 1.3244298806010248, "learning_rate": 1.613425937692089e-05, "loss": 0.9942, "num_input_tokens_seen": 1014798848, "step": 5621 }, { "epoch": 0.6154519828128848, "grad_norm": 1.2437106696576385, "learning_rate": 1.6126220130612646e-05, "loss": 0.788, "num_input_tokens_seen": 1014986560, "step": 5622 }, { "epoch": 0.6155614548838227, "grad_norm": 1.2222606274803454, "learning_rate": 1.6118181934055593e-05, "loss": 0.5454, "num_input_tokens_seen": 1015159264, "step": 5623 }, { "epoch": 0.6156709269547607, "grad_norm": 1.2214364496533117, "learning_rate": 1.611014478820064e-05, "loss": 0.8409, "num_input_tokens_seen": 1015347872, "step": 5624 }, { "epoch": 0.6157803990256986, "grad_norm": 1.3503667580479182, "learning_rate": 1.6102108693998568e-05, "loss": 0.7603, "num_input_tokens_seen": 1015498400, "step": 5625 }, { "epoch": 0.6158898710966365, "grad_norm": 1.1959899777595318, "learning_rate": 1.6094073652400014e-05, "loss": 0.652, "num_input_tokens_seen": 1015688576, "step": 5626 }, { "epoch": 0.6159993431675743, "grad_norm": 1.3067928390910404, "learning_rate": 1.608603966435554e-05, "loss": 0.8421, "num_input_tokens_seen": 1015868448, "step": 5627 }, { "epoch": 0.6161088152385122, "grad_norm": 1.1101403618466894, "learning_rate": 1.607800673081552e-05, "loss": 0.6916, "num_input_tokens_seen": 1016054816, "step": 5628 }, { "epoch": 0.6162182873094502, "grad_norm": 1.2855190704130015, "learning_rate": 1.6069974852730263e-05, "loss": 0.7484, "num_input_tokens_seen": 1016232224, "step": 5629 }, { "epoch": 0.6163277593803881, "grad_norm": 1.1758515599922337, "learning_rate": 1.6061944031049893e-05, "loss": 0.6238, "num_input_tokens_seen": 1016420384, "step": 5630 }, { "epoch": 0.616437231451326, "grad_norm": 1.1796645610675918, "learning_rate": 1.605391426672447e-05, "loss": 0.8591, "num_input_tokens_seen": 1016639904, "step": 5631 }, { "epoch": 0.6165467035222639, "grad_norm": 1.330345586249244, "learning_rate": 1.604588556070388e-05, "loss": 0.8722, "num_input_tokens_seen": 1016802976, "step": 5632 }, { "epoch": 0.6166561755932017, "grad_norm": 1.2591786991220237, "learning_rate": 1.6037857913937908e-05, "loss": 0.7539, "num_input_tokens_seen": 1016962688, "step": 5633 }, { "epoch": 0.6167656476641397, "grad_norm": 1.331177916388552, "learning_rate": 1.6029831327376217e-05, "loss": 0.7009, "num_input_tokens_seen": 1017139424, "step": 5634 }, { "epoch": 0.6168751197350776, "grad_norm": 1.347543684086774, "learning_rate": 1.6021805801968325e-05, "loss": 0.8175, "num_input_tokens_seen": 1017316384, "step": 5635 }, { "epoch": 0.6169845918060155, "grad_norm": 1.2696744397914344, "learning_rate": 1.6013781338663654e-05, "loss": 0.7916, "num_input_tokens_seen": 1017518656, "step": 5636 }, { "epoch": 0.6170940638769534, "grad_norm": 1.2527305819457262, "learning_rate": 1.6005757938411466e-05, "loss": 0.6901, "num_input_tokens_seen": 1017689792, "step": 5637 }, { "epoch": 0.6172035359478913, "grad_norm": 1.3430425866618685, "learning_rate": 1.5997735602160923e-05, "loss": 0.7235, "num_input_tokens_seen": 1017823296, "step": 5638 }, { "epoch": 0.6173130080188292, "grad_norm": 1.2247758718710309, "learning_rate": 1.5989714330861043e-05, "loss": 0.7091, "num_input_tokens_seen": 1018016832, "step": 5639 }, { "epoch": 0.6174224800897671, "grad_norm": 1.2242365365358205, "learning_rate": 1.5981694125460735e-05, "loss": 0.6641, "num_input_tokens_seen": 1018210816, "step": 5640 }, { "epoch": 0.617531952160705, "grad_norm": 1.1379898148535106, "learning_rate": 1.5973674986908778e-05, "loss": 0.7093, "num_input_tokens_seen": 1018406144, "step": 5641 }, { "epoch": 0.6176414242316429, "grad_norm": 1.150733003038729, "learning_rate": 1.596565691615381e-05, "loss": 0.5434, "num_input_tokens_seen": 1018583776, "step": 5642 }, { "epoch": 0.6177508963025808, "grad_norm": 1.2202523581793283, "learning_rate": 1.5957639914144358e-05, "loss": 1.1702, "num_input_tokens_seen": 1018783808, "step": 5643 }, { "epoch": 0.6178603683735187, "grad_norm": 1.1769086432446945, "learning_rate": 1.5949623981828815e-05, "loss": 0.6512, "num_input_tokens_seen": 1018968608, "step": 5644 }, { "epoch": 0.6179698404444566, "grad_norm": 1.4329593928427473, "learning_rate": 1.594160912015546e-05, "loss": 0.866, "num_input_tokens_seen": 1019151392, "step": 5645 }, { "epoch": 0.6180793125153945, "grad_norm": 1.2180546953514093, "learning_rate": 1.5933595330072425e-05, "loss": 0.755, "num_input_tokens_seen": 1019284896, "step": 5646 }, { "epoch": 0.6181887845863324, "grad_norm": 1.2684179970951688, "learning_rate": 1.5925582612527728e-05, "loss": 0.8724, "num_input_tokens_seen": 1019443936, "step": 5647 }, { "epoch": 0.6182982566572703, "grad_norm": 1.1644969871780122, "learning_rate": 1.591757096846927e-05, "loss": 0.588, "num_input_tokens_seen": 1019630528, "step": 5648 }, { "epoch": 0.6184077287282083, "grad_norm": 1.16623025065849, "learning_rate": 1.590956039884479e-05, "loss": 0.6737, "num_input_tokens_seen": 1019815552, "step": 5649 }, { "epoch": 0.6185172007991461, "grad_norm": 1.317604854978732, "learning_rate": 1.5901550904601952e-05, "loss": 1.056, "num_input_tokens_seen": 1019979744, "step": 5650 }, { "epoch": 0.618626672870084, "grad_norm": 1.495365214440103, "learning_rate": 1.589354248668824e-05, "loss": 0.8429, "num_input_tokens_seen": 1020164992, "step": 5651 }, { "epoch": 0.6187361449410219, "grad_norm": 1.2477653078345157, "learning_rate": 1.5885535146051046e-05, "loss": 0.6074, "num_input_tokens_seen": 1020344416, "step": 5652 }, { "epoch": 0.6188456170119598, "grad_norm": 1.3226250705097073, "learning_rate": 1.587752888363762e-05, "loss": 1.0089, "num_input_tokens_seen": 1020546688, "step": 5653 }, { "epoch": 0.6189550890828978, "grad_norm": 1.343326457497454, "learning_rate": 1.5869523700395085e-05, "loss": 0.9002, "num_input_tokens_seen": 1020694080, "step": 5654 }, { "epoch": 0.6190645611538357, "grad_norm": 1.1860029421153895, "learning_rate": 1.5861519597270442e-05, "loss": 0.7451, "num_input_tokens_seen": 1020870368, "step": 5655 }, { "epoch": 0.6191740332247735, "grad_norm": 1.2542232694980453, "learning_rate": 1.5853516575210558e-05, "loss": 0.8295, "num_input_tokens_seen": 1021055392, "step": 5656 }, { "epoch": 0.6192835052957114, "grad_norm": 1.1180707734351123, "learning_rate": 1.5845514635162188e-05, "loss": 0.5985, "num_input_tokens_seen": 1021213088, "step": 5657 }, { "epoch": 0.6193929773666493, "grad_norm": 1.3517051577389008, "learning_rate": 1.5837513778071927e-05, "loss": 0.7283, "num_input_tokens_seen": 1021382880, "step": 5658 }, { "epoch": 0.6195024494375873, "grad_norm": 1.3045640532974545, "learning_rate": 1.5829514004886282e-05, "loss": 0.6655, "num_input_tokens_seen": 1021566112, "step": 5659 }, { "epoch": 0.6196119215085252, "grad_norm": 1.2733153302149953, "learning_rate": 1.582151531655159e-05, "loss": 1.0023, "num_input_tokens_seen": 1021722464, "step": 5660 }, { "epoch": 0.619721393579463, "grad_norm": 1.248479347339285, "learning_rate": 1.5813517714014087e-05, "loss": 0.9385, "num_input_tokens_seen": 1021934816, "step": 5661 }, { "epoch": 0.6198308656504009, "grad_norm": 1.3466773859547954, "learning_rate": 1.5805521198219886e-05, "loss": 0.8711, "num_input_tokens_seen": 1022080192, "step": 5662 }, { "epoch": 0.6199403377213388, "grad_norm": 1.2031408036420204, "learning_rate": 1.579752577011494e-05, "loss": 0.6519, "num_input_tokens_seen": 1022259840, "step": 5663 }, { "epoch": 0.6200498097922768, "grad_norm": 1.3286860459045782, "learning_rate": 1.578953143064511e-05, "loss": 0.9084, "num_input_tokens_seen": 1022446208, "step": 5664 }, { "epoch": 0.6201592818632147, "grad_norm": 1.222703585454879, "learning_rate": 1.57815381807561e-05, "loss": 0.7223, "num_input_tokens_seen": 1022652064, "step": 5665 }, { "epoch": 0.6202687539341526, "grad_norm": 1.2374627329291532, "learning_rate": 1.577354602139351e-05, "loss": 0.6946, "num_input_tokens_seen": 1022787808, "step": 5666 }, { "epoch": 0.6203782260050904, "grad_norm": 1.1542719339687302, "learning_rate": 1.5765554953502777e-05, "loss": 0.7203, "num_input_tokens_seen": 1022960512, "step": 5667 }, { "epoch": 0.6204876980760283, "grad_norm": 1.2303476909849407, "learning_rate": 1.575756497802924e-05, "loss": 0.726, "num_input_tokens_seen": 1023137248, "step": 5668 }, { "epoch": 0.6205971701469662, "grad_norm": 1.321539494929196, "learning_rate": 1.574957609591811e-05, "loss": 0.8754, "num_input_tokens_seen": 1023316224, "step": 5669 }, { "epoch": 0.6207066422179042, "grad_norm": 1.1611615587274273, "learning_rate": 1.574158830811443e-05, "loss": 0.7545, "num_input_tokens_seen": 1023516480, "step": 5670 }, { "epoch": 0.6208161142888421, "grad_norm": 1.3066213289897386, "learning_rate": 1.5733601615563163e-05, "loss": 0.7692, "num_input_tokens_seen": 1023724128, "step": 5671 }, { "epoch": 0.62092558635978, "grad_norm": 1.300732860867979, "learning_rate": 1.5725616019209106e-05, "loss": 0.7723, "num_input_tokens_seen": 1023901984, "step": 5672 }, { "epoch": 0.6210350584307178, "grad_norm": 1.3241290396851388, "learning_rate": 1.5717631519996947e-05, "loss": 0.8368, "num_input_tokens_seen": 1024076928, "step": 5673 }, { "epoch": 0.6211445305016557, "grad_norm": 1.3538305403402628, "learning_rate": 1.5709648118871232e-05, "loss": 0.8955, "num_input_tokens_seen": 1024263968, "step": 5674 }, { "epoch": 0.6212540025725937, "grad_norm": 1.2472319472079394, "learning_rate": 1.5701665816776385e-05, "loss": 0.7904, "num_input_tokens_seen": 1024433760, "step": 5675 }, { "epoch": 0.6213634746435316, "grad_norm": 1.35942042483147, "learning_rate": 1.5693684614656697e-05, "loss": 0.8252, "num_input_tokens_seen": 1024613856, "step": 5676 }, { "epoch": 0.6214729467144695, "grad_norm": 1.2600731762572404, "learning_rate": 1.568570451345632e-05, "loss": 0.6227, "num_input_tokens_seen": 1024800672, "step": 5677 }, { "epoch": 0.6215824187854073, "grad_norm": 1.431806734240239, "learning_rate": 1.567772551411931e-05, "loss": 0.9248, "num_input_tokens_seen": 1024983232, "step": 5678 }, { "epoch": 0.6216918908563452, "grad_norm": 1.2515179953597753, "learning_rate": 1.5669747617589535e-05, "loss": 0.7289, "num_input_tokens_seen": 1025174528, "step": 5679 }, { "epoch": 0.6218013629272832, "grad_norm": 1.292188011086304, "learning_rate": 1.5661770824810785e-05, "loss": 0.6663, "num_input_tokens_seen": 1025353056, "step": 5680 }, { "epoch": 0.6219108349982211, "grad_norm": 1.1719218944067757, "learning_rate": 1.5653795136726705e-05, "loss": 0.6899, "num_input_tokens_seen": 1025542784, "step": 5681 }, { "epoch": 0.622020307069159, "grad_norm": 1.2631692019226293, "learning_rate": 1.5645820554280783e-05, "loss": 0.8408, "num_input_tokens_seen": 1025711456, "step": 5682 }, { "epoch": 0.6221297791400969, "grad_norm": 1.3060164393373217, "learning_rate": 1.5637847078416413e-05, "loss": 0.8344, "num_input_tokens_seen": 1025916640, "step": 5683 }, { "epoch": 0.6222392512110347, "grad_norm": 1.3769400088720394, "learning_rate": 1.562987471007683e-05, "loss": 0.9935, "num_input_tokens_seen": 1026105696, "step": 5684 }, { "epoch": 0.6223487232819727, "grad_norm": 1.409583020136467, "learning_rate": 1.5621903450205162e-05, "loss": 0.8621, "num_input_tokens_seen": 1026285792, "step": 5685 }, { "epoch": 0.6224581953529106, "grad_norm": 1.3653619840302416, "learning_rate": 1.561393329974438e-05, "loss": 0.8591, "num_input_tokens_seen": 1026441472, "step": 5686 }, { "epoch": 0.6225676674238485, "grad_norm": 1.3965716568140254, "learning_rate": 1.560596425963735e-05, "loss": 0.8509, "num_input_tokens_seen": 1026596032, "step": 5687 }, { "epoch": 0.6226771394947864, "grad_norm": 1.3614220826222085, "learning_rate": 1.559799633082679e-05, "loss": 0.9961, "num_input_tokens_seen": 1026760448, "step": 5688 }, { "epoch": 0.6227866115657243, "grad_norm": 1.1771511575969018, "learning_rate": 1.559002951425529e-05, "loss": 0.6539, "num_input_tokens_seen": 1026942336, "step": 5689 }, { "epoch": 0.6228960836366622, "grad_norm": 1.2553220937417835, "learning_rate": 1.5582063810865315e-05, "loss": 0.6516, "num_input_tokens_seen": 1027104736, "step": 5690 }, { "epoch": 0.6230055557076001, "grad_norm": 1.3364964258479413, "learning_rate": 1.557409922159918e-05, "loss": 0.752, "num_input_tokens_seen": 1027262208, "step": 5691 }, { "epoch": 0.623115027778538, "grad_norm": 1.301251254928526, "learning_rate": 1.5566135747399097e-05, "loss": 0.7543, "num_input_tokens_seen": 1027408032, "step": 5692 }, { "epoch": 0.6232244998494759, "grad_norm": 1.2823097219469144, "learning_rate": 1.555817338920711e-05, "loss": 0.7811, "num_input_tokens_seen": 1027578720, "step": 5693 }, { "epoch": 0.6233339719204138, "grad_norm": 1.1901143558436083, "learning_rate": 1.555021214796516e-05, "loss": 0.6918, "num_input_tokens_seen": 1027769792, "step": 5694 }, { "epoch": 0.6234434439913517, "grad_norm": 1.2240317410579558, "learning_rate": 1.5542252024615056e-05, "loss": 0.9691, "num_input_tokens_seen": 1027987968, "step": 5695 }, { "epoch": 0.6235529160622896, "grad_norm": 1.135150522426325, "learning_rate": 1.5534293020098454e-05, "loss": 0.7457, "num_input_tokens_seen": 1028158656, "step": 5696 }, { "epoch": 0.6236623881332275, "grad_norm": 1.1830101966676436, "learning_rate": 1.5526335135356895e-05, "loss": 0.8696, "num_input_tokens_seen": 1028331136, "step": 5697 }, { "epoch": 0.6237718602041654, "grad_norm": 1.1715979184962804, "learning_rate": 1.551837837133177e-05, "loss": 0.8051, "num_input_tokens_seen": 1028536096, "step": 5698 }, { "epoch": 0.6238813322751033, "grad_norm": 1.2058881266249106, "learning_rate": 1.5510422728964374e-05, "loss": 0.6651, "num_input_tokens_seen": 1028689536, "step": 5699 }, { "epoch": 0.6239908043460413, "grad_norm": 1.0963023785873702, "learning_rate": 1.5502468209195815e-05, "loss": 0.6841, "num_input_tokens_seen": 1028858880, "step": 5700 }, { "epoch": 0.6241002764169791, "grad_norm": 1.2976080879216894, "learning_rate": 1.549451481296711e-05, "loss": 1.0211, "num_input_tokens_seen": 1029048608, "step": 5701 }, { "epoch": 0.624209748487917, "grad_norm": 1.1683644928815315, "learning_rate": 1.548656254121914e-05, "loss": 0.8262, "num_input_tokens_seen": 1029253120, "step": 5702 }, { "epoch": 0.6243192205588549, "grad_norm": 1.3334867518295759, "learning_rate": 1.547861139489263e-05, "loss": 0.6913, "num_input_tokens_seen": 1029417312, "step": 5703 }, { "epoch": 0.6244286926297928, "grad_norm": 1.2753728657956704, "learning_rate": 1.5470661374928198e-05, "loss": 0.8695, "num_input_tokens_seen": 1029605472, "step": 5704 }, { "epoch": 0.6245381647007308, "grad_norm": 1.1599070991114033, "learning_rate": 1.5462712482266296e-05, "loss": 0.8171, "num_input_tokens_seen": 1029813120, "step": 5705 }, { "epoch": 0.6246476367716687, "grad_norm": 1.0599622151100188, "learning_rate": 1.545476471784728e-05, "loss": 0.7275, "num_input_tokens_seen": 1029999264, "step": 5706 }, { "epoch": 0.6247571088426065, "grad_norm": 1.209540761386151, "learning_rate": 1.544681808261135e-05, "loss": 0.8385, "num_input_tokens_seen": 1030182048, "step": 5707 }, { "epoch": 0.6248665809135444, "grad_norm": 1.268417057268282, "learning_rate": 1.5438872577498575e-05, "loss": 0.6031, "num_input_tokens_seen": 1030331680, "step": 5708 }, { "epoch": 0.6249760529844823, "grad_norm": 1.2888162551114073, "learning_rate": 1.5430928203448903e-05, "loss": 0.6648, "num_input_tokens_seen": 1030484000, "step": 5709 }, { "epoch": 0.6250855250554203, "grad_norm": 1.3448980882742112, "learning_rate": 1.5422984961402125e-05, "loss": 0.8431, "num_input_tokens_seen": 1030670592, "step": 5710 }, { "epoch": 0.6251949971263582, "grad_norm": 1.1913047872395433, "learning_rate": 1.541504285229793e-05, "loss": 0.8321, "num_input_tokens_seen": 1030849344, "step": 5711 }, { "epoch": 0.625304469197296, "grad_norm": 1.3850322945475666, "learning_rate": 1.5407101877075827e-05, "loss": 0.8123, "num_input_tokens_seen": 1031015552, "step": 5712 }, { "epoch": 0.6254139412682339, "grad_norm": 1.236460360766674, "learning_rate": 1.5399162036675245e-05, "loss": 0.7077, "num_input_tokens_seen": 1031200800, "step": 5713 }, { "epoch": 0.6255234133391718, "grad_norm": 1.2723433585894204, "learning_rate": 1.5391223332035434e-05, "loss": 0.6698, "num_input_tokens_seen": 1031360064, "step": 5714 }, { "epoch": 0.6256328854101098, "grad_norm": 1.266649420311285, "learning_rate": 1.5383285764095534e-05, "loss": 0.8364, "num_input_tokens_seen": 1031548896, "step": 5715 }, { "epoch": 0.6257423574810477, "grad_norm": 1.4520976218674926, "learning_rate": 1.5375349333794545e-05, "loss": 0.739, "num_input_tokens_seen": 1031703456, "step": 5716 }, { "epoch": 0.6258518295519856, "grad_norm": 1.3347263311419233, "learning_rate": 1.5367414042071333e-05, "loss": 0.8619, "num_input_tokens_seen": 1031851968, "step": 5717 }, { "epoch": 0.6259613016229234, "grad_norm": 1.3469440734569769, "learning_rate": 1.5359479889864625e-05, "loss": 0.9245, "num_input_tokens_seen": 1032032512, "step": 5718 }, { "epoch": 0.6260707736938613, "grad_norm": 1.2442398870086668, "learning_rate": 1.535154687811301e-05, "loss": 0.7887, "num_input_tokens_seen": 1032238592, "step": 5719 }, { "epoch": 0.6261802457647992, "grad_norm": 1.0741516092299104, "learning_rate": 1.534361500775497e-05, "loss": 0.6371, "num_input_tokens_seen": 1032438848, "step": 5720 }, { "epoch": 0.6262897178357372, "grad_norm": 1.2238363110475206, "learning_rate": 1.5335684279728798e-05, "loss": 0.8898, "num_input_tokens_seen": 1032620736, "step": 5721 }, { "epoch": 0.6263991899066751, "grad_norm": 1.3786629956652787, "learning_rate": 1.5327754694972705e-05, "loss": 0.8825, "num_input_tokens_seen": 1032786720, "step": 5722 }, { "epoch": 0.626508661977613, "grad_norm": 1.3167755333442097, "learning_rate": 1.531982625442475e-05, "loss": 0.8472, "num_input_tokens_seen": 1032952032, "step": 5723 }, { "epoch": 0.6266181340485508, "grad_norm": 1.2592627652800958, "learning_rate": 1.5311898959022832e-05, "loss": 0.7449, "num_input_tokens_seen": 1033103904, "step": 5724 }, { "epoch": 0.6267276061194887, "grad_norm": 1.1943248083885867, "learning_rate": 1.530397280970476e-05, "loss": 0.6532, "num_input_tokens_seen": 1033247936, "step": 5725 }, { "epoch": 0.6268370781904267, "grad_norm": 1.1640874360761142, "learning_rate": 1.5296047807408152e-05, "loss": 0.5987, "num_input_tokens_seen": 1033415936, "step": 5726 }, { "epoch": 0.6269465502613646, "grad_norm": 1.3474916661700154, "learning_rate": 1.5288123953070552e-05, "loss": 0.626, "num_input_tokens_seen": 1033545184, "step": 5727 }, { "epoch": 0.6270560223323025, "grad_norm": 1.1700987005580112, "learning_rate": 1.5280201247629312e-05, "loss": 0.5331, "num_input_tokens_seen": 1033710496, "step": 5728 }, { "epoch": 0.6271654944032403, "grad_norm": 1.301689403708195, "learning_rate": 1.527227969202169e-05, "loss": 0.8359, "num_input_tokens_seen": 1033872224, "step": 5729 }, { "epoch": 0.6272749664741782, "grad_norm": 1.3277369212715084, "learning_rate": 1.5264359287184783e-05, "loss": 0.7496, "num_input_tokens_seen": 1034061952, "step": 5730 }, { "epoch": 0.6273844385451162, "grad_norm": 1.2020000301081801, "learning_rate": 1.5256440034055557e-05, "loss": 0.7454, "num_input_tokens_seen": 1034254816, "step": 5731 }, { "epoch": 0.6274939106160541, "grad_norm": 1.2030225933138157, "learning_rate": 1.5248521933570858e-05, "loss": 0.7045, "num_input_tokens_seen": 1034429536, "step": 5732 }, { "epoch": 0.627603382686992, "grad_norm": 1.1003586704542292, "learning_rate": 1.5240604986667362e-05, "loss": 0.6308, "num_input_tokens_seen": 1034622624, "step": 5733 }, { "epoch": 0.6277128547579299, "grad_norm": 1.12461645157904, "learning_rate": 1.5232689194281652e-05, "loss": 0.5376, "num_input_tokens_seen": 1034785024, "step": 5734 }, { "epoch": 0.6278223268288677, "grad_norm": 1.3329885851692536, "learning_rate": 1.5224774557350125e-05, "loss": 0.8973, "num_input_tokens_seen": 1034974304, "step": 5735 }, { "epoch": 0.6279317988998057, "grad_norm": 1.2214915910635158, "learning_rate": 1.5216861076809083e-05, "loss": 0.7951, "num_input_tokens_seen": 1035155520, "step": 5736 }, { "epoch": 0.6280412709707436, "grad_norm": 1.3059796085401443, "learning_rate": 1.5208948753594677e-05, "loss": 0.7006, "num_input_tokens_seen": 1035318368, "step": 5737 }, { "epoch": 0.6281507430416815, "grad_norm": 1.337809049249452, "learning_rate": 1.5201037588642916e-05, "loss": 0.7764, "num_input_tokens_seen": 1035498240, "step": 5738 }, { "epoch": 0.6282602151126194, "grad_norm": 1.2910365854659567, "learning_rate": 1.5193127582889677e-05, "loss": 0.8466, "num_input_tokens_seen": 1035694240, "step": 5739 }, { "epoch": 0.6283696871835573, "grad_norm": 1.2153850956110763, "learning_rate": 1.5185218737270694e-05, "loss": 0.7353, "num_input_tokens_seen": 1035879936, "step": 5740 }, { "epoch": 0.6284791592544952, "grad_norm": 1.2735324660387535, "learning_rate": 1.5177311052721568e-05, "loss": 0.9621, "num_input_tokens_seen": 1036058464, "step": 5741 }, { "epoch": 0.6285886313254331, "grad_norm": 1.374741790108759, "learning_rate": 1.5169404530177778e-05, "loss": 0.8621, "num_input_tokens_seen": 1036263200, "step": 5742 }, { "epoch": 0.628698103396371, "grad_norm": 1.0878406013205844, "learning_rate": 1.5161499170574629e-05, "loss": 0.7314, "num_input_tokens_seen": 1036439264, "step": 5743 }, { "epoch": 0.6288075754673089, "grad_norm": 1.3508266584751922, "learning_rate": 1.515359497484733e-05, "loss": 0.8541, "num_input_tokens_seen": 1036611968, "step": 5744 }, { "epoch": 0.6289170475382468, "grad_norm": 1.3381798201925832, "learning_rate": 1.5145691943930914e-05, "loss": 0.6829, "num_input_tokens_seen": 1036769216, "step": 5745 }, { "epoch": 0.6290265196091847, "grad_norm": 1.278960112297949, "learning_rate": 1.513779007876031e-05, "loss": 0.8594, "num_input_tokens_seen": 1036967008, "step": 5746 }, { "epoch": 0.6291359916801226, "grad_norm": 1.272063983120745, "learning_rate": 1.5129889380270279e-05, "loss": 0.9295, "num_input_tokens_seen": 1037176672, "step": 5747 }, { "epoch": 0.6292454637510605, "grad_norm": 1.2119297023504834, "learning_rate": 1.5121989849395465e-05, "loss": 0.8273, "num_input_tokens_seen": 1037380288, "step": 5748 }, { "epoch": 0.6293549358219984, "grad_norm": 1.2761253588991037, "learning_rate": 1.5114091487070376e-05, "loss": 0.8229, "num_input_tokens_seen": 1037576512, "step": 5749 }, { "epoch": 0.6294644078929363, "grad_norm": 1.1979947880136412, "learning_rate": 1.5106194294229359e-05, "loss": 0.6341, "num_input_tokens_seen": 1037746304, "step": 5750 }, { "epoch": 0.6295738799638743, "grad_norm": 1.432966297677067, "learning_rate": 1.5098298271806649e-05, "loss": 1.0341, "num_input_tokens_seen": 1037949920, "step": 5751 }, { "epoch": 0.6296833520348121, "grad_norm": 1.0815905282820508, "learning_rate": 1.5090403420736315e-05, "loss": 0.6469, "num_input_tokens_seen": 1038119936, "step": 5752 }, { "epoch": 0.62979282410575, "grad_norm": 1.2195787302044814, "learning_rate": 1.5082509741952328e-05, "loss": 0.6648, "num_input_tokens_seen": 1038300480, "step": 5753 }, { "epoch": 0.6299022961766879, "grad_norm": 1.3760573050076945, "learning_rate": 1.5074617236388467e-05, "loss": 1.0703, "num_input_tokens_seen": 1038505440, "step": 5754 }, { "epoch": 0.6300117682476258, "grad_norm": 1.1074613802959659, "learning_rate": 1.506672590497841e-05, "loss": 0.8052, "num_input_tokens_seen": 1038677696, "step": 5755 }, { "epoch": 0.6301212403185638, "grad_norm": 1.1727908815159538, "learning_rate": 1.5058835748655703e-05, "loss": 0.8284, "num_input_tokens_seen": 1038899456, "step": 5756 }, { "epoch": 0.6302307123895017, "grad_norm": 1.3196900384559218, "learning_rate": 1.5050946768353708e-05, "loss": 0.852, "num_input_tokens_seen": 1039074624, "step": 5757 }, { "epoch": 0.6303401844604395, "grad_norm": 1.2745783089801332, "learning_rate": 1.5043058965005702e-05, "loss": 0.8474, "num_input_tokens_seen": 1039276896, "step": 5758 }, { "epoch": 0.6304496565313774, "grad_norm": 1.3230040163453511, "learning_rate": 1.5035172339544781e-05, "loss": 0.8027, "num_input_tokens_seen": 1039453856, "step": 5759 }, { "epoch": 0.6305591286023153, "grad_norm": 1.1795084735360621, "learning_rate": 1.5027286892903924e-05, "loss": 0.7073, "num_input_tokens_seen": 1039662176, "step": 5760 }, { "epoch": 0.6306686006732533, "grad_norm": 0.9986811225735712, "learning_rate": 1.501940262601596e-05, "loss": 0.5238, "num_input_tokens_seen": 1039833536, "step": 5761 }, { "epoch": 0.6307780727441912, "grad_norm": 1.2683641438912936, "learning_rate": 1.5011519539813584e-05, "loss": 0.8213, "num_input_tokens_seen": 1040011168, "step": 5762 }, { "epoch": 0.630887544815129, "grad_norm": 1.0404222239288745, "learning_rate": 1.5003637635229361e-05, "loss": 0.5296, "num_input_tokens_seen": 1040223072, "step": 5763 }, { "epoch": 0.6309970168860669, "grad_norm": 1.3316541121077643, "learning_rate": 1.4995756913195688e-05, "loss": 0.9224, "num_input_tokens_seen": 1040389952, "step": 5764 }, { "epoch": 0.6311064889570048, "grad_norm": 1.0409270458217548, "learning_rate": 1.4987877374644858e-05, "loss": 0.5188, "num_input_tokens_seen": 1040558400, "step": 5765 }, { "epoch": 0.6312159610279428, "grad_norm": 1.272915458158345, "learning_rate": 1.4979999020508983e-05, "loss": 0.7875, "num_input_tokens_seen": 1040736480, "step": 5766 }, { "epoch": 0.6313254330988807, "grad_norm": 1.1756783373839013, "learning_rate": 1.4972121851720078e-05, "loss": 0.6848, "num_input_tokens_seen": 1040884544, "step": 5767 }, { "epoch": 0.6314349051698186, "grad_norm": 1.221890486987376, "learning_rate": 1.4964245869209979e-05, "loss": 0.8038, "num_input_tokens_seen": 1041052768, "step": 5768 }, { "epoch": 0.6315443772407564, "grad_norm": 1.1000320950074078, "learning_rate": 1.4956371073910408e-05, "loss": 0.8989, "num_input_tokens_seen": 1041249888, "step": 5769 }, { "epoch": 0.6316538493116943, "grad_norm": 1.2850486919068842, "learning_rate": 1.4948497466752943e-05, "loss": 0.7042, "num_input_tokens_seen": 1041427296, "step": 5770 }, { "epoch": 0.6317633213826322, "grad_norm": 1.390889860497381, "learning_rate": 1.494062504866901e-05, "loss": 0.7241, "num_input_tokens_seen": 1041585888, "step": 5771 }, { "epoch": 0.6318727934535702, "grad_norm": 1.334578151559829, "learning_rate": 1.4932753820589912e-05, "loss": 1.0199, "num_input_tokens_seen": 1041787936, "step": 5772 }, { "epoch": 0.6319822655245081, "grad_norm": 1.3016995233075823, "learning_rate": 1.492488378344678e-05, "loss": 0.6415, "num_input_tokens_seen": 1041982816, "step": 5773 }, { "epoch": 0.632091737595446, "grad_norm": 1.3517083767351905, "learning_rate": 1.4917014938170648e-05, "loss": 0.7796, "num_input_tokens_seen": 1042157984, "step": 5774 }, { "epoch": 0.6322012096663838, "grad_norm": 1.10702965197182, "learning_rate": 1.4909147285692366e-05, "loss": 0.5959, "num_input_tokens_seen": 1042354432, "step": 5775 }, { "epoch": 0.6323106817373217, "grad_norm": 1.1727011248232095, "learning_rate": 1.4901280826942665e-05, "loss": 0.7966, "num_input_tokens_seen": 1042562752, "step": 5776 }, { "epoch": 0.6324201538082597, "grad_norm": 1.15339007962007, "learning_rate": 1.4893415562852148e-05, "loss": 0.6455, "num_input_tokens_seen": 1042744416, "step": 5777 }, { "epoch": 0.6325296258791976, "grad_norm": 1.3195625748651256, "learning_rate": 1.4885551494351242e-05, "loss": 0.6854, "num_input_tokens_seen": 1042935264, "step": 5778 }, { "epoch": 0.6326390979501355, "grad_norm": 1.1578494685110245, "learning_rate": 1.4877688622370262e-05, "loss": 0.8252, "num_input_tokens_seen": 1043127904, "step": 5779 }, { "epoch": 0.6327485700210733, "grad_norm": 1.2302959017981996, "learning_rate": 1.4869826947839366e-05, "loss": 0.7803, "num_input_tokens_seen": 1043299936, "step": 5780 }, { "epoch": 0.6328580420920112, "grad_norm": 1.416175317638567, "learning_rate": 1.4861966471688577e-05, "loss": 0.7851, "num_input_tokens_seen": 1043512288, "step": 5781 }, { "epoch": 0.6329675141629492, "grad_norm": 1.2843106627302874, "learning_rate": 1.4854107194847771e-05, "loss": 0.863, "num_input_tokens_seen": 1043714560, "step": 5782 }, { "epoch": 0.6330769862338871, "grad_norm": 1.086660021272088, "learning_rate": 1.4846249118246686e-05, "loss": 0.6618, "num_input_tokens_seen": 1043872256, "step": 5783 }, { "epoch": 0.633186458304825, "grad_norm": 1.219614504819915, "learning_rate": 1.483839224281493e-05, "loss": 0.6614, "num_input_tokens_seen": 1044055936, "step": 5784 }, { "epoch": 0.6332959303757629, "grad_norm": 1.142634724576353, "learning_rate": 1.4830536569481934e-05, "loss": 0.5432, "num_input_tokens_seen": 1044238944, "step": 5785 }, { "epoch": 0.6334054024467007, "grad_norm": 1.2833354539151858, "learning_rate": 1.4822682099177035e-05, "loss": 0.7651, "num_input_tokens_seen": 1044412320, "step": 5786 }, { "epoch": 0.6335148745176387, "grad_norm": 1.2872770023740285, "learning_rate": 1.4814828832829374e-05, "loss": 0.6066, "num_input_tokens_seen": 1044565536, "step": 5787 }, { "epoch": 0.6336243465885766, "grad_norm": 1.233370118408811, "learning_rate": 1.4806976771368006e-05, "loss": 0.6904, "num_input_tokens_seen": 1044763328, "step": 5788 }, { "epoch": 0.6337338186595145, "grad_norm": 1.3210854609594287, "learning_rate": 1.4799125915721787e-05, "loss": 0.6534, "num_input_tokens_seen": 1044920576, "step": 5789 }, { "epoch": 0.6338432907304524, "grad_norm": 1.1935566490930023, "learning_rate": 1.479127626681947e-05, "loss": 0.692, "num_input_tokens_seen": 1045115904, "step": 5790 }, { "epoch": 0.6339527628013903, "grad_norm": 1.2455421590174423, "learning_rate": 1.4783427825589663e-05, "loss": 0.8748, "num_input_tokens_seen": 1045308320, "step": 5791 }, { "epoch": 0.6340622348723282, "grad_norm": 1.2699042608564368, "learning_rate": 1.4775580592960808e-05, "loss": 0.7778, "num_input_tokens_seen": 1045513280, "step": 5792 }, { "epoch": 0.6341717069432661, "grad_norm": 1.265852778153351, "learning_rate": 1.4767734569861233e-05, "loss": 0.8257, "num_input_tokens_seen": 1045706816, "step": 5793 }, { "epoch": 0.634281179014204, "grad_norm": 1.0582952324824835, "learning_rate": 1.4759889757219087e-05, "loss": 0.6331, "num_input_tokens_seen": 1045925664, "step": 5794 }, { "epoch": 0.6343906510851419, "grad_norm": 1.2811511397019888, "learning_rate": 1.4752046155962418e-05, "loss": 0.6771, "num_input_tokens_seen": 1046105312, "step": 5795 }, { "epoch": 0.6345001231560798, "grad_norm": 1.1953043220589323, "learning_rate": 1.4744203767019088e-05, "loss": 0.8011, "num_input_tokens_seen": 1046269280, "step": 5796 }, { "epoch": 0.6346095952270177, "grad_norm": 1.2706147719917753, "learning_rate": 1.4736362591316844e-05, "loss": 0.8938, "num_input_tokens_seen": 1046483424, "step": 5797 }, { "epoch": 0.6347190672979556, "grad_norm": 1.2486643330894562, "learning_rate": 1.4728522629783297e-05, "loss": 0.7271, "num_input_tokens_seen": 1046633280, "step": 5798 }, { "epoch": 0.6348285393688935, "grad_norm": 1.1537809041720848, "learning_rate": 1.4720683883345876e-05, "loss": 0.6378, "num_input_tokens_seen": 1046832416, "step": 5799 }, { "epoch": 0.6349380114398314, "grad_norm": 1.2104233665521626, "learning_rate": 1.4712846352931909e-05, "loss": 0.8368, "num_input_tokens_seen": 1047003552, "step": 5800 }, { "epoch": 0.6350474835107693, "grad_norm": 1.3654129282659893, "learning_rate": 1.4705010039468547e-05, "loss": 0.7684, "num_input_tokens_seen": 1047169984, "step": 5801 }, { "epoch": 0.6351569555817073, "grad_norm": 1.2037786219314586, "learning_rate": 1.4697174943882821e-05, "loss": 0.6879, "num_input_tokens_seen": 1047364864, "step": 5802 }, { "epoch": 0.6352664276526451, "grad_norm": 1.3226019968170917, "learning_rate": 1.4689341067101597e-05, "loss": 0.9187, "num_input_tokens_seen": 1047541600, "step": 5803 }, { "epoch": 0.635375899723583, "grad_norm": 1.2263820053738796, "learning_rate": 1.4681508410051615e-05, "loss": 0.6108, "num_input_tokens_seen": 1047731776, "step": 5804 }, { "epoch": 0.6354853717945209, "grad_norm": 1.180166589324723, "learning_rate": 1.4673676973659473e-05, "loss": 0.7885, "num_input_tokens_seen": 1047918368, "step": 5805 }, { "epoch": 0.6355948438654588, "grad_norm": 1.3408606315490883, "learning_rate": 1.4665846758851593e-05, "loss": 0.6892, "num_input_tokens_seen": 1048073600, "step": 5806 }, { "epoch": 0.6357043159363968, "grad_norm": 1.293285918734895, "learning_rate": 1.4658017766554295e-05, "loss": 0.7689, "num_input_tokens_seen": 1048244288, "step": 5807 }, { "epoch": 0.6358137880073347, "grad_norm": 1.324533025756812, "learning_rate": 1.4650189997693717e-05, "loss": 0.6985, "num_input_tokens_seen": 1048424832, "step": 5808 }, { "epoch": 0.6359232600782725, "grad_norm": 1.1692489191506648, "learning_rate": 1.4642363453195874e-05, "loss": 0.6583, "num_input_tokens_seen": 1048605600, "step": 5809 }, { "epoch": 0.6360327321492104, "grad_norm": 1.1897010482205026, "learning_rate": 1.4634538133986647e-05, "loss": 0.6551, "num_input_tokens_seen": 1048767776, "step": 5810 }, { "epoch": 0.6361422042201483, "grad_norm": 1.1199880324303337, "learning_rate": 1.4626714040991733e-05, "loss": 0.7421, "num_input_tokens_seen": 1048955264, "step": 5811 }, { "epoch": 0.6362516762910863, "grad_norm": 1.2682078814694924, "learning_rate": 1.4618891175136724e-05, "loss": 1.0403, "num_input_tokens_seen": 1049141632, "step": 5812 }, { "epoch": 0.6363611483620242, "grad_norm": 1.1741851923672117, "learning_rate": 1.4611069537347032e-05, "loss": 0.7462, "num_input_tokens_seen": 1049318592, "step": 5813 }, { "epoch": 0.636470620432962, "grad_norm": 1.2143894830410427, "learning_rate": 1.4603249128547968e-05, "loss": 0.7804, "num_input_tokens_seen": 1049490848, "step": 5814 }, { "epoch": 0.6365800925038999, "grad_norm": 1.1664550055644705, "learning_rate": 1.4595429949664647e-05, "loss": 0.5823, "num_input_tokens_seen": 1049665344, "step": 5815 }, { "epoch": 0.6366895645748378, "grad_norm": 1.1274527726358345, "learning_rate": 1.4587612001622078e-05, "loss": 0.8098, "num_input_tokens_seen": 1049844544, "step": 5816 }, { "epoch": 0.6367990366457758, "grad_norm": 1.3101317974940974, "learning_rate": 1.4579795285345105e-05, "loss": 0.8392, "num_input_tokens_seen": 1050024416, "step": 5817 }, { "epoch": 0.6369085087167137, "grad_norm": 1.097170033132515, "learning_rate": 1.457197980175843e-05, "loss": 0.6096, "num_input_tokens_seen": 1050204064, "step": 5818 }, { "epoch": 0.6370179807876516, "grad_norm": 1.3812906544083585, "learning_rate": 1.4564165551786608e-05, "loss": 1.0148, "num_input_tokens_seen": 1050365120, "step": 5819 }, { "epoch": 0.6371274528585894, "grad_norm": 1.3156371544210814, "learning_rate": 1.455635253635404e-05, "loss": 0.9983, "num_input_tokens_seen": 1050573440, "step": 5820 }, { "epoch": 0.6372369249295273, "grad_norm": 1.212679489850876, "learning_rate": 1.454854075638502e-05, "loss": 0.6563, "num_input_tokens_seen": 1050714560, "step": 5821 }, { "epoch": 0.6373463970004652, "grad_norm": 1.2563746034687795, "learning_rate": 1.4540730212803633e-05, "loss": 0.7213, "num_input_tokens_seen": 1050869568, "step": 5822 }, { "epoch": 0.6374558690714032, "grad_norm": 1.222837392045252, "learning_rate": 1.4532920906533875e-05, "loss": 0.5928, "num_input_tokens_seen": 1051051456, "step": 5823 }, { "epoch": 0.6375653411423411, "grad_norm": 1.0475135108604474, "learning_rate": 1.4525112838499567e-05, "loss": 0.6504, "num_input_tokens_seen": 1051251488, "step": 5824 }, { "epoch": 0.637674813213279, "grad_norm": 1.3203796730785815, "learning_rate": 1.4517306009624382e-05, "loss": 0.7007, "num_input_tokens_seen": 1051432928, "step": 5825 }, { "epoch": 0.6377842852842168, "grad_norm": 1.2940517626466395, "learning_rate": 1.450950042083186e-05, "loss": 0.7974, "num_input_tokens_seen": 1051589280, "step": 5826 }, { "epoch": 0.6378937573551547, "grad_norm": 1.398779076145336, "learning_rate": 1.4501696073045382e-05, "loss": 0.9087, "num_input_tokens_seen": 1051791776, "step": 5827 }, { "epoch": 0.6380032294260927, "grad_norm": 1.1576376140678744, "learning_rate": 1.4493892967188188e-05, "loss": 0.67, "num_input_tokens_seen": 1052000096, "step": 5828 }, { "epoch": 0.6381127014970306, "grad_norm": 1.2179919599728115, "learning_rate": 1.4486091104183364e-05, "loss": 0.7275, "num_input_tokens_seen": 1052157344, "step": 5829 }, { "epoch": 0.6382221735679685, "grad_norm": 1.2370246263148175, "learning_rate": 1.4478290484953871e-05, "loss": 0.6996, "num_input_tokens_seen": 1052323776, "step": 5830 }, { "epoch": 0.6383316456389063, "grad_norm": 1.3053246957959437, "learning_rate": 1.4470491110422502e-05, "loss": 0.6478, "num_input_tokens_seen": 1052471392, "step": 5831 }, { "epoch": 0.6384411177098442, "grad_norm": 1.1521939455063646, "learning_rate": 1.4462692981511906e-05, "loss": 0.6081, "num_input_tokens_seen": 1052624608, "step": 5832 }, { "epoch": 0.6385505897807822, "grad_norm": 1.1667311627861967, "learning_rate": 1.4454896099144583e-05, "loss": 0.8397, "num_input_tokens_seen": 1052821952, "step": 5833 }, { "epoch": 0.6386600618517201, "grad_norm": 1.1435961239390484, "learning_rate": 1.4447100464242894e-05, "loss": 0.8159, "num_input_tokens_seen": 1053036544, "step": 5834 }, { "epoch": 0.638769533922658, "grad_norm": 1.265745510439188, "learning_rate": 1.4439306077729048e-05, "loss": 0.8028, "num_input_tokens_seen": 1053226720, "step": 5835 }, { "epoch": 0.6388790059935959, "grad_norm": 1.2972605028886994, "learning_rate": 1.4431512940525102e-05, "loss": 0.8351, "num_input_tokens_seen": 1053427424, "step": 5836 }, { "epoch": 0.6389884780645337, "grad_norm": 1.4747386345199864, "learning_rate": 1.4423721053552963e-05, "loss": 0.8371, "num_input_tokens_seen": 1053582432, "step": 5837 }, { "epoch": 0.6390979501354717, "grad_norm": 1.3678554973855628, "learning_rate": 1.4415930417734414e-05, "loss": 0.8339, "num_input_tokens_seen": 1053770592, "step": 5838 }, { "epoch": 0.6392074222064096, "grad_norm": 1.1669471891657912, "learning_rate": 1.4408141033991064e-05, "loss": 0.808, "num_input_tokens_seen": 1053956288, "step": 5839 }, { "epoch": 0.6393168942773475, "grad_norm": 1.1809726308708546, "learning_rate": 1.4400352903244382e-05, "loss": 0.8097, "num_input_tokens_seen": 1054167072, "step": 5840 }, { "epoch": 0.6394263663482854, "grad_norm": 1.374148321813891, "learning_rate": 1.4392566026415688e-05, "loss": 0.7692, "num_input_tokens_seen": 1054323200, "step": 5841 }, { "epoch": 0.6395358384192233, "grad_norm": 1.3075974044841727, "learning_rate": 1.4384780404426157e-05, "loss": 0.7929, "num_input_tokens_seen": 1054516064, "step": 5842 }, { "epoch": 0.6396453104901612, "grad_norm": 1.2488080063369567, "learning_rate": 1.4376996038196807e-05, "loss": 0.6091, "num_input_tokens_seen": 1054682272, "step": 5843 }, { "epoch": 0.6397547825610991, "grad_norm": 1.3214064360572564, "learning_rate": 1.4369212928648513e-05, "loss": 0.8387, "num_input_tokens_seen": 1054862816, "step": 5844 }, { "epoch": 0.639864254632037, "grad_norm": 1.356919669141288, "learning_rate": 1.4361431076702019e-05, "loss": 0.8011, "num_input_tokens_seen": 1055047616, "step": 5845 }, { "epoch": 0.6399737267029749, "grad_norm": 1.2311079998719174, "learning_rate": 1.4353650483277881e-05, "loss": 0.7982, "num_input_tokens_seen": 1055242272, "step": 5846 }, { "epoch": 0.6400831987739128, "grad_norm": 1.2069080376313406, "learning_rate": 1.4345871149296552e-05, "loss": 0.5904, "num_input_tokens_seen": 1055424384, "step": 5847 }, { "epoch": 0.6401926708448507, "grad_norm": 1.4556038335667398, "learning_rate": 1.433809307567828e-05, "loss": 0.8858, "num_input_tokens_seen": 1055622400, "step": 5848 }, { "epoch": 0.6403021429157886, "grad_norm": 1.2168957714469093, "learning_rate": 1.4330316263343224e-05, "loss": 0.6447, "num_input_tokens_seen": 1055786144, "step": 5849 }, { "epoch": 0.6404116149867265, "grad_norm": 1.1622020011339764, "learning_rate": 1.432254071321136e-05, "loss": 0.6204, "num_input_tokens_seen": 1055974976, "step": 5850 }, { "epoch": 0.6405210870576644, "grad_norm": 1.3624539638756312, "learning_rate": 1.4314766426202507e-05, "loss": 0.7833, "num_input_tokens_seen": 1056128416, "step": 5851 }, { "epoch": 0.6406305591286023, "grad_norm": 1.1557488773149884, "learning_rate": 1.430699340323638e-05, "loss": 0.753, "num_input_tokens_seen": 1056310976, "step": 5852 }, { "epoch": 0.6407400311995403, "grad_norm": 1.2608778191379235, "learning_rate": 1.429922164523247e-05, "loss": 0.6965, "num_input_tokens_seen": 1056462176, "step": 5853 }, { "epoch": 0.6408495032704781, "grad_norm": 1.2169816319279232, "learning_rate": 1.4291451153110202e-05, "loss": 0.6152, "num_input_tokens_seen": 1056637344, "step": 5854 }, { "epoch": 0.640958975341416, "grad_norm": 1.3473165775086426, "learning_rate": 1.4283681927788772e-05, "loss": 0.7361, "num_input_tokens_seen": 1056802432, "step": 5855 }, { "epoch": 0.6410684474123539, "grad_norm": 1.420469406743769, "learning_rate": 1.4275913970187305e-05, "loss": 0.9905, "num_input_tokens_seen": 1057023072, "step": 5856 }, { "epoch": 0.6411779194832918, "grad_norm": 1.3142218875269298, "learning_rate": 1.4268147281224695e-05, "loss": 0.7575, "num_input_tokens_seen": 1057219296, "step": 5857 }, { "epoch": 0.6412873915542298, "grad_norm": 1.1142880889496984, "learning_rate": 1.4260381861819755e-05, "loss": 0.7153, "num_input_tokens_seen": 1057403872, "step": 5858 }, { "epoch": 0.6413968636251677, "grad_norm": 1.2423060138715583, "learning_rate": 1.4252617712891109e-05, "loss": 0.6355, "num_input_tokens_seen": 1057575232, "step": 5859 }, { "epoch": 0.6415063356961055, "grad_norm": 1.2629043158306188, "learning_rate": 1.424485483535724e-05, "loss": 0.8774, "num_input_tokens_seen": 1057750176, "step": 5860 }, { "epoch": 0.6416158077670434, "grad_norm": 1.2548644780700997, "learning_rate": 1.4237093230136489e-05, "loss": 0.7405, "num_input_tokens_seen": 1057904736, "step": 5861 }, { "epoch": 0.6417252798379813, "grad_norm": 1.2981082613604615, "learning_rate": 1.4229332898147022e-05, "loss": 0.7912, "num_input_tokens_seen": 1058098272, "step": 5862 }, { "epoch": 0.6418347519089193, "grad_norm": 1.354545168675875, "learning_rate": 1.4221573840306902e-05, "loss": 1.1416, "num_input_tokens_seen": 1058305248, "step": 5863 }, { "epoch": 0.6419442239798572, "grad_norm": 1.3110154702752697, "learning_rate": 1.421381605753397e-05, "loss": 0.8632, "num_input_tokens_seen": 1058452416, "step": 5864 }, { "epoch": 0.642053696050795, "grad_norm": 1.2927381880409965, "learning_rate": 1.4206059550745993e-05, "loss": 0.6574, "num_input_tokens_seen": 1058604736, "step": 5865 }, { "epoch": 0.6421631681217329, "grad_norm": 1.2601703791999863, "learning_rate": 1.4198304320860534e-05, "loss": 0.6998, "num_input_tokens_seen": 1058768256, "step": 5866 }, { "epoch": 0.6422726401926708, "grad_norm": 1.2613370808235882, "learning_rate": 1.4190550368795024e-05, "loss": 0.6786, "num_input_tokens_seen": 1058956864, "step": 5867 }, { "epoch": 0.6423821122636088, "grad_norm": 1.3043195468317945, "learning_rate": 1.4182797695466743e-05, "loss": 0.9602, "num_input_tokens_seen": 1059140768, "step": 5868 }, { "epoch": 0.6424915843345467, "grad_norm": 1.2381866611491157, "learning_rate": 1.4175046301792816e-05, "loss": 0.7441, "num_input_tokens_seen": 1059326240, "step": 5869 }, { "epoch": 0.6426010564054846, "grad_norm": 1.3656541345487008, "learning_rate": 1.4167296188690204e-05, "loss": 0.8584, "num_input_tokens_seen": 1059486624, "step": 5870 }, { "epoch": 0.6427105284764224, "grad_norm": 1.3963358766789578, "learning_rate": 1.4159547357075759e-05, "loss": 0.8884, "num_input_tokens_seen": 1059651040, "step": 5871 }, { "epoch": 0.6428200005473603, "grad_norm": 1.264570008427125, "learning_rate": 1.4151799807866135e-05, "loss": 0.8569, "num_input_tokens_seen": 1059816352, "step": 5872 }, { "epoch": 0.6429294726182982, "grad_norm": 1.1195246119279116, "learning_rate": 1.4144053541977855e-05, "loss": 0.5537, "num_input_tokens_seen": 1059978752, "step": 5873 }, { "epoch": 0.6430389446892362, "grad_norm": 1.21660479013747, "learning_rate": 1.4136308560327288e-05, "loss": 0.6876, "num_input_tokens_seen": 1060146304, "step": 5874 }, { "epoch": 0.6431484167601741, "grad_norm": 1.4759353839924523, "learning_rate": 1.4128564863830655e-05, "loss": 0.8544, "num_input_tokens_seen": 1060300416, "step": 5875 }, { "epoch": 0.643257888831112, "grad_norm": 1.3719527985013233, "learning_rate": 1.4120822453404011e-05, "loss": 0.8103, "num_input_tokens_seen": 1060483648, "step": 5876 }, { "epoch": 0.6433673609020498, "grad_norm": 1.2986114091697165, "learning_rate": 1.4113081329963265e-05, "loss": 0.6578, "num_input_tokens_seen": 1060632160, "step": 5877 }, { "epoch": 0.6434768329729877, "grad_norm": 1.447200058083773, "learning_rate": 1.4105341494424206e-05, "loss": 0.9817, "num_input_tokens_seen": 1060810464, "step": 5878 }, { "epoch": 0.6435863050439257, "grad_norm": 1.348138872056666, "learning_rate": 1.40976029477024e-05, "loss": 0.8496, "num_input_tokens_seen": 1061007136, "step": 5879 }, { "epoch": 0.6436957771148636, "grad_norm": 1.2685781804141982, "learning_rate": 1.4089865690713337e-05, "loss": 0.7507, "num_input_tokens_seen": 1061191488, "step": 5880 }, { "epoch": 0.6438052491858015, "grad_norm": 1.1787382942823974, "learning_rate": 1.40821297243723e-05, "loss": 0.8594, "num_input_tokens_seen": 1061390176, "step": 5881 }, { "epoch": 0.6439147212567393, "grad_norm": 1.182270985337023, "learning_rate": 1.407439504959445e-05, "loss": 0.8612, "num_input_tokens_seen": 1061596704, "step": 5882 }, { "epoch": 0.6440241933276772, "grad_norm": 1.1214451255826088, "learning_rate": 1.4066661667294779e-05, "loss": 0.7493, "num_input_tokens_seen": 1061791136, "step": 5883 }, { "epoch": 0.6441336653986152, "grad_norm": 1.4437190163990976, "learning_rate": 1.405892957838812e-05, "loss": 0.9063, "num_input_tokens_seen": 1061930688, "step": 5884 }, { "epoch": 0.6442431374695531, "grad_norm": 1.1778708611348525, "learning_rate": 1.4051198783789196e-05, "loss": 0.6925, "num_input_tokens_seen": 1062122432, "step": 5885 }, { "epoch": 0.644352609540491, "grad_norm": 1.203821302779941, "learning_rate": 1.4043469284412509e-05, "loss": 1.0481, "num_input_tokens_seen": 1062326496, "step": 5886 }, { "epoch": 0.6444620816114289, "grad_norm": 1.176006561692415, "learning_rate": 1.4035741081172476e-05, "loss": 0.6791, "num_input_tokens_seen": 1062526976, "step": 5887 }, { "epoch": 0.6445715536823667, "grad_norm": 1.266718827395015, "learning_rate": 1.4028014174983295e-05, "loss": 0.7345, "num_input_tokens_seen": 1062695424, "step": 5888 }, { "epoch": 0.6446810257533047, "grad_norm": 1.3524908945478709, "learning_rate": 1.402028856675908e-05, "loss": 0.8422, "num_input_tokens_seen": 1062898592, "step": 5889 }, { "epoch": 0.6447904978242426, "grad_norm": 1.059218106425093, "learning_rate": 1.4012564257413718e-05, "loss": 0.693, "num_input_tokens_seen": 1063102656, "step": 5890 }, { "epoch": 0.6448999698951805, "grad_norm": 1.2034618352045674, "learning_rate": 1.4004841247861011e-05, "loss": 0.6337, "num_input_tokens_seen": 1063289248, "step": 5891 }, { "epoch": 0.6450094419661184, "grad_norm": 1.242593231157317, "learning_rate": 1.3997119539014566e-05, "loss": 0.6774, "num_input_tokens_seen": 1063457248, "step": 5892 }, { "epoch": 0.6451189140370563, "grad_norm": 1.216334957907517, "learning_rate": 1.3989399131787836e-05, "loss": 0.8856, "num_input_tokens_seen": 1063633536, "step": 5893 }, { "epoch": 0.6452283861079942, "grad_norm": 1.3076395228033184, "learning_rate": 1.398168002709416e-05, "loss": 0.7736, "num_input_tokens_seen": 1063819904, "step": 5894 }, { "epoch": 0.6453378581789321, "grad_norm": 1.2830074631436006, "learning_rate": 1.3973962225846654e-05, "loss": 0.8029, "num_input_tokens_seen": 1063976032, "step": 5895 }, { "epoch": 0.64544733024987, "grad_norm": 1.1229348300934583, "learning_rate": 1.3966245728958355e-05, "loss": 0.805, "num_input_tokens_seen": 1064201376, "step": 5896 }, { "epoch": 0.6455568023208079, "grad_norm": 1.2358290883971192, "learning_rate": 1.3958530537342075e-05, "loss": 1.0171, "num_input_tokens_seen": 1064413056, "step": 5897 }, { "epoch": 0.6456662743917458, "grad_norm": 1.0234463523889572, "learning_rate": 1.3950816651910537e-05, "loss": 0.6528, "num_input_tokens_seen": 1064597184, "step": 5898 }, { "epoch": 0.6457757464626837, "grad_norm": 1.2488346866916145, "learning_rate": 1.3943104073576263e-05, "loss": 0.7325, "num_input_tokens_seen": 1064786688, "step": 5899 }, { "epoch": 0.6458852185336216, "grad_norm": 1.2229151832445928, "learning_rate": 1.393539280325164e-05, "loss": 0.5231, "num_input_tokens_seen": 1064958944, "step": 5900 }, { "epoch": 0.6459946906045595, "grad_norm": 1.2216737840053955, "learning_rate": 1.3927682841848899e-05, "loss": 0.7512, "num_input_tokens_seen": 1065131872, "step": 5901 }, { "epoch": 0.6461041626754974, "grad_norm": 1.3330871781976554, "learning_rate": 1.391997419028011e-05, "loss": 0.7698, "num_input_tokens_seen": 1065284192, "step": 5902 }, { "epoch": 0.6462136347464353, "grad_norm": 1.1618325809411933, "learning_rate": 1.3912266849457195e-05, "loss": 0.7973, "num_input_tokens_seen": 1065424640, "step": 5903 }, { "epoch": 0.6463231068173733, "grad_norm": 1.1213008676002851, "learning_rate": 1.3904560820291902e-05, "loss": 0.7326, "num_input_tokens_seen": 1065624224, "step": 5904 }, { "epoch": 0.6464325788883111, "grad_norm": 1.0976819887231954, "learning_rate": 1.3896856103695866e-05, "loss": 0.5482, "num_input_tokens_seen": 1065808576, "step": 5905 }, { "epoch": 0.646542050959249, "grad_norm": 1.0995455735230302, "learning_rate": 1.3889152700580527e-05, "loss": 0.6962, "num_input_tokens_seen": 1065998304, "step": 5906 }, { "epoch": 0.6466515230301869, "grad_norm": 1.2281005837534695, "learning_rate": 1.3881450611857181e-05, "loss": 0.9826, "num_input_tokens_seen": 1066199232, "step": 5907 }, { "epoch": 0.6467609951011248, "grad_norm": 1.222586047804861, "learning_rate": 1.3873749838436972e-05, "loss": 0.7421, "num_input_tokens_seen": 1066403296, "step": 5908 }, { "epoch": 0.6468704671720628, "grad_norm": 1.3765223485349722, "learning_rate": 1.386605038123089e-05, "loss": 0.8185, "num_input_tokens_seen": 1066569728, "step": 5909 }, { "epoch": 0.6469799392430007, "grad_norm": 1.1870975816397575, "learning_rate": 1.3858352241149763e-05, "loss": 0.6906, "num_input_tokens_seen": 1066757216, "step": 5910 }, { "epoch": 0.6470894113139385, "grad_norm": 1.2475045567584095, "learning_rate": 1.3850655419104267e-05, "loss": 1.0453, "num_input_tokens_seen": 1066946944, "step": 5911 }, { "epoch": 0.6471988833848764, "grad_norm": 1.2485838020166145, "learning_rate": 1.3842959916004911e-05, "loss": 0.7292, "num_input_tokens_seen": 1067116512, "step": 5912 }, { "epoch": 0.6473083554558143, "grad_norm": 1.3344675643941002, "learning_rate": 1.3835265732762076e-05, "loss": 0.7697, "num_input_tokens_seen": 1067283392, "step": 5913 }, { "epoch": 0.6474178275267523, "grad_norm": 1.4170309701629675, "learning_rate": 1.3827572870285963e-05, "loss": 0.8917, "num_input_tokens_seen": 1067448928, "step": 5914 }, { "epoch": 0.6475272995976902, "grad_norm": 1.120173910977921, "learning_rate": 1.3819881329486622e-05, "loss": 0.6908, "num_input_tokens_seen": 1067642912, "step": 5915 }, { "epoch": 0.647636771668628, "grad_norm": 1.0576467747657647, "learning_rate": 1.3812191111273944e-05, "loss": 0.5815, "num_input_tokens_seen": 1067812704, "step": 5916 }, { "epoch": 0.6477462437395659, "grad_norm": 1.360968740741929, "learning_rate": 1.3804502216557675e-05, "loss": 0.8752, "num_input_tokens_seen": 1068014080, "step": 5917 }, { "epoch": 0.6478557158105038, "grad_norm": 6.305811898748058, "learning_rate": 1.3796814646247385e-05, "loss": 0.7348, "num_input_tokens_seen": 1068196192, "step": 5918 }, { "epoch": 0.6479651878814418, "grad_norm": 1.2116503186458882, "learning_rate": 1.3789128401252502e-05, "loss": 0.772, "num_input_tokens_seen": 1068367104, "step": 5919 }, { "epoch": 0.6480746599523797, "grad_norm": 1.298242652127404, "learning_rate": 1.3781443482482314e-05, "loss": 0.8221, "num_input_tokens_seen": 1068547424, "step": 5920 }, { "epoch": 0.6481841320233176, "grad_norm": 1.2091423757222104, "learning_rate": 1.37737598908459e-05, "loss": 0.8589, "num_input_tokens_seen": 1068746560, "step": 5921 }, { "epoch": 0.6482936040942554, "grad_norm": 1.2384358980378796, "learning_rate": 1.3766077627252233e-05, "loss": 0.7561, "num_input_tokens_seen": 1068929568, "step": 5922 }, { "epoch": 0.6484030761651933, "grad_norm": 1.2456174316788822, "learning_rate": 1.3758396692610112e-05, "loss": 0.7756, "num_input_tokens_seen": 1069092864, "step": 5923 }, { "epoch": 0.6485125482361312, "grad_norm": 1.1999059441581452, "learning_rate": 1.3750717087828172e-05, "loss": 0.633, "num_input_tokens_seen": 1069268928, "step": 5924 }, { "epoch": 0.6486220203070692, "grad_norm": 1.1296489872573785, "learning_rate": 1.3743038813814896e-05, "loss": 0.6591, "num_input_tokens_seen": 1069491584, "step": 5925 }, { "epoch": 0.6487314923780071, "grad_norm": 1.2634659235245869, "learning_rate": 1.3735361871478597e-05, "loss": 0.8802, "num_input_tokens_seen": 1069716256, "step": 5926 }, { "epoch": 0.648840964448945, "grad_norm": 1.2033891264079482, "learning_rate": 1.3727686261727474e-05, "loss": 0.9045, "num_input_tokens_seen": 1069909120, "step": 5927 }, { "epoch": 0.6489504365198828, "grad_norm": 1.1823822468524356, "learning_rate": 1.3720011985469494e-05, "loss": 0.8696, "num_input_tokens_seen": 1070080704, "step": 5928 }, { "epoch": 0.6490599085908207, "grad_norm": 1.3032929053869877, "learning_rate": 1.371233904361256e-05, "loss": 0.897, "num_input_tokens_seen": 1070278496, "step": 5929 }, { "epoch": 0.6491693806617587, "grad_norm": 1.280996157924063, "learning_rate": 1.370466743706431e-05, "loss": 0.8333, "num_input_tokens_seen": 1070482560, "step": 5930 }, { "epoch": 0.6492788527326966, "grad_norm": 1.2545811628265677, "learning_rate": 1.3696997166732328e-05, "loss": 0.6972, "num_input_tokens_seen": 1070661312, "step": 5931 }, { "epoch": 0.6493883248036345, "grad_norm": 1.2103260096065147, "learning_rate": 1.3689328233523968e-05, "loss": 0.8837, "num_input_tokens_seen": 1070833120, "step": 5932 }, { "epoch": 0.6494977968745723, "grad_norm": 1.2130865408742815, "learning_rate": 1.3681660638346455e-05, "loss": 0.724, "num_input_tokens_seen": 1070995296, "step": 5933 }, { "epoch": 0.6496072689455102, "grad_norm": 1.2038207581382743, "learning_rate": 1.3673994382106856e-05, "loss": 0.8572, "num_input_tokens_seen": 1071181216, "step": 5934 }, { "epoch": 0.6497167410164482, "grad_norm": 1.3730453556318274, "learning_rate": 1.3666329465712058e-05, "loss": 0.9961, "num_input_tokens_seen": 1071372960, "step": 5935 }, { "epoch": 0.6498262130873861, "grad_norm": 1.3205470491901286, "learning_rate": 1.3658665890068836e-05, "loss": 0.9336, "num_input_tokens_seen": 1071568960, "step": 5936 }, { "epoch": 0.649935685158324, "grad_norm": 1.1788969106538096, "learning_rate": 1.3651003656083742e-05, "loss": 0.8987, "num_input_tokens_seen": 1071787136, "step": 5937 }, { "epoch": 0.6500451572292619, "grad_norm": 1.2233069397697598, "learning_rate": 1.3643342764663225e-05, "loss": 0.906, "num_input_tokens_seen": 1071986048, "step": 5938 }, { "epoch": 0.6501546293001997, "grad_norm": 1.2274848725732794, "learning_rate": 1.3635683216713551e-05, "loss": 0.9414, "num_input_tokens_seen": 1072163456, "step": 5939 }, { "epoch": 0.6502641013711377, "grad_norm": 1.1856706985088352, "learning_rate": 1.362802501314083e-05, "loss": 0.6553, "num_input_tokens_seen": 1072376928, "step": 5940 }, { "epoch": 0.6503735734420756, "grad_norm": 1.257969106082584, "learning_rate": 1.3620368154851008e-05, "loss": 0.6666, "num_input_tokens_seen": 1072534400, "step": 5941 }, { "epoch": 0.6504830455130135, "grad_norm": 1.2695914386273115, "learning_rate": 1.361271264274988e-05, "loss": 0.7073, "num_input_tokens_seen": 1072709120, "step": 5942 }, { "epoch": 0.6505925175839514, "grad_norm": 1.1739252592954657, "learning_rate": 1.3605058477743077e-05, "loss": 0.809, "num_input_tokens_seen": 1072937824, "step": 5943 }, { "epoch": 0.6507019896548893, "grad_norm": 1.2123793355146661, "learning_rate": 1.3597405660736074e-05, "loss": 0.6617, "num_input_tokens_seen": 1073138304, "step": 5944 }, { "epoch": 0.6508114617258272, "grad_norm": 1.160956161574939, "learning_rate": 1.3589754192634168e-05, "loss": 0.6744, "num_input_tokens_seen": 1073324224, "step": 5945 }, { "epoch": 0.6509209337967651, "grad_norm": 1.6682580344133715, "learning_rate": 1.3582104074342544e-05, "loss": 1.1437, "num_input_tokens_seen": 1073459744, "step": 5946 }, { "epoch": 0.651030405867703, "grad_norm": 1.281672245322704, "learning_rate": 1.3574455306766179e-05, "loss": 0.8746, "num_input_tokens_seen": 1073674112, "step": 5947 }, { "epoch": 0.6511398779386409, "grad_norm": 1.322783375831054, "learning_rate": 1.3566807890809907e-05, "loss": 0.7593, "num_input_tokens_seen": 1073821952, "step": 5948 }, { "epoch": 0.6512493500095788, "grad_norm": 1.3140587859946076, "learning_rate": 1.3559161827378409e-05, "loss": 0.9731, "num_input_tokens_seen": 1074013248, "step": 5949 }, { "epoch": 0.6513588220805167, "grad_norm": 1.3432212055271626, "learning_rate": 1.3551517117376195e-05, "loss": 0.736, "num_input_tokens_seen": 1074175648, "step": 5950 }, { "epoch": 0.6514682941514546, "grad_norm": 1.3684095316326623, "learning_rate": 1.3543873761707617e-05, "loss": 0.7391, "num_input_tokens_seen": 1074358880, "step": 5951 }, { "epoch": 0.6515777662223925, "grad_norm": 1.350854191925958, "learning_rate": 1.3536231761276866e-05, "loss": 0.795, "num_input_tokens_seen": 1074542784, "step": 5952 }, { "epoch": 0.6516872382933304, "grad_norm": 1.2889979490821686, "learning_rate": 1.3528591116988e-05, "loss": 0.7072, "num_input_tokens_seen": 1074717056, "step": 5953 }, { "epoch": 0.6517967103642683, "grad_norm": 1.1720125753392046, "learning_rate": 1.3520951829744857e-05, "loss": 0.8962, "num_input_tokens_seen": 1074873856, "step": 5954 }, { "epoch": 0.6519061824352063, "grad_norm": 1.1682661819659739, "learning_rate": 1.351331390045118e-05, "loss": 0.592, "num_input_tokens_seen": 1075040512, "step": 5955 }, { "epoch": 0.6520156545061441, "grad_norm": 1.3259784180969119, "learning_rate": 1.3505677330010505e-05, "loss": 0.5933, "num_input_tokens_seen": 1075179616, "step": 5956 }, { "epoch": 0.652125126577082, "grad_norm": 1.2187199222081038, "learning_rate": 1.3498042119326232e-05, "loss": 0.726, "num_input_tokens_seen": 1075324096, "step": 5957 }, { "epoch": 0.6522345986480199, "grad_norm": 1.2246651035995213, "learning_rate": 1.3490408269301585e-05, "loss": 0.8596, "num_input_tokens_seen": 1075498368, "step": 5958 }, { "epoch": 0.6523440707189578, "grad_norm": 1.2972771354332346, "learning_rate": 1.3482775780839632e-05, "loss": 0.8312, "num_input_tokens_seen": 1075679360, "step": 5959 }, { "epoch": 0.6524535427898958, "grad_norm": 1.3919338110775155, "learning_rate": 1.3475144654843302e-05, "loss": 1.0448, "num_input_tokens_seen": 1075882304, "step": 5960 }, { "epoch": 0.6525630148608337, "grad_norm": 1.3021138098646876, "learning_rate": 1.346751489221531e-05, "loss": 1.1132, "num_input_tokens_seen": 1076095552, "step": 5961 }, { "epoch": 0.6526724869317715, "grad_norm": 1.2784051713979727, "learning_rate": 1.3459886493858282e-05, "loss": 0.8572, "num_input_tokens_seen": 1076272512, "step": 5962 }, { "epoch": 0.6527819590027094, "grad_norm": 1.1876787753805234, "learning_rate": 1.3452259460674599e-05, "loss": 0.8037, "num_input_tokens_seen": 1076486656, "step": 5963 }, { "epoch": 0.6528914310736473, "grad_norm": 1.2842319492395569, "learning_rate": 1.3444633793566556e-05, "loss": 0.6937, "num_input_tokens_seen": 1076696320, "step": 5964 }, { "epoch": 0.6530009031445853, "grad_norm": 1.2545451500355749, "learning_rate": 1.3437009493436243e-05, "loss": 1.0418, "num_input_tokens_seen": 1076897920, "step": 5965 }, { "epoch": 0.6531103752155232, "grad_norm": 1.218899839922536, "learning_rate": 1.3429386561185606e-05, "loss": 0.8658, "num_input_tokens_seen": 1077070176, "step": 5966 }, { "epoch": 0.653219847286461, "grad_norm": 1.225006379869936, "learning_rate": 1.3421764997716418e-05, "loss": 0.6483, "num_input_tokens_seen": 1077226080, "step": 5967 }, { "epoch": 0.6533293193573989, "grad_norm": 1.2097366285190154, "learning_rate": 1.3414144803930284e-05, "loss": 0.6413, "num_input_tokens_seen": 1077416256, "step": 5968 }, { "epoch": 0.6534387914283368, "grad_norm": 1.186655999458959, "learning_rate": 1.3406525980728697e-05, "loss": 0.6472, "num_input_tokens_seen": 1077597920, "step": 5969 }, { "epoch": 0.6535482634992748, "grad_norm": 1.11462706483036, "learning_rate": 1.3398908529012899e-05, "loss": 0.6306, "num_input_tokens_seen": 1077780928, "step": 5970 }, { "epoch": 0.6536577355702127, "grad_norm": 1.303789023586133, "learning_rate": 1.3391292449684067e-05, "loss": 0.9286, "num_input_tokens_seen": 1077981856, "step": 5971 }, { "epoch": 0.6537672076411506, "grad_norm": 1.2487877212493823, "learning_rate": 1.3383677743643126e-05, "loss": 0.6376, "num_input_tokens_seen": 1078189728, "step": 5972 }, { "epoch": 0.6538766797120884, "grad_norm": 1.1479216474550795, "learning_rate": 1.3376064411790909e-05, "loss": 0.8157, "num_input_tokens_seen": 1078378784, "step": 5973 }, { "epoch": 0.6539861517830263, "grad_norm": 1.1378708077828816, "learning_rate": 1.3368452455028052e-05, "loss": 0.5301, "num_input_tokens_seen": 1078562016, "step": 5974 }, { "epoch": 0.6540956238539642, "grad_norm": 1.194519278427022, "learning_rate": 1.3360841874255034e-05, "loss": 0.7936, "num_input_tokens_seen": 1078744128, "step": 5975 }, { "epoch": 0.6542050959249022, "grad_norm": 1.159357431695064, "learning_rate": 1.3353232670372173e-05, "loss": 0.6272, "num_input_tokens_seen": 1078921984, "step": 5976 }, { "epoch": 0.6543145679958401, "grad_norm": 1.3200698719894353, "learning_rate": 1.3345624844279611e-05, "loss": 0.9028, "num_input_tokens_seen": 1079100960, "step": 5977 }, { "epoch": 0.654424040066778, "grad_norm": 1.2203271393624393, "learning_rate": 1.3338018396877371e-05, "loss": 0.7619, "num_input_tokens_seen": 1079241184, "step": 5978 }, { "epoch": 0.6545335121377158, "grad_norm": 1.2776019567425672, "learning_rate": 1.3330413329065238e-05, "loss": 0.8663, "num_input_tokens_seen": 1079394400, "step": 5979 }, { "epoch": 0.6546429842086537, "grad_norm": 1.2492438413427347, "learning_rate": 1.3322809641742917e-05, "loss": 0.6069, "num_input_tokens_seen": 1079540672, "step": 5980 }, { "epoch": 0.6547524562795917, "grad_norm": 1.341391289726997, "learning_rate": 1.3315207335809888e-05, "loss": 0.9535, "num_input_tokens_seen": 1079689856, "step": 5981 }, { "epoch": 0.6548619283505296, "grad_norm": 1.3518613804568962, "learning_rate": 1.3307606412165491e-05, "loss": 0.8527, "num_input_tokens_seen": 1079883616, "step": 5982 }, { "epoch": 0.6549714004214675, "grad_norm": 1.4806991035593193, "learning_rate": 1.3300006871708905e-05, "loss": 0.8021, "num_input_tokens_seen": 1080059904, "step": 5983 }, { "epoch": 0.6550808724924054, "grad_norm": 1.3216547717168075, "learning_rate": 1.3292408715339141e-05, "loss": 0.7427, "num_input_tokens_seen": 1080251648, "step": 5984 }, { "epoch": 0.6551903445633432, "grad_norm": 1.318891132630994, "learning_rate": 1.3284811943955045e-05, "loss": 0.7652, "num_input_tokens_seen": 1080447424, "step": 5985 }, { "epoch": 0.6552998166342812, "grad_norm": 1.298322765730102, "learning_rate": 1.32772165584553e-05, "loss": 0.8264, "num_input_tokens_seen": 1080598176, "step": 5986 }, { "epoch": 0.6554092887052191, "grad_norm": 1.2640510002150682, "learning_rate": 1.3269622559738416e-05, "loss": 0.9441, "num_input_tokens_seen": 1080771552, "step": 5987 }, { "epoch": 0.655518760776157, "grad_norm": 1.21174245352468, "learning_rate": 1.3262029948702766e-05, "loss": 0.7342, "num_input_tokens_seen": 1080969120, "step": 5988 }, { "epoch": 0.6556282328470949, "grad_norm": 1.3455618242953753, "learning_rate": 1.3254438726246537e-05, "loss": 0.8244, "num_input_tokens_seen": 1081140704, "step": 5989 }, { "epoch": 0.6557377049180327, "grad_norm": 1.1570772356351837, "learning_rate": 1.3246848893267749e-05, "loss": 0.6652, "num_input_tokens_seen": 1081303104, "step": 5990 }, { "epoch": 0.6558471769889707, "grad_norm": 1.2416212602714767, "learning_rate": 1.3239260450664275e-05, "loss": 0.7169, "num_input_tokens_seen": 1081493280, "step": 5991 }, { "epoch": 0.6559566490599086, "grad_norm": 1.2303407148323913, "learning_rate": 1.3231673399333802e-05, "loss": 0.6045, "num_input_tokens_seen": 1081690848, "step": 5992 }, { "epoch": 0.6560661211308465, "grad_norm": 1.3573983828480884, "learning_rate": 1.3224087740173871e-05, "loss": 0.7478, "num_input_tokens_seen": 1081861088, "step": 5993 }, { "epoch": 0.6561755932017844, "grad_norm": 1.320639846220033, "learning_rate": 1.3216503474081838e-05, "loss": 0.6801, "num_input_tokens_seen": 1081991232, "step": 5994 }, { "epoch": 0.6562850652727223, "grad_norm": 1.1987102312605609, "learning_rate": 1.3208920601954938e-05, "loss": 0.7745, "num_input_tokens_seen": 1082214112, "step": 5995 }, { "epoch": 0.6563945373436602, "grad_norm": 1.077475033977544, "learning_rate": 1.320133912469017e-05, "loss": 0.88, "num_input_tokens_seen": 1082385472, "step": 5996 }, { "epoch": 0.6565040094145981, "grad_norm": 1.227582276766441, "learning_rate": 1.3193759043184437e-05, "loss": 0.7173, "num_input_tokens_seen": 1082553472, "step": 5997 }, { "epoch": 0.656613481485536, "grad_norm": 1.3032151097307496, "learning_rate": 1.318618035833444e-05, "loss": 0.8104, "num_input_tokens_seen": 1082733568, "step": 5998 }, { "epoch": 0.6567229535564739, "grad_norm": 1.3838130542341724, "learning_rate": 1.317860307103672e-05, "loss": 0.8061, "num_input_tokens_seen": 1082915904, "step": 5999 }, { "epoch": 0.6568324256274118, "grad_norm": 1.0571408031049678, "learning_rate": 1.3171027182187665e-05, "loss": 0.6742, "num_input_tokens_seen": 1083104288, "step": 6000 }, { "epoch": 0.6569418976983498, "grad_norm": 1.3140549758169555, "learning_rate": 1.3163452692683465e-05, "loss": 0.6619, "num_input_tokens_seen": 1083305888, "step": 6001 }, { "epoch": 0.6570513697692876, "grad_norm": 1.2318447109700497, "learning_rate": 1.3155879603420207e-05, "loss": 0.6062, "num_input_tokens_seen": 1083458208, "step": 6002 }, { "epoch": 0.6571608418402255, "grad_norm": 1.2162525312626717, "learning_rate": 1.3148307915293728e-05, "loss": 0.6019, "num_input_tokens_seen": 1083650176, "step": 6003 }, { "epoch": 0.6572703139111634, "grad_norm": 1.3725396164837806, "learning_rate": 1.3140737629199787e-05, "loss": 0.7772, "num_input_tokens_seen": 1083837216, "step": 6004 }, { "epoch": 0.6573797859821013, "grad_norm": 1.2959097061874647, "learning_rate": 1.3133168746033895e-05, "loss": 0.8285, "num_input_tokens_seen": 1083997152, "step": 6005 }, { "epoch": 0.6574892580530393, "grad_norm": 1.2703406937251736, "learning_rate": 1.3125601266691462e-05, "loss": 0.8265, "num_input_tokens_seen": 1084179264, "step": 6006 }, { "epoch": 0.6575987301239771, "grad_norm": 1.3769394461310809, "learning_rate": 1.3118035192067702e-05, "loss": 0.8466, "num_input_tokens_seen": 1084351968, "step": 6007 }, { "epoch": 0.657708202194915, "grad_norm": 1.3415270316080232, "learning_rate": 1.311047052305766e-05, "loss": 0.7496, "num_input_tokens_seen": 1084554464, "step": 6008 }, { "epoch": 0.6578176742658529, "grad_norm": 1.0897471067176212, "learning_rate": 1.310290726055623e-05, "loss": 0.5124, "num_input_tokens_seen": 1084725152, "step": 6009 }, { "epoch": 0.6579271463367908, "grad_norm": 1.3452282248937468, "learning_rate": 1.3095345405458115e-05, "loss": 0.6117, "num_input_tokens_seen": 1084877248, "step": 6010 }, { "epoch": 0.6580366184077288, "grad_norm": 1.3123560781134873, "learning_rate": 1.30877849586579e-05, "loss": 0.7874, "num_input_tokens_seen": 1085038752, "step": 6011 }, { "epoch": 0.6581460904786667, "grad_norm": 1.453314573986411, "learning_rate": 1.3080225921049938e-05, "loss": 0.6582, "num_input_tokens_seen": 1085184800, "step": 6012 }, { "epoch": 0.6582555625496045, "grad_norm": 1.331459884933135, "learning_rate": 1.3072668293528467e-05, "loss": 1.0005, "num_input_tokens_seen": 1085387968, "step": 6013 }, { "epoch": 0.6583650346205424, "grad_norm": 1.2052094868638241, "learning_rate": 1.306511207698754e-05, "loss": 0.7605, "num_input_tokens_seen": 1085574112, "step": 6014 }, { "epoch": 0.6584745066914803, "grad_norm": 1.2179147841736666, "learning_rate": 1.3057557272321036e-05, "loss": 0.7655, "num_input_tokens_seen": 1085742112, "step": 6015 }, { "epoch": 0.6585839787624183, "grad_norm": 1.2426153441835748, "learning_rate": 1.3050003880422676e-05, "loss": 0.7383, "num_input_tokens_seen": 1085933408, "step": 6016 }, { "epoch": 0.6586934508333562, "grad_norm": 1.338027167487777, "learning_rate": 1.3042451902186012e-05, "loss": 0.9374, "num_input_tokens_seen": 1086089312, "step": 6017 }, { "epoch": 0.6588029229042941, "grad_norm": 1.3662619809587777, "learning_rate": 1.303490133850443e-05, "loss": 0.7916, "num_input_tokens_seen": 1086225952, "step": 6018 }, { "epoch": 0.6589123949752319, "grad_norm": 1.3161560635860379, "learning_rate": 1.3027352190271136e-05, "loss": 0.8897, "num_input_tokens_seen": 1086415008, "step": 6019 }, { "epoch": 0.6590218670461698, "grad_norm": 1.2335113691154702, "learning_rate": 1.3019804458379204e-05, "loss": 0.6153, "num_input_tokens_seen": 1086553888, "step": 6020 }, { "epoch": 0.6591313391171078, "grad_norm": 1.2128122891055098, "learning_rate": 1.3012258143721499e-05, "loss": 0.7156, "num_input_tokens_seen": 1086725248, "step": 6021 }, { "epoch": 0.6592408111880457, "grad_norm": 1.2286341478094742, "learning_rate": 1.3004713247190736e-05, "loss": 0.8381, "num_input_tokens_seen": 1086923936, "step": 6022 }, { "epoch": 0.6593502832589836, "grad_norm": 1.2545974110033422, "learning_rate": 1.2997169769679468e-05, "loss": 0.6207, "num_input_tokens_seen": 1087081408, "step": 6023 }, { "epoch": 0.6594597553299214, "grad_norm": 1.4368485342859687, "learning_rate": 1.298962771208007e-05, "loss": 0.8838, "num_input_tokens_seen": 1087266208, "step": 6024 }, { "epoch": 0.6595692274008593, "grad_norm": 1.3622240049472558, "learning_rate": 1.2982087075284754e-05, "loss": 0.8405, "num_input_tokens_seen": 1087421440, "step": 6025 }, { "epoch": 0.6596786994717972, "grad_norm": 1.1599400893722582, "learning_rate": 1.2974547860185566e-05, "loss": 0.6128, "num_input_tokens_seen": 1087608928, "step": 6026 }, { "epoch": 0.6597881715427352, "grad_norm": 1.2293342325251415, "learning_rate": 1.2967010067674362e-05, "loss": 0.888, "num_input_tokens_seen": 1087778048, "step": 6027 }, { "epoch": 0.6598976436136731, "grad_norm": 1.155568875919702, "learning_rate": 1.2959473698642888e-05, "loss": 0.7787, "num_input_tokens_seen": 1088002944, "step": 6028 }, { "epoch": 0.660007115684611, "grad_norm": 1.2506664406898906, "learning_rate": 1.2951938753982634e-05, "loss": 0.8233, "num_input_tokens_seen": 1088193344, "step": 6029 }, { "epoch": 0.6601165877555488, "grad_norm": 1.2627910863868066, "learning_rate": 1.2944405234585005e-05, "loss": 0.7897, "num_input_tokens_seen": 1088376352, "step": 6030 }, { "epoch": 0.6602260598264867, "grad_norm": 1.3035288708602268, "learning_rate": 1.293687314134119e-05, "loss": 0.9002, "num_input_tokens_seen": 1088548832, "step": 6031 }, { "epoch": 0.6603355318974247, "grad_norm": 1.177714738318365, "learning_rate": 1.2929342475142225e-05, "loss": 0.7395, "num_input_tokens_seen": 1088729824, "step": 6032 }, { "epoch": 0.6604450039683626, "grad_norm": 1.1640131895130796, "learning_rate": 1.2921813236878965e-05, "loss": 0.5672, "num_input_tokens_seen": 1088920896, "step": 6033 }, { "epoch": 0.6605544760393005, "grad_norm": 1.2242642879232675, "learning_rate": 1.2914285427442102e-05, "loss": 0.6098, "num_input_tokens_seen": 1089099424, "step": 6034 }, { "epoch": 0.6606639481102384, "grad_norm": 1.3217232745596312, "learning_rate": 1.290675904772219e-05, "loss": 0.6151, "num_input_tokens_seen": 1089247712, "step": 6035 }, { "epoch": 0.6607734201811762, "grad_norm": 1.272570074733817, "learning_rate": 1.2899234098609541e-05, "loss": 0.8356, "num_input_tokens_seen": 1089447072, "step": 6036 }, { "epoch": 0.6608828922521142, "grad_norm": 1.2325674898963355, "learning_rate": 1.2891710580994387e-05, "loss": 0.8525, "num_input_tokens_seen": 1089637472, "step": 6037 }, { "epoch": 0.6609923643230521, "grad_norm": 1.2938333714845642, "learning_rate": 1.2884188495766709e-05, "loss": 0.7664, "num_input_tokens_seen": 1089835488, "step": 6038 }, { "epoch": 0.66110183639399, "grad_norm": 1.2951644279045993, "learning_rate": 1.2876667843816373e-05, "loss": 1.2275, "num_input_tokens_seen": 1090055680, "step": 6039 }, { "epoch": 0.6612113084649279, "grad_norm": 1.268398824488884, "learning_rate": 1.2869148626033059e-05, "loss": 0.8112, "num_input_tokens_seen": 1090246752, "step": 6040 }, { "epoch": 0.6613207805358657, "grad_norm": 1.2608088469076317, "learning_rate": 1.2861630843306271e-05, "loss": 0.7249, "num_input_tokens_seen": 1090412960, "step": 6041 }, { "epoch": 0.6614302526068037, "grad_norm": 1.2658563567263716, "learning_rate": 1.285411449652535e-05, "loss": 0.7935, "num_input_tokens_seen": 1090557216, "step": 6042 }, { "epoch": 0.6615397246777416, "grad_norm": 1.2173631063965886, "learning_rate": 1.2846599586579456e-05, "loss": 0.9119, "num_input_tokens_seen": 1090734176, "step": 6043 }, { "epoch": 0.6616491967486795, "grad_norm": 1.2584106255260357, "learning_rate": 1.2839086114357617e-05, "loss": 0.7617, "num_input_tokens_seen": 1090902848, "step": 6044 }, { "epoch": 0.6617586688196174, "grad_norm": 1.3422373752408812, "learning_rate": 1.2831574080748621e-05, "loss": 0.9162, "num_input_tokens_seen": 1091081824, "step": 6045 }, { "epoch": 0.6618681408905553, "grad_norm": 1.3405929561600103, "learning_rate": 1.2824063486641172e-05, "loss": 0.7773, "num_input_tokens_seen": 1091254528, "step": 6046 }, { "epoch": 0.6619776129614932, "grad_norm": 1.1417445084331395, "learning_rate": 1.2816554332923714e-05, "loss": 0.6688, "num_input_tokens_seen": 1091445152, "step": 6047 }, { "epoch": 0.6620870850324311, "grad_norm": 1.3374588986792952, "learning_rate": 1.28090466204846e-05, "loss": 0.7595, "num_input_tokens_seen": 1091625024, "step": 6048 }, { "epoch": 0.662196557103369, "grad_norm": 1.2764603946337714, "learning_rate": 1.2801540350211963e-05, "loss": 0.8138, "num_input_tokens_seen": 1091793248, "step": 6049 }, { "epoch": 0.6623060291743069, "grad_norm": 1.2365253887724494, "learning_rate": 1.2794035522993785e-05, "loss": 0.7339, "num_input_tokens_seen": 1091982752, "step": 6050 }, { "epoch": 0.6624155012452448, "grad_norm": 1.265494210349426, "learning_rate": 1.2786532139717872e-05, "loss": 0.7657, "num_input_tokens_seen": 1092163072, "step": 6051 }, { "epoch": 0.6625249733161828, "grad_norm": 1.1508625921129716, "learning_rate": 1.2779030201271846e-05, "loss": 0.6465, "num_input_tokens_seen": 1092331744, "step": 6052 }, { "epoch": 0.6626344453871206, "grad_norm": 1.3471504920967918, "learning_rate": 1.2771529708543203e-05, "loss": 0.7728, "num_input_tokens_seen": 1092510496, "step": 6053 }, { "epoch": 0.6627439174580585, "grad_norm": 1.246154305352926, "learning_rate": 1.2764030662419201e-05, "loss": 0.8976, "num_input_tokens_seen": 1092694400, "step": 6054 }, { "epoch": 0.6628533895289964, "grad_norm": 1.2367911533890596, "learning_rate": 1.2756533063786991e-05, "loss": 0.6845, "num_input_tokens_seen": 1092833280, "step": 6055 }, { "epoch": 0.6629628615999343, "grad_norm": 1.2199880718210905, "learning_rate": 1.2749036913533514e-05, "loss": 0.8025, "num_input_tokens_seen": 1092973728, "step": 6056 }, { "epoch": 0.6630723336708723, "grad_norm": 1.265387477485768, "learning_rate": 1.2741542212545549e-05, "loss": 0.6519, "num_input_tokens_seen": 1093153152, "step": 6057 }, { "epoch": 0.6631818057418101, "grad_norm": 1.2105664084558172, "learning_rate": 1.2734048961709707e-05, "loss": 1.0345, "num_input_tokens_seen": 1093359232, "step": 6058 }, { "epoch": 0.663291277812748, "grad_norm": 1.3233054733626715, "learning_rate": 1.2726557161912425e-05, "loss": 0.6562, "num_input_tokens_seen": 1093529248, "step": 6059 }, { "epoch": 0.6634007498836859, "grad_norm": 1.1867138619538846, "learning_rate": 1.2719066814039957e-05, "loss": 0.8941, "num_input_tokens_seen": 1093752352, "step": 6060 }, { "epoch": 0.6635102219546238, "grad_norm": 1.279845048943113, "learning_rate": 1.2711577918978417e-05, "loss": 0.7756, "num_input_tokens_seen": 1093933792, "step": 6061 }, { "epoch": 0.6636196940255618, "grad_norm": 1.2804736599877502, "learning_rate": 1.270409047761372e-05, "loss": 0.7306, "num_input_tokens_seen": 1094094624, "step": 6062 }, { "epoch": 0.6637291660964997, "grad_norm": 1.2777162061359895, "learning_rate": 1.2696604490831609e-05, "loss": 0.7176, "num_input_tokens_seen": 1094254336, "step": 6063 }, { "epoch": 0.6638386381674375, "grad_norm": 1.1581343168819567, "learning_rate": 1.268911995951767e-05, "loss": 0.6973, "num_input_tokens_seen": 1094463552, "step": 6064 }, { "epoch": 0.6639481102383754, "grad_norm": 1.2507721900016138, "learning_rate": 1.2681636884557307e-05, "loss": 0.9368, "num_input_tokens_seen": 1094674784, "step": 6065 }, { "epoch": 0.6640575823093133, "grad_norm": 1.1992057277705523, "learning_rate": 1.2674155266835753e-05, "loss": 0.5613, "num_input_tokens_seen": 1094836960, "step": 6066 }, { "epoch": 0.6641670543802513, "grad_norm": 1.2821273126126653, "learning_rate": 1.2666675107238052e-05, "loss": 0.9068, "num_input_tokens_seen": 1094999136, "step": 6067 }, { "epoch": 0.6642765264511892, "grad_norm": 1.2077264851369727, "learning_rate": 1.2659196406649132e-05, "loss": 0.904, "num_input_tokens_seen": 1095162432, "step": 6068 }, { "epoch": 0.6643859985221271, "grad_norm": 1.3221289875424456, "learning_rate": 1.2651719165953666e-05, "loss": 0.7755, "num_input_tokens_seen": 1095314528, "step": 6069 }, { "epoch": 0.6644954705930649, "grad_norm": 1.3138527483917222, "learning_rate": 1.2644243386036234e-05, "loss": 0.9319, "num_input_tokens_seen": 1095510080, "step": 6070 }, { "epoch": 0.6646049426640028, "grad_norm": 1.208977768591855, "learning_rate": 1.2636769067781173e-05, "loss": 0.6669, "num_input_tokens_seen": 1095724896, "step": 6071 }, { "epoch": 0.6647144147349408, "grad_norm": 1.181055450565375, "learning_rate": 1.2629296212072703e-05, "loss": 0.7786, "num_input_tokens_seen": 1095908128, "step": 6072 }, { "epoch": 0.6648238868058787, "grad_norm": 1.4493012520474828, "learning_rate": 1.2621824819794845e-05, "loss": 0.9956, "num_input_tokens_seen": 1096087328, "step": 6073 }, { "epoch": 0.6649333588768166, "grad_norm": 1.150592054468335, "learning_rate": 1.2614354891831437e-05, "loss": 0.6956, "num_input_tokens_seen": 1096290272, "step": 6074 }, { "epoch": 0.6650428309477544, "grad_norm": 1.3019414544691221, "learning_rate": 1.2606886429066186e-05, "loss": 0.7308, "num_input_tokens_seen": 1096457152, "step": 6075 }, { "epoch": 0.6651523030186923, "grad_norm": 1.310183296993549, "learning_rate": 1.2599419432382561e-05, "loss": 1.0721, "num_input_tokens_seen": 1096669952, "step": 6076 }, { "epoch": 0.6652617750896302, "grad_norm": 1.1720009799634201, "learning_rate": 1.259195390266393e-05, "loss": 0.8268, "num_input_tokens_seen": 1096871776, "step": 6077 }, { "epoch": 0.6653712471605682, "grad_norm": 1.1663363477299642, "learning_rate": 1.2584489840793414e-05, "loss": 0.7313, "num_input_tokens_seen": 1097084128, "step": 6078 }, { "epoch": 0.6654807192315061, "grad_norm": 1.429739112130851, "learning_rate": 1.2577027247654033e-05, "loss": 0.8143, "num_input_tokens_seen": 1097262880, "step": 6079 }, { "epoch": 0.665590191302444, "grad_norm": 1.3119700133804588, "learning_rate": 1.2569566124128563e-05, "loss": 0.8541, "num_input_tokens_seen": 1097426400, "step": 6080 }, { "epoch": 0.6656996633733818, "grad_norm": 1.2070427557634518, "learning_rate": 1.2562106471099667e-05, "loss": 0.7082, "num_input_tokens_seen": 1097608064, "step": 6081 }, { "epoch": 0.6658091354443197, "grad_norm": 1.0871439756588546, "learning_rate": 1.2554648289449798e-05, "loss": 0.6727, "num_input_tokens_seen": 1097815264, "step": 6082 }, { "epoch": 0.6659186075152577, "grad_norm": 1.1854105184403434, "learning_rate": 1.254719158006124e-05, "loss": 0.653, "num_input_tokens_seen": 1097969824, "step": 6083 }, { "epoch": 0.6660280795861956, "grad_norm": 1.3791215852743386, "learning_rate": 1.253973634381612e-05, "loss": 0.8874, "num_input_tokens_seen": 1098132224, "step": 6084 }, { "epoch": 0.6661375516571335, "grad_norm": 1.058126874495061, "learning_rate": 1.2532282581596355e-05, "loss": 0.5278, "num_input_tokens_seen": 1098320832, "step": 6085 }, { "epoch": 0.6662470237280714, "grad_norm": 1.2422933473415203, "learning_rate": 1.2524830294283748e-05, "loss": 0.9085, "num_input_tokens_seen": 1098487040, "step": 6086 }, { "epoch": 0.6663564957990092, "grad_norm": 1.2297062524628357, "learning_rate": 1.251737948275985e-05, "loss": 0.8951, "num_input_tokens_seen": 1098690208, "step": 6087 }, { "epoch": 0.6664659678699472, "grad_norm": 1.375952780675759, "learning_rate": 1.2509930147906107e-05, "loss": 0.9142, "num_input_tokens_seen": 1098881728, "step": 6088 }, { "epoch": 0.6665754399408851, "grad_norm": 1.233174379948385, "learning_rate": 1.2502482290603748e-05, "loss": 0.8937, "num_input_tokens_seen": 1099098336, "step": 6089 }, { "epoch": 0.666684912011823, "grad_norm": 1.2842881537635071, "learning_rate": 1.2495035911733844e-05, "loss": 0.9336, "num_input_tokens_seen": 1099292544, "step": 6090 }, { "epoch": 0.6667943840827609, "grad_norm": 1.2520973253446854, "learning_rate": 1.2487591012177285e-05, "loss": 0.911, "num_input_tokens_seen": 1099497952, "step": 6091 }, { "epoch": 0.6669038561536987, "grad_norm": 1.3112685920990044, "learning_rate": 1.2480147592814791e-05, "loss": 0.8022, "num_input_tokens_seen": 1099647584, "step": 6092 }, { "epoch": 0.6670133282246367, "grad_norm": 1.2437883534101022, "learning_rate": 1.2472705654526904e-05, "loss": 0.635, "num_input_tokens_seen": 1099802144, "step": 6093 }, { "epoch": 0.6671228002955746, "grad_norm": 1.4404262115953823, "learning_rate": 1.2465265198193977e-05, "loss": 1.0026, "num_input_tokens_seen": 1099975744, "step": 6094 }, { "epoch": 0.6672322723665125, "grad_norm": 1.29005920717854, "learning_rate": 1.2457826224696225e-05, "loss": 0.7759, "num_input_tokens_seen": 1100165696, "step": 6095 }, { "epoch": 0.6673417444374504, "grad_norm": 1.2079423841984729, "learning_rate": 1.2450388734913657e-05, "loss": 0.983, "num_input_tokens_seen": 1100363040, "step": 6096 }, { "epoch": 0.6674512165083883, "grad_norm": 1.2023154668722298, "learning_rate": 1.2442952729726109e-05, "loss": 0.6779, "num_input_tokens_seen": 1100553888, "step": 6097 }, { "epoch": 0.6675606885793262, "grad_norm": 1.2394430684890834, "learning_rate": 1.2435518210013248e-05, "loss": 0.7319, "num_input_tokens_seen": 1100712032, "step": 6098 }, { "epoch": 0.6676701606502641, "grad_norm": 1.2238368954848018, "learning_rate": 1.2428085176654563e-05, "loss": 0.7459, "num_input_tokens_seen": 1100894144, "step": 6099 }, { "epoch": 0.667779632721202, "grad_norm": 1.2999024042761846, "learning_rate": 1.2420653630529369e-05, "loss": 0.9332, "num_input_tokens_seen": 1101069088, "step": 6100 }, { "epoch": 0.6678891047921399, "grad_norm": 1.1566557816553, "learning_rate": 1.2413223572516802e-05, "loss": 0.6245, "num_input_tokens_seen": 1101256352, "step": 6101 }, { "epoch": 0.6679985768630778, "grad_norm": 1.436623845328877, "learning_rate": 1.2405795003495819e-05, "loss": 0.7321, "num_input_tokens_seen": 1101434208, "step": 6102 }, { "epoch": 0.6681080489340158, "grad_norm": 1.2254703786407002, "learning_rate": 1.2398367924345213e-05, "loss": 0.8827, "num_input_tokens_seen": 1101591232, "step": 6103 }, { "epoch": 0.6682175210049536, "grad_norm": 1.3029770289143312, "learning_rate": 1.2390942335943597e-05, "loss": 0.7978, "num_input_tokens_seen": 1101766624, "step": 6104 }, { "epoch": 0.6683269930758915, "grad_norm": 1.1296791677840412, "learning_rate": 1.2383518239169397e-05, "loss": 0.7489, "num_input_tokens_seen": 1101974496, "step": 6105 }, { "epoch": 0.6684364651468294, "grad_norm": 1.1989404311888758, "learning_rate": 1.2376095634900872e-05, "loss": 0.7039, "num_input_tokens_seen": 1102181696, "step": 6106 }, { "epoch": 0.6685459372177673, "grad_norm": 1.1530387205187973, "learning_rate": 1.2368674524016099e-05, "loss": 0.6993, "num_input_tokens_seen": 1102364032, "step": 6107 }, { "epoch": 0.6686554092887053, "grad_norm": 1.4089332412650626, "learning_rate": 1.2361254907392983e-05, "loss": 0.8255, "num_input_tokens_seen": 1102528000, "step": 6108 }, { "epoch": 0.6687648813596431, "grad_norm": 1.2422048219636639, "learning_rate": 1.235383678590924e-05, "loss": 0.7664, "num_input_tokens_seen": 1102719296, "step": 6109 }, { "epoch": 0.668874353430581, "grad_norm": 1.222399602406622, "learning_rate": 1.2346420160442449e-05, "loss": 0.8007, "num_input_tokens_seen": 1102883040, "step": 6110 }, { "epoch": 0.6689838255015189, "grad_norm": 1.3788797521881913, "learning_rate": 1.2339005031869947e-05, "loss": 0.7223, "num_input_tokens_seen": 1103021248, "step": 6111 }, { "epoch": 0.6690932975724568, "grad_norm": 1.382804094709293, "learning_rate": 1.2331591401068961e-05, "loss": 0.8199, "num_input_tokens_seen": 1103195296, "step": 6112 }, { "epoch": 0.6692027696433948, "grad_norm": 1.2870598235020279, "learning_rate": 1.2324179268916478e-05, "loss": 0.7567, "num_input_tokens_seen": 1103384576, "step": 6113 }, { "epoch": 0.6693122417143327, "grad_norm": 1.3276172340481278, "learning_rate": 1.2316768636289364e-05, "loss": 0.8462, "num_input_tokens_seen": 1103582368, "step": 6114 }, { "epoch": 0.6694217137852705, "grad_norm": 1.1556458828134262, "learning_rate": 1.2309359504064274e-05, "loss": 0.7676, "num_input_tokens_seen": 1103787552, "step": 6115 }, { "epoch": 0.6695311858562084, "grad_norm": 1.245642505605047, "learning_rate": 1.2301951873117687e-05, "loss": 0.7054, "num_input_tokens_seen": 1103959360, "step": 6116 }, { "epoch": 0.6696406579271463, "grad_norm": 1.2175831711107388, "learning_rate": 1.2294545744325935e-05, "loss": 0.8366, "num_input_tokens_seen": 1104164992, "step": 6117 }, { "epoch": 0.6697501299980843, "grad_norm": 1.2701466169354518, "learning_rate": 1.2287141118565116e-05, "loss": 0.7889, "num_input_tokens_seen": 1104357408, "step": 6118 }, { "epoch": 0.6698596020690222, "grad_norm": 1.269734574688587, "learning_rate": 1.2279737996711216e-05, "loss": 0.7905, "num_input_tokens_seen": 1104539744, "step": 6119 }, { "epoch": 0.6699690741399601, "grad_norm": 1.2012623822609099, "learning_rate": 1.2272336379639978e-05, "loss": 0.6631, "num_input_tokens_seen": 1104711776, "step": 6120 }, { "epoch": 0.6700785462108979, "grad_norm": 1.2144473002414644, "learning_rate": 1.2264936268227032e-05, "loss": 0.9891, "num_input_tokens_seen": 1104939584, "step": 6121 }, { "epoch": 0.6701880182818358, "grad_norm": 1.3759603614476053, "learning_rate": 1.2257537663347763e-05, "loss": 0.8852, "num_input_tokens_seen": 1105110720, "step": 6122 }, { "epoch": 0.6702974903527738, "grad_norm": 1.0801706671087177, "learning_rate": 1.2250140565877438e-05, "loss": 0.6681, "num_input_tokens_seen": 1105286112, "step": 6123 }, { "epoch": 0.6704069624237117, "grad_norm": 1.3071673462053024, "learning_rate": 1.2242744976691109e-05, "loss": 0.9507, "num_input_tokens_seen": 1105461728, "step": 6124 }, { "epoch": 0.6705164344946496, "grad_norm": 1.2096618331910023, "learning_rate": 1.2235350896663662e-05, "loss": 0.5057, "num_input_tokens_seen": 1105647648, "step": 6125 }, { "epoch": 0.6706259065655874, "grad_norm": 1.2688967483666438, "learning_rate": 1.2227958326669803e-05, "loss": 1.0572, "num_input_tokens_seen": 1105846784, "step": 6126 }, { "epoch": 0.6707353786365253, "grad_norm": 1.1178981414825881, "learning_rate": 1.2220567267584048e-05, "loss": 0.7698, "num_input_tokens_seen": 1106015904, "step": 6127 }, { "epoch": 0.6708448507074632, "grad_norm": 1.2246389187742843, "learning_rate": 1.2213177720280764e-05, "loss": 0.7536, "num_input_tokens_seen": 1106208992, "step": 6128 }, { "epoch": 0.6709543227784012, "grad_norm": 1.0438742832039314, "learning_rate": 1.2205789685634112e-05, "loss": 0.7414, "num_input_tokens_seen": 1106425152, "step": 6129 }, { "epoch": 0.6710637948493391, "grad_norm": 1.1813661871389558, "learning_rate": 1.2198403164518083e-05, "loss": 0.8358, "num_input_tokens_seen": 1106597408, "step": 6130 }, { "epoch": 0.671173266920277, "grad_norm": 1.1537885498682185, "learning_rate": 1.2191018157806485e-05, "loss": 0.807, "num_input_tokens_seen": 1106776832, "step": 6131 }, { "epoch": 0.6712827389912148, "grad_norm": 1.3581503251405278, "learning_rate": 1.2183634666372954e-05, "loss": 0.9014, "num_input_tokens_seen": 1106939680, "step": 6132 }, { "epoch": 0.6713922110621527, "grad_norm": 1.333169445257339, "learning_rate": 1.2176252691090939e-05, "loss": 0.676, "num_input_tokens_seen": 1107104544, "step": 6133 }, { "epoch": 0.6715016831330907, "grad_norm": 1.2134284210847848, "learning_rate": 1.2168872232833717e-05, "loss": 0.6751, "num_input_tokens_seen": 1107294720, "step": 6134 }, { "epoch": 0.6716111552040286, "grad_norm": 1.2483512017856084, "learning_rate": 1.216149329247437e-05, "loss": 0.7461, "num_input_tokens_seen": 1107487584, "step": 6135 }, { "epoch": 0.6717206272749665, "grad_norm": 1.2494515646392443, "learning_rate": 1.2154115870885838e-05, "loss": 0.7174, "num_input_tokens_seen": 1107647072, "step": 6136 }, { "epoch": 0.6718300993459044, "grad_norm": 1.2628465932101502, "learning_rate": 1.2146739968940838e-05, "loss": 0.6933, "num_input_tokens_seen": 1107805664, "step": 6137 }, { "epoch": 0.6719395714168422, "grad_norm": 1.3051942309535873, "learning_rate": 1.2139365587511927e-05, "loss": 0.7058, "num_input_tokens_seen": 1107973664, "step": 6138 }, { "epoch": 0.6720490434877802, "grad_norm": 1.3453651745770672, "learning_rate": 1.2131992727471484e-05, "loss": 0.6256, "num_input_tokens_seen": 1108133376, "step": 6139 }, { "epoch": 0.6721585155587181, "grad_norm": 1.3198000122063376, "learning_rate": 1.2124621389691702e-05, "loss": 0.9088, "num_input_tokens_seen": 1108321760, "step": 6140 }, { "epoch": 0.672267987629656, "grad_norm": 1.2859808876254781, "learning_rate": 1.2117251575044594e-05, "loss": 0.7775, "num_input_tokens_seen": 1108490432, "step": 6141 }, { "epoch": 0.6723774597005939, "grad_norm": 1.241794644018079, "learning_rate": 1.2109883284401987e-05, "loss": 0.7001, "num_input_tokens_seen": 1108668960, "step": 6142 }, { "epoch": 0.6724869317715317, "grad_norm": 1.08463934279056, "learning_rate": 1.2102516518635568e-05, "loss": 0.686, "num_input_tokens_seen": 1108865408, "step": 6143 }, { "epoch": 0.6725964038424697, "grad_norm": 1.119086245934941, "learning_rate": 1.2095151278616768e-05, "loss": 0.6469, "num_input_tokens_seen": 1109015936, "step": 6144 }, { "epoch": 0.6727058759134076, "grad_norm": 1.2946371510572543, "learning_rate": 1.208778756521691e-05, "loss": 1.0076, "num_input_tokens_seen": 1109195136, "step": 6145 }, { "epoch": 0.6728153479843455, "grad_norm": 1.163919119997394, "learning_rate": 1.20804253793071e-05, "loss": 0.7697, "num_input_tokens_seen": 1109403008, "step": 6146 }, { "epoch": 0.6729248200552834, "grad_norm": 1.1830473793873622, "learning_rate": 1.207306472175827e-05, "loss": 0.6655, "num_input_tokens_seen": 1109590944, "step": 6147 }, { "epoch": 0.6730342921262213, "grad_norm": 1.0754485702561187, "learning_rate": 1.2065705593441174e-05, "loss": 0.6112, "num_input_tokens_seen": 1109775296, "step": 6148 }, { "epoch": 0.6731437641971592, "grad_norm": 1.2490038240158878, "learning_rate": 1.2058347995226365e-05, "loss": 0.9071, "num_input_tokens_seen": 1109936576, "step": 6149 }, { "epoch": 0.6732532362680971, "grad_norm": 1.3905832948753079, "learning_rate": 1.2050991927984273e-05, "loss": 0.9468, "num_input_tokens_seen": 1110130784, "step": 6150 }, { "epoch": 0.673362708339035, "grad_norm": 1.213368983391267, "learning_rate": 1.2043637392585059e-05, "loss": 0.6709, "num_input_tokens_seen": 1110298560, "step": 6151 }, { "epoch": 0.6734721804099729, "grad_norm": 1.2657803098729015, "learning_rate": 1.2036284389898791e-05, "loss": 0.6217, "num_input_tokens_seen": 1110473056, "step": 6152 }, { "epoch": 0.6735816524809108, "grad_norm": 1.237857252629918, "learning_rate": 1.2028932920795284e-05, "loss": 0.6576, "num_input_tokens_seen": 1110640384, "step": 6153 }, { "epoch": 0.6736911245518488, "grad_norm": 1.153129749236626, "learning_rate": 1.2021582986144229e-05, "loss": 0.8934, "num_input_tokens_seen": 1110826304, "step": 6154 }, { "epoch": 0.6738005966227866, "grad_norm": 1.2765177818132663, "learning_rate": 1.2014234586815079e-05, "loss": 0.8256, "num_input_tokens_seen": 1111006624, "step": 6155 }, { "epoch": 0.6739100686937245, "grad_norm": 1.1327715636124431, "learning_rate": 1.2006887723677162e-05, "loss": 0.6484, "num_input_tokens_seen": 1111193216, "step": 6156 }, { "epoch": 0.6740195407646624, "grad_norm": 1.2671176914690339, "learning_rate": 1.1999542397599589e-05, "loss": 0.6988, "num_input_tokens_seen": 1111359424, "step": 6157 }, { "epoch": 0.6741290128356003, "grad_norm": 1.463998372598171, "learning_rate": 1.1992198609451288e-05, "loss": 0.6204, "num_input_tokens_seen": 1111534816, "step": 6158 }, { "epoch": 0.6742384849065383, "grad_norm": 1.2645784454418827, "learning_rate": 1.1984856360101045e-05, "loss": 0.8895, "num_input_tokens_seen": 1111714464, "step": 6159 }, { "epoch": 0.6743479569774761, "grad_norm": 1.1329005766204543, "learning_rate": 1.1977515650417396e-05, "loss": 0.7933, "num_input_tokens_seen": 1111899040, "step": 6160 }, { "epoch": 0.674457429048414, "grad_norm": 1.3195571092835068, "learning_rate": 1.1970176481268766e-05, "loss": 0.9438, "num_input_tokens_seen": 1112076448, "step": 6161 }, { "epoch": 0.6745669011193519, "grad_norm": 1.293868382884122, "learning_rate": 1.1962838853523335e-05, "loss": 0.7082, "num_input_tokens_seen": 1112230784, "step": 6162 }, { "epoch": 0.6746763731902898, "grad_norm": 1.2771058867279652, "learning_rate": 1.195550276804915e-05, "loss": 0.698, "num_input_tokens_seen": 1112420512, "step": 6163 }, { "epoch": 0.6747858452612278, "grad_norm": 1.1847258337566329, "learning_rate": 1.1948168225714051e-05, "loss": 0.6488, "num_input_tokens_seen": 1112602400, "step": 6164 }, { "epoch": 0.6748953173321657, "grad_norm": 1.3913649401350325, "learning_rate": 1.1940835227385702e-05, "loss": 0.7395, "num_input_tokens_seen": 1112771744, "step": 6165 }, { "epoch": 0.6750047894031035, "grad_norm": 1.0816182474154594, "learning_rate": 1.1933503773931581e-05, "loss": 0.9476, "num_input_tokens_seen": 1112982304, "step": 6166 }, { "epoch": 0.6751142614740414, "grad_norm": 1.3087060225420395, "learning_rate": 1.1926173866218984e-05, "loss": 0.8011, "num_input_tokens_seen": 1113165984, "step": 6167 }, { "epoch": 0.6752237335449793, "grad_norm": 1.2350247278710917, "learning_rate": 1.1918845505115025e-05, "loss": 0.947, "num_input_tokens_seen": 1113344736, "step": 6168 }, { "epoch": 0.6753332056159173, "grad_norm": 1.4382088613450128, "learning_rate": 1.1911518691486626e-05, "loss": 0.8513, "num_input_tokens_seen": 1113500416, "step": 6169 }, { "epoch": 0.6754426776868552, "grad_norm": 1.237903525326488, "learning_rate": 1.1904193426200554e-05, "loss": 0.866, "num_input_tokens_seen": 1113691040, "step": 6170 }, { "epoch": 0.6755521497577931, "grad_norm": 1.1838120363086637, "learning_rate": 1.1896869710123368e-05, "loss": 0.9745, "num_input_tokens_seen": 1113865984, "step": 6171 }, { "epoch": 0.6756616218287309, "grad_norm": 1.1875751371203667, "learning_rate": 1.1889547544121443e-05, "loss": 0.6319, "num_input_tokens_seen": 1114042720, "step": 6172 }, { "epoch": 0.6757710938996688, "grad_norm": 1.2620300286917512, "learning_rate": 1.1882226929060982e-05, "loss": 0.9203, "num_input_tokens_seen": 1114234688, "step": 6173 }, { "epoch": 0.6758805659706068, "grad_norm": 1.4976539544141168, "learning_rate": 1.1874907865808e-05, "loss": 0.7321, "num_input_tokens_seen": 1114382528, "step": 6174 }, { "epoch": 0.6759900380415447, "grad_norm": 1.2586318954474378, "learning_rate": 1.1867590355228326e-05, "loss": 0.8856, "num_input_tokens_seen": 1114561056, "step": 6175 }, { "epoch": 0.6760995101124826, "grad_norm": 1.099706614990264, "learning_rate": 1.1860274398187605e-05, "loss": 0.8121, "num_input_tokens_seen": 1114727936, "step": 6176 }, { "epoch": 0.6762089821834204, "grad_norm": 1.1558496967927654, "learning_rate": 1.1852959995551297e-05, "loss": 0.6789, "num_input_tokens_seen": 1114938048, "step": 6177 }, { "epoch": 0.6763184542543583, "grad_norm": 1.1812101730818032, "learning_rate": 1.1845647148184696e-05, "loss": 0.9061, "num_input_tokens_seen": 1115113888, "step": 6178 }, { "epoch": 0.6764279263252962, "grad_norm": 1.2257832280976362, "learning_rate": 1.1838335856952893e-05, "loss": 0.55, "num_input_tokens_seen": 1115291296, "step": 6179 }, { "epoch": 0.6765373983962342, "grad_norm": 1.196851101629242, "learning_rate": 1.1831026122720795e-05, "loss": 0.8134, "num_input_tokens_seen": 1115462880, "step": 6180 }, { "epoch": 0.6766468704671721, "grad_norm": 1.2342427822718713, "learning_rate": 1.1823717946353134e-05, "loss": 0.8201, "num_input_tokens_seen": 1115627968, "step": 6181 }, { "epoch": 0.67675634253811, "grad_norm": 1.2074233223873412, "learning_rate": 1.181641132871445e-05, "loss": 0.8666, "num_input_tokens_seen": 1115833824, "step": 6182 }, { "epoch": 0.6768658146090478, "grad_norm": 1.1289934170871463, "learning_rate": 1.1809106270669104e-05, "loss": 0.8388, "num_input_tokens_seen": 1116063200, "step": 6183 }, { "epoch": 0.6769752866799857, "grad_norm": 1.1555361219345153, "learning_rate": 1.1801802773081258e-05, "loss": 0.6171, "num_input_tokens_seen": 1116205888, "step": 6184 }, { "epoch": 0.6770847587509237, "grad_norm": 1.3524083043320467, "learning_rate": 1.1794500836814933e-05, "loss": 0.879, "num_input_tokens_seen": 1116367168, "step": 6185 }, { "epoch": 0.6771942308218616, "grad_norm": 1.1894653520127922, "learning_rate": 1.1787200462733897e-05, "loss": 0.6222, "num_input_tokens_seen": 1116566976, "step": 6186 }, { "epoch": 0.6773037028927995, "grad_norm": 1.196770122106297, "learning_rate": 1.1779901651701796e-05, "loss": 0.7331, "num_input_tokens_seen": 1116739904, "step": 6187 }, { "epoch": 0.6774131749637374, "grad_norm": 1.1758481637157792, "learning_rate": 1.1772604404582057e-05, "loss": 0.8141, "num_input_tokens_seen": 1116943072, "step": 6188 }, { "epoch": 0.6775226470346752, "grad_norm": 1.2475906517254634, "learning_rate": 1.176530872223793e-05, "loss": 0.6077, "num_input_tokens_seen": 1117128096, "step": 6189 }, { "epoch": 0.6776321191056132, "grad_norm": 1.3573642197035163, "learning_rate": 1.1758014605532483e-05, "loss": 0.9152, "num_input_tokens_seen": 1117299008, "step": 6190 }, { "epoch": 0.6777415911765511, "grad_norm": 1.286593482863128, "learning_rate": 1.1750722055328581e-05, "loss": 0.6609, "num_input_tokens_seen": 1117484704, "step": 6191 }, { "epoch": 0.677851063247489, "grad_norm": 1.2917396258186222, "learning_rate": 1.1743431072488952e-05, "loss": 0.7009, "num_input_tokens_seen": 1117683840, "step": 6192 }, { "epoch": 0.6779605353184269, "grad_norm": 1.3694784969216873, "learning_rate": 1.1736141657876068e-05, "loss": 1.1124, "num_input_tokens_seen": 1117873120, "step": 6193 }, { "epoch": 0.6780700073893647, "grad_norm": 1.3298979649805927, "learning_rate": 1.1728853812352286e-05, "loss": 0.7856, "num_input_tokens_seen": 1118009536, "step": 6194 }, { "epoch": 0.6781794794603027, "grad_norm": 1.1784302030310632, "learning_rate": 1.172156753677971e-05, "loss": 0.6702, "num_input_tokens_seen": 1118185600, "step": 6195 }, { "epoch": 0.6782889515312406, "grad_norm": 1.249137342328086, "learning_rate": 1.1714282832020318e-05, "loss": 1.0739, "num_input_tokens_seen": 1118387872, "step": 6196 }, { "epoch": 0.6783984236021785, "grad_norm": 1.2104355294644942, "learning_rate": 1.170699969893587e-05, "loss": 0.674, "num_input_tokens_seen": 1118593504, "step": 6197 }, { "epoch": 0.6785078956731164, "grad_norm": 1.2776001839070914, "learning_rate": 1.1699718138387947e-05, "loss": 0.8084, "num_input_tokens_seen": 1118769568, "step": 6198 }, { "epoch": 0.6786173677440543, "grad_norm": 1.3135815206235002, "learning_rate": 1.1692438151237942e-05, "loss": 0.8557, "num_input_tokens_seen": 1118948544, "step": 6199 }, { "epoch": 0.6787268398149922, "grad_norm": 1.363200975038511, "learning_rate": 1.1685159738347054e-05, "loss": 0.9654, "num_input_tokens_seen": 1119095712, "step": 6200 }, { "epoch": 0.6788363118859301, "grad_norm": 1.3156093369953774, "learning_rate": 1.1677882900576334e-05, "loss": 0.7937, "num_input_tokens_seen": 1119273568, "step": 6201 }, { "epoch": 0.678945783956868, "grad_norm": 1.2977706390250563, "learning_rate": 1.1670607638786579e-05, "loss": 0.925, "num_input_tokens_seen": 1119467552, "step": 6202 }, { "epoch": 0.6790552560278059, "grad_norm": 1.4018538530500528, "learning_rate": 1.166333395383847e-05, "loss": 0.7855, "num_input_tokens_seen": 1119625920, "step": 6203 }, { "epoch": 0.6791647280987438, "grad_norm": 1.3064494770090318, "learning_rate": 1.1656061846592458e-05, "loss": 0.6556, "num_input_tokens_seen": 1119830432, "step": 6204 }, { "epoch": 0.6792742001696818, "grad_norm": 1.2618042057729717, "learning_rate": 1.1648791317908822e-05, "loss": 0.8691, "num_input_tokens_seen": 1119971552, "step": 6205 }, { "epoch": 0.6793836722406196, "grad_norm": 1.3277016357623264, "learning_rate": 1.164152236864765e-05, "loss": 0.8292, "num_input_tokens_seen": 1120157248, "step": 6206 }, { "epoch": 0.6794931443115575, "grad_norm": 1.1811805523030665, "learning_rate": 1.1634254999668842e-05, "loss": 0.6862, "num_input_tokens_seen": 1120350112, "step": 6207 }, { "epoch": 0.6796026163824954, "grad_norm": 1.2083270083733246, "learning_rate": 1.162698921183212e-05, "loss": 0.8896, "num_input_tokens_seen": 1120559104, "step": 6208 }, { "epoch": 0.6797120884534333, "grad_norm": 1.1282324974854532, "learning_rate": 1.1619725005997007e-05, "loss": 0.7586, "num_input_tokens_seen": 1120738304, "step": 6209 }, { "epoch": 0.6798215605243713, "grad_norm": 1.3420611125516242, "learning_rate": 1.1612462383022838e-05, "loss": 0.9708, "num_input_tokens_seen": 1120937440, "step": 6210 }, { "epoch": 0.6799310325953091, "grad_norm": 1.0344191319942226, "learning_rate": 1.1605201343768787e-05, "loss": 0.5127, "num_input_tokens_seen": 1121113056, "step": 6211 }, { "epoch": 0.680040504666247, "grad_norm": 1.1803996986779164, "learning_rate": 1.1597941889093808e-05, "loss": 0.7639, "num_input_tokens_seen": 1121317344, "step": 6212 }, { "epoch": 0.6801499767371849, "grad_norm": 1.2346263098513297, "learning_rate": 1.1590684019856687e-05, "loss": 0.6956, "num_input_tokens_seen": 1121484672, "step": 6213 }, { "epoch": 0.6802594488081228, "grad_norm": 1.3694662232093315, "learning_rate": 1.1583427736916008e-05, "loss": 0.866, "num_input_tokens_seen": 1121647744, "step": 6214 }, { "epoch": 0.6803689208790608, "grad_norm": 1.1350836250822538, "learning_rate": 1.1576173041130184e-05, "loss": 0.6814, "num_input_tokens_seen": 1121851136, "step": 6215 }, { "epoch": 0.6804783929499987, "grad_norm": 1.234138417810373, "learning_rate": 1.1568919933357423e-05, "loss": 0.8806, "num_input_tokens_seen": 1122040640, "step": 6216 }, { "epoch": 0.6805878650209365, "grad_norm": 1.2636771577845527, "learning_rate": 1.1561668414455751e-05, "loss": 0.7274, "num_input_tokens_seen": 1122231712, "step": 6217 }, { "epoch": 0.6806973370918744, "grad_norm": 1.2575164351707004, "learning_rate": 1.1554418485283033e-05, "loss": 0.7445, "num_input_tokens_seen": 1122389632, "step": 6218 }, { "epoch": 0.6808068091628123, "grad_norm": 1.3016576249072749, "learning_rate": 1.1547170146696887e-05, "loss": 0.6946, "num_input_tokens_seen": 1122603776, "step": 6219 }, { "epoch": 0.6809162812337503, "grad_norm": 1.327869884447717, "learning_rate": 1.1539923399554805e-05, "loss": 0.9101, "num_input_tokens_seen": 1122769760, "step": 6220 }, { "epoch": 0.6810257533046882, "grad_norm": 1.244559512443062, "learning_rate": 1.1532678244714055e-05, "loss": 0.7528, "num_input_tokens_seen": 1122940896, "step": 6221 }, { "epoch": 0.6811352253756261, "grad_norm": 1.3500705930125463, "learning_rate": 1.1525434683031718e-05, "loss": 0.691, "num_input_tokens_seen": 1123101504, "step": 6222 }, { "epoch": 0.6812446974465639, "grad_norm": 1.2328528664752667, "learning_rate": 1.1518192715364704e-05, "loss": 0.7209, "num_input_tokens_seen": 1123290336, "step": 6223 }, { "epoch": 0.6813541695175018, "grad_norm": 1.2962650565263418, "learning_rate": 1.1510952342569708e-05, "loss": 0.8309, "num_input_tokens_seen": 1123474912, "step": 6224 }, { "epoch": 0.6814636415884398, "grad_norm": 1.2878655627620506, "learning_rate": 1.1503713565503282e-05, "loss": 0.7177, "num_input_tokens_seen": 1123648288, "step": 6225 }, { "epoch": 0.6815731136593777, "grad_norm": 1.439112877973551, "learning_rate": 1.1496476385021723e-05, "loss": 0.7875, "num_input_tokens_seen": 1123829728, "step": 6226 }, { "epoch": 0.6816825857303156, "grad_norm": 1.2708064321370172, "learning_rate": 1.148924080198121e-05, "loss": 0.7414, "num_input_tokens_seen": 1123987872, "step": 6227 }, { "epoch": 0.6817920578012534, "grad_norm": 1.354433835401564, "learning_rate": 1.1482006817237665e-05, "loss": 0.7983, "num_input_tokens_seen": 1124173792, "step": 6228 }, { "epoch": 0.6819015298721913, "grad_norm": 1.2103248276834944, "learning_rate": 1.1474774431646878e-05, "loss": 0.7431, "num_input_tokens_seen": 1124341792, "step": 6229 }, { "epoch": 0.6820110019431292, "grad_norm": 1.2289847492578523, "learning_rate": 1.1467543646064424e-05, "loss": 0.6668, "num_input_tokens_seen": 1124510912, "step": 6230 }, { "epoch": 0.6821204740140672, "grad_norm": 1.1676717016830445, "learning_rate": 1.1460314461345684e-05, "loss": 0.843, "num_input_tokens_seen": 1124711616, "step": 6231 }, { "epoch": 0.6822299460850051, "grad_norm": 1.1564947333242754, "learning_rate": 1.1453086878345862e-05, "loss": 0.5774, "num_input_tokens_seen": 1124856992, "step": 6232 }, { "epoch": 0.682339418155943, "grad_norm": 1.2962519516373807, "learning_rate": 1.1445860897919951e-05, "loss": 0.7167, "num_input_tokens_seen": 1125007296, "step": 6233 }, { "epoch": 0.6824488902268808, "grad_norm": 1.2494155471122668, "learning_rate": 1.1438636520922807e-05, "loss": 1.0314, "num_input_tokens_seen": 1125205088, "step": 6234 }, { "epoch": 0.6825583622978187, "grad_norm": 1.0938264002683196, "learning_rate": 1.1431413748209021e-05, "loss": 0.594, "num_input_tokens_seen": 1125390112, "step": 6235 }, { "epoch": 0.6826678343687567, "grad_norm": 1.2471503677900453, "learning_rate": 1.1424192580633067e-05, "loss": 0.6734, "num_input_tokens_seen": 1125586560, "step": 6236 }, { "epoch": 0.6827773064396946, "grad_norm": 1.1027432870048686, "learning_rate": 1.1416973019049156e-05, "loss": 0.6722, "num_input_tokens_seen": 1125789728, "step": 6237 }, { "epoch": 0.6828867785106325, "grad_norm": 1.3034469280025416, "learning_rate": 1.1409755064311384e-05, "loss": 0.7834, "num_input_tokens_seen": 1125974304, "step": 6238 }, { "epoch": 0.6829962505815704, "grad_norm": 1.297447262171787, "learning_rate": 1.1402538717273605e-05, "loss": 0.6759, "num_input_tokens_seen": 1126190464, "step": 6239 }, { "epoch": 0.6831057226525082, "grad_norm": 1.3187675239881154, "learning_rate": 1.1395323978789504e-05, "loss": 0.789, "num_input_tokens_seen": 1126371232, "step": 6240 }, { "epoch": 0.6832151947234462, "grad_norm": 1.2579584451674333, "learning_rate": 1.138811084971257e-05, "loss": 0.8276, "num_input_tokens_seen": 1126569696, "step": 6241 }, { "epoch": 0.6833246667943841, "grad_norm": 1.3054054249460965, "learning_rate": 1.1380899330896086e-05, "loss": 0.7347, "num_input_tokens_seen": 1126769280, "step": 6242 }, { "epoch": 0.683434138865322, "grad_norm": 1.1681736795029232, "learning_rate": 1.13736894231932e-05, "loss": 0.7242, "num_input_tokens_seen": 1126942880, "step": 6243 }, { "epoch": 0.6835436109362599, "grad_norm": 1.1291646166755864, "learning_rate": 1.1366481127456785e-05, "loss": 0.8621, "num_input_tokens_seen": 1127129024, "step": 6244 }, { "epoch": 0.6836530830071977, "grad_norm": 1.4827178538981003, "learning_rate": 1.1359274444539596e-05, "loss": 0.8514, "num_input_tokens_seen": 1127304864, "step": 6245 }, { "epoch": 0.6837625550781357, "grad_norm": 1.185616756686047, "learning_rate": 1.1352069375294169e-05, "loss": 0.7992, "num_input_tokens_seen": 1127475776, "step": 6246 }, { "epoch": 0.6838720271490736, "grad_norm": 1.440421163242786, "learning_rate": 1.1344865920572842e-05, "loss": 0.9037, "num_input_tokens_seen": 1127643552, "step": 6247 }, { "epoch": 0.6839814992200115, "grad_norm": 1.1129199596219683, "learning_rate": 1.1337664081227773e-05, "loss": 0.6161, "num_input_tokens_seen": 1127804832, "step": 6248 }, { "epoch": 0.6840909712909494, "grad_norm": 1.1741671207572397, "learning_rate": 1.1330463858110927e-05, "loss": 0.952, "num_input_tokens_seen": 1127989856, "step": 6249 }, { "epoch": 0.6842004433618873, "grad_norm": 1.2864597747835844, "learning_rate": 1.132326525207406e-05, "loss": 0.8606, "num_input_tokens_seen": 1128178464, "step": 6250 }, { "epoch": 0.6843099154328252, "grad_norm": 1.3026776851918846, "learning_rate": 1.1316068263968793e-05, "loss": 0.9025, "num_input_tokens_seen": 1128336384, "step": 6251 }, { "epoch": 0.6844193875037631, "grad_norm": 1.0648196088219193, "learning_rate": 1.130887289464647e-05, "loss": 0.7587, "num_input_tokens_seen": 1128537536, "step": 6252 }, { "epoch": 0.684528859574701, "grad_norm": 1.1324288606510453, "learning_rate": 1.1301679144958318e-05, "loss": 0.6645, "num_input_tokens_seen": 1128690080, "step": 6253 }, { "epoch": 0.6846383316456389, "grad_norm": 1.2369323025576358, "learning_rate": 1.1294487015755339e-05, "loss": 0.7183, "num_input_tokens_seen": 1128860096, "step": 6254 }, { "epoch": 0.6847478037165768, "grad_norm": 1.1412220267704205, "learning_rate": 1.1287296507888345e-05, "loss": 0.8811, "num_input_tokens_seen": 1129056096, "step": 6255 }, { "epoch": 0.6848572757875148, "grad_norm": 1.267500347460938, "learning_rate": 1.1280107622207962e-05, "loss": 0.8225, "num_input_tokens_seen": 1129250752, "step": 6256 }, { "epoch": 0.6849667478584526, "grad_norm": 1.2298493779232622, "learning_rate": 1.1272920359564607e-05, "loss": 0.8315, "num_input_tokens_seen": 1129418528, "step": 6257 }, { "epoch": 0.6850762199293905, "grad_norm": 1.4306199831893371, "learning_rate": 1.1265734720808549e-05, "loss": 0.8788, "num_input_tokens_seen": 1129597728, "step": 6258 }, { "epoch": 0.6851856920003284, "grad_norm": 1.403800016957495, "learning_rate": 1.1258550706789803e-05, "loss": 1.1224, "num_input_tokens_seen": 1129783872, "step": 6259 }, { "epoch": 0.6852951640712663, "grad_norm": 1.2849600505927268, "learning_rate": 1.1251368318358254e-05, "loss": 0.7769, "num_input_tokens_seen": 1129941792, "step": 6260 }, { "epoch": 0.6854046361422043, "grad_norm": 1.2529189361325055, "learning_rate": 1.1244187556363536e-05, "loss": 0.8395, "num_input_tokens_seen": 1130145408, "step": 6261 }, { "epoch": 0.6855141082131421, "grad_norm": 1.047121275216771, "learning_rate": 1.123700842165514e-05, "loss": 0.6023, "num_input_tokens_seen": 1130351712, "step": 6262 }, { "epoch": 0.68562358028408, "grad_norm": 1.245253611551932, "learning_rate": 1.1229830915082337e-05, "loss": 0.8637, "num_input_tokens_seen": 1130546592, "step": 6263 }, { "epoch": 0.6857330523550179, "grad_norm": 1.342295413847106, "learning_rate": 1.1222655037494215e-05, "loss": 0.838, "num_input_tokens_seen": 1130727584, "step": 6264 }, { "epoch": 0.6858425244259558, "grad_norm": 1.3024235344551796, "learning_rate": 1.1215480789739662e-05, "loss": 0.8799, "num_input_tokens_seen": 1130896480, "step": 6265 }, { "epoch": 0.6859519964968938, "grad_norm": 1.261178340861026, "learning_rate": 1.120830817266737e-05, "loss": 0.7976, "num_input_tokens_seen": 1131084640, "step": 6266 }, { "epoch": 0.6860614685678317, "grad_norm": 1.1530644636106957, "learning_rate": 1.1201137187125876e-05, "loss": 0.8153, "num_input_tokens_seen": 1131271008, "step": 6267 }, { "epoch": 0.6861709406387695, "grad_norm": 1.263779224393657, "learning_rate": 1.1193967833963455e-05, "loss": 0.7176, "num_input_tokens_seen": 1131432960, "step": 6268 }, { "epoch": 0.6862804127097074, "grad_norm": 1.2892592308893074, "learning_rate": 1.1186800114028268e-05, "loss": 0.7403, "num_input_tokens_seen": 1131625152, "step": 6269 }, { "epoch": 0.6863898847806453, "grad_norm": 1.2653198345232721, "learning_rate": 1.1179634028168198e-05, "loss": 0.7302, "num_input_tokens_seen": 1131808384, "step": 6270 }, { "epoch": 0.6864993568515833, "grad_norm": 1.3097604589363243, "learning_rate": 1.1172469577231016e-05, "loss": 0.6424, "num_input_tokens_seen": 1131938080, "step": 6271 }, { "epoch": 0.6866088289225212, "grad_norm": 1.0621321546294062, "learning_rate": 1.1165306762064246e-05, "loss": 0.6995, "num_input_tokens_seen": 1132131616, "step": 6272 }, { "epoch": 0.6867183009934591, "grad_norm": 1.3317389293315776, "learning_rate": 1.115814558351524e-05, "loss": 0.7676, "num_input_tokens_seen": 1132302752, "step": 6273 }, { "epoch": 0.6868277730643969, "grad_norm": 1.449629897535399, "learning_rate": 1.1150986042431147e-05, "loss": 0.9329, "num_input_tokens_seen": 1132481504, "step": 6274 }, { "epoch": 0.6869372451353348, "grad_norm": 1.4289044179430146, "learning_rate": 1.1143828139658924e-05, "loss": 0.979, "num_input_tokens_seen": 1132662048, "step": 6275 }, { "epoch": 0.6870467172062728, "grad_norm": 1.2824474414667104, "learning_rate": 1.1136671876045363e-05, "loss": 0.7348, "num_input_tokens_seen": 1132854688, "step": 6276 }, { "epoch": 0.6871561892772107, "grad_norm": 1.1942059749532379, "learning_rate": 1.1129517252436996e-05, "loss": 0.5928, "num_input_tokens_seen": 1133035456, "step": 6277 }, { "epoch": 0.6872656613481486, "grad_norm": 1.2258046232466508, "learning_rate": 1.1122364269680236e-05, "loss": 0.8864, "num_input_tokens_seen": 1133244448, "step": 6278 }, { "epoch": 0.6873751334190864, "grad_norm": 1.4249351185534, "learning_rate": 1.1115212928621249e-05, "loss": 0.7834, "num_input_tokens_seen": 1133388480, "step": 6279 }, { "epoch": 0.6874846054900243, "grad_norm": 1.3499203888060203, "learning_rate": 1.1108063230106031e-05, "loss": 0.7376, "num_input_tokens_seen": 1133576416, "step": 6280 }, { "epoch": 0.6875940775609622, "grad_norm": 1.2496555330576402, "learning_rate": 1.1100915174980375e-05, "loss": 0.8915, "num_input_tokens_seen": 1133771072, "step": 6281 }, { "epoch": 0.6877035496319002, "grad_norm": 1.1659272893674788, "learning_rate": 1.1093768764089881e-05, "loss": 0.814, "num_input_tokens_seen": 1133956096, "step": 6282 }, { "epoch": 0.6878130217028381, "grad_norm": 1.270530753254767, "learning_rate": 1.1086623998279963e-05, "loss": 0.7482, "num_input_tokens_seen": 1134137088, "step": 6283 }, { "epoch": 0.687922493773776, "grad_norm": 1.1375821429765163, "learning_rate": 1.1079480878395817e-05, "loss": 0.8329, "num_input_tokens_seen": 1134311584, "step": 6284 }, { "epoch": 0.6880319658447138, "grad_norm": 1.174227223013576, "learning_rate": 1.107233940528248e-05, "loss": 0.7921, "num_input_tokens_seen": 1134512064, "step": 6285 }, { "epoch": 0.6881414379156517, "grad_norm": 1.2774911964546147, "learning_rate": 1.1065199579784767e-05, "loss": 0.7124, "num_input_tokens_seen": 1134682080, "step": 6286 }, { "epoch": 0.6882509099865897, "grad_norm": 1.215569723877945, "learning_rate": 1.1058061402747305e-05, "loss": 0.7193, "num_input_tokens_seen": 1134857696, "step": 6287 }, { "epoch": 0.6883603820575276, "grad_norm": 1.1306778361815437, "learning_rate": 1.1050924875014527e-05, "loss": 0.6807, "num_input_tokens_seen": 1135041824, "step": 6288 }, { "epoch": 0.6884698541284655, "grad_norm": 1.1596173919891997, "learning_rate": 1.1043789997430668e-05, "loss": 0.7353, "num_input_tokens_seen": 1135229760, "step": 6289 }, { "epoch": 0.6885793261994034, "grad_norm": 1.0499983923763503, "learning_rate": 1.1036656770839774e-05, "loss": 0.6339, "num_input_tokens_seen": 1135401792, "step": 6290 }, { "epoch": 0.6886887982703412, "grad_norm": 1.232028952955041, "learning_rate": 1.1029525196085691e-05, "loss": 0.7396, "num_input_tokens_seen": 1135569568, "step": 6291 }, { "epoch": 0.6887982703412792, "grad_norm": 1.2619026451615765, "learning_rate": 1.102239527401206e-05, "loss": 0.8012, "num_input_tokens_seen": 1135728384, "step": 6292 }, { "epoch": 0.6889077424122171, "grad_norm": 1.235854891440886, "learning_rate": 1.1015267005462366e-05, "loss": 0.9246, "num_input_tokens_seen": 1135907136, "step": 6293 }, { "epoch": 0.689017214483155, "grad_norm": 1.234703792772518, "learning_rate": 1.1008140391279834e-05, "loss": 0.8184, "num_input_tokens_seen": 1136100000, "step": 6294 }, { "epoch": 0.6891266865540929, "grad_norm": 1.269986209043582, "learning_rate": 1.1001015432307554e-05, "loss": 0.8115, "num_input_tokens_seen": 1136301376, "step": 6295 }, { "epoch": 0.6892361586250307, "grad_norm": 1.3984032738615642, "learning_rate": 1.0993892129388385e-05, "loss": 0.8367, "num_input_tokens_seen": 1136484608, "step": 6296 }, { "epoch": 0.6893456306959687, "grad_norm": 1.3888855674492009, "learning_rate": 1.0986770483365005e-05, "loss": 0.8339, "num_input_tokens_seen": 1136664480, "step": 6297 }, { "epoch": 0.6894551027669066, "grad_norm": 1.3618800879108122, "learning_rate": 1.0979650495079888e-05, "loss": 0.8491, "num_input_tokens_seen": 1136812320, "step": 6298 }, { "epoch": 0.6895645748378445, "grad_norm": 1.153166448163536, "learning_rate": 1.0972532165375305e-05, "loss": 0.7752, "num_input_tokens_seen": 1137021312, "step": 6299 }, { "epoch": 0.6896740469087824, "grad_norm": 1.1073740792430509, "learning_rate": 1.0965415495093368e-05, "loss": 0.8371, "num_input_tokens_seen": 1137206336, "step": 6300 }, { "epoch": 0.6897835189797203, "grad_norm": 1.3367617111392214, "learning_rate": 1.0958300485075931e-05, "loss": 0.9483, "num_input_tokens_seen": 1137359104, "step": 6301 }, { "epoch": 0.6898929910506582, "grad_norm": 1.2855123009287905, "learning_rate": 1.095118713616472e-05, "loss": 0.6081, "num_input_tokens_seen": 1137535392, "step": 6302 }, { "epoch": 0.6900024631215961, "grad_norm": 1.3657397643167821, "learning_rate": 1.09440754492012e-05, "loss": 0.6693, "num_input_tokens_seen": 1137717280, "step": 6303 }, { "epoch": 0.690111935192534, "grad_norm": 1.2946780336233272, "learning_rate": 1.093696542502669e-05, "loss": 0.8012, "num_input_tokens_seen": 1137894016, "step": 6304 }, { "epoch": 0.6902214072634719, "grad_norm": 1.2326779832446009, "learning_rate": 1.0929857064482285e-05, "loss": 0.8489, "num_input_tokens_seen": 1138088448, "step": 6305 }, { "epoch": 0.6903308793344098, "grad_norm": 1.0727624628058277, "learning_rate": 1.0922750368408896e-05, "loss": 0.6417, "num_input_tokens_seen": 1138257344, "step": 6306 }, { "epoch": 0.6904403514053478, "grad_norm": 1.3577537076137707, "learning_rate": 1.0915645337647224e-05, "loss": 0.7816, "num_input_tokens_seen": 1138405632, "step": 6307 }, { "epoch": 0.6905498234762856, "grad_norm": 1.2047889894317974, "learning_rate": 1.0908541973037775e-05, "loss": 0.762, "num_input_tokens_seen": 1138571168, "step": 6308 }, { "epoch": 0.6906592955472235, "grad_norm": 1.2209297152979655, "learning_rate": 1.090144027542089e-05, "loss": 0.7214, "num_input_tokens_seen": 1138761344, "step": 6309 }, { "epoch": 0.6907687676181614, "grad_norm": 1.0738840587448357, "learning_rate": 1.0894340245636652e-05, "loss": 0.731, "num_input_tokens_seen": 1138947712, "step": 6310 }, { "epoch": 0.6908782396890993, "grad_norm": 1.421941231662719, "learning_rate": 1.0887241884525014e-05, "loss": 0.6828, "num_input_tokens_seen": 1139087040, "step": 6311 }, { "epoch": 0.6909877117600373, "grad_norm": 1.3963739098027372, "learning_rate": 1.0880145192925666e-05, "loss": 1.0169, "num_input_tokens_seen": 1139290432, "step": 6312 }, { "epoch": 0.6910971838309751, "grad_norm": 1.2675528736861539, "learning_rate": 1.087305017167816e-05, "loss": 1.306, "num_input_tokens_seen": 1139510624, "step": 6313 }, { "epoch": 0.691206655901913, "grad_norm": 1.1727052417867307, "learning_rate": 1.0865956821621808e-05, "loss": 0.6743, "num_input_tokens_seen": 1139683552, "step": 6314 }, { "epoch": 0.6913161279728509, "grad_norm": 1.081723827111555, "learning_rate": 1.0858865143595749e-05, "loss": 0.6142, "num_input_tokens_seen": 1139876640, "step": 6315 }, { "epoch": 0.6914256000437888, "grad_norm": 1.369298297330477, "learning_rate": 1.0851775138438906e-05, "loss": 0.9913, "num_input_tokens_seen": 1140058528, "step": 6316 }, { "epoch": 0.6915350721147268, "grad_norm": 1.0724219258663343, "learning_rate": 1.084468680699001e-05, "loss": 0.5566, "num_input_tokens_seen": 1140255872, "step": 6317 }, { "epoch": 0.6916445441856647, "grad_norm": 1.170009858098029, "learning_rate": 1.0837600150087612e-05, "loss": 0.7737, "num_input_tokens_seen": 1140482560, "step": 6318 }, { "epoch": 0.6917540162566025, "grad_norm": 1.4034475164028408, "learning_rate": 1.0830515168570043e-05, "loss": 0.889, "num_input_tokens_seen": 1140664224, "step": 6319 }, { "epoch": 0.6918634883275404, "grad_norm": 1.2628528234155807, "learning_rate": 1.0823431863275443e-05, "loss": 0.8727, "num_input_tokens_seen": 1140848352, "step": 6320 }, { "epoch": 0.6919729603984783, "grad_norm": 1.3197498364080815, "learning_rate": 1.081635023504175e-05, "loss": 0.8677, "num_input_tokens_seen": 1141030912, "step": 6321 }, { "epoch": 0.6920824324694163, "grad_norm": 1.1344760650087056, "learning_rate": 1.0809270284706713e-05, "loss": 0.6294, "num_input_tokens_seen": 1141190400, "step": 6322 }, { "epoch": 0.6921919045403542, "grad_norm": 1.1226953636369308, "learning_rate": 1.0802192013107873e-05, "loss": 0.7092, "num_input_tokens_seen": 1141365344, "step": 6323 }, { "epoch": 0.6923013766112921, "grad_norm": 1.3041506822152904, "learning_rate": 1.0795115421082574e-05, "loss": 0.8443, "num_input_tokens_seen": 1141551040, "step": 6324 }, { "epoch": 0.6924108486822299, "grad_norm": 1.250818413796462, "learning_rate": 1.0788040509467958e-05, "loss": 0.7903, "num_input_tokens_seen": 1141755104, "step": 6325 }, { "epoch": 0.6925203207531678, "grad_norm": 1.2801832084373381, "learning_rate": 1.078096727910099e-05, "loss": 0.8951, "num_input_tokens_seen": 1141941920, "step": 6326 }, { "epoch": 0.6926297928241057, "grad_norm": 1.2857874100190232, "learning_rate": 1.0773895730818409e-05, "loss": 0.7337, "num_input_tokens_seen": 1142111040, "step": 6327 }, { "epoch": 0.6927392648950437, "grad_norm": 1.2890517263251249, "learning_rate": 1.076682586545677e-05, "loss": 0.8593, "num_input_tokens_seen": 1142306368, "step": 6328 }, { "epoch": 0.6928487369659816, "grad_norm": 1.2800310430636692, "learning_rate": 1.075975768385242e-05, "loss": 0.7259, "num_input_tokens_seen": 1142488928, "step": 6329 }, { "epoch": 0.6929582090369194, "grad_norm": 1.1387153211520178, "learning_rate": 1.0752691186841516e-05, "loss": 0.6954, "num_input_tokens_seen": 1142641248, "step": 6330 }, { "epoch": 0.6930676811078573, "grad_norm": 1.2000287827377762, "learning_rate": 1.0745626375260004e-05, "loss": 0.8957, "num_input_tokens_seen": 1142809472, "step": 6331 }, { "epoch": 0.6931771531787952, "grad_norm": 1.2698177073152672, "learning_rate": 1.0738563249943637e-05, "loss": 0.6792, "num_input_tokens_seen": 1142945888, "step": 6332 }, { "epoch": 0.6932866252497332, "grad_norm": 1.2696527041512693, "learning_rate": 1.073150181172799e-05, "loss": 0.7611, "num_input_tokens_seen": 1143118368, "step": 6333 }, { "epoch": 0.6933960973206711, "grad_norm": 1.4366166261571482, "learning_rate": 1.0724442061448383e-05, "loss": 0.8674, "num_input_tokens_seen": 1143291744, "step": 6334 }, { "epoch": 0.693505569391609, "grad_norm": 1.1938540468259429, "learning_rate": 1.0717383999940011e-05, "loss": 0.8049, "num_input_tokens_seen": 1143481024, "step": 6335 }, { "epoch": 0.6936150414625468, "grad_norm": 1.0934369865475146, "learning_rate": 1.0710327628037788e-05, "loss": 0.7893, "num_input_tokens_seen": 1143651488, "step": 6336 }, { "epoch": 0.6937245135334847, "grad_norm": 1.1979921510659004, "learning_rate": 1.0703272946576501e-05, "loss": 0.8885, "num_input_tokens_seen": 1143840768, "step": 6337 }, { "epoch": 0.6938339856044227, "grad_norm": 1.3749558701802895, "learning_rate": 1.069621995639069e-05, "loss": 0.9201, "num_input_tokens_seen": 1144006752, "step": 6338 }, { "epoch": 0.6939434576753606, "grad_norm": 1.1940453987723953, "learning_rate": 1.0689168658314708e-05, "loss": 0.7376, "num_input_tokens_seen": 1144179680, "step": 6339 }, { "epoch": 0.6940529297462985, "grad_norm": 1.2611730948707358, "learning_rate": 1.0682119053182731e-05, "loss": 0.9221, "num_input_tokens_seen": 1144360448, "step": 6340 }, { "epoch": 0.6941624018172364, "grad_norm": 1.2599323014585855, "learning_rate": 1.0675071141828682e-05, "loss": 1.0042, "num_input_tokens_seen": 1144523968, "step": 6341 }, { "epoch": 0.6942718738881742, "grad_norm": 1.261802174553091, "learning_rate": 1.0668024925086353e-05, "loss": 1.052, "num_input_tokens_seen": 1144714368, "step": 6342 }, { "epoch": 0.6943813459591122, "grad_norm": 1.1032290466345027, "learning_rate": 1.0660980403789256e-05, "loss": 0.5977, "num_input_tokens_seen": 1144902976, "step": 6343 }, { "epoch": 0.6944908180300501, "grad_norm": 1.0149563275417404, "learning_rate": 1.0653937578770787e-05, "loss": 0.6321, "num_input_tokens_seen": 1145082176, "step": 6344 }, { "epoch": 0.694600290100988, "grad_norm": 0.9778046299638093, "learning_rate": 1.0646896450864056e-05, "loss": 0.6477, "num_input_tokens_seen": 1145253984, "step": 6345 }, { "epoch": 0.6947097621719259, "grad_norm": 1.2187145912699844, "learning_rate": 1.0639857020902048e-05, "loss": 0.7436, "num_input_tokens_seen": 1145411904, "step": 6346 }, { "epoch": 0.6948192342428637, "grad_norm": 1.2588697456444902, "learning_rate": 1.0632819289717499e-05, "loss": 0.7433, "num_input_tokens_seen": 1145597152, "step": 6347 }, { "epoch": 0.6949287063138017, "grad_norm": 1.1145195749650818, "learning_rate": 1.0625783258142965e-05, "loss": 0.7622, "num_input_tokens_seen": 1145787552, "step": 6348 }, { "epoch": 0.6950381783847396, "grad_norm": 1.379195661134378, "learning_rate": 1.0618748927010794e-05, "loss": 1.0516, "num_input_tokens_seen": 1145996544, "step": 6349 }, { "epoch": 0.6951476504556775, "grad_norm": 1.187943676833594, "learning_rate": 1.0611716297153121e-05, "loss": 0.7735, "num_input_tokens_seen": 1146165664, "step": 6350 }, { "epoch": 0.6952571225266154, "grad_norm": 1.1729432387021297, "learning_rate": 1.0604685369401923e-05, "loss": 0.4957, "num_input_tokens_seen": 1146351808, "step": 6351 }, { "epoch": 0.6953665945975533, "grad_norm": 1.3662601922886182, "learning_rate": 1.059765614458891e-05, "loss": 0.7416, "num_input_tokens_seen": 1146524736, "step": 6352 }, { "epoch": 0.6954760666684912, "grad_norm": 1.1541282156221173, "learning_rate": 1.059062862354565e-05, "loss": 0.7054, "num_input_tokens_seen": 1146733056, "step": 6353 }, { "epoch": 0.6955855387394291, "grad_norm": 1.3091518293851585, "learning_rate": 1.0583602807103479e-05, "loss": 1.0509, "num_input_tokens_seen": 1146908896, "step": 6354 }, { "epoch": 0.695695010810367, "grad_norm": 1.1283133774818792, "learning_rate": 1.0576578696093536e-05, "loss": 0.7343, "num_input_tokens_seen": 1147107808, "step": 6355 }, { "epoch": 0.6958044828813049, "grad_norm": 1.1441398185604714, "learning_rate": 1.0569556291346761e-05, "loss": 0.7191, "num_input_tokens_seen": 1147302688, "step": 6356 }, { "epoch": 0.6959139549522428, "grad_norm": 1.2619563962503983, "learning_rate": 1.056253559369389e-05, "loss": 0.7672, "num_input_tokens_seen": 1147474944, "step": 6357 }, { "epoch": 0.6960234270231808, "grad_norm": 1.3844927907737432, "learning_rate": 1.0555516603965457e-05, "loss": 0.7266, "num_input_tokens_seen": 1147623008, "step": 6358 }, { "epoch": 0.6961328990941186, "grad_norm": 1.3650461484632626, "learning_rate": 1.0548499322991789e-05, "loss": 0.9644, "num_input_tokens_seen": 1147806464, "step": 6359 }, { "epoch": 0.6962423711650565, "grad_norm": 1.4442845168384322, "learning_rate": 1.0541483751603031e-05, "loss": 0.6975, "num_input_tokens_seen": 1147986112, "step": 6360 }, { "epoch": 0.6963518432359944, "grad_norm": 1.3702391218767331, "learning_rate": 1.0534469890629109e-05, "loss": 0.8517, "num_input_tokens_seen": 1148167776, "step": 6361 }, { "epoch": 0.6964613153069323, "grad_norm": 1.1045538685218081, "learning_rate": 1.0527457740899744e-05, "loss": 0.825, "num_input_tokens_seen": 1148389088, "step": 6362 }, { "epoch": 0.6965707873778703, "grad_norm": 1.2725868426294888, "learning_rate": 1.0520447303244463e-05, "loss": 0.7819, "num_input_tokens_seen": 1148592928, "step": 6363 }, { "epoch": 0.6966802594488081, "grad_norm": 1.2393794439700696, "learning_rate": 1.0513438578492582e-05, "loss": 0.6988, "num_input_tokens_seen": 1148741216, "step": 6364 }, { "epoch": 0.696789731519746, "grad_norm": 1.1734255895505417, "learning_rate": 1.0506431567473226e-05, "loss": 0.7086, "num_input_tokens_seen": 1148943264, "step": 6365 }, { "epoch": 0.6968992035906839, "grad_norm": 1.1068147478554176, "learning_rate": 1.049942627101531e-05, "loss": 0.5867, "num_input_tokens_seen": 1149144640, "step": 6366 }, { "epoch": 0.6970086756616218, "grad_norm": 1.170498357257562, "learning_rate": 1.0492422689947534e-05, "loss": 0.8374, "num_input_tokens_seen": 1149319808, "step": 6367 }, { "epoch": 0.6971181477325598, "grad_norm": 1.3674629152001279, "learning_rate": 1.048542082509843e-05, "loss": 0.8275, "num_input_tokens_seen": 1149503040, "step": 6368 }, { "epoch": 0.6972276198034977, "grad_norm": 1.2478893102960045, "learning_rate": 1.0478420677296297e-05, "loss": 0.7418, "num_input_tokens_seen": 1149674400, "step": 6369 }, { "epoch": 0.6973370918744355, "grad_norm": 1.3521307043036523, "learning_rate": 1.0471422247369233e-05, "loss": 0.8802, "num_input_tokens_seen": 1149838368, "step": 6370 }, { "epoch": 0.6974465639453734, "grad_norm": 1.2463881763622187, "learning_rate": 1.0464425536145148e-05, "loss": 0.9948, "num_input_tokens_seen": 1150051616, "step": 6371 }, { "epoch": 0.6975560360163113, "grad_norm": 1.1032629753471797, "learning_rate": 1.0457430544451733e-05, "loss": 0.7484, "num_input_tokens_seen": 1150233728, "step": 6372 }, { "epoch": 0.6976655080872493, "grad_norm": 1.3032489086492678, "learning_rate": 1.0450437273116484e-05, "loss": 0.857, "num_input_tokens_seen": 1150407104, "step": 6373 }, { "epoch": 0.6977749801581872, "grad_norm": 1.1758733567568729, "learning_rate": 1.044344572296668e-05, "loss": 0.6776, "num_input_tokens_seen": 1150579584, "step": 6374 }, { "epoch": 0.6978844522291251, "grad_norm": 1.4150311434927654, "learning_rate": 1.0436455894829442e-05, "loss": 0.9439, "num_input_tokens_seen": 1150759904, "step": 6375 }, { "epoch": 0.6979939243000629, "grad_norm": 1.145121399469777, "learning_rate": 1.0429467789531608e-05, "loss": 0.8197, "num_input_tokens_seen": 1150913344, "step": 6376 }, { "epoch": 0.6981033963710008, "grad_norm": 1.383912807146284, "learning_rate": 1.04224814078999e-05, "loss": 0.6964, "num_input_tokens_seen": 1151088512, "step": 6377 }, { "epoch": 0.6982128684419387, "grad_norm": 1.1752016787224213, "learning_rate": 1.041549675076076e-05, "loss": 0.5428, "num_input_tokens_seen": 1151246432, "step": 6378 }, { "epoch": 0.6983223405128767, "grad_norm": 1.13390970697148, "learning_rate": 1.0408513818940477e-05, "loss": 0.8796, "num_input_tokens_seen": 1151439744, "step": 6379 }, { "epoch": 0.6984318125838146, "grad_norm": 1.181850294947314, "learning_rate": 1.040153261326512e-05, "loss": 0.7313, "num_input_tokens_seen": 1151627456, "step": 6380 }, { "epoch": 0.6985412846547524, "grad_norm": 1.2696667408167412, "learning_rate": 1.0394553134560533e-05, "loss": 0.8966, "num_input_tokens_seen": 1151844736, "step": 6381 }, { "epoch": 0.6986507567256903, "grad_norm": 1.1505423220758528, "learning_rate": 1.0387575383652411e-05, "loss": 0.7179, "num_input_tokens_seen": 1152041408, "step": 6382 }, { "epoch": 0.6987602287966282, "grad_norm": 1.1801382555221704, "learning_rate": 1.0380599361366169e-05, "loss": 0.7517, "num_input_tokens_seen": 1152227776, "step": 6383 }, { "epoch": 0.6988697008675662, "grad_norm": 1.1204986587850023, "learning_rate": 1.037362506852709e-05, "loss": 0.6359, "num_input_tokens_seen": 1152395776, "step": 6384 }, { "epoch": 0.6989791729385041, "grad_norm": 1.244309633953272, "learning_rate": 1.036665250596019e-05, "loss": 0.8278, "num_input_tokens_seen": 1152586400, "step": 6385 }, { "epoch": 0.699088645009442, "grad_norm": 1.2203199593954654, "learning_rate": 1.0359681674490332e-05, "loss": 0.8987, "num_input_tokens_seen": 1152802336, "step": 6386 }, { "epoch": 0.6991981170803798, "grad_norm": 1.21148816785651, "learning_rate": 1.0352712574942144e-05, "loss": 0.734, "num_input_tokens_seen": 1152998112, "step": 6387 }, { "epoch": 0.6993075891513177, "grad_norm": 1.208936305161206, "learning_rate": 1.0345745208140056e-05, "loss": 0.6399, "num_input_tokens_seen": 1153192320, "step": 6388 }, { "epoch": 0.6994170612222557, "grad_norm": 1.4039427585344544, "learning_rate": 1.03387795749083e-05, "loss": 0.8757, "num_input_tokens_seen": 1153385856, "step": 6389 }, { "epoch": 0.6995265332931936, "grad_norm": 1.3540002084785425, "learning_rate": 1.0331815676070888e-05, "loss": 0.9226, "num_input_tokens_seen": 1153544672, "step": 6390 }, { "epoch": 0.6996360053641315, "grad_norm": 1.32800723283016, "learning_rate": 1.0324853512451643e-05, "loss": 0.9723, "num_input_tokens_seen": 1153737088, "step": 6391 }, { "epoch": 0.6997454774350694, "grad_norm": 1.5248257302940305, "learning_rate": 1.0317893084874167e-05, "loss": 1.0053, "num_input_tokens_seen": 1153938688, "step": 6392 }, { "epoch": 0.6998549495060072, "grad_norm": 1.2051556282531488, "learning_rate": 1.0310934394161875e-05, "loss": 0.7724, "num_input_tokens_seen": 1154154400, "step": 6393 }, { "epoch": 0.6999644215769452, "grad_norm": 1.2229802004130799, "learning_rate": 1.0303977441137968e-05, "loss": 0.7241, "num_input_tokens_seen": 1154311648, "step": 6394 }, { "epoch": 0.7000738936478831, "grad_norm": 1.1955464950607784, "learning_rate": 1.0297022226625434e-05, "loss": 0.7457, "num_input_tokens_seen": 1154482560, "step": 6395 }, { "epoch": 0.700183365718821, "grad_norm": 1.2391401134297746, "learning_rate": 1.0290068751447062e-05, "loss": 0.8359, "num_input_tokens_seen": 1154640032, "step": 6396 }, { "epoch": 0.7002928377897589, "grad_norm": 1.1946359851695716, "learning_rate": 1.0283117016425439e-05, "loss": 0.6436, "num_input_tokens_seen": 1154833120, "step": 6397 }, { "epoch": 0.7004023098606967, "grad_norm": 1.2117238106735675, "learning_rate": 1.0276167022382937e-05, "loss": 0.7168, "num_input_tokens_seen": 1155027104, "step": 6398 }, { "epoch": 0.7005117819316347, "grad_norm": 1.1405834033644877, "learning_rate": 1.0269218770141728e-05, "loss": 0.628, "num_input_tokens_seen": 1155222208, "step": 6399 }, { "epoch": 0.7006212540025726, "grad_norm": 1.2298963396074523, "learning_rate": 1.0262272260523772e-05, "loss": 0.777, "num_input_tokens_seen": 1155424032, "step": 6400 }, { "epoch": 0.7007307260735105, "grad_norm": 1.2746289068485581, "learning_rate": 1.0255327494350841e-05, "loss": 0.7518, "num_input_tokens_seen": 1155594496, "step": 6401 }, { "epoch": 0.7008401981444484, "grad_norm": 1.216428648951307, "learning_rate": 1.0248384472444481e-05, "loss": 0.6944, "num_input_tokens_seen": 1155737856, "step": 6402 }, { "epoch": 0.7009496702153863, "grad_norm": 1.3237030465477362, "learning_rate": 1.0241443195626038e-05, "loss": 0.8583, "num_input_tokens_seen": 1155930048, "step": 6403 }, { "epoch": 0.7010591422863242, "grad_norm": 1.2908948417639121, "learning_rate": 1.0234503664716649e-05, "loss": 0.7252, "num_input_tokens_seen": 1156068704, "step": 6404 }, { "epoch": 0.7011686143572621, "grad_norm": 1.466970298866875, "learning_rate": 1.0227565880537252e-05, "loss": 0.9761, "num_input_tokens_seen": 1156262016, "step": 6405 }, { "epoch": 0.7012780864282, "grad_norm": 1.1970013424794916, "learning_rate": 1.0220629843908572e-05, "loss": 0.704, "num_input_tokens_seen": 1156424640, "step": 6406 }, { "epoch": 0.7013875584991379, "grad_norm": 1.1825488422783135, "learning_rate": 1.0213695555651118e-05, "loss": 0.9158, "num_input_tokens_seen": 1156626464, "step": 6407 }, { "epoch": 0.7014970305700758, "grad_norm": 1.3241275992291008, "learning_rate": 1.020676301658523e-05, "loss": 0.7378, "num_input_tokens_seen": 1156800960, "step": 6408 }, { "epoch": 0.7016065026410138, "grad_norm": 1.290828119878582, "learning_rate": 1.0199832227530979e-05, "loss": 0.9443, "num_input_tokens_seen": 1156966048, "step": 6409 }, { "epoch": 0.7017159747119516, "grad_norm": 1.164154356334118, "learning_rate": 1.0192903189308293e-05, "loss": 0.9808, "num_input_tokens_seen": 1157192288, "step": 6410 }, { "epoch": 0.7018254467828895, "grad_norm": 1.2202836681676985, "learning_rate": 1.0185975902736853e-05, "loss": 0.9313, "num_input_tokens_seen": 1157391200, "step": 6411 }, { "epoch": 0.7019349188538274, "grad_norm": 1.0346293709987417, "learning_rate": 1.0179050368636146e-05, "loss": 0.604, "num_input_tokens_seen": 1157568608, "step": 6412 }, { "epoch": 0.7020443909247653, "grad_norm": 1.2646641321570766, "learning_rate": 1.0172126587825442e-05, "loss": 0.8807, "num_input_tokens_seen": 1157750944, "step": 6413 }, { "epoch": 0.7021538629957033, "grad_norm": 1.0881659606200575, "learning_rate": 1.0165204561123811e-05, "loss": 0.6767, "num_input_tokens_seen": 1157974944, "step": 6414 }, { "epoch": 0.7022633350666411, "grad_norm": 1.4184305579231387, "learning_rate": 1.015828428935014e-05, "loss": 1.1053, "num_input_tokens_seen": 1158170272, "step": 6415 }, { "epoch": 0.702372807137579, "grad_norm": 1.364606546490881, "learning_rate": 1.0151365773323046e-05, "loss": 1.0652, "num_input_tokens_seen": 1158357760, "step": 6416 }, { "epoch": 0.7024822792085169, "grad_norm": 1.1859447936385359, "learning_rate": 1.0144449013861013e-05, "loss": 0.7868, "num_input_tokens_seen": 1158538528, "step": 6417 }, { "epoch": 0.7025917512794548, "grad_norm": 1.1386033523382906, "learning_rate": 1.0137534011782246e-05, "loss": 0.7195, "num_input_tokens_seen": 1158718848, "step": 6418 }, { "epoch": 0.7027012233503928, "grad_norm": 1.2484169472541151, "learning_rate": 1.013062076790481e-05, "loss": 0.9056, "num_input_tokens_seen": 1158915968, "step": 6419 }, { "epoch": 0.7028106954213307, "grad_norm": 1.3028132516296518, "learning_rate": 1.0123709283046495e-05, "loss": 0.819, "num_input_tokens_seen": 1159096288, "step": 6420 }, { "epoch": 0.7029201674922685, "grad_norm": 1.0870968235729175, "learning_rate": 1.011679955802494e-05, "loss": 0.7292, "num_input_tokens_seen": 1159284000, "step": 6421 }, { "epoch": 0.7030296395632064, "grad_norm": 1.1530255902867375, "learning_rate": 1.0109891593657547e-05, "loss": 0.7629, "num_input_tokens_seen": 1159471488, "step": 6422 }, { "epoch": 0.7031391116341443, "grad_norm": 1.382172905372466, "learning_rate": 1.0102985390761505e-05, "loss": 1.0206, "num_input_tokens_seen": 1159622688, "step": 6423 }, { "epoch": 0.7032485837050823, "grad_norm": 1.1966825624347046, "learning_rate": 1.009608095015383e-05, "loss": 0.5952, "num_input_tokens_seen": 1159820032, "step": 6424 }, { "epoch": 0.7033580557760202, "grad_norm": 1.4892421233804733, "learning_rate": 1.0089178272651267e-05, "loss": 0.9418, "num_input_tokens_seen": 1160023872, "step": 6425 }, { "epoch": 0.7034675278469581, "grad_norm": 1.0770031717797401, "learning_rate": 1.008227735907043e-05, "loss": 0.7461, "num_input_tokens_seen": 1160201056, "step": 6426 }, { "epoch": 0.7035769999178959, "grad_norm": 1.1205116914226863, "learning_rate": 1.0075378210227645e-05, "loss": 0.7075, "num_input_tokens_seen": 1160381600, "step": 6427 }, { "epoch": 0.7036864719888338, "grad_norm": 1.1716807395140563, "learning_rate": 1.0068480826939097e-05, "loss": 0.689, "num_input_tokens_seen": 1160564160, "step": 6428 }, { "epoch": 0.7037959440597717, "grad_norm": 1.1131554783499058, "learning_rate": 1.006158521002072e-05, "loss": 0.751, "num_input_tokens_seen": 1160759712, "step": 6429 }, { "epoch": 0.7039054161307097, "grad_norm": 1.0962348504580137, "learning_rate": 1.005469136028826e-05, "loss": 0.7435, "num_input_tokens_seen": 1160925920, "step": 6430 }, { "epoch": 0.7040148882016476, "grad_norm": 1.332484551316934, "learning_rate": 1.0047799278557238e-05, "loss": 0.864, "num_input_tokens_seen": 1161122592, "step": 6431 }, { "epoch": 0.7041243602725854, "grad_norm": 1.1724428962515416, "learning_rate": 1.0040908965642979e-05, "loss": 0.9053, "num_input_tokens_seen": 1161308288, "step": 6432 }, { "epoch": 0.7042338323435233, "grad_norm": 1.1923247252116145, "learning_rate": 1.0034020422360591e-05, "loss": 0.7465, "num_input_tokens_seen": 1161485024, "step": 6433 }, { "epoch": 0.7043433044144612, "grad_norm": 1.254955340118944, "learning_rate": 1.002713364952497e-05, "loss": 1.0233, "num_input_tokens_seen": 1161649888, "step": 6434 }, { "epoch": 0.7044527764853992, "grad_norm": 1.1581616965180972, "learning_rate": 1.0020248647950822e-05, "loss": 0.7132, "num_input_tokens_seen": 1161826176, "step": 6435 }, { "epoch": 0.7045622485563371, "grad_norm": 1.274705314240541, "learning_rate": 1.0013365418452625e-05, "loss": 0.7798, "num_input_tokens_seen": 1162030464, "step": 6436 }, { "epoch": 0.704671720627275, "grad_norm": 1.1963205512437396, "learning_rate": 1.0006483961844645e-05, "loss": 0.8542, "num_input_tokens_seen": 1162197568, "step": 6437 }, { "epoch": 0.7047811926982128, "grad_norm": 1.1570551363007475, "learning_rate": 9.999604278940956e-06, "loss": 0.6759, "num_input_tokens_seen": 1162371392, "step": 6438 }, { "epoch": 0.7048906647691507, "grad_norm": 1.2254812249060982, "learning_rate": 9.9927263705554e-06, "loss": 0.8491, "num_input_tokens_seen": 1162552384, "step": 6439 }, { "epoch": 0.7050001368400887, "grad_norm": 1.2036434889401502, "learning_rate": 9.985850237501618e-06, "loss": 0.8174, "num_input_tokens_seen": 1162740768, "step": 6440 }, { "epoch": 0.7051096089110266, "grad_norm": 1.151678222887179, "learning_rate": 9.978975880593067e-06, "loss": 0.9848, "num_input_tokens_seen": 1162915040, "step": 6441 }, { "epoch": 0.7052190809819645, "grad_norm": 1.2608323896009128, "learning_rate": 9.972103300642937e-06, "loss": 0.6803, "num_input_tokens_seen": 1163070496, "step": 6442 }, { "epoch": 0.7053285530529024, "grad_norm": 1.3007208549523355, "learning_rate": 9.965232498464266e-06, "loss": 0.8383, "num_input_tokens_seen": 1163247904, "step": 6443 }, { "epoch": 0.7054380251238402, "grad_norm": 1.2935939326115127, "learning_rate": 9.958363474869853e-06, "loss": 0.7422, "num_input_tokens_seen": 1163432480, "step": 6444 }, { "epoch": 0.7055474971947782, "grad_norm": 1.2127145341457857, "learning_rate": 9.951496230672283e-06, "loss": 0.932, "num_input_tokens_seen": 1163645728, "step": 6445 }, { "epoch": 0.7056569692657161, "grad_norm": 1.2929467144612563, "learning_rate": 9.94463076668394e-06, "loss": 0.8108, "num_input_tokens_seen": 1163845536, "step": 6446 }, { "epoch": 0.705766441336654, "grad_norm": 1.377367495690756, "learning_rate": 9.937767083716989e-06, "loss": 0.7338, "num_input_tokens_seen": 1163994496, "step": 6447 }, { "epoch": 0.7058759134075919, "grad_norm": 1.095580884673124, "learning_rate": 9.930905182583417e-06, "loss": 0.7519, "num_input_tokens_seen": 1164164288, "step": 6448 }, { "epoch": 0.7059853854785297, "grad_norm": 1.3480476142839268, "learning_rate": 9.924045064094934e-06, "loss": 0.6995, "num_input_tokens_seen": 1164315712, "step": 6449 }, { "epoch": 0.7060948575494677, "grad_norm": 1.2611429418186888, "learning_rate": 9.917186729063118e-06, "loss": 0.9028, "num_input_tokens_seen": 1164506336, "step": 6450 }, { "epoch": 0.7062043296204056, "grad_norm": 1.168095457418835, "learning_rate": 9.910330178299262e-06, "loss": 0.6168, "num_input_tokens_seen": 1164650816, "step": 6451 }, { "epoch": 0.7063138016913435, "grad_norm": 1.2423621960521434, "learning_rate": 9.903475412614507e-06, "loss": 0.9615, "num_input_tokens_seen": 1164866304, "step": 6452 }, { "epoch": 0.7064232737622814, "grad_norm": 1.2076916909344755, "learning_rate": 9.896622432819753e-06, "loss": 0.8257, "num_input_tokens_seen": 1165058272, "step": 6453 }, { "epoch": 0.7065327458332193, "grad_norm": 1.1940985605145416, "learning_rate": 9.889771239725693e-06, "loss": 0.6831, "num_input_tokens_seen": 1165240384, "step": 6454 }, { "epoch": 0.7066422179041572, "grad_norm": 1.269595851721714, "learning_rate": 9.882921834142806e-06, "loss": 0.7427, "num_input_tokens_seen": 1165418016, "step": 6455 }, { "epoch": 0.7067516899750951, "grad_norm": 1.1638021497996014, "learning_rate": 9.876074216881359e-06, "loss": 0.8651, "num_input_tokens_seen": 1165595424, "step": 6456 }, { "epoch": 0.706861162046033, "grad_norm": 1.2098371859636847, "learning_rate": 9.86922838875144e-06, "loss": 0.7917, "num_input_tokens_seen": 1165781568, "step": 6457 }, { "epoch": 0.7069706341169709, "grad_norm": 1.2713431706227134, "learning_rate": 9.86238435056286e-06, "loss": 0.9143, "num_input_tokens_seen": 1165971744, "step": 6458 }, { "epoch": 0.7070801061879088, "grad_norm": 1.0098677386185597, "learning_rate": 9.855542103125286e-06, "loss": 0.6487, "num_input_tokens_seen": 1166159904, "step": 6459 }, { "epoch": 0.7071895782588468, "grad_norm": 1.1564830854393628, "learning_rate": 9.848701647248118e-06, "loss": 0.8435, "num_input_tokens_seen": 1166335520, "step": 6460 }, { "epoch": 0.7072990503297846, "grad_norm": 1.1517617000302693, "learning_rate": 9.841862983740584e-06, "loss": 0.8041, "num_input_tokens_seen": 1166534208, "step": 6461 }, { "epoch": 0.7074085224007225, "grad_norm": 1.1495063115959236, "learning_rate": 9.835026113411685e-06, "loss": 0.7024, "num_input_tokens_seen": 1166728416, "step": 6462 }, { "epoch": 0.7075179944716604, "grad_norm": 1.3473928442903307, "learning_rate": 9.828191037070208e-06, "loss": 0.9265, "num_input_tokens_seen": 1166874016, "step": 6463 }, { "epoch": 0.7076274665425983, "grad_norm": 1.253081623953393, "learning_rate": 9.821357755524727e-06, "loss": 0.8755, "num_input_tokens_seen": 1167041568, "step": 6464 }, { "epoch": 0.7077369386135363, "grad_norm": 1.0393772652349602, "learning_rate": 9.814526269583596e-06, "loss": 0.5768, "num_input_tokens_seen": 1167231296, "step": 6465 }, { "epoch": 0.7078464106844741, "grad_norm": 1.1037526104076445, "learning_rate": 9.807696580054994e-06, "loss": 0.7677, "num_input_tokens_seen": 1167423936, "step": 6466 }, { "epoch": 0.707955882755412, "grad_norm": 1.2091427700819566, "learning_rate": 9.800868687746832e-06, "loss": 1.0006, "num_input_tokens_seen": 1167621728, "step": 6467 }, { "epoch": 0.7080653548263499, "grad_norm": 1.3393910702637124, "learning_rate": 9.794042593466851e-06, "loss": 0.7862, "num_input_tokens_seen": 1167780768, "step": 6468 }, { "epoch": 0.7081748268972878, "grad_norm": 1.1984925199011858, "learning_rate": 9.787218298022565e-06, "loss": 0.791, "num_input_tokens_seen": 1167939584, "step": 6469 }, { "epoch": 0.7082842989682258, "grad_norm": 1.4085476648257234, "learning_rate": 9.780395802221274e-06, "loss": 1.1715, "num_input_tokens_seen": 1168136256, "step": 6470 }, { "epoch": 0.7083937710391637, "grad_norm": 1.2967379394170433, "learning_rate": 9.773575106870061e-06, "loss": 0.6974, "num_input_tokens_seen": 1168294624, "step": 6471 }, { "epoch": 0.7085032431101015, "grad_norm": 1.1663871441634999, "learning_rate": 9.766756212775807e-06, "loss": 0.9761, "num_input_tokens_seen": 1168475616, "step": 6472 }, { "epoch": 0.7086127151810394, "grad_norm": 1.1576302512358405, "learning_rate": 9.759939120745171e-06, "loss": 0.7774, "num_input_tokens_seen": 1168679232, "step": 6473 }, { "epoch": 0.7087221872519773, "grad_norm": 1.1102140235766986, "learning_rate": 9.753123831584604e-06, "loss": 0.9091, "num_input_tokens_seen": 1168881952, "step": 6474 }, { "epoch": 0.7088316593229153, "grad_norm": 1.0985409575316027, "learning_rate": 9.746310346100331e-06, "loss": 0.7007, "num_input_tokens_seen": 1169085792, "step": 6475 }, { "epoch": 0.7089411313938532, "grad_norm": 1.010623353615896, "learning_rate": 9.739498665098395e-06, "loss": 0.6447, "num_input_tokens_seen": 1169272160, "step": 6476 }, { "epoch": 0.7090506034647911, "grad_norm": 1.188514426763692, "learning_rate": 9.732688789384593e-06, "loss": 0.753, "num_input_tokens_seen": 1169463680, "step": 6477 }, { "epoch": 0.7091600755357289, "grad_norm": 1.1786478779508573, "learning_rate": 9.725880719764519e-06, "loss": 0.7734, "num_input_tokens_seen": 1169657888, "step": 6478 }, { "epoch": 0.7092695476066668, "grad_norm": 1.2401249874865932, "learning_rate": 9.71907445704356e-06, "loss": 0.9554, "num_input_tokens_seen": 1169844032, "step": 6479 }, { "epoch": 0.7093790196776047, "grad_norm": 1.276823865451988, "learning_rate": 9.712270002026877e-06, "loss": 0.9312, "num_input_tokens_seen": 1170018304, "step": 6480 }, { "epoch": 0.7094884917485427, "grad_norm": 1.2314544120773039, "learning_rate": 9.705467355519428e-06, "loss": 0.8453, "num_input_tokens_seen": 1170192352, "step": 6481 }, { "epoch": 0.7095979638194806, "grad_norm": 1.2959253442036422, "learning_rate": 9.698666518325943e-06, "loss": 0.9522, "num_input_tokens_seen": 1170342208, "step": 6482 }, { "epoch": 0.7097074358904184, "grad_norm": 1.1578378857340579, "learning_rate": 9.69186749125098e-06, "loss": 0.7133, "num_input_tokens_seen": 1170519392, "step": 6483 }, { "epoch": 0.7098169079613563, "grad_norm": 1.198762688912707, "learning_rate": 9.685070275098806e-06, "loss": 0.7641, "num_input_tokens_seen": 1170706432, "step": 6484 }, { "epoch": 0.7099263800322942, "grad_norm": 1.2656156280547342, "learning_rate": 9.678274870673555e-06, "loss": 0.8149, "num_input_tokens_seen": 1170879808, "step": 6485 }, { "epoch": 0.7100358521032322, "grad_norm": 1.1536538645491323, "learning_rate": 9.671481278779094e-06, "loss": 0.6595, "num_input_tokens_seen": 1171057664, "step": 6486 }, { "epoch": 0.7101453241741701, "grad_norm": 1.172906955297095, "learning_rate": 9.664689500219092e-06, "loss": 0.8191, "num_input_tokens_seen": 1171255904, "step": 6487 }, { "epoch": 0.710254796245108, "grad_norm": 1.2167320662823389, "learning_rate": 9.65789953579701e-06, "loss": 0.5721, "num_input_tokens_seen": 1171434432, "step": 6488 }, { "epoch": 0.7103642683160458, "grad_norm": 1.1905259178765761, "learning_rate": 9.651111386316072e-06, "loss": 0.6667, "num_input_tokens_seen": 1171603552, "step": 6489 }, { "epoch": 0.7104737403869837, "grad_norm": 1.1714403490810268, "learning_rate": 9.644325052579333e-06, "loss": 0.7402, "num_input_tokens_seen": 1171805600, "step": 6490 }, { "epoch": 0.7105832124579217, "grad_norm": 1.348912502182861, "learning_rate": 9.63754053538957e-06, "loss": 0.696, "num_input_tokens_seen": 1171934400, "step": 6491 }, { "epoch": 0.7106926845288596, "grad_norm": 1.187585927966356, "learning_rate": 9.630757835549412e-06, "loss": 0.791, "num_input_tokens_seen": 1172109568, "step": 6492 }, { "epoch": 0.7108021565997975, "grad_norm": 1.0957063889072376, "learning_rate": 9.623976953861199e-06, "loss": 0.6513, "num_input_tokens_seen": 1172289440, "step": 6493 }, { "epoch": 0.7109116286707354, "grad_norm": 1.3727221260915938, "learning_rate": 9.617197891127131e-06, "loss": 0.8444, "num_input_tokens_seen": 1172480960, "step": 6494 }, { "epoch": 0.7110211007416732, "grad_norm": 1.088302561708054, "learning_rate": 9.610420648149144e-06, "loss": 0.556, "num_input_tokens_seen": 1172656352, "step": 6495 }, { "epoch": 0.7111305728126112, "grad_norm": 1.4014178436374767, "learning_rate": 9.603645225728975e-06, "loss": 1.0299, "num_input_tokens_seen": 1172854368, "step": 6496 }, { "epoch": 0.7112400448835491, "grad_norm": 1.2741914635127571, "learning_rate": 9.59687162466814e-06, "loss": 0.9438, "num_input_tokens_seen": 1173058880, "step": 6497 }, { "epoch": 0.711349516954487, "grad_norm": 1.3004911631951621, "learning_rate": 9.590099845767941e-06, "loss": 0.7718, "num_input_tokens_seen": 1173198432, "step": 6498 }, { "epoch": 0.7114589890254249, "grad_norm": 1.0521760213580993, "learning_rate": 9.583329889829486e-06, "loss": 0.6266, "num_input_tokens_seen": 1173377184, "step": 6499 }, { "epoch": 0.7115684610963627, "grad_norm": 1.2398927233037194, "learning_rate": 9.576561757653618e-06, "loss": 0.9927, "num_input_tokens_seen": 1173543616, "step": 6500 }, { "epoch": 0.7116779331673007, "grad_norm": 1.3363333221664548, "learning_rate": 9.569795450041028e-06, "loss": 0.8954, "num_input_tokens_seen": 1173708256, "step": 6501 }, { "epoch": 0.7117874052382386, "grad_norm": 1.1556136983728358, "learning_rate": 9.563030967792119e-06, "loss": 0.8229, "num_input_tokens_seen": 1173914560, "step": 6502 }, { "epoch": 0.7118968773091765, "grad_norm": 1.458694776521053, "learning_rate": 9.556268311707145e-06, "loss": 0.8115, "num_input_tokens_seen": 1174079872, "step": 6503 }, { "epoch": 0.7120063493801144, "grad_norm": 1.160862973928569, "learning_rate": 9.549507482586107e-06, "loss": 0.6053, "num_input_tokens_seen": 1174231744, "step": 6504 }, { "epoch": 0.7121158214510523, "grad_norm": 1.1863713170875536, "learning_rate": 9.542748481228796e-06, "loss": 0.7187, "num_input_tokens_seen": 1174430656, "step": 6505 }, { "epoch": 0.7122252935219902, "grad_norm": 1.2706398686384828, "learning_rate": 9.535991308434795e-06, "loss": 1.0123, "num_input_tokens_seen": 1174636288, "step": 6506 }, { "epoch": 0.7123347655929281, "grad_norm": 1.0982699053389129, "learning_rate": 9.529235965003447e-06, "loss": 0.8889, "num_input_tokens_seen": 1174825568, "step": 6507 }, { "epoch": 0.712444237663866, "grad_norm": 1.3021940972582406, "learning_rate": 9.52248245173392e-06, "loss": 0.8864, "num_input_tokens_seen": 1174979680, "step": 6508 }, { "epoch": 0.7125537097348039, "grad_norm": 1.2457471024557825, "learning_rate": 9.51573076942513e-06, "loss": 0.785, "num_input_tokens_seen": 1175151040, "step": 6509 }, { "epoch": 0.7126631818057418, "grad_norm": 1.2678542108329167, "learning_rate": 9.508980918875787e-06, "loss": 0.9181, "num_input_tokens_seen": 1175339424, "step": 6510 }, { "epoch": 0.7127726538766798, "grad_norm": 1.151950833850961, "learning_rate": 9.50223290088439e-06, "loss": 0.8352, "num_input_tokens_seen": 1175518176, "step": 6511 }, { "epoch": 0.7128821259476176, "grad_norm": 1.1707104300524662, "learning_rate": 9.495486716249213e-06, "loss": 0.9036, "num_input_tokens_seen": 1175688192, "step": 6512 }, { "epoch": 0.7129915980185555, "grad_norm": 1.5116888629807665, "learning_rate": 9.48874236576832e-06, "loss": 0.9446, "num_input_tokens_seen": 1175863808, "step": 6513 }, { "epoch": 0.7131010700894934, "grad_norm": 1.1614673540024498, "learning_rate": 9.48199985023955e-06, "loss": 0.7969, "num_input_tokens_seen": 1176081088, "step": 6514 }, { "epoch": 0.7132105421604313, "grad_norm": 1.0644735631817972, "learning_rate": 9.475259170460527e-06, "loss": 0.5672, "num_input_tokens_seen": 1176278208, "step": 6515 }, { "epoch": 0.7133200142313693, "grad_norm": 1.3238657703151762, "learning_rate": 9.468520327228681e-06, "loss": 0.6445, "num_input_tokens_seen": 1176446432, "step": 6516 }, { "epoch": 0.7134294863023071, "grad_norm": 1.2037409902296852, "learning_rate": 9.46178332134117e-06, "loss": 0.6244, "num_input_tokens_seen": 1176598976, "step": 6517 }, { "epoch": 0.713538958373245, "grad_norm": 1.3206750496128568, "learning_rate": 9.455048153594998e-06, "loss": 0.772, "num_input_tokens_seen": 1176727552, "step": 6518 }, { "epoch": 0.7136484304441829, "grad_norm": 1.2502671909870475, "learning_rate": 9.448314824786913e-06, "loss": 0.7132, "num_input_tokens_seen": 1176926016, "step": 6519 }, { "epoch": 0.7137579025151208, "grad_norm": 1.30302830801637, "learning_rate": 9.441583335713455e-06, "loss": 0.9246, "num_input_tokens_seen": 1177139264, "step": 6520 }, { "epoch": 0.7138673745860588, "grad_norm": 1.157723750575317, "learning_rate": 9.434853687170947e-06, "loss": 0.696, "num_input_tokens_seen": 1177325856, "step": 6521 }, { "epoch": 0.7139768466569967, "grad_norm": 1.126674624298329, "learning_rate": 9.42812587995548e-06, "loss": 0.8583, "num_input_tokens_seen": 1177524096, "step": 6522 }, { "epoch": 0.7140863187279345, "grad_norm": 1.2208128368666453, "learning_rate": 9.421399914862975e-06, "loss": 0.7706, "num_input_tokens_seen": 1177697024, "step": 6523 }, { "epoch": 0.7141957907988724, "grad_norm": 1.3899123691132786, "learning_rate": 9.414675792689056e-06, "loss": 0.8262, "num_input_tokens_seen": 1177872416, "step": 6524 }, { "epoch": 0.7143052628698103, "grad_norm": 1.2284166936051246, "learning_rate": 9.407953514229218e-06, "loss": 0.751, "num_input_tokens_seen": 1178070656, "step": 6525 }, { "epoch": 0.7144147349407483, "grad_norm": 1.142399909311713, "learning_rate": 9.401233080278655e-06, "loss": 0.8409, "num_input_tokens_seen": 1178272256, "step": 6526 }, { "epoch": 0.7145242070116862, "grad_norm": 1.2850011482678934, "learning_rate": 9.394514491632406e-06, "loss": 1.0876, "num_input_tokens_seen": 1178485280, "step": 6527 }, { "epoch": 0.7146336790826241, "grad_norm": 1.2383372297178625, "learning_rate": 9.38779774908526e-06, "loss": 0.7362, "num_input_tokens_seen": 1178662240, "step": 6528 }, { "epoch": 0.7147431511535619, "grad_norm": 1.2556084698148644, "learning_rate": 9.381082853431795e-06, "loss": 0.7825, "num_input_tokens_seen": 1178811200, "step": 6529 }, { "epoch": 0.7148526232244998, "grad_norm": 1.4074738157557192, "learning_rate": 9.374369805466369e-06, "loss": 0.847, "num_input_tokens_seen": 1179000480, "step": 6530 }, { "epoch": 0.7149620952954377, "grad_norm": 1.3882752615349219, "learning_rate": 9.367658605983117e-06, "loss": 1.0344, "num_input_tokens_seen": 1179169600, "step": 6531 }, { "epoch": 0.7150715673663757, "grad_norm": 1.2889213715950796, "learning_rate": 9.360949255775986e-06, "loss": 0.9918, "num_input_tokens_seen": 1179371648, "step": 6532 }, { "epoch": 0.7151810394373136, "grad_norm": 1.2515665251896932, "learning_rate": 9.354241755638641e-06, "loss": 0.7314, "num_input_tokens_seen": 1179583328, "step": 6533 }, { "epoch": 0.7152905115082514, "grad_norm": 1.2321913030431708, "learning_rate": 9.347536106364607e-06, "loss": 0.7905, "num_input_tokens_seen": 1179756928, "step": 6534 }, { "epoch": 0.7153999835791893, "grad_norm": 1.2982419175385467, "learning_rate": 9.34083230874711e-06, "loss": 0.7886, "num_input_tokens_seen": 1179916416, "step": 6535 }, { "epoch": 0.7155094556501272, "grad_norm": 1.26269484045694, "learning_rate": 9.334130363579224e-06, "loss": 0.5847, "num_input_tokens_seen": 1180097184, "step": 6536 }, { "epoch": 0.7156189277210652, "grad_norm": 1.2395935325309821, "learning_rate": 9.32743027165377e-06, "loss": 0.67, "num_input_tokens_seen": 1180249952, "step": 6537 }, { "epoch": 0.7157283997920031, "grad_norm": 1.1594847203079057, "learning_rate": 9.320732033763351e-06, "loss": 0.7916, "num_input_tokens_seen": 1180444608, "step": 6538 }, { "epoch": 0.715837871862941, "grad_norm": 1.262583905443964, "learning_rate": 9.314035650700361e-06, "loss": 0.7682, "num_input_tokens_seen": 1180609024, "step": 6539 }, { "epoch": 0.7159473439338788, "grad_norm": 1.236210918445197, "learning_rate": 9.307341123256957e-06, "loss": 0.9032, "num_input_tokens_seen": 1180815552, "step": 6540 }, { "epoch": 0.7160568160048167, "grad_norm": 1.1579347656599865, "learning_rate": 9.300648452225119e-06, "loss": 0.7888, "num_input_tokens_seen": 1181021408, "step": 6541 }, { "epoch": 0.7161662880757547, "grad_norm": 1.1402062993188595, "learning_rate": 9.293957638396535e-06, "loss": 0.6452, "num_input_tokens_seen": 1181194336, "step": 6542 }, { "epoch": 0.7162757601466926, "grad_norm": 1.1383679673067815, "learning_rate": 9.28726868256275e-06, "loss": 0.7183, "num_input_tokens_seen": 1181378688, "step": 6543 }, { "epoch": 0.7163852322176305, "grad_norm": 1.1233379484378028, "learning_rate": 9.280581585515042e-06, "loss": 1.0053, "num_input_tokens_seen": 1181557216, "step": 6544 }, { "epoch": 0.7164947042885684, "grad_norm": 1.3168635720081654, "learning_rate": 9.273896348044481e-06, "loss": 0.9864, "num_input_tokens_seen": 1181766656, "step": 6545 }, { "epoch": 0.7166041763595062, "grad_norm": 1.1252654610135064, "learning_rate": 9.267212970941919e-06, "loss": 0.7048, "num_input_tokens_seen": 1181931296, "step": 6546 }, { "epoch": 0.7167136484304442, "grad_norm": 1.1617913844907637, "learning_rate": 9.260531454997987e-06, "loss": 0.7509, "num_input_tokens_seen": 1182099296, "step": 6547 }, { "epoch": 0.7168231205013821, "grad_norm": 1.208345654304214, "learning_rate": 9.253851801003094e-06, "loss": 0.8319, "num_input_tokens_seen": 1182291264, "step": 6548 }, { "epoch": 0.71693259257232, "grad_norm": 1.1556341747953351, "learning_rate": 9.247174009747422e-06, "loss": 1.0581, "num_input_tokens_seen": 1182467552, "step": 6549 }, { "epoch": 0.7170420646432579, "grad_norm": 1.2285858764939512, "learning_rate": 9.240498082020962e-06, "loss": 0.7627, "num_input_tokens_seen": 1182653472, "step": 6550 }, { "epoch": 0.7171515367141957, "grad_norm": 1.314281707514703, "learning_rate": 9.23382401861345e-06, "loss": 0.6753, "num_input_tokens_seen": 1182831552, "step": 6551 }, { "epoch": 0.7172610087851337, "grad_norm": 1.29290633036524, "learning_rate": 9.227151820314417e-06, "loss": 1.2305, "num_input_tokens_seen": 1183056896, "step": 6552 }, { "epoch": 0.7173704808560716, "grad_norm": 1.271380582871264, "learning_rate": 9.22048148791317e-06, "loss": 0.768, "num_input_tokens_seen": 1183232288, "step": 6553 }, { "epoch": 0.7174799529270095, "grad_norm": 1.3195810492668143, "learning_rate": 9.2138130221988e-06, "loss": 0.8484, "num_input_tokens_seen": 1183384160, "step": 6554 }, { "epoch": 0.7175894249979474, "grad_norm": 1.2333343078420416, "learning_rate": 9.20714642396017e-06, "loss": 0.9022, "num_input_tokens_seen": 1183568736, "step": 6555 }, { "epoch": 0.7176988970688853, "grad_norm": 1.3529925856432936, "learning_rate": 9.200481693985928e-06, "loss": 0.8375, "num_input_tokens_seen": 1183730912, "step": 6556 }, { "epoch": 0.7178083691398232, "grad_norm": 1.2633205674280503, "learning_rate": 9.193818833064489e-06, "loss": 0.757, "num_input_tokens_seen": 1183927584, "step": 6557 }, { "epoch": 0.7179178412107611, "grad_norm": 1.2472124489219836, "learning_rate": 9.187157841984082e-06, "loss": 0.6658, "num_input_tokens_seen": 1184099392, "step": 6558 }, { "epoch": 0.718027313281699, "grad_norm": 1.2325711167498894, "learning_rate": 9.180498721532657e-06, "loss": 0.7983, "num_input_tokens_seen": 1184320480, "step": 6559 }, { "epoch": 0.7181367853526369, "grad_norm": 1.487031026048354, "learning_rate": 9.173841472498001e-06, "loss": 0.9572, "num_input_tokens_seen": 1184491616, "step": 6560 }, { "epoch": 0.7182462574235748, "grad_norm": 1.1557098879874463, "learning_rate": 9.167186095667643e-06, "loss": 0.7051, "num_input_tokens_seen": 1184654464, "step": 6561 }, { "epoch": 0.7183557294945128, "grad_norm": 1.181330566080812, "learning_rate": 9.160532591828902e-06, "loss": 0.6713, "num_input_tokens_seen": 1184837472, "step": 6562 }, { "epoch": 0.7184652015654506, "grad_norm": 1.157460790180144, "learning_rate": 9.153880961768877e-06, "loss": 0.7939, "num_input_tokens_seen": 1185054080, "step": 6563 }, { "epoch": 0.7185746736363885, "grad_norm": 1.3452600821142238, "learning_rate": 9.147231206274431e-06, "loss": 0.7731, "num_input_tokens_seen": 1185227232, "step": 6564 }, { "epoch": 0.7186841457073264, "grad_norm": 1.2054602018833243, "learning_rate": 9.140583326132249e-06, "loss": 0.7692, "num_input_tokens_seen": 1185370368, "step": 6565 }, { "epoch": 0.7187936177782643, "grad_norm": 1.1817164373116698, "learning_rate": 9.133937322128722e-06, "loss": 0.6939, "num_input_tokens_seen": 1185543744, "step": 6566 }, { "epoch": 0.7189030898492023, "grad_norm": 1.3826687409410825, "learning_rate": 9.127293195050096e-06, "loss": 1.0002, "num_input_tokens_seen": 1185722720, "step": 6567 }, { "epoch": 0.7190125619201401, "grad_norm": 1.0877336974451133, "learning_rate": 9.120650945682325e-06, "loss": 0.5654, "num_input_tokens_seen": 1185913792, "step": 6568 }, { "epoch": 0.719122033991078, "grad_norm": 1.2380590871320665, "learning_rate": 9.114010574811197e-06, "loss": 0.6868, "num_input_tokens_seen": 1186092768, "step": 6569 }, { "epoch": 0.7192315060620159, "grad_norm": 1.2451626159757985, "learning_rate": 9.107372083222251e-06, "loss": 0.8304, "num_input_tokens_seen": 1186254720, "step": 6570 }, { "epoch": 0.7193409781329538, "grad_norm": 1.2583633543479629, "learning_rate": 9.100735471700805e-06, "loss": 0.8053, "num_input_tokens_seen": 1186450944, "step": 6571 }, { "epoch": 0.7194504502038918, "grad_norm": 1.0885486085417886, "learning_rate": 9.094100741031961e-06, "loss": 0.5698, "num_input_tokens_seen": 1186634400, "step": 6572 }, { "epoch": 0.7195599222748297, "grad_norm": 1.3494616371036057, "learning_rate": 9.087467892000582e-06, "loss": 0.6843, "num_input_tokens_seen": 1186778432, "step": 6573 }, { "epoch": 0.7196693943457675, "grad_norm": 1.120691313603234, "learning_rate": 9.08083692539135e-06, "loss": 0.7123, "num_input_tokens_seen": 1186964128, "step": 6574 }, { "epoch": 0.7197788664167054, "grad_norm": 1.2031637408472002, "learning_rate": 9.07420784198866e-06, "loss": 0.8055, "num_input_tokens_seen": 1187123392, "step": 6575 }, { "epoch": 0.7198883384876433, "grad_norm": 1.317440543525163, "learning_rate": 9.067580642576746e-06, "loss": 0.7627, "num_input_tokens_seen": 1187283552, "step": 6576 }, { "epoch": 0.7199978105585813, "grad_norm": 1.4554506785017725, "learning_rate": 9.060955327939582e-06, "loss": 0.9206, "num_input_tokens_seen": 1187473952, "step": 6577 }, { "epoch": 0.7201072826295192, "grad_norm": 1.3321586838786148, "learning_rate": 9.054331898860935e-06, "loss": 0.7725, "num_input_tokens_seen": 1187635456, "step": 6578 }, { "epoch": 0.7202167547004571, "grad_norm": 1.225821204230291, "learning_rate": 9.047710356124342e-06, "loss": 0.9809, "num_input_tokens_seen": 1187807712, "step": 6579 }, { "epoch": 0.7203262267713949, "grad_norm": 1.1687547469425188, "learning_rate": 9.041090700513117e-06, "loss": 0.6504, "num_input_tokens_seen": 1187987360, "step": 6580 }, { "epoch": 0.7204356988423328, "grad_norm": 1.19390731693655, "learning_rate": 9.034472932810354e-06, "loss": 0.7909, "num_input_tokens_seen": 1188194112, "step": 6581 }, { "epoch": 0.7205451709132707, "grad_norm": 1.1702826298771225, "learning_rate": 9.027857053798913e-06, "loss": 0.9215, "num_input_tokens_seen": 1188406912, "step": 6582 }, { "epoch": 0.7206546429842087, "grad_norm": 1.2674236467978421, "learning_rate": 9.02124306426146e-06, "loss": 1.0656, "num_input_tokens_seen": 1188616576, "step": 6583 }, { "epoch": 0.7207641150551466, "grad_norm": 1.217481637851493, "learning_rate": 9.014630964980404e-06, "loss": 0.9511, "num_input_tokens_seen": 1188824448, "step": 6584 }, { "epoch": 0.7208735871260844, "grad_norm": 1.1204178530477644, "learning_rate": 9.008020756737945e-06, "loss": 0.8529, "num_input_tokens_seen": 1189011936, "step": 6585 }, { "epoch": 0.7209830591970223, "grad_norm": 1.3666433504875901, "learning_rate": 9.001412440316059e-06, "loss": 0.9814, "num_input_tokens_seen": 1189202336, "step": 6586 }, { "epoch": 0.7210925312679602, "grad_norm": 1.216096336560989, "learning_rate": 8.994806016496499e-06, "loss": 0.7476, "num_input_tokens_seen": 1189381536, "step": 6587 }, { "epoch": 0.7212020033388982, "grad_norm": 1.0799802766870987, "learning_rate": 8.988201486060791e-06, "loss": 0.7003, "num_input_tokens_seen": 1189580224, "step": 6588 }, { "epoch": 0.7213114754098361, "grad_norm": 1.2473073090404505, "learning_rate": 8.981598849790238e-06, "loss": 0.6302, "num_input_tokens_seen": 1189753376, "step": 6589 }, { "epoch": 0.721420947480774, "grad_norm": 1.2307285579508231, "learning_rate": 8.974998108465907e-06, "loss": 0.9024, "num_input_tokens_seen": 1189929664, "step": 6590 }, { "epoch": 0.7215304195517118, "grad_norm": 1.2281234430589587, "learning_rate": 8.968399262868677e-06, "loss": 0.6359, "num_input_tokens_seen": 1190118720, "step": 6591 }, { "epoch": 0.7216398916226497, "grad_norm": 1.284439675589189, "learning_rate": 8.961802313779166e-06, "loss": 0.7358, "num_input_tokens_seen": 1190302624, "step": 6592 }, { "epoch": 0.7217493636935877, "grad_norm": 1.1162100297151547, "learning_rate": 8.955207261977783e-06, "loss": 0.768, "num_input_tokens_seen": 1190510720, "step": 6593 }, { "epoch": 0.7218588357645256, "grad_norm": 1.1462461363749143, "learning_rate": 8.948614108244705e-06, "loss": 0.83, "num_input_tokens_seen": 1190721952, "step": 6594 }, { "epoch": 0.7219683078354635, "grad_norm": 1.118491099145267, "learning_rate": 8.942022853359896e-06, "loss": 0.6181, "num_input_tokens_seen": 1190887264, "step": 6595 }, { "epoch": 0.7220777799064014, "grad_norm": 1.1399304090976121, "learning_rate": 8.935433498103086e-06, "loss": 0.837, "num_input_tokens_seen": 1191047872, "step": 6596 }, { "epoch": 0.7221872519773392, "grad_norm": 1.151489096052825, "learning_rate": 8.928846043253772e-06, "loss": 0.5566, "num_input_tokens_seen": 1191261344, "step": 6597 }, { "epoch": 0.7222967240482772, "grad_norm": 1.2235145846621203, "learning_rate": 8.922260489591266e-06, "loss": 0.7768, "num_input_tokens_seen": 1191432032, "step": 6598 }, { "epoch": 0.7224061961192151, "grad_norm": 1.264475594863898, "learning_rate": 8.915676837894593e-06, "loss": 0.7494, "num_input_tokens_seen": 1191609216, "step": 6599 }, { "epoch": 0.722515668190153, "grad_norm": 1.0767910763124209, "learning_rate": 8.909095088942617e-06, "loss": 0.624, "num_input_tokens_seen": 1191799392, "step": 6600 }, { "epoch": 0.7226251402610909, "grad_norm": 1.137895479725553, "learning_rate": 8.902515243513918e-06, "loss": 0.658, "num_input_tokens_seen": 1191994048, "step": 6601 }, { "epoch": 0.7227346123320287, "grad_norm": 1.2106294301611213, "learning_rate": 8.895937302386898e-06, "loss": 0.6592, "num_input_tokens_seen": 1192150176, "step": 6602 }, { "epoch": 0.7228440844029667, "grad_norm": 1.230984582709941, "learning_rate": 8.88936126633971e-06, "loss": 0.8872, "num_input_tokens_seen": 1192355136, "step": 6603 }, { "epoch": 0.7229535564739046, "grad_norm": 1.138999459990791, "learning_rate": 8.882787136150275e-06, "loss": 0.8755, "num_input_tokens_seen": 1192540832, "step": 6604 }, { "epoch": 0.7230630285448425, "grad_norm": 1.1846559498298073, "learning_rate": 8.876214912596331e-06, "loss": 0.6953, "num_input_tokens_seen": 1192746240, "step": 6605 }, { "epoch": 0.7231725006157804, "grad_norm": 1.211233878013625, "learning_rate": 8.869644596455324e-06, "loss": 0.7222, "num_input_tokens_seen": 1192951648, "step": 6606 }, { "epoch": 0.7232819726867183, "grad_norm": 1.1030110243577516, "learning_rate": 8.863076188504537e-06, "loss": 0.7606, "num_input_tokens_seen": 1193149440, "step": 6607 }, { "epoch": 0.7233914447576562, "grad_norm": 1.1957899648442774, "learning_rate": 8.856509689520976e-06, "loss": 0.7701, "num_input_tokens_seen": 1193323488, "step": 6608 }, { "epoch": 0.7235009168285941, "grad_norm": 1.1398362345771291, "learning_rate": 8.849945100281474e-06, "loss": 0.6302, "num_input_tokens_seen": 1193487456, "step": 6609 }, { "epoch": 0.723610388899532, "grad_norm": 1.243306789310867, "learning_rate": 8.843382421562573e-06, "loss": 0.7829, "num_input_tokens_seen": 1193662400, "step": 6610 }, { "epoch": 0.7237198609704699, "grad_norm": 1.279250934736914, "learning_rate": 8.836821654140656e-06, "loss": 1.2094, "num_input_tokens_seen": 1193868704, "step": 6611 }, { "epoch": 0.7238293330414078, "grad_norm": 1.1509247917573668, "learning_rate": 8.830262798791838e-06, "loss": 0.8877, "num_input_tokens_seen": 1194062464, "step": 6612 }, { "epoch": 0.7239388051123458, "grad_norm": 1.259115503804588, "learning_rate": 8.823705856292019e-06, "loss": 0.644, "num_input_tokens_seen": 1194226432, "step": 6613 }, { "epoch": 0.7240482771832836, "grad_norm": 1.218631298446852, "learning_rate": 8.817150827416876e-06, "loss": 0.7796, "num_input_tokens_seen": 1194406976, "step": 6614 }, { "epoch": 0.7241577492542215, "grad_norm": 1.1601765948736962, "learning_rate": 8.810597712941843e-06, "loss": 0.8061, "num_input_tokens_seen": 1194581696, "step": 6615 }, { "epoch": 0.7242672213251594, "grad_norm": 1.1959537954942203, "learning_rate": 8.80404651364217e-06, "loss": 0.9857, "num_input_tokens_seen": 1194760000, "step": 6616 }, { "epoch": 0.7243766933960973, "grad_norm": 1.2571814239302022, "learning_rate": 8.797497230292814e-06, "loss": 0.9998, "num_input_tokens_seen": 1194905600, "step": 6617 }, { "epoch": 0.7244861654670353, "grad_norm": 1.1618379676666954, "learning_rate": 8.790949863668571e-06, "loss": 1.0156, "num_input_tokens_seen": 1195112352, "step": 6618 }, { "epoch": 0.7245956375379731, "grad_norm": 1.2097274148515476, "learning_rate": 8.784404414543973e-06, "loss": 0.8343, "num_input_tokens_seen": 1195259296, "step": 6619 }, { "epoch": 0.724705109608911, "grad_norm": 1.224734213274696, "learning_rate": 8.777860883693335e-06, "loss": 0.9077, "num_input_tokens_seen": 1195444544, "step": 6620 }, { "epoch": 0.7248145816798489, "grad_norm": 1.3274238979846626, "learning_rate": 8.771319271890741e-06, "loss": 0.7657, "num_input_tokens_seen": 1195644576, "step": 6621 }, { "epoch": 0.7249240537507868, "grad_norm": 1.2860973124430999, "learning_rate": 8.764779579910054e-06, "loss": 0.6545, "num_input_tokens_seen": 1195815040, "step": 6622 }, { "epoch": 0.7250335258217248, "grad_norm": 1.358716257022027, "learning_rate": 8.758241808524906e-06, "loss": 0.9177, "num_input_tokens_seen": 1195992224, "step": 6623 }, { "epoch": 0.7251429978926627, "grad_norm": 1.3144597454386118, "learning_rate": 8.751705958508697e-06, "loss": 1.0859, "num_input_tokens_seen": 1196187328, "step": 6624 }, { "epoch": 0.7252524699636005, "grad_norm": 1.1752731391302085, "learning_rate": 8.745172030634616e-06, "loss": 0.9452, "num_input_tokens_seen": 1196350624, "step": 6625 }, { "epoch": 0.7253619420345384, "grad_norm": 1.244393171851205, "learning_rate": 8.738640025675612e-06, "loss": 0.9741, "num_input_tokens_seen": 1196531616, "step": 6626 }, { "epoch": 0.7254714141054763, "grad_norm": 1.163812495273471, "learning_rate": 8.732109944404408e-06, "loss": 0.6813, "num_input_tokens_seen": 1196722240, "step": 6627 }, { "epoch": 0.7255808861764143, "grad_norm": 1.2197933132943437, "learning_rate": 8.725581787593496e-06, "loss": 0.9786, "num_input_tokens_seen": 1196941312, "step": 6628 }, { "epoch": 0.7256903582473522, "grad_norm": 1.3848204608600092, "learning_rate": 8.719055556015149e-06, "loss": 0.9005, "num_input_tokens_seen": 1197095872, "step": 6629 }, { "epoch": 0.7257998303182901, "grad_norm": 1.2048826878374925, "learning_rate": 8.712531250441394e-06, "loss": 0.8307, "num_input_tokens_seen": 1197283808, "step": 6630 }, { "epoch": 0.7259093023892279, "grad_norm": 1.2437617564963608, "learning_rate": 8.706008871644075e-06, "loss": 0.8498, "num_input_tokens_seen": 1197463904, "step": 6631 }, { "epoch": 0.7260187744601658, "grad_norm": 1.245839921217291, "learning_rate": 8.699488420394741e-06, "loss": 0.6923, "num_input_tokens_seen": 1197631904, "step": 6632 }, { "epoch": 0.7261282465311037, "grad_norm": 1.2706097056926802, "learning_rate": 8.692969897464775e-06, "loss": 0.7962, "num_input_tokens_seen": 1197826560, "step": 6633 }, { "epoch": 0.7262377186020417, "grad_norm": 1.2774821915103787, "learning_rate": 8.686453303625294e-06, "loss": 0.8388, "num_input_tokens_seen": 1198005088, "step": 6634 }, { "epoch": 0.7263471906729796, "grad_norm": 1.375614245804689, "learning_rate": 8.679938639647203e-06, "loss": 0.9238, "num_input_tokens_seen": 1198174656, "step": 6635 }, { "epoch": 0.7264566627439174, "grad_norm": 1.1035028777609992, "learning_rate": 8.673425906301171e-06, "loss": 0.7076, "num_input_tokens_seen": 1198376704, "step": 6636 }, { "epoch": 0.7265661348148553, "grad_norm": 1.3342351049485617, "learning_rate": 8.666915104357637e-06, "loss": 0.8841, "num_input_tokens_seen": 1198542016, "step": 6637 }, { "epoch": 0.7266756068857932, "grad_norm": 1.3761613016537162, "learning_rate": 8.660406234586838e-06, "loss": 0.7606, "num_input_tokens_seen": 1198709568, "step": 6638 }, { "epoch": 0.7267850789567312, "grad_norm": 1.0854964973150714, "learning_rate": 8.653899297758728e-06, "loss": 0.6159, "num_input_tokens_seen": 1198902432, "step": 6639 }, { "epoch": 0.7268945510276691, "grad_norm": 1.1897486427653683, "learning_rate": 8.647394294643099e-06, "loss": 0.8114, "num_input_tokens_seen": 1199097088, "step": 6640 }, { "epoch": 0.727004023098607, "grad_norm": 1.3017380778025158, "learning_rate": 8.640891226009449e-06, "loss": 0.7512, "num_input_tokens_seen": 1199253664, "step": 6641 }, { "epoch": 0.7271134951695448, "grad_norm": 1.2244242112929813, "learning_rate": 8.63439009262711e-06, "loss": 0.7364, "num_input_tokens_seen": 1199444064, "step": 6642 }, { "epoch": 0.7272229672404827, "grad_norm": 1.369325720537141, "learning_rate": 8.62789089526512e-06, "loss": 0.8733, "num_input_tokens_seen": 1199642304, "step": 6643 }, { "epoch": 0.7273324393114207, "grad_norm": 1.2503425128884216, "learning_rate": 8.621393634692346e-06, "loss": 0.9894, "num_input_tokens_seen": 1199855104, "step": 6644 }, { "epoch": 0.7274419113823586, "grad_norm": 1.1934375893392277, "learning_rate": 8.614898311677397e-06, "loss": 0.7521, "num_input_tokens_seen": 1200036320, "step": 6645 }, { "epoch": 0.7275513834532965, "grad_norm": 1.2593361292441656, "learning_rate": 8.608404926988644e-06, "loss": 0.8166, "num_input_tokens_seen": 1200191776, "step": 6646 }, { "epoch": 0.7276608555242344, "grad_norm": 1.174190420899119, "learning_rate": 8.601913481394273e-06, "loss": 0.5999, "num_input_tokens_seen": 1200319904, "step": 6647 }, { "epoch": 0.7277703275951722, "grad_norm": 1.2455571852013663, "learning_rate": 8.595423975662168e-06, "loss": 0.6633, "num_input_tokens_seen": 1200505376, "step": 6648 }, { "epoch": 0.7278797996661102, "grad_norm": 1.1604605638475447, "learning_rate": 8.588936410560065e-06, "loss": 0.7045, "num_input_tokens_seen": 1200663296, "step": 6649 }, { "epoch": 0.7279892717370481, "grad_norm": 1.2133102305452033, "learning_rate": 8.582450786855394e-06, "loss": 0.8198, "num_input_tokens_seen": 1200840704, "step": 6650 }, { "epoch": 0.728098743807986, "grad_norm": 1.1293218567023733, "learning_rate": 8.575967105315422e-06, "loss": 0.6319, "num_input_tokens_seen": 1200995488, "step": 6651 }, { "epoch": 0.7282082158789239, "grad_norm": 1.2459389521674311, "learning_rate": 8.569485366707142e-06, "loss": 0.9258, "num_input_tokens_seen": 1201162144, "step": 6652 }, { "epoch": 0.7283176879498617, "grad_norm": 1.3339295941522369, "learning_rate": 8.563005571797334e-06, "loss": 0.7841, "num_input_tokens_seen": 1201322528, "step": 6653 }, { "epoch": 0.7284271600207997, "grad_norm": 1.1364452649188852, "learning_rate": 8.556527721352542e-06, "loss": 0.932, "num_input_tokens_seen": 1201507104, "step": 6654 }, { "epoch": 0.7285366320917376, "grad_norm": 1.1523635086693222, "learning_rate": 8.550051816139088e-06, "loss": 0.9286, "num_input_tokens_seen": 1201679808, "step": 6655 }, { "epoch": 0.7286461041626755, "grad_norm": 1.3971108478567602, "learning_rate": 8.543577856923058e-06, "loss": 0.9688, "num_input_tokens_seen": 1201852288, "step": 6656 }, { "epoch": 0.7287555762336134, "grad_norm": 1.1180903914390574, "learning_rate": 8.537105844470297e-06, "loss": 0.8774, "num_input_tokens_seen": 1202016032, "step": 6657 }, { "epoch": 0.7288650483045513, "grad_norm": 1.4452251511031011, "learning_rate": 8.530635779546453e-06, "loss": 0.9674, "num_input_tokens_seen": 1202197920, "step": 6658 }, { "epoch": 0.7289745203754892, "grad_norm": 1.327389816553535, "learning_rate": 8.524167662916913e-06, "loss": 0.8467, "num_input_tokens_seen": 1202364128, "step": 6659 }, { "epoch": 0.7290839924464271, "grad_norm": 1.3141634254842947, "learning_rate": 8.517701495346842e-06, "loss": 0.8004, "num_input_tokens_seen": 1202526976, "step": 6660 }, { "epoch": 0.729193464517365, "grad_norm": 1.1968939797114169, "learning_rate": 8.511237277601174e-06, "loss": 0.7838, "num_input_tokens_seen": 1202679296, "step": 6661 }, { "epoch": 0.7293029365883029, "grad_norm": 1.2644506115633567, "learning_rate": 8.504775010444616e-06, "loss": 0.6777, "num_input_tokens_seen": 1202850880, "step": 6662 }, { "epoch": 0.7294124086592408, "grad_norm": 1.376322370492135, "learning_rate": 8.49831469464164e-06, "loss": 0.9366, "num_input_tokens_seen": 1203017536, "step": 6663 }, { "epoch": 0.7295218807301788, "grad_norm": 1.3642430451453351, "learning_rate": 8.491856330956491e-06, "loss": 1.0457, "num_input_tokens_seen": 1203173888, "step": 6664 }, { "epoch": 0.7296313528011166, "grad_norm": 1.302032189954251, "learning_rate": 8.48539992015317e-06, "loss": 0.8974, "num_input_tokens_seen": 1203339872, "step": 6665 }, { "epoch": 0.7297408248720545, "grad_norm": 1.2006001918598754, "learning_rate": 8.478945462995477e-06, "loss": 0.6084, "num_input_tokens_seen": 1203505632, "step": 6666 }, { "epoch": 0.7298502969429924, "grad_norm": 1.1983055589204588, "learning_rate": 8.472492960246953e-06, "loss": 0.8562, "num_input_tokens_seen": 1203686848, "step": 6667 }, { "epoch": 0.7299597690139303, "grad_norm": 1.1954718589517925, "learning_rate": 8.466042412670916e-06, "loss": 0.7483, "num_input_tokens_seen": 1203832448, "step": 6668 }, { "epoch": 0.7300692410848683, "grad_norm": 1.0362928376586853, "learning_rate": 8.459593821030454e-06, "loss": 0.5752, "num_input_tokens_seen": 1204020608, "step": 6669 }, { "epoch": 0.7301787131558061, "grad_norm": 1.1127672045990413, "learning_rate": 8.453147186088423e-06, "loss": 0.7345, "num_input_tokens_seen": 1204195104, "step": 6670 }, { "epoch": 0.730288185226744, "grad_norm": 1.2594275678922904, "learning_rate": 8.446702508607449e-06, "loss": 1.0612, "num_input_tokens_seen": 1204366688, "step": 6671 }, { "epoch": 0.7303976572976819, "grad_norm": 1.190658634942217, "learning_rate": 8.440259789349913e-06, "loss": 0.6685, "num_input_tokens_seen": 1204555744, "step": 6672 }, { "epoch": 0.7305071293686198, "grad_norm": 1.1772117657207297, "learning_rate": 8.433819029078005e-06, "loss": 0.9997, "num_input_tokens_seen": 1204745920, "step": 6673 }, { "epoch": 0.7306166014395578, "grad_norm": 1.185778172662783, "learning_rate": 8.42738022855362e-06, "loss": 0.6609, "num_input_tokens_seen": 1204930496, "step": 6674 }, { "epoch": 0.7307260735104957, "grad_norm": 1.2847068487857043, "learning_rate": 8.42094338853848e-06, "loss": 0.8714, "num_input_tokens_seen": 1205102528, "step": 6675 }, { "epoch": 0.7308355455814335, "grad_norm": 1.1881529869864926, "learning_rate": 8.414508509794044e-06, "loss": 0.9924, "num_input_tokens_seen": 1205309952, "step": 6676 }, { "epoch": 0.7309450176523714, "grad_norm": 1.1711847434088545, "learning_rate": 8.408075593081546e-06, "loss": 0.6985, "num_input_tokens_seen": 1205486240, "step": 6677 }, { "epoch": 0.7310544897233093, "grad_norm": 1.328271745540735, "learning_rate": 8.401644639161987e-06, "loss": 1.0486, "num_input_tokens_seen": 1205669024, "step": 6678 }, { "epoch": 0.7311639617942473, "grad_norm": 1.1868310349102038, "learning_rate": 8.39521564879613e-06, "loss": 0.7749, "num_input_tokens_seen": 1205823808, "step": 6679 }, { "epoch": 0.7312734338651852, "grad_norm": 1.3479984457137206, "learning_rate": 8.38878862274453e-06, "loss": 0.8423, "num_input_tokens_seen": 1205988000, "step": 6680 }, { "epoch": 0.7313829059361231, "grad_norm": 1.2239053214206141, "learning_rate": 8.382363561767467e-06, "loss": 0.7935, "num_input_tokens_seen": 1206185792, "step": 6681 }, { "epoch": 0.7314923780070609, "grad_norm": 1.4119660237468805, "learning_rate": 8.375940466625047e-06, "loss": 0.9047, "num_input_tokens_seen": 1206380448, "step": 6682 }, { "epoch": 0.7316018500779988, "grad_norm": 1.1864415521191294, "learning_rate": 8.369519338077067e-06, "loss": 0.7655, "num_input_tokens_seen": 1206564800, "step": 6683 }, { "epoch": 0.7317113221489367, "grad_norm": 1.1595447097324054, "learning_rate": 8.36310017688318e-06, "loss": 0.7353, "num_input_tokens_seen": 1206716448, "step": 6684 }, { "epoch": 0.7318207942198747, "grad_norm": 1.1493478916408588, "learning_rate": 8.356682983802717e-06, "loss": 0.9517, "num_input_tokens_seen": 1206926560, "step": 6685 }, { "epoch": 0.7319302662908126, "grad_norm": 1.1673057996656946, "learning_rate": 8.35026775959485e-06, "loss": 0.7143, "num_input_tokens_seen": 1207103296, "step": 6686 }, { "epoch": 0.7320397383617505, "grad_norm": 1.3634877745272966, "learning_rate": 8.343854505018477e-06, "loss": 0.8815, "num_input_tokens_seen": 1207278912, "step": 6687 }, { "epoch": 0.7321492104326883, "grad_norm": 1.158789912239096, "learning_rate": 8.337443220832267e-06, "loss": 0.8753, "num_input_tokens_seen": 1207440864, "step": 6688 }, { "epoch": 0.7322586825036262, "grad_norm": 1.1550606461493522, "learning_rate": 8.331033907794689e-06, "loss": 0.7774, "num_input_tokens_seen": 1207619168, "step": 6689 }, { "epoch": 0.7323681545745642, "grad_norm": 1.2312416676660527, "learning_rate": 8.324626566663914e-06, "loss": 0.9379, "num_input_tokens_seen": 1207806880, "step": 6690 }, { "epoch": 0.7324776266455021, "grad_norm": 1.2900262639972386, "learning_rate": 8.31822119819796e-06, "loss": 0.8858, "num_input_tokens_seen": 1207997952, "step": 6691 }, { "epoch": 0.73258709871644, "grad_norm": 1.049204611381202, "learning_rate": 8.311817803154525e-06, "loss": 0.7763, "num_input_tokens_seen": 1208164384, "step": 6692 }, { "epoch": 0.7326965707873778, "grad_norm": 1.1116643038490575, "learning_rate": 8.305416382291157e-06, "loss": 0.9212, "num_input_tokens_seen": 1208371360, "step": 6693 }, { "epoch": 0.7328060428583157, "grad_norm": 1.2508166506521186, "learning_rate": 8.299016936365111e-06, "loss": 0.7534, "num_input_tokens_seen": 1208562432, "step": 6694 }, { "epoch": 0.7329155149292537, "grad_norm": 1.288509828404936, "learning_rate": 8.292619466133437e-06, "loss": 0.9269, "num_input_tokens_seen": 1208761792, "step": 6695 }, { "epoch": 0.7330249870001916, "grad_norm": 1.1330320474379996, "learning_rate": 8.286223972352939e-06, "loss": 0.7216, "num_input_tokens_seen": 1208947936, "step": 6696 }, { "epoch": 0.7331344590711295, "grad_norm": 1.3580234868486332, "learning_rate": 8.279830455780196e-06, "loss": 1.0054, "num_input_tokens_seen": 1209122656, "step": 6697 }, { "epoch": 0.7332439311420674, "grad_norm": 1.3711882555960526, "learning_rate": 8.273438917171536e-06, "loss": 0.9168, "num_input_tokens_seen": 1209277440, "step": 6698 }, { "epoch": 0.7333534032130052, "grad_norm": 1.2507018979674356, "learning_rate": 8.267049357283088e-06, "loss": 0.739, "num_input_tokens_seen": 1209453952, "step": 6699 }, { "epoch": 0.7334628752839432, "grad_norm": 1.5128073703148577, "learning_rate": 8.26066177687071e-06, "loss": 0.8182, "num_input_tokens_seen": 1209643904, "step": 6700 }, { "epoch": 0.7335723473548811, "grad_norm": 1.2659901692609885, "learning_rate": 8.254276176690045e-06, "loss": 0.7026, "num_input_tokens_seen": 1209818624, "step": 6701 }, { "epoch": 0.733681819425819, "grad_norm": 1.177525642585559, "learning_rate": 8.247892557496495e-06, "loss": 0.6626, "num_input_tokens_seen": 1209992224, "step": 6702 }, { "epoch": 0.7337912914967569, "grad_norm": 1.0599522056241015, "learning_rate": 8.241510920045232e-06, "loss": 0.959, "num_input_tokens_seen": 1210194944, "step": 6703 }, { "epoch": 0.7339007635676948, "grad_norm": 1.1399192194175363, "learning_rate": 8.235131265091189e-06, "loss": 0.8675, "num_input_tokens_seen": 1210379072, "step": 6704 }, { "epoch": 0.7340102356386327, "grad_norm": 1.2366057893176094, "learning_rate": 8.22875359338906e-06, "loss": 0.8466, "num_input_tokens_seen": 1210559392, "step": 6705 }, { "epoch": 0.7341197077095706, "grad_norm": 1.1808244897533233, "learning_rate": 8.222377905693338e-06, "loss": 0.5915, "num_input_tokens_seen": 1210756512, "step": 6706 }, { "epoch": 0.7342291797805085, "grad_norm": 1.2781917078942098, "learning_rate": 8.21600420275822e-06, "loss": 1.0757, "num_input_tokens_seen": 1210969312, "step": 6707 }, { "epoch": 0.7343386518514464, "grad_norm": 1.235812304071978, "learning_rate": 8.209632485337727e-06, "loss": 0.9046, "num_input_tokens_seen": 1211147840, "step": 6708 }, { "epoch": 0.7344481239223843, "grad_norm": 1.1723884983361461, "learning_rate": 8.203262754185611e-06, "loss": 0.822, "num_input_tokens_seen": 1211362208, "step": 6709 }, { "epoch": 0.7345575959933222, "grad_norm": 1.1965534027144318, "learning_rate": 8.196895010055403e-06, "loss": 0.6619, "num_input_tokens_seen": 1211544992, "step": 6710 }, { "epoch": 0.7346670680642601, "grad_norm": 1.1957297002212544, "learning_rate": 8.190529253700393e-06, "loss": 0.6354, "num_input_tokens_seen": 1211710080, "step": 6711 }, { "epoch": 0.734776540135198, "grad_norm": 1.288610066865293, "learning_rate": 8.184165485873633e-06, "loss": 1.0866, "num_input_tokens_seen": 1211886368, "step": 6712 }, { "epoch": 0.7348860122061359, "grad_norm": 1.2434058782022859, "learning_rate": 8.177803707327961e-06, "loss": 0.8596, "num_input_tokens_seen": 1212058848, "step": 6713 }, { "epoch": 0.7349954842770738, "grad_norm": 1.163171619298882, "learning_rate": 8.171443918815939e-06, "loss": 0.8312, "num_input_tokens_seen": 1212255744, "step": 6714 }, { "epoch": 0.7351049563480118, "grad_norm": 1.20577577153457, "learning_rate": 8.165086121089944e-06, "loss": 0.7439, "num_input_tokens_seen": 1212444352, "step": 6715 }, { "epoch": 0.7352144284189496, "grad_norm": 1.4364302427972218, "learning_rate": 8.158730314902063e-06, "loss": 0.8844, "num_input_tokens_seen": 1212627136, "step": 6716 }, { "epoch": 0.7353239004898875, "grad_norm": 1.2963554076815538, "learning_rate": 8.152376501004199e-06, "loss": 0.8864, "num_input_tokens_seen": 1212820896, "step": 6717 }, { "epoch": 0.7354333725608254, "grad_norm": 1.329314563060718, "learning_rate": 8.146024680147987e-06, "loss": 0.8577, "num_input_tokens_seen": 1213006368, "step": 6718 }, { "epoch": 0.7355428446317633, "grad_norm": 1.1237947047319115, "learning_rate": 8.139674853084838e-06, "loss": 0.9356, "num_input_tokens_seen": 1213188480, "step": 6719 }, { "epoch": 0.7356523167027013, "grad_norm": 1.3230824053813688, "learning_rate": 8.13332702056592e-06, "loss": 0.857, "num_input_tokens_seen": 1213347744, "step": 6720 }, { "epoch": 0.7357617887736392, "grad_norm": 1.0636787607654463, "learning_rate": 8.126981183342167e-06, "loss": 0.6646, "num_input_tokens_seen": 1213526496, "step": 6721 }, { "epoch": 0.735871260844577, "grad_norm": 1.1246232885551426, "learning_rate": 8.120637342164298e-06, "loss": 1.0344, "num_input_tokens_seen": 1213721600, "step": 6722 }, { "epoch": 0.7359807329155149, "grad_norm": 1.2608868953945076, "learning_rate": 8.114295497782748e-06, "loss": 0.7786, "num_input_tokens_seen": 1213906176, "step": 6723 }, { "epoch": 0.7360902049864528, "grad_norm": 1.2624522341766344, "learning_rate": 8.107955650947777e-06, "loss": 0.8771, "num_input_tokens_seen": 1214069248, "step": 6724 }, { "epoch": 0.7361996770573908, "grad_norm": 1.2209346948322441, "learning_rate": 8.101617802409343e-06, "loss": 0.9971, "num_input_tokens_seen": 1214238368, "step": 6725 }, { "epoch": 0.7363091491283287, "grad_norm": 1.3521112639956814, "learning_rate": 8.095281952917227e-06, "loss": 0.6737, "num_input_tokens_seen": 1214379712, "step": 6726 }, { "epoch": 0.7364186211992665, "grad_norm": 1.2216657825291561, "learning_rate": 8.088948103220942e-06, "loss": 0.758, "num_input_tokens_seen": 1214520384, "step": 6727 }, { "epoch": 0.7365280932702044, "grad_norm": 1.1423514377657242, "learning_rate": 8.082616254069767e-06, "loss": 0.9206, "num_input_tokens_seen": 1214699136, "step": 6728 }, { "epoch": 0.7366375653411423, "grad_norm": 1.1493290146158717, "learning_rate": 8.076286406212747e-06, "loss": 0.8122, "num_input_tokens_seen": 1214875648, "step": 6729 }, { "epoch": 0.7367470374120803, "grad_norm": 1.155809731044226, "learning_rate": 8.069958560398686e-06, "loss": 0.8615, "num_input_tokens_seen": 1215072544, "step": 6730 }, { "epoch": 0.7368565094830182, "grad_norm": 1.0963517443463286, "learning_rate": 8.063632717376177e-06, "loss": 0.7409, "num_input_tokens_seen": 1215232032, "step": 6731 }, { "epoch": 0.7369659815539561, "grad_norm": 1.1810144704308518, "learning_rate": 8.057308877893524e-06, "loss": 0.7533, "num_input_tokens_seen": 1215415040, "step": 6732 }, { "epoch": 0.7370754536248939, "grad_norm": 1.0565142600039987, "learning_rate": 8.050987042698852e-06, "loss": 0.5853, "num_input_tokens_seen": 1215594016, "step": 6733 }, { "epoch": 0.7371849256958318, "grad_norm": 1.1437516926403757, "learning_rate": 8.04466721254001e-06, "loss": 0.7491, "num_input_tokens_seen": 1215788000, "step": 6734 }, { "epoch": 0.7372943977667697, "grad_norm": 1.1673104973347088, "learning_rate": 8.038349388164627e-06, "loss": 0.8814, "num_input_tokens_seen": 1215961600, "step": 6735 }, { "epoch": 0.7374038698377077, "grad_norm": 1.2747351932417568, "learning_rate": 8.032033570320083e-06, "loss": 0.8221, "num_input_tokens_seen": 1216098688, "step": 6736 }, { "epoch": 0.7375133419086456, "grad_norm": 1.1336673010764795, "learning_rate": 8.025719759753531e-06, "loss": 0.66, "num_input_tokens_seen": 1216277216, "step": 6737 }, { "epoch": 0.7376228139795835, "grad_norm": 1.24532689142467, "learning_rate": 8.019407957211883e-06, "loss": 1.0393, "num_input_tokens_seen": 1216462688, "step": 6738 }, { "epoch": 0.7377322860505213, "grad_norm": 1.2759790158624098, "learning_rate": 8.013098163441813e-06, "loss": 0.9223, "num_input_tokens_seen": 1216653984, "step": 6739 }, { "epoch": 0.7378417581214592, "grad_norm": 1.4001894584253496, "learning_rate": 8.006790379189746e-06, "loss": 1.2815, "num_input_tokens_seen": 1216865440, "step": 6740 }, { "epoch": 0.7379512301923972, "grad_norm": 1.179972229000394, "learning_rate": 8.000484605201902e-06, "loss": 1.0093, "num_input_tokens_seen": 1217068160, "step": 6741 }, { "epoch": 0.7380607022633351, "grad_norm": 1.5316234055484887, "learning_rate": 7.99418084222423e-06, "loss": 0.9698, "num_input_tokens_seen": 1217218912, "step": 6742 }, { "epoch": 0.738170174334273, "grad_norm": 1.2296121673581364, "learning_rate": 7.987879091002456e-06, "loss": 0.7231, "num_input_tokens_seen": 1217382656, "step": 6743 }, { "epoch": 0.7382796464052108, "grad_norm": 1.3972075183912938, "learning_rate": 7.981579352282064e-06, "loss": 0.7381, "num_input_tokens_seen": 1217549312, "step": 6744 }, { "epoch": 0.7383891184761487, "grad_norm": 1.1382353847377222, "learning_rate": 7.9752816268083e-06, "loss": 0.7621, "num_input_tokens_seen": 1217738144, "step": 6745 }, { "epoch": 0.7384985905470867, "grad_norm": 1.24983877095889, "learning_rate": 7.968985915326175e-06, "loss": 0.978, "num_input_tokens_seen": 1217935936, "step": 6746 }, { "epoch": 0.7386080626180246, "grad_norm": 1.3131698533921772, "learning_rate": 7.962692218580451e-06, "loss": 0.8273, "num_input_tokens_seen": 1218120064, "step": 6747 }, { "epoch": 0.7387175346889625, "grad_norm": 1.2170462314566644, "learning_rate": 7.956400537315681e-06, "loss": 1.0233, "num_input_tokens_seen": 1218276864, "step": 6748 }, { "epoch": 0.7388270067599004, "grad_norm": 1.1968361611700513, "learning_rate": 7.950110872276131e-06, "loss": 0.6925, "num_input_tokens_seen": 1218446880, "step": 6749 }, { "epoch": 0.7389364788308382, "grad_norm": 1.3050939417967862, "learning_rate": 7.943823224205879e-06, "loss": 0.7661, "num_input_tokens_seen": 1218612192, "step": 6750 }, { "epoch": 0.7390459509017762, "grad_norm": 1.1550820096117993, "learning_rate": 7.937537593848734e-06, "loss": 0.9334, "num_input_tokens_seen": 1218814240, "step": 6751 }, { "epoch": 0.7391554229727141, "grad_norm": 1.2103221191055524, "learning_rate": 7.931253981948275e-06, "loss": 0.6535, "num_input_tokens_seen": 1218999264, "step": 6752 }, { "epoch": 0.739264895043652, "grad_norm": 1.1496686499528672, "learning_rate": 7.924972389247836e-06, "loss": 0.6463, "num_input_tokens_seen": 1219163904, "step": 6753 }, { "epoch": 0.7393743671145899, "grad_norm": 1.255544715012157, "learning_rate": 7.918692816490517e-06, "loss": 1.0891, "num_input_tokens_seen": 1219355424, "step": 6754 }, { "epoch": 0.7394838391855278, "grad_norm": 1.0772247425764885, "learning_rate": 7.912415264419198e-06, "loss": 0.8389, "num_input_tokens_seen": 1219561056, "step": 6755 }, { "epoch": 0.7395933112564657, "grad_norm": 1.1728772264606022, "learning_rate": 7.906139733776474e-06, "loss": 0.7436, "num_input_tokens_seen": 1219731520, "step": 6756 }, { "epoch": 0.7397027833274036, "grad_norm": 1.15400271209451, "learning_rate": 7.899866225304756e-06, "loss": 0.9019, "num_input_tokens_seen": 1219912736, "step": 6757 }, { "epoch": 0.7398122553983415, "grad_norm": 1.0945841197192665, "learning_rate": 7.893594739746157e-06, "loss": 0.6783, "num_input_tokens_seen": 1220117024, "step": 6758 }, { "epoch": 0.7399217274692794, "grad_norm": 1.2546205001557287, "learning_rate": 7.887325277842605e-06, "loss": 0.7174, "num_input_tokens_seen": 1220292416, "step": 6759 }, { "epoch": 0.7400311995402173, "grad_norm": 1.3410218956231514, "learning_rate": 7.881057840335762e-06, "loss": 0.9755, "num_input_tokens_seen": 1220511264, "step": 6760 }, { "epoch": 0.7401406716111552, "grad_norm": 0.9704050876837856, "learning_rate": 7.874792427967048e-06, "loss": 0.6291, "num_input_tokens_seen": 1220711296, "step": 6761 }, { "epoch": 0.7402501436820931, "grad_norm": 1.2340439944160901, "learning_rate": 7.868529041477654e-06, "loss": 0.8917, "num_input_tokens_seen": 1220884896, "step": 6762 }, { "epoch": 0.740359615753031, "grad_norm": 1.2779764830614997, "learning_rate": 7.862267681608514e-06, "loss": 0.7946, "num_input_tokens_seen": 1221047520, "step": 6763 }, { "epoch": 0.7404690878239689, "grad_norm": 1.3414905981473007, "learning_rate": 7.856008349100366e-06, "loss": 0.8309, "num_input_tokens_seen": 1221237696, "step": 6764 }, { "epoch": 0.7405785598949068, "grad_norm": 1.2886072453078603, "learning_rate": 7.849751044693637e-06, "loss": 0.8358, "num_input_tokens_seen": 1221433024, "step": 6765 }, { "epoch": 0.7406880319658448, "grad_norm": 1.2237309129607765, "learning_rate": 7.843495769128584e-06, "loss": 0.73, "num_input_tokens_seen": 1221616256, "step": 6766 }, { "epoch": 0.7407975040367826, "grad_norm": 1.154694361272721, "learning_rate": 7.83724252314518e-06, "loss": 0.8665, "num_input_tokens_seen": 1221827712, "step": 6767 }, { "epoch": 0.7409069761077205, "grad_norm": 1.415233232801473, "learning_rate": 7.830991307483179e-06, "loss": 1.1668, "num_input_tokens_seen": 1221996608, "step": 6768 }, { "epoch": 0.7410164481786584, "grad_norm": 1.3268024816115513, "learning_rate": 7.824742122882083e-06, "loss": 0.73, "num_input_tokens_seen": 1222167744, "step": 6769 }, { "epoch": 0.7411259202495963, "grad_norm": 1.2597127740700307, "learning_rate": 7.818494970081161e-06, "loss": 0.8455, "num_input_tokens_seen": 1222335072, "step": 6770 }, { "epoch": 0.7412353923205343, "grad_norm": 1.1968998560304034, "learning_rate": 7.812249849819439e-06, "loss": 0.6304, "num_input_tokens_seen": 1222520544, "step": 6771 }, { "epoch": 0.7413448643914722, "grad_norm": 1.2467436814531474, "learning_rate": 7.806006762835696e-06, "loss": 0.7753, "num_input_tokens_seen": 1222695040, "step": 6772 }, { "epoch": 0.74145433646241, "grad_norm": 1.4461898202994967, "learning_rate": 7.79976570986849e-06, "loss": 0.9039, "num_input_tokens_seen": 1222873568, "step": 6773 }, { "epoch": 0.7415638085333479, "grad_norm": 1.2808307333882405, "learning_rate": 7.793526691656117e-06, "loss": 0.8444, "num_input_tokens_seen": 1223068896, "step": 6774 }, { "epoch": 0.7416732806042858, "grad_norm": 1.198815094549785, "learning_rate": 7.787289708936645e-06, "loss": 0.7585, "num_input_tokens_seen": 1223273184, "step": 6775 }, { "epoch": 0.7417827526752238, "grad_norm": 1.2317872748645313, "learning_rate": 7.781054762447898e-06, "loss": 0.7415, "num_input_tokens_seen": 1223463584, "step": 6776 }, { "epoch": 0.7418922247461617, "grad_norm": 1.188176464348808, "learning_rate": 7.774821852927453e-06, "loss": 0.8687, "num_input_tokens_seen": 1223660032, "step": 6777 }, { "epoch": 0.7420016968170995, "grad_norm": 1.222258189182699, "learning_rate": 7.768590981112654e-06, "loss": 0.7623, "num_input_tokens_seen": 1223858496, "step": 6778 }, { "epoch": 0.7421111688880374, "grad_norm": 1.316962240739394, "learning_rate": 7.762362147740601e-06, "loss": 0.8207, "num_input_tokens_seen": 1224017088, "step": 6779 }, { "epoch": 0.7422206409589753, "grad_norm": 0.9792964964212751, "learning_rate": 7.756135353548145e-06, "loss": 0.6315, "num_input_tokens_seen": 1224211296, "step": 6780 }, { "epoch": 0.7423301130299133, "grad_norm": 1.0611575565581168, "learning_rate": 7.749910599271928e-06, "loss": 0.7878, "num_input_tokens_seen": 1224394080, "step": 6781 }, { "epoch": 0.7424395851008512, "grad_norm": 1.4328288602570944, "learning_rate": 7.743687885648293e-06, "loss": 0.7693, "num_input_tokens_seen": 1224540800, "step": 6782 }, { "epoch": 0.7425490571717891, "grad_norm": 1.1048837468543038, "learning_rate": 7.737467213413405e-06, "loss": 0.9423, "num_input_tokens_seen": 1224720224, "step": 6783 }, { "epoch": 0.7426585292427269, "grad_norm": 1.2150530864750624, "learning_rate": 7.731248583303142e-06, "loss": 0.8774, "num_input_tokens_seen": 1224890016, "step": 6784 }, { "epoch": 0.7427680013136648, "grad_norm": 1.178048264410132, "learning_rate": 7.725031996053159e-06, "loss": 1.0115, "num_input_tokens_seen": 1225078848, "step": 6785 }, { "epoch": 0.7428774733846027, "grad_norm": 1.2717069322669936, "learning_rate": 7.718817452398869e-06, "loss": 0.8412, "num_input_tokens_seen": 1225271936, "step": 6786 }, { "epoch": 0.7429869454555407, "grad_norm": 1.2322726635449848, "learning_rate": 7.712604953075428e-06, "loss": 0.9265, "num_input_tokens_seen": 1225457632, "step": 6787 }, { "epoch": 0.7430964175264786, "grad_norm": 1.1325834371554422, "learning_rate": 7.70639449881779e-06, "loss": 0.621, "num_input_tokens_seen": 1225619360, "step": 6788 }, { "epoch": 0.7432058895974165, "grad_norm": 1.3092468909182444, "learning_rate": 7.700186090360609e-06, "loss": 0.8448, "num_input_tokens_seen": 1225774144, "step": 6789 }, { "epoch": 0.7433153616683543, "grad_norm": 1.2479383155757806, "learning_rate": 7.693979728438355e-06, "loss": 0.997, "num_input_tokens_seen": 1225961184, "step": 6790 }, { "epoch": 0.7434248337392922, "grad_norm": 1.1050636438881958, "learning_rate": 7.687775413785201e-06, "loss": 0.6648, "num_input_tokens_seen": 1226130304, "step": 6791 }, { "epoch": 0.7435343058102302, "grad_norm": 1.3921483300860673, "learning_rate": 7.681573147135126e-06, "loss": 0.8546, "num_input_tokens_seen": 1226317792, "step": 6792 }, { "epoch": 0.7436437778811681, "grad_norm": 1.3398481986191635, "learning_rate": 7.675372929221844e-06, "loss": 1.0252, "num_input_tokens_seen": 1226485568, "step": 6793 }, { "epoch": 0.743753249952106, "grad_norm": 1.3068974674433533, "learning_rate": 7.669174760778825e-06, "loss": 0.6643, "num_input_tokens_seen": 1226661408, "step": 6794 }, { "epoch": 0.7438627220230438, "grad_norm": 1.0661890835818746, "learning_rate": 7.662978642539298e-06, "loss": 0.6442, "num_input_tokens_seen": 1226851584, "step": 6795 }, { "epoch": 0.7439721940939817, "grad_norm": 1.215079870306751, "learning_rate": 7.65678457523625e-06, "loss": 0.7448, "num_input_tokens_seen": 1227038400, "step": 6796 }, { "epoch": 0.7440816661649197, "grad_norm": 1.3395166470474658, "learning_rate": 7.650592559602446e-06, "loss": 0.6665, "num_input_tokens_seen": 1227194528, "step": 6797 }, { "epoch": 0.7441911382358576, "grad_norm": 1.268198763608909, "learning_rate": 7.644402596370361e-06, "loss": 1.1128, "num_input_tokens_seen": 1227379104, "step": 6798 }, { "epoch": 0.7443006103067955, "grad_norm": 1.1589429271295053, "learning_rate": 7.638214686272285e-06, "loss": 0.6007, "num_input_tokens_seen": 1227529856, "step": 6799 }, { "epoch": 0.7444100823777334, "grad_norm": 1.11620810734481, "learning_rate": 7.632028830040208e-06, "loss": 0.8588, "num_input_tokens_seen": 1227724512, "step": 6800 }, { "epoch": 0.7445195544486712, "grad_norm": 1.067116357713611, "learning_rate": 7.6258450284059255e-06, "loss": 0.8388, "num_input_tokens_seen": 1227891168, "step": 6801 }, { "epoch": 0.7446290265196092, "grad_norm": 1.0743844199308556, "learning_rate": 7.619663282100961e-06, "loss": 0.6731, "num_input_tokens_seen": 1228069696, "step": 6802 }, { "epoch": 0.7447384985905471, "grad_norm": 1.2763063376879296, "learning_rate": 7.613483591856605e-06, "loss": 0.8809, "num_input_tokens_seen": 1228255840, "step": 6803 }, { "epoch": 0.744847970661485, "grad_norm": 1.2315830574168696, "learning_rate": 7.607305958403904e-06, "loss": 0.7567, "num_input_tokens_seen": 1228453408, "step": 6804 }, { "epoch": 0.7449574427324229, "grad_norm": 1.2341619380127966, "learning_rate": 7.601130382473651e-06, "loss": 0.8681, "num_input_tokens_seen": 1228635296, "step": 6805 }, { "epoch": 0.7450669148033608, "grad_norm": 1.1477378738284556, "learning_rate": 7.5949568647964265e-06, "loss": 0.8736, "num_input_tokens_seen": 1228836224, "step": 6806 }, { "epoch": 0.7451763868742987, "grad_norm": 1.2785717955038483, "learning_rate": 7.58878540610252e-06, "loss": 0.8627, "num_input_tokens_seen": 1229037152, "step": 6807 }, { "epoch": 0.7452858589452366, "grad_norm": 1.097766802776249, "learning_rate": 7.58261600712202e-06, "loss": 0.7538, "num_input_tokens_seen": 1229217248, "step": 6808 }, { "epoch": 0.7453953310161745, "grad_norm": 1.1850457679988111, "learning_rate": 7.576448668584752e-06, "loss": 0.6557, "num_input_tokens_seen": 1229405184, "step": 6809 }, { "epoch": 0.7455048030871124, "grad_norm": 1.0737487874673697, "learning_rate": 7.570283391220295e-06, "loss": 0.8186, "num_input_tokens_seen": 1229609920, "step": 6810 }, { "epoch": 0.7456142751580503, "grad_norm": 1.25482391337943, "learning_rate": 7.564120175757996e-06, "loss": 0.7368, "num_input_tokens_seen": 1229770528, "step": 6811 }, { "epoch": 0.7457237472289882, "grad_norm": 1.3579112536094315, "learning_rate": 7.557959022926947e-06, "loss": 0.883, "num_input_tokens_seen": 1229946368, "step": 6812 }, { "epoch": 0.7458332192999261, "grad_norm": 1.1994048232067607, "learning_rate": 7.551799933456003e-06, "loss": 0.8674, "num_input_tokens_seen": 1230145728, "step": 6813 }, { "epoch": 0.745942691370864, "grad_norm": 1.105292585914517, "learning_rate": 7.5456429080737635e-06, "loss": 0.7522, "num_input_tokens_seen": 1230323136, "step": 6814 }, { "epoch": 0.7460521634418019, "grad_norm": 1.1668300116948105, "learning_rate": 7.5394879475086085e-06, "loss": 0.5962, "num_input_tokens_seen": 1230479040, "step": 6815 }, { "epoch": 0.7461616355127398, "grad_norm": 1.276226943701683, "learning_rate": 7.533335052488652e-06, "loss": 0.6823, "num_input_tokens_seen": 1230657344, "step": 6816 }, { "epoch": 0.7462711075836778, "grad_norm": 1.2122871998723592, "learning_rate": 7.527184223741765e-06, "loss": 0.5537, "num_input_tokens_seen": 1230829152, "step": 6817 }, { "epoch": 0.7463805796546156, "grad_norm": 1.3118770574269147, "learning_rate": 7.521035461995585e-06, "loss": 0.8846, "num_input_tokens_seen": 1230977440, "step": 6818 }, { "epoch": 0.7464900517255535, "grad_norm": 1.3209251472178931, "learning_rate": 7.514888767977493e-06, "loss": 0.9078, "num_input_tokens_seen": 1231151712, "step": 6819 }, { "epoch": 0.7465995237964914, "grad_norm": 1.2876226533409743, "learning_rate": 7.508744142414629e-06, "loss": 0.6792, "num_input_tokens_seen": 1231312768, "step": 6820 }, { "epoch": 0.7467089958674293, "grad_norm": 1.2198755495508167, "learning_rate": 7.502601586033908e-06, "loss": 0.701, "num_input_tokens_seen": 1231495776, "step": 6821 }, { "epoch": 0.7468184679383673, "grad_norm": 1.3165150945473973, "learning_rate": 7.496461099561958e-06, "loss": 0.9869, "num_input_tokens_seen": 1231663104, "step": 6822 }, { "epoch": 0.7469279400093052, "grad_norm": 1.1727438185575074, "learning_rate": 7.490322683725204e-06, "loss": 0.8417, "num_input_tokens_seen": 1231853952, "step": 6823 }, { "epoch": 0.747037412080243, "grad_norm": 1.1815759057851074, "learning_rate": 7.484186339249804e-06, "loss": 0.9362, "num_input_tokens_seen": 1232032480, "step": 6824 }, { "epoch": 0.7471468841511809, "grad_norm": 1.2469420221428473, "learning_rate": 7.4780520668616765e-06, "loss": 0.8023, "num_input_tokens_seen": 1232213920, "step": 6825 }, { "epoch": 0.7472563562221188, "grad_norm": 1.3139315019372746, "learning_rate": 7.471919867286492e-06, "loss": 0.7166, "num_input_tokens_seen": 1232398272, "step": 6826 }, { "epoch": 0.7473658282930568, "grad_norm": 1.113141449298581, "learning_rate": 7.465789741249671e-06, "loss": 0.8036, "num_input_tokens_seen": 1232577920, "step": 6827 }, { "epoch": 0.7474753003639947, "grad_norm": 1.3251659415355859, "learning_rate": 7.4596616894764215e-06, "loss": 0.8175, "num_input_tokens_seen": 1232743680, "step": 6828 }, { "epoch": 0.7475847724349325, "grad_norm": 1.2927151341942058, "learning_rate": 7.4535357126916446e-06, "loss": 0.8717, "num_input_tokens_seen": 1232926688, "step": 6829 }, { "epoch": 0.7476942445058704, "grad_norm": 1.4272465948938566, "learning_rate": 7.447411811620067e-06, "loss": 0.6184, "num_input_tokens_seen": 1233110368, "step": 6830 }, { "epoch": 0.7478037165768083, "grad_norm": 1.0965011329036205, "learning_rate": 7.441289986986102e-06, "loss": 0.6001, "num_input_tokens_seen": 1233260896, "step": 6831 }, { "epoch": 0.7479131886477463, "grad_norm": 1.2308902079244246, "learning_rate": 7.43517023951398e-06, "loss": 0.8753, "num_input_tokens_seen": 1233459808, "step": 6832 }, { "epoch": 0.7480226607186842, "grad_norm": 1.3158436373805869, "learning_rate": 7.429052569927625e-06, "loss": 1.1509, "num_input_tokens_seen": 1233681120, "step": 6833 }, { "epoch": 0.7481321327896221, "grad_norm": 1.3080153824050447, "learning_rate": 7.4229369789507706e-06, "loss": 0.9619, "num_input_tokens_seen": 1233870624, "step": 6834 }, { "epoch": 0.7482416048605599, "grad_norm": 1.3071237990771538, "learning_rate": 7.416823467306866e-06, "loss": 1.0941, "num_input_tokens_seen": 1234074912, "step": 6835 }, { "epoch": 0.7483510769314978, "grad_norm": 1.2857539865252845, "learning_rate": 7.410712035719133e-06, "loss": 0.9603, "num_input_tokens_seen": 1234270464, "step": 6836 }, { "epoch": 0.7484605490024357, "grad_norm": 1.3768011780358638, "learning_rate": 7.4046026849105445e-06, "loss": 1.1083, "num_input_tokens_seen": 1234458624, "step": 6837 }, { "epoch": 0.7485700210733737, "grad_norm": 1.3389531948920292, "learning_rate": 7.3984954156038095e-06, "loss": 0.8416, "num_input_tokens_seen": 1234628416, "step": 6838 }, { "epoch": 0.7486794931443116, "grad_norm": 1.3158787426029706, "learning_rate": 7.392390228521437e-06, "loss": 0.6634, "num_input_tokens_seen": 1234793504, "step": 6839 }, { "epoch": 0.7487889652152495, "grad_norm": 1.3869580559360046, "learning_rate": 7.386287124385624e-06, "loss": 0.8972, "num_input_tokens_seen": 1234967104, "step": 6840 }, { "epoch": 0.7488984372861873, "grad_norm": 1.1983992171358828, "learning_rate": 7.3801861039183796e-06, "loss": 0.8314, "num_input_tokens_seen": 1235159520, "step": 6841 }, { "epoch": 0.7490079093571252, "grad_norm": 1.2568029297093386, "learning_rate": 7.374087167841437e-06, "loss": 0.9452, "num_input_tokens_seen": 1235341856, "step": 6842 }, { "epoch": 0.7491173814280632, "grad_norm": 1.2174077590058403, "learning_rate": 7.367990316876286e-06, "loss": 0.8712, "num_input_tokens_seen": 1235501120, "step": 6843 }, { "epoch": 0.7492268534990011, "grad_norm": 1.1297633207632816, "learning_rate": 7.361895551744175e-06, "loss": 0.6424, "num_input_tokens_seen": 1235711456, "step": 6844 }, { "epoch": 0.749336325569939, "grad_norm": 1.2364096471536632, "learning_rate": 7.355802873166101e-06, "loss": 1.0514, "num_input_tokens_seen": 1235903648, "step": 6845 }, { "epoch": 0.7494457976408768, "grad_norm": 1.3763768498501219, "learning_rate": 7.349712281862817e-06, "loss": 1.1335, "num_input_tokens_seen": 1236106144, "step": 6846 }, { "epoch": 0.7495552697118147, "grad_norm": 1.222513452545602, "learning_rate": 7.34362377855482e-06, "loss": 0.6468, "num_input_tokens_seen": 1236276160, "step": 6847 }, { "epoch": 0.7496647417827527, "grad_norm": 1.3341411090780033, "learning_rate": 7.3375373639623876e-06, "loss": 0.7023, "num_input_tokens_seen": 1236434304, "step": 6848 }, { "epoch": 0.7497742138536906, "grad_norm": 1.107508687917418, "learning_rate": 7.331453038805517e-06, "loss": 0.5547, "num_input_tokens_seen": 1236612160, "step": 6849 }, { "epoch": 0.7498836859246285, "grad_norm": 1.1561298307915324, "learning_rate": 7.325370803803977e-06, "loss": 0.8195, "num_input_tokens_seen": 1236782624, "step": 6850 }, { "epoch": 0.7499931579955664, "grad_norm": 1.233240741183828, "learning_rate": 7.319290659677283e-06, "loss": 0.7762, "num_input_tokens_seen": 1236971904, "step": 6851 }, { "epoch": 0.7501026300665042, "grad_norm": 1.3132165587659104, "learning_rate": 7.313212607144704e-06, "loss": 0.7099, "num_input_tokens_seen": 1237136992, "step": 6852 }, { "epoch": 0.7502121021374422, "grad_norm": 1.1768746598983169, "learning_rate": 7.307136646925261e-06, "loss": 0.7497, "num_input_tokens_seen": 1237317760, "step": 6853 }, { "epoch": 0.7503215742083801, "grad_norm": 1.2867218499299757, "learning_rate": 7.30106277973773e-06, "loss": 0.6882, "num_input_tokens_seen": 1237500992, "step": 6854 }, { "epoch": 0.750431046279318, "grad_norm": 1.1524356611419113, "learning_rate": 7.294991006300631e-06, "loss": 0.6981, "num_input_tokens_seen": 1237671008, "step": 6855 }, { "epoch": 0.7505405183502559, "grad_norm": 1.238311093328766, "learning_rate": 7.288921327332254e-06, "loss": 0.7309, "num_input_tokens_seen": 1237856032, "step": 6856 }, { "epoch": 0.7506499904211938, "grad_norm": 1.0656463374801124, "learning_rate": 7.28285374355063e-06, "loss": 0.6799, "num_input_tokens_seen": 1238044864, "step": 6857 }, { "epoch": 0.7507594624921317, "grad_norm": 1.3607329678048594, "learning_rate": 7.276788255673539e-06, "loss": 0.9884, "num_input_tokens_seen": 1238226528, "step": 6858 }, { "epoch": 0.7508689345630696, "grad_norm": 1.1825160291202321, "learning_rate": 7.270724864418513e-06, "loss": 0.7491, "num_input_tokens_seen": 1238429024, "step": 6859 }, { "epoch": 0.7509784066340075, "grad_norm": 1.2806522556753768, "learning_rate": 7.264663570502844e-06, "loss": 1.0001, "num_input_tokens_seen": 1238642720, "step": 6860 }, { "epoch": 0.7510878787049454, "grad_norm": 1.1952642792593002, "learning_rate": 7.258604374643571e-06, "loss": 1.0472, "num_input_tokens_seen": 1238817664, "step": 6861 }, { "epoch": 0.7511973507758833, "grad_norm": 1.2841519312886798, "learning_rate": 7.252547277557478e-06, "loss": 0.8808, "num_input_tokens_seen": 1238987904, "step": 6862 }, { "epoch": 0.7513068228468212, "grad_norm": 1.1343898971882522, "learning_rate": 7.246492279961129e-06, "loss": 0.6106, "num_input_tokens_seen": 1239161280, "step": 6863 }, { "epoch": 0.7514162949177591, "grad_norm": 1.199884633638098, "learning_rate": 7.24043938257079e-06, "loss": 0.7211, "num_input_tokens_seen": 1239359520, "step": 6864 }, { "epoch": 0.751525766988697, "grad_norm": 1.211601124704468, "learning_rate": 7.234388586102528e-06, "loss": 0.7162, "num_input_tokens_seen": 1239512960, "step": 6865 }, { "epoch": 0.7516352390596349, "grad_norm": 1.1977479345932465, "learning_rate": 7.228339891272135e-06, "loss": 0.9926, "num_input_tokens_seen": 1239712992, "step": 6866 }, { "epoch": 0.7517447111305728, "grad_norm": 1.391064392280517, "learning_rate": 7.222293298795158e-06, "loss": 0.8644, "num_input_tokens_seen": 1239866208, "step": 6867 }, { "epoch": 0.7518541832015108, "grad_norm": 1.1921460465690006, "learning_rate": 7.216248809386899e-06, "loss": 1.0343, "num_input_tokens_seen": 1240059968, "step": 6868 }, { "epoch": 0.7519636552724486, "grad_norm": 1.2309011517029593, "learning_rate": 7.210206423762403e-06, "loss": 0.7215, "num_input_tokens_seen": 1240262016, "step": 6869 }, { "epoch": 0.7520731273433865, "grad_norm": 1.283706403770203, "learning_rate": 7.2041661426364925e-06, "loss": 0.8203, "num_input_tokens_seen": 1240452640, "step": 6870 }, { "epoch": 0.7521825994143244, "grad_norm": 1.1366283996960778, "learning_rate": 7.198127966723692e-06, "loss": 0.8787, "num_input_tokens_seen": 1240643040, "step": 6871 }, { "epoch": 0.7522920714852623, "grad_norm": 1.2307133991502992, "learning_rate": 7.192091896738337e-06, "loss": 0.7175, "num_input_tokens_seen": 1240845760, "step": 6872 }, { "epoch": 0.7524015435562003, "grad_norm": 1.3220922900230219, "learning_rate": 7.1860579333944525e-06, "loss": 0.9158, "num_input_tokens_seen": 1241020032, "step": 6873 }, { "epoch": 0.7525110156271382, "grad_norm": 1.2414748348738287, "learning_rate": 7.180026077405877e-06, "loss": 0.738, "num_input_tokens_seen": 1241201920, "step": 6874 }, { "epoch": 0.752620487698076, "grad_norm": 1.1680715254151681, "learning_rate": 7.1739963294861325e-06, "loss": 0.9236, "num_input_tokens_seen": 1241411584, "step": 6875 }, { "epoch": 0.7527299597690139, "grad_norm": 1.166446625303928, "learning_rate": 7.167968690348554e-06, "loss": 0.8722, "num_input_tokens_seen": 1241601536, "step": 6876 }, { "epoch": 0.7528394318399518, "grad_norm": 1.1262928633413567, "learning_rate": 7.161943160706189e-06, "loss": 0.8053, "num_input_tokens_seen": 1241787232, "step": 6877 }, { "epoch": 0.7529489039108898, "grad_norm": 1.2731873904387287, "learning_rate": 7.155919741271849e-06, "loss": 0.9984, "num_input_tokens_seen": 1241959040, "step": 6878 }, { "epoch": 0.7530583759818277, "grad_norm": 1.1633190879022444, "learning_rate": 7.149898432758093e-06, "loss": 0.6845, "num_input_tokens_seen": 1242146304, "step": 6879 }, { "epoch": 0.7531678480527655, "grad_norm": 1.2993785638287851, "learning_rate": 7.143879235877218e-06, "loss": 0.7861, "num_input_tokens_seen": 1242342976, "step": 6880 }, { "epoch": 0.7532773201237034, "grad_norm": 1.17085486196543, "learning_rate": 7.13786215134131e-06, "loss": 0.7791, "num_input_tokens_seen": 1242525536, "step": 6881 }, { "epoch": 0.7533867921946413, "grad_norm": 1.2159613470064068, "learning_rate": 7.131847179862148e-06, "loss": 1.0009, "num_input_tokens_seen": 1242710784, "step": 6882 }, { "epoch": 0.7534962642655793, "grad_norm": 1.2321386238483503, "learning_rate": 7.125834322151315e-06, "loss": 0.8608, "num_input_tokens_seen": 1242918208, "step": 6883 }, { "epoch": 0.7536057363365172, "grad_norm": 1.3269745724763682, "learning_rate": 7.119823578920112e-06, "loss": 0.7602, "num_input_tokens_seen": 1243107936, "step": 6884 }, { "epoch": 0.7537152084074551, "grad_norm": 1.1348127398008747, "learning_rate": 7.113814950879596e-06, "loss": 0.7868, "num_input_tokens_seen": 1243276384, "step": 6885 }, { "epoch": 0.7538246804783929, "grad_norm": 1.3039738879251475, "learning_rate": 7.1078084387405815e-06, "loss": 0.7359, "num_input_tokens_seen": 1243439456, "step": 6886 }, { "epoch": 0.7539341525493308, "grad_norm": 1.133133415279291, "learning_rate": 7.101804043213625e-06, "loss": 0.7925, "num_input_tokens_seen": 1243644416, "step": 6887 }, { "epoch": 0.7540436246202687, "grad_norm": 1.323135157687155, "learning_rate": 7.0958017650090245e-06, "loss": 0.8946, "num_input_tokens_seen": 1243808160, "step": 6888 }, { "epoch": 0.7541530966912067, "grad_norm": 1.134109964226281, "learning_rate": 7.089801604836857e-06, "loss": 0.8432, "num_input_tokens_seen": 1243992288, "step": 6889 }, { "epoch": 0.7542625687621446, "grad_norm": 1.0762042752077334, "learning_rate": 7.083803563406924e-06, "loss": 0.709, "num_input_tokens_seen": 1244214720, "step": 6890 }, { "epoch": 0.7543720408330825, "grad_norm": 1.324607049506308, "learning_rate": 7.077807641428777e-06, "loss": 0.8934, "num_input_tokens_seen": 1244378016, "step": 6891 }, { "epoch": 0.7544815129040203, "grad_norm": 1.2721363277165232, "learning_rate": 7.071813839611724e-06, "loss": 0.7872, "num_input_tokens_seen": 1244565504, "step": 6892 }, { "epoch": 0.7545909849749582, "grad_norm": 1.1948271121172058, "learning_rate": 7.0658221586648195e-06, "loss": 0.8506, "num_input_tokens_seen": 1244752320, "step": 6893 }, { "epoch": 0.7547004570458962, "grad_norm": 1.217469055753837, "learning_rate": 7.059832599296873e-06, "loss": 0.7866, "num_input_tokens_seen": 1244909120, "step": 6894 }, { "epoch": 0.7548099291168341, "grad_norm": 1.2979976436934515, "learning_rate": 7.053845162216424e-06, "loss": 0.6671, "num_input_tokens_seen": 1245084736, "step": 6895 }, { "epoch": 0.754919401187772, "grad_norm": 1.298884340374737, "learning_rate": 7.047859848131802e-06, "loss": 0.8998, "num_input_tokens_seen": 1245269984, "step": 6896 }, { "epoch": 0.7550288732587098, "grad_norm": 1.326395546839294, "learning_rate": 7.041876657751023e-06, "loss": 0.7954, "num_input_tokens_seen": 1245409760, "step": 6897 }, { "epoch": 0.7551383453296477, "grad_norm": 1.247643395096704, "learning_rate": 7.035895591781916e-06, "loss": 1.0313, "num_input_tokens_seen": 1245605984, "step": 6898 }, { "epoch": 0.7552478174005857, "grad_norm": 1.3647373151411535, "learning_rate": 7.0299166509320194e-06, "loss": 0.7462, "num_input_tokens_seen": 1245775328, "step": 6899 }, { "epoch": 0.7553572894715236, "grad_norm": 1.248720420128124, "learning_rate": 7.023939835908627e-06, "loss": 0.7666, "num_input_tokens_seen": 1245912192, "step": 6900 }, { "epoch": 0.7554667615424615, "grad_norm": 1.2004938956967008, "learning_rate": 7.0179651474187895e-06, "loss": 0.8747, "num_input_tokens_seen": 1246109984, "step": 6901 }, { "epoch": 0.7555762336133994, "grad_norm": 1.1673508352361557, "learning_rate": 7.011992586169291e-06, "loss": 0.6244, "num_input_tokens_seen": 1246277536, "step": 6902 }, { "epoch": 0.7556857056843372, "grad_norm": 1.314764566921466, "learning_rate": 7.006022152866698e-06, "loss": 0.7359, "num_input_tokens_seen": 1246459424, "step": 6903 }, { "epoch": 0.7557951777552752, "grad_norm": 1.293430487942542, "learning_rate": 7.000053848217272e-06, "loss": 0.9037, "num_input_tokens_seen": 1246654528, "step": 6904 }, { "epoch": 0.7559046498262131, "grad_norm": 1.0954950308432412, "learning_rate": 6.99408767292708e-06, "loss": 0.7057, "num_input_tokens_seen": 1246832384, "step": 6905 }, { "epoch": 0.756014121897151, "grad_norm": 1.2357155968385856, "learning_rate": 6.988123627701879e-06, "loss": 0.6909, "num_input_tokens_seen": 1247007104, "step": 6906 }, { "epoch": 0.7561235939680889, "grad_norm": 1.2221812827786835, "learning_rate": 6.982161713247226e-06, "loss": 0.708, "num_input_tokens_seen": 1247153152, "step": 6907 }, { "epoch": 0.7562330660390268, "grad_norm": 1.2403371693221121, "learning_rate": 6.9762019302684e-06, "loss": 0.7185, "num_input_tokens_seen": 1247310400, "step": 6908 }, { "epoch": 0.7563425381099647, "grad_norm": 1.2019839139360236, "learning_rate": 6.970244279470431e-06, "loss": 0.7195, "num_input_tokens_seen": 1247493184, "step": 6909 }, { "epoch": 0.7564520101809026, "grad_norm": 1.2180652161788663, "learning_rate": 6.964288761558094e-06, "loss": 0.8429, "num_input_tokens_seen": 1247666336, "step": 6910 }, { "epoch": 0.7565614822518405, "grad_norm": 1.1898155224082372, "learning_rate": 6.958335377235911e-06, "loss": 0.7443, "num_input_tokens_seen": 1247817312, "step": 6911 }, { "epoch": 0.7566709543227784, "grad_norm": 1.2126653342244058, "learning_rate": 6.952384127208181e-06, "loss": 0.7569, "num_input_tokens_seen": 1248025408, "step": 6912 }, { "epoch": 0.7567804263937163, "grad_norm": 1.263104271508663, "learning_rate": 6.94643501217889e-06, "loss": 1.1645, "num_input_tokens_seen": 1248225216, "step": 6913 }, { "epoch": 0.7568898984646542, "grad_norm": 1.1138831937289702, "learning_rate": 6.940488032851839e-06, "loss": 0.8654, "num_input_tokens_seen": 1248390976, "step": 6914 }, { "epoch": 0.7569993705355921, "grad_norm": 1.2787751742930595, "learning_rate": 6.934543189930515e-06, "loss": 0.9939, "num_input_tokens_seen": 1248560992, "step": 6915 }, { "epoch": 0.75710884260653, "grad_norm": 1.318034537527093, "learning_rate": 6.928600484118206e-06, "loss": 1.0641, "num_input_tokens_seen": 1248731232, "step": 6916 }, { "epoch": 0.7572183146774679, "grad_norm": 1.1616919016092546, "learning_rate": 6.92265991611791e-06, "loss": 0.7616, "num_input_tokens_seen": 1248906400, "step": 6917 }, { "epoch": 0.7573277867484058, "grad_norm": 1.1325377032338553, "learning_rate": 6.916721486632391e-06, "loss": 0.826, "num_input_tokens_seen": 1249075072, "step": 6918 }, { "epoch": 0.7574372588193438, "grad_norm": 1.2699936714164868, "learning_rate": 6.9107851963641505e-06, "loss": 0.8093, "num_input_tokens_seen": 1249240384, "step": 6919 }, { "epoch": 0.7575467308902816, "grad_norm": 1.0729278020682136, "learning_rate": 6.9048510460154315e-06, "loss": 0.6396, "num_input_tokens_seen": 1249422944, "step": 6920 }, { "epoch": 0.7576562029612195, "grad_norm": 1.2900873445172072, "learning_rate": 6.8989190362882565e-06, "loss": 1.0675, "num_input_tokens_seen": 1249618496, "step": 6921 }, { "epoch": 0.7577656750321574, "grad_norm": 1.0023215287704557, "learning_rate": 6.892989167884342e-06, "loss": 0.603, "num_input_tokens_seen": 1249805088, "step": 6922 }, { "epoch": 0.7578751471030953, "grad_norm": 1.1081275911510657, "learning_rate": 6.887061441505202e-06, "loss": 0.8421, "num_input_tokens_seen": 1250003776, "step": 6923 }, { "epoch": 0.7579846191740333, "grad_norm": 1.0680239079260405, "learning_rate": 6.881135857852067e-06, "loss": 0.6441, "num_input_tokens_seen": 1250176928, "step": 6924 }, { "epoch": 0.7580940912449712, "grad_norm": 1.3067074061549866, "learning_rate": 6.87521241762592e-06, "loss": 0.9146, "num_input_tokens_seen": 1250367776, "step": 6925 }, { "epoch": 0.758203563315909, "grad_norm": 1.377179196243753, "learning_rate": 6.869291121527499e-06, "loss": 0.8882, "num_input_tokens_seen": 1250518976, "step": 6926 }, { "epoch": 0.7583130353868469, "grad_norm": 1.2250846658369914, "learning_rate": 6.863371970257276e-06, "loss": 0.9045, "num_input_tokens_seen": 1250728640, "step": 6927 }, { "epoch": 0.7584225074577848, "grad_norm": 1.2154130491024806, "learning_rate": 6.857454964515481e-06, "loss": 0.6783, "num_input_tokens_seen": 1250921952, "step": 6928 }, { "epoch": 0.7585319795287228, "grad_norm": 1.4282767417643765, "learning_rate": 6.851540105002077e-06, "loss": 0.9814, "num_input_tokens_seen": 1251121088, "step": 6929 }, { "epoch": 0.7586414515996607, "grad_norm": 1.1809571867021367, "learning_rate": 6.845627392416779e-06, "loss": 0.745, "num_input_tokens_seen": 1251289984, "step": 6930 }, { "epoch": 0.7587509236705985, "grad_norm": 1.3452016397094715, "learning_rate": 6.839716827459064e-06, "loss": 1.0792, "num_input_tokens_seen": 1251471872, "step": 6931 }, { "epoch": 0.7588603957415364, "grad_norm": 1.253418444286772, "learning_rate": 6.83380841082813e-06, "loss": 0.7691, "num_input_tokens_seen": 1251646592, "step": 6932 }, { "epoch": 0.7589698678124743, "grad_norm": 1.2811349026225347, "learning_rate": 6.827902143222933e-06, "loss": 0.9978, "num_input_tokens_seen": 1251850208, "step": 6933 }, { "epoch": 0.7590793398834123, "grad_norm": 1.2314961337463675, "learning_rate": 6.821998025342172e-06, "loss": 0.7411, "num_input_tokens_seen": 1252013280, "step": 6934 }, { "epoch": 0.7591888119543502, "grad_norm": 1.2398253720128707, "learning_rate": 6.816096057884297e-06, "loss": 0.8251, "num_input_tokens_seen": 1252196512, "step": 6935 }, { "epoch": 0.7592982840252881, "grad_norm": 1.2364777629625132, "learning_rate": 6.810196241547495e-06, "loss": 0.9841, "num_input_tokens_seen": 1252335392, "step": 6936 }, { "epoch": 0.7594077560962259, "grad_norm": 1.2592995897911443, "learning_rate": 6.804298577029697e-06, "loss": 0.8199, "num_input_tokens_seen": 1252529376, "step": 6937 }, { "epoch": 0.7595172281671638, "grad_norm": 1.2781887234422722, "learning_rate": 6.798403065028611e-06, "loss": 0.8666, "num_input_tokens_seen": 1252733440, "step": 6938 }, { "epoch": 0.7596267002381017, "grad_norm": 1.1816022882469093, "learning_rate": 6.792509706241629e-06, "loss": 1.1484, "num_input_tokens_seen": 1252940416, "step": 6939 }, { "epoch": 0.7597361723090397, "grad_norm": 1.3573113046524834, "learning_rate": 6.786618501365949e-06, "loss": 1.1212, "num_input_tokens_seen": 1253130368, "step": 6940 }, { "epoch": 0.7598456443799776, "grad_norm": 1.2342401744752007, "learning_rate": 6.780729451098483e-06, "loss": 0.8732, "num_input_tokens_seen": 1253337568, "step": 6941 }, { "epoch": 0.7599551164509155, "grad_norm": 1.2046188394298531, "learning_rate": 6.7748425561358934e-06, "loss": 0.9424, "num_input_tokens_seen": 1253526624, "step": 6942 }, { "epoch": 0.7600645885218533, "grad_norm": 1.326021301081178, "learning_rate": 6.76895781717459e-06, "loss": 0.9154, "num_input_tokens_seen": 1253709632, "step": 6943 }, { "epoch": 0.7601740605927912, "grad_norm": 1.236098619224111, "learning_rate": 6.763075234910715e-06, "loss": 0.8865, "num_input_tokens_seen": 1253914144, "step": 6944 }, { "epoch": 0.7602835326637292, "grad_norm": 1.3581436104690918, "learning_rate": 6.757194810040193e-06, "loss": 0.9831, "num_input_tokens_seen": 1254094464, "step": 6945 }, { "epoch": 0.7603930047346671, "grad_norm": 1.3008941051568057, "learning_rate": 6.751316543258637e-06, "loss": 0.8722, "num_input_tokens_seen": 1254283744, "step": 6946 }, { "epoch": 0.760502476805605, "grad_norm": 1.1629304950146617, "learning_rate": 6.745440435261463e-06, "loss": 0.9107, "num_input_tokens_seen": 1254500128, "step": 6947 }, { "epoch": 0.7606119488765428, "grad_norm": 1.2440345515331659, "learning_rate": 6.739566486743773e-06, "loss": 0.8474, "num_input_tokens_seen": 1254681792, "step": 6948 }, { "epoch": 0.7607214209474807, "grad_norm": 1.1618260655350703, "learning_rate": 6.733694698400467e-06, "loss": 0.8152, "num_input_tokens_seen": 1254854944, "step": 6949 }, { "epoch": 0.7608308930184187, "grad_norm": 1.2860227869426601, "learning_rate": 6.727825070926158e-06, "loss": 0.7308, "num_input_tokens_seen": 1255035040, "step": 6950 }, { "epoch": 0.7609403650893566, "grad_norm": 1.2648679564847545, "learning_rate": 6.721957605015214e-06, "loss": 0.9711, "num_input_tokens_seen": 1255208864, "step": 6951 }, { "epoch": 0.7610498371602945, "grad_norm": 1.165684229596194, "learning_rate": 6.716092301361743e-06, "loss": 0.808, "num_input_tokens_seen": 1255393664, "step": 6952 }, { "epoch": 0.7611593092312324, "grad_norm": 1.3124655764470627, "learning_rate": 6.710229160659593e-06, "loss": 0.969, "num_input_tokens_seen": 1255591680, "step": 6953 }, { "epoch": 0.7612687813021702, "grad_norm": 1.1300786672064083, "learning_rate": 6.704368183602386e-06, "loss": 0.7616, "num_input_tokens_seen": 1255765504, "step": 6954 }, { "epoch": 0.7613782533731082, "grad_norm": 1.097637027015562, "learning_rate": 6.698509370883429e-06, "loss": 0.7269, "num_input_tokens_seen": 1255965760, "step": 6955 }, { "epoch": 0.7614877254440461, "grad_norm": 1.2143096240278226, "learning_rate": 6.692652723195836e-06, "loss": 0.9119, "num_input_tokens_seen": 1256157056, "step": 6956 }, { "epoch": 0.761597197514984, "grad_norm": 1.1634559331105325, "learning_rate": 6.686798241232428e-06, "loss": 0.8523, "num_input_tokens_seen": 1256359328, "step": 6957 }, { "epoch": 0.7617066695859219, "grad_norm": 1.1661746429433, "learning_rate": 6.680945925685778e-06, "loss": 0.665, "num_input_tokens_seen": 1256545248, "step": 6958 }, { "epoch": 0.7618161416568598, "grad_norm": 1.2161233424762476, "learning_rate": 6.675095777248208e-06, "loss": 0.7031, "num_input_tokens_seen": 1256688160, "step": 6959 }, { "epoch": 0.7619256137277977, "grad_norm": 1.3170202617433526, "learning_rate": 6.669247796611774e-06, "loss": 0.6921, "num_input_tokens_seen": 1256820768, "step": 6960 }, { "epoch": 0.7620350857987356, "grad_norm": 1.2187074018027466, "learning_rate": 6.663401984468281e-06, "loss": 0.8674, "num_input_tokens_seen": 1257016320, "step": 6961 }, { "epoch": 0.7621445578696735, "grad_norm": 1.1357689523864245, "learning_rate": 6.657558341509276e-06, "loss": 0.8157, "num_input_tokens_seen": 1257197088, "step": 6962 }, { "epoch": 0.7622540299406114, "grad_norm": 1.3966283763987057, "learning_rate": 6.651716868426061e-06, "loss": 1.2146, "num_input_tokens_seen": 1257392864, "step": 6963 }, { "epoch": 0.7623635020115493, "grad_norm": 1.2050829721906668, "learning_rate": 6.645877565909664e-06, "loss": 0.9292, "num_input_tokens_seen": 1257574976, "step": 6964 }, { "epoch": 0.7624729740824872, "grad_norm": 1.2337851261754278, "learning_rate": 6.6400404346508625e-06, "loss": 0.6703, "num_input_tokens_seen": 1257756416, "step": 6965 }, { "epoch": 0.7625824461534251, "grad_norm": 1.2319874428536437, "learning_rate": 6.634205475340182e-06, "loss": 0.8921, "num_input_tokens_seen": 1257921056, "step": 6966 }, { "epoch": 0.762691918224363, "grad_norm": 1.1461608534230943, "learning_rate": 6.628372688667883e-06, "loss": 0.8477, "num_input_tokens_seen": 1258098016, "step": 6967 }, { "epoch": 0.7628013902953009, "grad_norm": 1.049877643268036, "learning_rate": 6.622542075323973e-06, "loss": 0.8869, "num_input_tokens_seen": 1258296480, "step": 6968 }, { "epoch": 0.7629108623662388, "grad_norm": 1.3323299486964943, "learning_rate": 6.6167136359982064e-06, "loss": 0.9329, "num_input_tokens_seen": 1258457312, "step": 6969 }, { "epoch": 0.7630203344371768, "grad_norm": 1.1760048384372281, "learning_rate": 6.610887371380064e-06, "loss": 0.8995, "num_input_tokens_seen": 1258649504, "step": 6970 }, { "epoch": 0.7631298065081146, "grad_norm": 1.1667830148083767, "learning_rate": 6.605063282158808e-06, "loss": 0.8872, "num_input_tokens_seen": 1258846624, "step": 6971 }, { "epoch": 0.7632392785790525, "grad_norm": 1.2292268813734564, "learning_rate": 6.599241369023385e-06, "loss": 0.8379, "num_input_tokens_seen": 1259029632, "step": 6972 }, { "epoch": 0.7633487506499904, "grad_norm": 1.2185065197431715, "learning_rate": 6.593421632662539e-06, "loss": 0.9019, "num_input_tokens_seen": 1259207712, "step": 6973 }, { "epoch": 0.7634582227209283, "grad_norm": 1.157189940826784, "learning_rate": 6.587604073764728e-06, "loss": 0.7225, "num_input_tokens_seen": 1259388928, "step": 6974 }, { "epoch": 0.7635676947918663, "grad_norm": 1.196862555854985, "learning_rate": 6.581788693018154e-06, "loss": 0.9872, "num_input_tokens_seen": 1259575072, "step": 6975 }, { "epoch": 0.7636771668628042, "grad_norm": 1.1716624257882333, "learning_rate": 6.575975491110769e-06, "loss": 0.7461, "num_input_tokens_seen": 1259744416, "step": 6976 }, { "epoch": 0.763786638933742, "grad_norm": 1.0412056411325092, "learning_rate": 6.570164468730258e-06, "loss": 0.7605, "num_input_tokens_seen": 1259931008, "step": 6977 }, { "epoch": 0.7638961110046799, "grad_norm": 1.1698039254927322, "learning_rate": 6.56435562656407e-06, "loss": 0.6357, "num_input_tokens_seen": 1260091616, "step": 6978 }, { "epoch": 0.7640055830756178, "grad_norm": 1.2888310080079384, "learning_rate": 6.558548965299355e-06, "loss": 1.0362, "num_input_tokens_seen": 1260286272, "step": 6979 }, { "epoch": 0.7641150551465558, "grad_norm": 1.3623058688250926, "learning_rate": 6.552744485623058e-06, "loss": 0.967, "num_input_tokens_seen": 1260465248, "step": 6980 }, { "epoch": 0.7642245272174937, "grad_norm": 1.0484787343042046, "learning_rate": 6.5469421882218075e-06, "loss": 0.6375, "num_input_tokens_seen": 1260670208, "step": 6981 }, { "epoch": 0.7643339992884315, "grad_norm": 1.5081114793749208, "learning_rate": 6.541142073782028e-06, "loss": 0.9057, "num_input_tokens_seen": 1260809760, "step": 6982 }, { "epoch": 0.7644434713593694, "grad_norm": 1.1941210724262794, "learning_rate": 6.535344142989852e-06, "loss": 1.0672, "num_input_tokens_seen": 1260990976, "step": 6983 }, { "epoch": 0.7645529434303073, "grad_norm": 1.1383204236748374, "learning_rate": 6.529548396531168e-06, "loss": 0.7156, "num_input_tokens_seen": 1261149344, "step": 6984 }, { "epoch": 0.7646624155012453, "grad_norm": 1.1845834956178523, "learning_rate": 6.523754835091597e-06, "loss": 0.909, "num_input_tokens_seen": 1261351840, "step": 6985 }, { "epoch": 0.7647718875721832, "grad_norm": 1.2157664336863316, "learning_rate": 6.517963459356502e-06, "loss": 0.7079, "num_input_tokens_seen": 1261511552, "step": 6986 }, { "epoch": 0.7648813596431211, "grad_norm": 1.3056900832123401, "learning_rate": 6.512174270011015e-06, "loss": 0.9208, "num_input_tokens_seen": 1261675296, "step": 6987 }, { "epoch": 0.7649908317140589, "grad_norm": 1.3306889188867401, "learning_rate": 6.5063872677399525e-06, "loss": 1.0308, "num_input_tokens_seen": 1261855392, "step": 6988 }, { "epoch": 0.7651003037849968, "grad_norm": 1.1950721748454798, "learning_rate": 6.500602453227936e-06, "loss": 0.8272, "num_input_tokens_seen": 1262009056, "step": 6989 }, { "epoch": 0.7652097758559347, "grad_norm": 1.1952449804446779, "learning_rate": 6.494819827159271e-06, "loss": 0.801, "num_input_tokens_seen": 1262213568, "step": 6990 }, { "epoch": 0.7653192479268727, "grad_norm": 1.225874981478472, "learning_rate": 6.489039390218052e-06, "loss": 0.6824, "num_input_tokens_seen": 1262375520, "step": 6991 }, { "epoch": 0.7654287199978106, "grad_norm": 1.2916350155715903, "learning_rate": 6.483261143088084e-06, "loss": 0.7836, "num_input_tokens_seen": 1262536352, "step": 6992 }, { "epoch": 0.7655381920687485, "grad_norm": 1.3313516354156867, "learning_rate": 6.477485086452928e-06, "loss": 1.2439, "num_input_tokens_seen": 1262738624, "step": 6993 }, { "epoch": 0.7656476641396863, "grad_norm": 1.1299885772026457, "learning_rate": 6.471711220995877e-06, "loss": 0.7429, "num_input_tokens_seen": 1262928800, "step": 6994 }, { "epoch": 0.7657571362106242, "grad_norm": 1.0650870299262585, "learning_rate": 6.46593954739996e-06, "loss": 0.7205, "num_input_tokens_seen": 1263103520, "step": 6995 }, { "epoch": 0.7658666082815622, "grad_norm": 1.1783507902281212, "learning_rate": 6.460170066347979e-06, "loss": 1.137, "num_input_tokens_seen": 1263319904, "step": 6996 }, { "epoch": 0.7659760803525001, "grad_norm": 1.3050386789954986, "learning_rate": 6.4544027785224195e-06, "loss": 0.7584, "num_input_tokens_seen": 1263523520, "step": 6997 }, { "epoch": 0.766085552423438, "grad_norm": 1.1487695642077593, "learning_rate": 6.448637684605569e-06, "loss": 0.7762, "num_input_tokens_seen": 1263737216, "step": 6998 }, { "epoch": 0.7661950244943758, "grad_norm": 1.228062532500933, "learning_rate": 6.442874785279415e-06, "loss": 0.8547, "num_input_tokens_seen": 1263916416, "step": 6999 }, { "epoch": 0.7663044965653137, "grad_norm": 1.277207860222012, "learning_rate": 6.437114081225698e-06, "loss": 0.7911, "num_input_tokens_seen": 1264121600, "step": 7000 }, { "epoch": 0.7664139686362517, "grad_norm": 1.1454881957472869, "learning_rate": 6.431355573125899e-06, "loss": 0.5821, "num_input_tokens_seen": 1264298560, "step": 7001 }, { "epoch": 0.7665234407071896, "grad_norm": 1.1530508167419942, "learning_rate": 6.4255992616612385e-06, "loss": 0.8606, "num_input_tokens_seen": 1264478432, "step": 7002 }, { "epoch": 0.7666329127781275, "grad_norm": 1.2410805046165252, "learning_rate": 6.419845147512679e-06, "loss": 0.8312, "num_input_tokens_seen": 1264644640, "step": 7003 }, { "epoch": 0.7667423848490654, "grad_norm": 1.0884764924075878, "learning_rate": 6.4140932313609096e-06, "loss": 0.8881, "num_input_tokens_seen": 1264820256, "step": 7004 }, { "epoch": 0.7668518569200032, "grad_norm": 1.2405923642110626, "learning_rate": 6.408343513886389e-06, "loss": 0.7918, "num_input_tokens_seen": 1265003264, "step": 7005 }, { "epoch": 0.7669613289909412, "grad_norm": 1.1543872881753199, "learning_rate": 6.402595995769289e-06, "loss": 0.8011, "num_input_tokens_seen": 1265193888, "step": 7006 }, { "epoch": 0.7670708010618791, "grad_norm": 1.174167882207093, "learning_rate": 6.396850677689531e-06, "loss": 0.6789, "num_input_tokens_seen": 1265360768, "step": 7007 }, { "epoch": 0.767180273132817, "grad_norm": 1.1433097909407202, "learning_rate": 6.391107560326776e-06, "loss": 0.844, "num_input_tokens_seen": 1265537952, "step": 7008 }, { "epoch": 0.7672897452037549, "grad_norm": 1.3030304122004688, "learning_rate": 6.385366644360419e-06, "loss": 0.969, "num_input_tokens_seen": 1265738880, "step": 7009 }, { "epoch": 0.7673992172746928, "grad_norm": 1.1056187935628548, "learning_rate": 6.379627930469598e-06, "loss": 0.7987, "num_input_tokens_seen": 1265912704, "step": 7010 }, { "epoch": 0.7675086893456307, "grad_norm": 1.2308274972286999, "learning_rate": 6.373891419333211e-06, "loss": 0.7798, "num_input_tokens_seen": 1266097952, "step": 7011 }, { "epoch": 0.7676181614165686, "grad_norm": 1.1554606552820594, "learning_rate": 6.368157111629846e-06, "loss": 0.6739, "num_input_tokens_seen": 1266272896, "step": 7012 }, { "epoch": 0.7677276334875065, "grad_norm": 1.033305172126309, "learning_rate": 6.362425008037895e-06, "loss": 0.6608, "num_input_tokens_seen": 1266457024, "step": 7013 }, { "epoch": 0.7678371055584444, "grad_norm": 1.4252050620319705, "learning_rate": 6.35669510923542e-06, "loss": 0.9824, "num_input_tokens_seen": 1266645632, "step": 7014 }, { "epoch": 0.7679465776293823, "grad_norm": 1.3566540639152924, "learning_rate": 6.35096741590028e-06, "loss": 0.819, "num_input_tokens_seen": 1266784960, "step": 7015 }, { "epoch": 0.7680560497003202, "grad_norm": 1.1452512071557752, "learning_rate": 6.345241928710044e-06, "loss": 0.8471, "num_input_tokens_seen": 1266954080, "step": 7016 }, { "epoch": 0.7681655217712581, "grad_norm": 1.1043227223413208, "learning_rate": 6.339518648342019e-06, "loss": 0.6178, "num_input_tokens_seen": 1267128576, "step": 7017 }, { "epoch": 0.768274993842196, "grad_norm": 1.5018900249109053, "learning_rate": 6.33379757547328e-06, "loss": 1.0199, "num_input_tokens_seen": 1267295456, "step": 7018 }, { "epoch": 0.7683844659131339, "grad_norm": 1.3495639291004125, "learning_rate": 6.328078710780588e-06, "loss": 1.1263, "num_input_tokens_seen": 1267465024, "step": 7019 }, { "epoch": 0.7684939379840718, "grad_norm": 1.158361620294622, "learning_rate": 6.322362054940506e-06, "loss": 0.7976, "num_input_tokens_seen": 1267633472, "step": 7020 }, { "epoch": 0.7686034100550098, "grad_norm": 1.0885946026696471, "learning_rate": 6.316647608629272e-06, "loss": 0.7009, "num_input_tokens_seen": 1267799232, "step": 7021 }, { "epoch": 0.7687128821259476, "grad_norm": 1.1638114197581937, "learning_rate": 6.310935372522925e-06, "loss": 0.596, "num_input_tokens_seen": 1267963872, "step": 7022 }, { "epoch": 0.7688223541968855, "grad_norm": 1.1263198527733331, "learning_rate": 6.305225347297181e-06, "loss": 0.811, "num_input_tokens_seen": 1268163456, "step": 7023 }, { "epoch": 0.7689318262678234, "grad_norm": 1.2502468819002486, "learning_rate": 6.299517533627547e-06, "loss": 0.8374, "num_input_tokens_seen": 1268354752, "step": 7024 }, { "epoch": 0.7690412983387613, "grad_norm": 1.2542371937046854, "learning_rate": 6.293811932189239e-06, "loss": 1.1124, "num_input_tokens_seen": 1268526112, "step": 7025 }, { "epoch": 0.7691507704096993, "grad_norm": 1.1294474640880057, "learning_rate": 6.28810854365722e-06, "loss": 0.707, "num_input_tokens_seen": 1268702400, "step": 7026 }, { "epoch": 0.7692602424806372, "grad_norm": 1.2661245796473322, "learning_rate": 6.282407368706189e-06, "loss": 1.0515, "num_input_tokens_seen": 1268887200, "step": 7027 }, { "epoch": 0.769369714551575, "grad_norm": 1.1864946024338308, "learning_rate": 6.276708408010576e-06, "loss": 0.6614, "num_input_tokens_seen": 1269044000, "step": 7028 }, { "epoch": 0.7694791866225129, "grad_norm": 1.2832201719966183, "learning_rate": 6.27101166224458e-06, "loss": 0.9788, "num_input_tokens_seen": 1269220512, "step": 7029 }, { "epoch": 0.7695886586934508, "grad_norm": 1.2762445509118931, "learning_rate": 6.265317132082088e-06, "loss": 0.7799, "num_input_tokens_seen": 1269404192, "step": 7030 }, { "epoch": 0.7696981307643888, "grad_norm": 1.1166841133140193, "learning_rate": 6.259624818196772e-06, "loss": 0.8535, "num_input_tokens_seen": 1269595040, "step": 7031 }, { "epoch": 0.7698076028353267, "grad_norm": 0.9651365260658894, "learning_rate": 6.253934721262014e-06, "loss": 0.5934, "num_input_tokens_seen": 1269758336, "step": 7032 }, { "epoch": 0.7699170749062645, "grad_norm": 1.2679405693452916, "learning_rate": 6.248246841950942e-06, "loss": 0.9774, "num_input_tokens_seen": 1269940000, "step": 7033 }, { "epoch": 0.7700265469772024, "grad_norm": 1.2354311710083044, "learning_rate": 6.242561180936421e-06, "loss": 0.7631, "num_input_tokens_seen": 1270098144, "step": 7034 }, { "epoch": 0.7701360190481403, "grad_norm": 1.342938155735062, "learning_rate": 6.236877738891053e-06, "loss": 0.8519, "num_input_tokens_seen": 1270312288, "step": 7035 }, { "epoch": 0.7702454911190783, "grad_norm": 1.326385211218262, "learning_rate": 6.231196516487181e-06, "loss": 0.9846, "num_input_tokens_seen": 1270495520, "step": 7036 }, { "epoch": 0.7703549631900162, "grad_norm": 1.141064389397076, "learning_rate": 6.225517514396873e-06, "loss": 0.8724, "num_input_tokens_seen": 1270685248, "step": 7037 }, { "epoch": 0.7704644352609541, "grad_norm": 1.0125795921784202, "learning_rate": 6.219840733291959e-06, "loss": 0.5854, "num_input_tokens_seen": 1270857952, "step": 7038 }, { "epoch": 0.7705739073318919, "grad_norm": 1.0653924839366928, "learning_rate": 6.2141661738439884e-06, "loss": 0.8448, "num_input_tokens_seen": 1271030208, "step": 7039 }, { "epoch": 0.7706833794028298, "grad_norm": 1.1659289763953686, "learning_rate": 6.208493836724244e-06, "loss": 0.6917, "num_input_tokens_seen": 1271230016, "step": 7040 }, { "epoch": 0.7707928514737677, "grad_norm": 1.1376439160701486, "learning_rate": 6.202823722603757e-06, "loss": 0.6913, "num_input_tokens_seen": 1271426240, "step": 7041 }, { "epoch": 0.7709023235447057, "grad_norm": 1.1277462500491042, "learning_rate": 6.197155832153287e-06, "loss": 1.1706, "num_input_tokens_seen": 1271620448, "step": 7042 }, { "epoch": 0.7710117956156436, "grad_norm": 1.0874347930300958, "learning_rate": 6.191490166043337e-06, "loss": 1.0093, "num_input_tokens_seen": 1271820928, "step": 7043 }, { "epoch": 0.7711212676865815, "grad_norm": 1.1824174329550121, "learning_rate": 6.185826724944146e-06, "loss": 0.6871, "num_input_tokens_seen": 1271972576, "step": 7044 }, { "epoch": 0.7712307397575193, "grad_norm": 1.2114412305985345, "learning_rate": 6.180165509525682e-06, "loss": 0.7507, "num_input_tokens_seen": 1272135872, "step": 7045 }, { "epoch": 0.7713402118284572, "grad_norm": 1.0534884383187724, "learning_rate": 6.174506520457665e-06, "loss": 0.7419, "num_input_tokens_seen": 1272308352, "step": 7046 }, { "epoch": 0.7714496838993952, "grad_norm": 1.2466010612592215, "learning_rate": 6.168849758409539e-06, "loss": 0.7314, "num_input_tokens_seen": 1272460224, "step": 7047 }, { "epoch": 0.7715591559703331, "grad_norm": 1.198652748960276, "learning_rate": 6.163195224050488e-06, "loss": 0.997, "num_input_tokens_seen": 1272653984, "step": 7048 }, { "epoch": 0.771668628041271, "grad_norm": 1.1594454453445775, "learning_rate": 6.157542918049433e-06, "loss": 0.8242, "num_input_tokens_seen": 1272813248, "step": 7049 }, { "epoch": 0.7717781001122088, "grad_norm": 1.204167397486763, "learning_rate": 6.151892841075027e-06, "loss": 0.9673, "num_input_tokens_seen": 1273024032, "step": 7050 }, { "epoch": 0.7718875721831467, "grad_norm": 1.0747445224321568, "learning_rate": 6.146244993795669e-06, "loss": 0.7719, "num_input_tokens_seen": 1273196736, "step": 7051 }, { "epoch": 0.7719970442540847, "grad_norm": 1.1557246380729136, "learning_rate": 6.14059937687948e-06, "loss": 0.8508, "num_input_tokens_seen": 1273373920, "step": 7052 }, { "epoch": 0.7721065163250226, "grad_norm": 1.288896353446969, "learning_rate": 6.1349559909943425e-06, "loss": 1.0682, "num_input_tokens_seen": 1273554240, "step": 7053 }, { "epoch": 0.7722159883959605, "grad_norm": 1.4451893522018668, "learning_rate": 6.129314836807834e-06, "loss": 0.7695, "num_input_tokens_seen": 1273749344, "step": 7054 }, { "epoch": 0.7723254604668984, "grad_norm": 1.1389076682354202, "learning_rate": 6.123675914987323e-06, "loss": 0.7262, "num_input_tokens_seen": 1273951840, "step": 7055 }, { "epoch": 0.7724349325378362, "grad_norm": 1.2676376540396261, "learning_rate": 6.1180392261998484e-06, "loss": 0.9653, "num_input_tokens_seen": 1274122080, "step": 7056 }, { "epoch": 0.7725444046087742, "grad_norm": 1.2455100483161679, "learning_rate": 6.112404771112246e-06, "loss": 0.7795, "num_input_tokens_seen": 1274315392, "step": 7057 }, { "epoch": 0.7726538766797121, "grad_norm": 1.2979126421446068, "learning_rate": 6.106772550391052e-06, "loss": 1.023, "num_input_tokens_seen": 1274510944, "step": 7058 }, { "epoch": 0.77276334875065, "grad_norm": 1.2132181655980248, "learning_rate": 6.101142564702539e-06, "loss": 0.7885, "num_input_tokens_seen": 1274704032, "step": 7059 }, { "epoch": 0.7728728208215879, "grad_norm": 1.2115810038068877, "learning_rate": 6.095514814712747e-06, "loss": 0.7571, "num_input_tokens_seen": 1274912128, "step": 7060 }, { "epoch": 0.7729822928925258, "grad_norm": 1.284983892970683, "learning_rate": 6.089889301087398e-06, "loss": 0.8818, "num_input_tokens_seen": 1275083040, "step": 7061 }, { "epoch": 0.7730917649634637, "grad_norm": 1.1864593865331587, "learning_rate": 6.084266024492011e-06, "loss": 0.9021, "num_input_tokens_seen": 1275254848, "step": 7062 }, { "epoch": 0.7732012370344016, "grad_norm": 1.2066307514511618, "learning_rate": 6.078644985591778e-06, "loss": 0.6819, "num_input_tokens_seen": 1275415232, "step": 7063 }, { "epoch": 0.7733107091053395, "grad_norm": 1.073057899932271, "learning_rate": 6.0730261850516865e-06, "loss": 0.6285, "num_input_tokens_seen": 1275602720, "step": 7064 }, { "epoch": 0.7734201811762774, "grad_norm": 1.1312032911236596, "learning_rate": 6.067409623536399e-06, "loss": 0.8252, "num_input_tokens_seen": 1275782816, "step": 7065 }, { "epoch": 0.7735296532472153, "grad_norm": 1.2534810709031907, "learning_rate": 6.061795301710368e-06, "loss": 0.7356, "num_input_tokens_seen": 1275976800, "step": 7066 }, { "epoch": 0.7736391253181532, "grad_norm": 1.2162426318487471, "learning_rate": 6.056183220237749e-06, "loss": 1.1747, "num_input_tokens_seen": 1276187360, "step": 7067 }, { "epoch": 0.7737485973890911, "grad_norm": 1.26806245787795, "learning_rate": 6.05057337978244e-06, "loss": 0.9811, "num_input_tokens_seen": 1276390752, "step": 7068 }, { "epoch": 0.773858069460029, "grad_norm": 1.2655334439540162, "learning_rate": 6.044965781008077e-06, "loss": 0.7985, "num_input_tokens_seen": 1276588544, "step": 7069 }, { "epoch": 0.7739675415309669, "grad_norm": 1.1765349244309586, "learning_rate": 6.039360424578017e-06, "loss": 0.7445, "num_input_tokens_seen": 1276759008, "step": 7070 }, { "epoch": 0.7740770136019048, "grad_norm": 1.3554684861592412, "learning_rate": 6.033757311155386e-06, "loss": 1.134, "num_input_tokens_seen": 1276952992, "step": 7071 }, { "epoch": 0.7741864856728428, "grad_norm": 1.2299333164313164, "learning_rate": 6.028156441402996e-06, "loss": 0.7267, "num_input_tokens_seen": 1277110016, "step": 7072 }, { "epoch": 0.7742959577437806, "grad_norm": 1.2207396967177888, "learning_rate": 6.022557815983437e-06, "loss": 0.7452, "num_input_tokens_seen": 1277310048, "step": 7073 }, { "epoch": 0.7744054298147185, "grad_norm": 1.1748898190723414, "learning_rate": 6.01696143555901e-06, "loss": 0.7344, "num_input_tokens_seen": 1277491936, "step": 7074 }, { "epoch": 0.7745149018856564, "grad_norm": 1.275192549134747, "learning_rate": 6.011367300791754e-06, "loss": 0.8633, "num_input_tokens_seen": 1277665088, "step": 7075 }, { "epoch": 0.7746243739565943, "grad_norm": 1.3043473765469205, "learning_rate": 6.005775412343448e-06, "loss": 1.0539, "num_input_tokens_seen": 1277852800, "step": 7076 }, { "epoch": 0.7747338460275323, "grad_norm": 1.1537785277967492, "learning_rate": 6.0001857708755996e-06, "loss": 0.8265, "num_input_tokens_seen": 1278046784, "step": 7077 }, { "epoch": 0.7748433180984702, "grad_norm": 1.0949119390498885, "learning_rate": 5.994598377049446e-06, "loss": 0.5865, "num_input_tokens_seen": 1278240768, "step": 7078 }, { "epoch": 0.774952790169408, "grad_norm": 1.2708817092359714, "learning_rate": 5.989013231525978e-06, "loss": 0.7526, "num_input_tokens_seen": 1278422880, "step": 7079 }, { "epoch": 0.7750622622403459, "grad_norm": 1.0849631899201697, "learning_rate": 5.983430334965903e-06, "loss": 0.7515, "num_input_tokens_seen": 1278627392, "step": 7080 }, { "epoch": 0.7751717343112838, "grad_norm": 1.2051042402457965, "learning_rate": 5.977849688029666e-06, "loss": 0.804, "num_input_tokens_seen": 1278802336, "step": 7081 }, { "epoch": 0.7752812063822218, "grad_norm": 1.1516238275188877, "learning_rate": 5.972271291377446e-06, "loss": 0.9395, "num_input_tokens_seen": 1278980192, "step": 7082 }, { "epoch": 0.7753906784531597, "grad_norm": 1.3492462950580162, "learning_rate": 5.96669514566916e-06, "loss": 1.0355, "num_input_tokens_seen": 1279157376, "step": 7083 }, { "epoch": 0.7755001505240975, "grad_norm": 1.1861372707816449, "learning_rate": 5.96112125156445e-06, "loss": 0.9748, "num_input_tokens_seen": 1279368832, "step": 7084 }, { "epoch": 0.7756096225950354, "grad_norm": 1.2026595168870364, "learning_rate": 5.9555496097226934e-06, "loss": 0.8729, "num_input_tokens_seen": 1279528768, "step": 7085 }, { "epoch": 0.7757190946659733, "grad_norm": 1.108677765859701, "learning_rate": 5.949980220803025e-06, "loss": 0.871, "num_input_tokens_seen": 1279718944, "step": 7086 }, { "epoch": 0.7758285667369113, "grad_norm": 1.2159838953237945, "learning_rate": 5.944413085464265e-06, "loss": 1.0069, "num_input_tokens_seen": 1279898368, "step": 7087 }, { "epoch": 0.7759380388078492, "grad_norm": 1.2337772515561791, "learning_rate": 5.938848204365016e-06, "loss": 0.894, "num_input_tokens_seen": 1280114528, "step": 7088 }, { "epoch": 0.7760475108787871, "grad_norm": 1.0914794241992458, "learning_rate": 5.933285578163586e-06, "loss": 0.7342, "num_input_tokens_seen": 1280276480, "step": 7089 }, { "epoch": 0.7761569829497249, "grad_norm": 1.071171715011401, "learning_rate": 5.927725207518023e-06, "loss": 0.7912, "num_input_tokens_seen": 1280450528, "step": 7090 }, { "epoch": 0.7762664550206628, "grad_norm": 1.1421807013647216, "learning_rate": 5.922167093086107e-06, "loss": 0.7341, "num_input_tokens_seen": 1280652576, "step": 7091 }, { "epoch": 0.7763759270916007, "grad_norm": 1.1265345809376381, "learning_rate": 5.916611235525346e-06, "loss": 0.6909, "num_input_tokens_seen": 1280861120, "step": 7092 }, { "epoch": 0.7764853991625387, "grad_norm": 1.2048701720518455, "learning_rate": 5.9110576354930085e-06, "loss": 0.7855, "num_input_tokens_seen": 1281064512, "step": 7093 }, { "epoch": 0.7765948712334766, "grad_norm": 1.229646971445492, "learning_rate": 5.9055062936460484e-06, "loss": 0.8154, "num_input_tokens_seen": 1281263872, "step": 7094 }, { "epoch": 0.7767043433044145, "grad_norm": 1.1850187580217468, "learning_rate": 5.899957210641205e-06, "loss": 0.9646, "num_input_tokens_seen": 1281441280, "step": 7095 }, { "epoch": 0.7768138153753523, "grad_norm": 1.3770973941625693, "learning_rate": 5.894410387134896e-06, "loss": 0.757, "num_input_tokens_seen": 1281597184, "step": 7096 }, { "epoch": 0.7769232874462902, "grad_norm": 1.1558716644737324, "learning_rate": 5.888865823783329e-06, "loss": 0.7303, "num_input_tokens_seen": 1281771456, "step": 7097 }, { "epoch": 0.7770327595172282, "grad_norm": 1.1975097905119136, "learning_rate": 5.883323521242387e-06, "loss": 0.9309, "num_input_tokens_seen": 1281940800, "step": 7098 }, { "epoch": 0.7771422315881661, "grad_norm": 1.273916049275303, "learning_rate": 5.877783480167734e-06, "loss": 0.8703, "num_input_tokens_seen": 1282097152, "step": 7099 }, { "epoch": 0.777251703659104, "grad_norm": 1.2430515283321637, "learning_rate": 5.872245701214741e-06, "loss": 0.869, "num_input_tokens_seen": 1282274560, "step": 7100 }, { "epoch": 0.7773611757300418, "grad_norm": 1.0899735787015463, "learning_rate": 5.8667101850385045e-06, "loss": 0.7411, "num_input_tokens_seen": 1282458912, "step": 7101 }, { "epoch": 0.7774706478009797, "grad_norm": 1.2463887980467014, "learning_rate": 5.861176932293894e-06, "loss": 0.7192, "num_input_tokens_seen": 1282643040, "step": 7102 }, { "epoch": 0.7775801198719177, "grad_norm": 1.1485511567842184, "learning_rate": 5.855645943635449e-06, "loss": 0.9403, "num_input_tokens_seen": 1282827392, "step": 7103 }, { "epoch": 0.7776895919428556, "grad_norm": 1.1990404027474029, "learning_rate": 5.850117219717507e-06, "loss": 0.7367, "num_input_tokens_seen": 1282984640, "step": 7104 }, { "epoch": 0.7777990640137935, "grad_norm": 1.3780098397716125, "learning_rate": 5.8445907611940745e-06, "loss": 0.9722, "num_input_tokens_seen": 1283152864, "step": 7105 }, { "epoch": 0.7779085360847314, "grad_norm": 1.1600772304147398, "learning_rate": 5.839066568718946e-06, "loss": 0.7742, "num_input_tokens_seen": 1283334080, "step": 7106 }, { "epoch": 0.7780180081556692, "grad_norm": 1.2877019464572736, "learning_rate": 5.83354464294561e-06, "loss": 0.8517, "num_input_tokens_seen": 1283527616, "step": 7107 }, { "epoch": 0.7781274802266072, "grad_norm": 1.2991194090079188, "learning_rate": 5.8280249845273025e-06, "loss": 1.0476, "num_input_tokens_seen": 1283724288, "step": 7108 }, { "epoch": 0.7782369522975451, "grad_norm": 1.1442738526758367, "learning_rate": 5.822507594116988e-06, "loss": 0.7596, "num_input_tokens_seen": 1283916704, "step": 7109 }, { "epoch": 0.778346424368483, "grad_norm": 1.1345756227247858, "learning_rate": 5.816992472367366e-06, "loss": 0.659, "num_input_tokens_seen": 1284100832, "step": 7110 }, { "epoch": 0.7784558964394209, "grad_norm": 1.2776396988931125, "learning_rate": 5.811479619930862e-06, "loss": 0.8332, "num_input_tokens_seen": 1284288320, "step": 7111 }, { "epoch": 0.7785653685103588, "grad_norm": 1.207150746575288, "learning_rate": 5.80596903745963e-06, "loss": 1.1182, "num_input_tokens_seen": 1284474464, "step": 7112 }, { "epoch": 0.7786748405812967, "grad_norm": 1.2678198914456997, "learning_rate": 5.800460725605575e-06, "loss": 0.8664, "num_input_tokens_seen": 1284657920, "step": 7113 }, { "epoch": 0.7787843126522346, "grad_norm": 1.1681665871750273, "learning_rate": 5.794954685020312e-06, "loss": 0.8599, "num_input_tokens_seen": 1284817184, "step": 7114 }, { "epoch": 0.7788937847231725, "grad_norm": 1.2244374034297987, "learning_rate": 5.7894509163551995e-06, "loss": 0.8686, "num_input_tokens_seen": 1284976896, "step": 7115 }, { "epoch": 0.7790032567941104, "grad_norm": 1.017904101270918, "learning_rate": 5.783949420261315e-06, "loss": 0.9102, "num_input_tokens_seen": 1285169088, "step": 7116 }, { "epoch": 0.7791127288650483, "grad_norm": 1.2582938653343605, "learning_rate": 5.778450197389481e-06, "loss": 0.9065, "num_input_tokens_seen": 1285357248, "step": 7117 }, { "epoch": 0.7792222009359862, "grad_norm": 1.0363221134912985, "learning_rate": 5.772953248390242e-06, "loss": 0.6077, "num_input_tokens_seen": 1285534432, "step": 7118 }, { "epoch": 0.7793316730069241, "grad_norm": 1.1797639430459554, "learning_rate": 5.767458573913881e-06, "loss": 0.8168, "num_input_tokens_seen": 1285741856, "step": 7119 }, { "epoch": 0.779441145077862, "grad_norm": 1.280352115032192, "learning_rate": 5.761966174610395e-06, "loss": 0.8357, "num_input_tokens_seen": 1285939872, "step": 7120 }, { "epoch": 0.7795506171487999, "grad_norm": 1.1848633758235507, "learning_rate": 5.756476051129542e-06, "loss": 0.6239, "num_input_tokens_seen": 1286115936, "step": 7121 }, { "epoch": 0.7796600892197378, "grad_norm": 1.3258827579732022, "learning_rate": 5.750988204120783e-06, "loss": 1.2033, "num_input_tokens_seen": 1286327840, "step": 7122 }, { "epoch": 0.7797695612906758, "grad_norm": 1.0981083274969514, "learning_rate": 5.7455026342333215e-06, "loss": 0.796, "num_input_tokens_seen": 1286504576, "step": 7123 }, { "epoch": 0.7798790333616136, "grad_norm": 1.193770217724301, "learning_rate": 5.74001934211609e-06, "loss": 0.69, "num_input_tokens_seen": 1286691616, "step": 7124 }, { "epoch": 0.7799885054325515, "grad_norm": 1.1112293809623432, "learning_rate": 5.734538328417754e-06, "loss": 0.6984, "num_input_tokens_seen": 1286876192, "step": 7125 }, { "epoch": 0.7800979775034894, "grad_norm": 1.261442455356672, "learning_rate": 5.729059593786701e-06, "loss": 0.824, "num_input_tokens_seen": 1287077792, "step": 7126 }, { "epoch": 0.7802074495744273, "grad_norm": 1.172147998800198, "learning_rate": 5.7235831388710524e-06, "loss": 1.001, "num_input_tokens_seen": 1287267520, "step": 7127 }, { "epoch": 0.7803169216453653, "grad_norm": 1.0617691218965122, "learning_rate": 5.718108964318683e-06, "loss": 0.6067, "num_input_tokens_seen": 1287458592, "step": 7128 }, { "epoch": 0.7804263937163032, "grad_norm": 1.1524235584618354, "learning_rate": 5.7126370707771495e-06, "loss": 0.888, "num_input_tokens_seen": 1287629056, "step": 7129 }, { "epoch": 0.780535865787241, "grad_norm": 1.1265634163735059, "learning_rate": 5.707167458893786e-06, "loss": 0.8089, "num_input_tokens_seen": 1287772640, "step": 7130 }, { "epoch": 0.7806453378581789, "grad_norm": 1.1409709875980218, "learning_rate": 5.701700129315629e-06, "loss": 0.7197, "num_input_tokens_seen": 1287960576, "step": 7131 }, { "epoch": 0.7807548099291168, "grad_norm": 1.1981154843521133, "learning_rate": 5.696235082689455e-06, "loss": 0.7743, "num_input_tokens_seen": 1288129472, "step": 7132 }, { "epoch": 0.7808642820000548, "grad_norm": 1.4542238736126432, "learning_rate": 5.690772319661769e-06, "loss": 1.0666, "num_input_tokens_seen": 1288305312, "step": 7133 }, { "epoch": 0.7809737540709927, "grad_norm": 1.13373496545078, "learning_rate": 5.685311840878796e-06, "loss": 0.8718, "num_input_tokens_seen": 1288510720, "step": 7134 }, { "epoch": 0.7810832261419305, "grad_norm": 1.1351571384329315, "learning_rate": 5.679853646986524e-06, "loss": 0.7753, "num_input_tokens_seen": 1288684096, "step": 7135 }, { "epoch": 0.7811926982128684, "grad_norm": 1.2084045992037982, "learning_rate": 5.674397738630619e-06, "loss": 0.9181, "num_input_tokens_seen": 1288864864, "step": 7136 }, { "epoch": 0.7813021702838063, "grad_norm": 1.1613635322416023, "learning_rate": 5.668944116456529e-06, "loss": 1.0108, "num_input_tokens_seen": 1289059968, "step": 7137 }, { "epoch": 0.7814116423547443, "grad_norm": 1.271232474552308, "learning_rate": 5.663492781109381e-06, "loss": 1.027, "num_input_tokens_seen": 1289279264, "step": 7138 }, { "epoch": 0.7815211144256822, "grad_norm": 1.3479004127351815, "learning_rate": 5.658043733234081e-06, "loss": 0.7495, "num_input_tokens_seen": 1289470336, "step": 7139 }, { "epoch": 0.7816305864966201, "grad_norm": 1.3306826479470695, "learning_rate": 5.65259697347523e-06, "loss": 1.0545, "num_input_tokens_seen": 1289658048, "step": 7140 }, { "epoch": 0.7817400585675579, "grad_norm": 1.1381057719199006, "learning_rate": 5.647152502477171e-06, "loss": 0.7799, "num_input_tokens_seen": 1289839488, "step": 7141 }, { "epoch": 0.7818495306384958, "grad_norm": 1.1959106843412555, "learning_rate": 5.641710320883975e-06, "loss": 0.8366, "num_input_tokens_seen": 1290011744, "step": 7142 }, { "epoch": 0.7819590027094337, "grad_norm": 1.1607599710549301, "learning_rate": 5.636270429339436e-06, "loss": 0.8165, "num_input_tokens_seen": 1290148832, "step": 7143 }, { "epoch": 0.7820684747803717, "grad_norm": 1.068569908807634, "learning_rate": 5.630832828487101e-06, "loss": 0.7619, "num_input_tokens_seen": 1290331168, "step": 7144 }, { "epoch": 0.7821779468513096, "grad_norm": 1.1192189666862868, "learning_rate": 5.625397518970199e-06, "loss": 0.9793, "num_input_tokens_seen": 1290560544, "step": 7145 }, { "epoch": 0.7822874189222475, "grad_norm": 1.2029838974684406, "learning_rate": 5.619964501431743e-06, "loss": 0.8817, "num_input_tokens_seen": 1290773344, "step": 7146 }, { "epoch": 0.7823968909931853, "grad_norm": 1.27232433882837, "learning_rate": 5.614533776514436e-06, "loss": 0.7341, "num_input_tokens_seen": 1290940224, "step": 7147 }, { "epoch": 0.7825063630641232, "grad_norm": 1.1058961291523364, "learning_rate": 5.609105344860724e-06, "loss": 1.0079, "num_input_tokens_seen": 1291160416, "step": 7148 }, { "epoch": 0.7826158351350612, "grad_norm": 1.0933251236882613, "learning_rate": 5.603679207112781e-06, "loss": 0.8004, "num_input_tokens_seen": 1291334016, "step": 7149 }, { "epoch": 0.7827253072059991, "grad_norm": 1.2036035750115732, "learning_rate": 5.598255363912508e-06, "loss": 0.8687, "num_input_tokens_seen": 1291542560, "step": 7150 }, { "epoch": 0.782834779276937, "grad_norm": 1.1414240298495422, "learning_rate": 5.592833815901538e-06, "loss": 0.8457, "num_input_tokens_seen": 1291735648, "step": 7151 }, { "epoch": 0.7829442513478748, "grad_norm": 1.1772216389290424, "learning_rate": 5.5874145637212245e-06, "loss": 0.8753, "num_input_tokens_seen": 1291952480, "step": 7152 }, { "epoch": 0.7830537234188127, "grad_norm": 1.1352415678933006, "learning_rate": 5.581997608012651e-06, "loss": 0.7886, "num_input_tokens_seen": 1292151616, "step": 7153 }, { "epoch": 0.7831631954897507, "grad_norm": 1.1389741839798042, "learning_rate": 5.576582949416648e-06, "loss": 0.9653, "num_input_tokens_seen": 1292345824, "step": 7154 }, { "epoch": 0.7832726675606886, "grad_norm": 1.2715438149148817, "learning_rate": 5.571170588573751e-06, "loss": 0.8671, "num_input_tokens_seen": 1292519872, "step": 7155 }, { "epoch": 0.7833821396316265, "grad_norm": 1.086383810197007, "learning_rate": 5.56576052612423e-06, "loss": 0.9059, "num_input_tokens_seen": 1292719680, "step": 7156 }, { "epoch": 0.7834916117025644, "grad_norm": 1.251749721434882, "learning_rate": 5.560352762708088e-06, "loss": 0.9734, "num_input_tokens_seen": 1292913440, "step": 7157 }, { "epoch": 0.7836010837735022, "grad_norm": 1.3495325268121818, "learning_rate": 5.554947298965052e-06, "loss": 0.8488, "num_input_tokens_seen": 1293044480, "step": 7158 }, { "epoch": 0.7837105558444402, "grad_norm": 1.1819476272643752, "learning_rate": 5.5495441355345766e-06, "loss": 0.8337, "num_input_tokens_seen": 1293223232, "step": 7159 }, { "epoch": 0.7838200279153781, "grad_norm": 1.3272023754934443, "learning_rate": 5.54414327305584e-06, "loss": 0.7503, "num_input_tokens_seen": 1293377568, "step": 7160 }, { "epoch": 0.783929499986316, "grad_norm": 1.095754040845748, "learning_rate": 5.538744712167776e-06, "loss": 0.605, "num_input_tokens_seen": 1293554528, "step": 7161 }, { "epoch": 0.7840389720572539, "grad_norm": 1.2472474309237613, "learning_rate": 5.533348453508996e-06, "loss": 0.8616, "num_input_tokens_seen": 1293730816, "step": 7162 }, { "epoch": 0.7841484441281918, "grad_norm": 1.1643618000488354, "learning_rate": 5.527954497717886e-06, "loss": 0.9879, "num_input_tokens_seen": 1293921216, "step": 7163 }, { "epoch": 0.7842579161991297, "grad_norm": 1.221311615919138, "learning_rate": 5.522562845432533e-06, "loss": 0.9222, "num_input_tokens_seen": 1294078016, "step": 7164 }, { "epoch": 0.7843673882700676, "grad_norm": 1.2682917721104974, "learning_rate": 5.517173497290762e-06, "loss": 0.9044, "num_input_tokens_seen": 1294277376, "step": 7165 }, { "epoch": 0.7844768603410055, "grad_norm": 1.1879729784612174, "learning_rate": 5.511786453930124e-06, "loss": 0.8362, "num_input_tokens_seen": 1294487712, "step": 7166 }, { "epoch": 0.7845863324119434, "grad_norm": 1.214318999267036, "learning_rate": 5.5064017159878826e-06, "loss": 0.9422, "num_input_tokens_seen": 1294690432, "step": 7167 }, { "epoch": 0.7846958044828813, "grad_norm": 1.2723898761665509, "learning_rate": 5.501019284101067e-06, "loss": 0.9463, "num_input_tokens_seen": 1294843648, "step": 7168 }, { "epoch": 0.7848052765538192, "grad_norm": 1.277674267672023, "learning_rate": 5.495639158906382e-06, "loss": 0.8343, "num_input_tokens_seen": 1294992832, "step": 7169 }, { "epoch": 0.7849147486247571, "grad_norm": 1.3052061043872762, "learning_rate": 5.490261341040312e-06, "loss": 1.1909, "num_input_tokens_seen": 1295171584, "step": 7170 }, { "epoch": 0.785024220695695, "grad_norm": 1.2557331217520646, "learning_rate": 5.4848858311390165e-06, "loss": 0.8948, "num_input_tokens_seen": 1295358400, "step": 7171 }, { "epoch": 0.7851336927666329, "grad_norm": 1.3391514538270985, "learning_rate": 5.479512629838426e-06, "loss": 0.9567, "num_input_tokens_seen": 1295507360, "step": 7172 }, { "epoch": 0.7852431648375708, "grad_norm": 1.150036690997404, "learning_rate": 5.4741417377741745e-06, "loss": 0.9231, "num_input_tokens_seen": 1295699104, "step": 7173 }, { "epoch": 0.7853526369085088, "grad_norm": 1.2708347143363745, "learning_rate": 5.468773155581627e-06, "loss": 0.6667, "num_input_tokens_seen": 1295878752, "step": 7174 }, { "epoch": 0.7854621089794466, "grad_norm": 1.0782321931928387, "learning_rate": 5.46340688389588e-06, "loss": 0.8825, "num_input_tokens_seen": 1296097376, "step": 7175 }, { "epoch": 0.7855715810503845, "grad_norm": 1.2602187177259978, "learning_rate": 5.458042923351744e-06, "loss": 0.8169, "num_input_tokens_seen": 1296296736, "step": 7176 }, { "epoch": 0.7856810531213224, "grad_norm": 1.0374733381407473, "learning_rate": 5.452681274583784e-06, "loss": 0.7495, "num_input_tokens_seen": 1296479520, "step": 7177 }, { "epoch": 0.7857905251922603, "grad_norm": 1.0416221927049703, "learning_rate": 5.447321938226249e-06, "loss": 0.5854, "num_input_tokens_seen": 1296658496, "step": 7178 }, { "epoch": 0.7858999972631983, "grad_norm": 1.2913108868936665, "learning_rate": 5.441964914913164e-06, "loss": 0.8077, "num_input_tokens_seen": 1296829408, "step": 7179 }, { "epoch": 0.7860094693341362, "grad_norm": 1.0083231851892032, "learning_rate": 5.436610205278228e-06, "loss": 0.6957, "num_input_tokens_seen": 1297037504, "step": 7180 }, { "epoch": 0.786118941405074, "grad_norm": 1.2049787038290423, "learning_rate": 5.4312578099549125e-06, "loss": 0.7868, "num_input_tokens_seen": 1297217824, "step": 7181 }, { "epoch": 0.7862284134760119, "grad_norm": 1.154985612886997, "learning_rate": 5.425907729576388e-06, "loss": 0.7377, "num_input_tokens_seen": 1297409792, "step": 7182 }, { "epoch": 0.7863378855469498, "grad_norm": 1.2170565161296636, "learning_rate": 5.42055996477556e-06, "loss": 1.0815, "num_input_tokens_seen": 1297616096, "step": 7183 }, { "epoch": 0.7864473576178878, "grad_norm": 1.115908123602869, "learning_rate": 5.415214516185061e-06, "loss": 0.7193, "num_input_tokens_seen": 1297822848, "step": 7184 }, { "epoch": 0.7865568296888257, "grad_norm": 1.251070517853934, "learning_rate": 5.409871384437234e-06, "loss": 0.7032, "num_input_tokens_seen": 1298005184, "step": 7185 }, { "epoch": 0.7866663017597635, "grad_norm": 1.1934889802619775, "learning_rate": 5.404530570164187e-06, "loss": 0.6884, "num_input_tokens_seen": 1298186624, "step": 7186 }, { "epoch": 0.7867757738307014, "grad_norm": 1.239396805179474, "learning_rate": 5.399192073997703e-06, "loss": 0.7827, "num_input_tokens_seen": 1298337376, "step": 7187 }, { "epoch": 0.7868852459016393, "grad_norm": 1.1817922951957631, "learning_rate": 5.39385589656933e-06, "loss": 0.6941, "num_input_tokens_seen": 1298523520, "step": 7188 }, { "epoch": 0.7869947179725773, "grad_norm": 1.1723398939514398, "learning_rate": 5.3885220385103245e-06, "loss": 0.7276, "num_input_tokens_seen": 1298726464, "step": 7189 }, { "epoch": 0.7871041900435152, "grad_norm": 1.1332503952271407, "learning_rate": 5.383190500451671e-06, "loss": 0.9199, "num_input_tokens_seen": 1298916192, "step": 7190 }, { "epoch": 0.7872136621144531, "grad_norm": 1.2270858522830792, "learning_rate": 5.3778612830240795e-06, "loss": 0.6858, "num_input_tokens_seen": 1299055296, "step": 7191 }, { "epoch": 0.7873231341853909, "grad_norm": 1.2024162979497302, "learning_rate": 5.372534386857988e-06, "loss": 0.8372, "num_input_tokens_seen": 1299217920, "step": 7192 }, { "epoch": 0.7874326062563288, "grad_norm": 1.1896891242362644, "learning_rate": 5.367209812583557e-06, "loss": 0.7408, "num_input_tokens_seen": 1299400032, "step": 7193 }, { "epoch": 0.7875420783272667, "grad_norm": 1.0831185396929606, "learning_rate": 5.361887560830675e-06, "loss": 0.9779, "num_input_tokens_seen": 1299587296, "step": 7194 }, { "epoch": 0.7876515503982047, "grad_norm": 1.1307599644952495, "learning_rate": 5.356567632228943e-06, "loss": 0.782, "num_input_tokens_seen": 1299755520, "step": 7195 }, { "epoch": 0.7877610224691426, "grad_norm": 1.3175658147685234, "learning_rate": 5.351250027407717e-06, "loss": 1.1653, "num_input_tokens_seen": 1299918144, "step": 7196 }, { "epoch": 0.7878704945400805, "grad_norm": 1.2238662143968955, "learning_rate": 5.345934746996051e-06, "loss": 0.8483, "num_input_tokens_seen": 1300093088, "step": 7197 }, { "epoch": 0.7879799666110183, "grad_norm": 1.1520348603649782, "learning_rate": 5.340621791622733e-06, "loss": 0.7346, "num_input_tokens_seen": 1300265792, "step": 7198 }, { "epoch": 0.7880894386819562, "grad_norm": 1.1835515987331133, "learning_rate": 5.335311161916273e-06, "loss": 0.6802, "num_input_tokens_seen": 1300451712, "step": 7199 }, { "epoch": 0.7881989107528942, "grad_norm": 1.0792470428306107, "learning_rate": 5.330002858504904e-06, "loss": 0.6344, "num_input_tokens_seen": 1300640768, "step": 7200 }, { "epoch": 0.7883083828238321, "grad_norm": 1.2088956784943186, "learning_rate": 5.324696882016606e-06, "loss": 1.0618, "num_input_tokens_seen": 1300811680, "step": 7201 }, { "epoch": 0.78841785489477, "grad_norm": 1.177455230072366, "learning_rate": 5.319393233079042e-06, "loss": 0.768, "num_input_tokens_seen": 1300986848, "step": 7202 }, { "epoch": 0.7885273269657078, "grad_norm": 1.3539761286959797, "learning_rate": 5.314091912319649e-06, "loss": 0.8251, "num_input_tokens_seen": 1301154400, "step": 7203 }, { "epoch": 0.7886367990366457, "grad_norm": 1.2505148304269962, "learning_rate": 5.3087929203655375e-06, "loss": 0.9199, "num_input_tokens_seen": 1301341216, "step": 7204 }, { "epoch": 0.7887462711075837, "grad_norm": 1.0358972164739366, "learning_rate": 5.303496257843585e-06, "loss": 0.5308, "num_input_tokens_seen": 1301513472, "step": 7205 }, { "epoch": 0.7888557431785216, "grad_norm": 1.122998523500581, "learning_rate": 5.2982019253803725e-06, "loss": 0.812, "num_input_tokens_seen": 1301702752, "step": 7206 }, { "epoch": 0.7889652152494595, "grad_norm": 1.090720750163412, "learning_rate": 5.29290992360221e-06, "loss": 0.8791, "num_input_tokens_seen": 1301909504, "step": 7207 }, { "epoch": 0.7890746873203974, "grad_norm": 1.1250555766471, "learning_rate": 5.2876202531351285e-06, "loss": 0.792, "num_input_tokens_seen": 1302091840, "step": 7208 }, { "epoch": 0.7891841593913352, "grad_norm": 1.1904864152968773, "learning_rate": 5.2823329146048815e-06, "loss": 0.8927, "num_input_tokens_seen": 1302268128, "step": 7209 }, { "epoch": 0.7892936314622732, "grad_norm": 1.2805117829721702, "learning_rate": 5.27704790863697e-06, "loss": 0.8556, "num_input_tokens_seen": 1302402528, "step": 7210 }, { "epoch": 0.7894031035332111, "grad_norm": 1.2177359935446024, "learning_rate": 5.271765235856574e-06, "loss": 0.9642, "num_input_tokens_seen": 1302564480, "step": 7211 }, { "epoch": 0.789512575604149, "grad_norm": 1.137471739449363, "learning_rate": 5.266484896888649e-06, "loss": 0.7, "num_input_tokens_seen": 1302734720, "step": 7212 }, { "epoch": 0.7896220476750869, "grad_norm": 0.9872212511495655, "learning_rate": 5.261206892357825e-06, "loss": 0.6713, "num_input_tokens_seen": 1302937664, "step": 7213 }, { "epoch": 0.7897315197460248, "grad_norm": 1.1557425854754728, "learning_rate": 5.255931222888497e-06, "loss": 0.7201, "num_input_tokens_seen": 1303122016, "step": 7214 }, { "epoch": 0.7898409918169627, "grad_norm": 1.2087277831883068, "learning_rate": 5.25065788910476e-06, "loss": 1.0483, "num_input_tokens_seen": 1303321152, "step": 7215 }, { "epoch": 0.7899504638879006, "grad_norm": 1.2299016705418608, "learning_rate": 5.245386891630441e-06, "loss": 0.9278, "num_input_tokens_seen": 1303507520, "step": 7216 }, { "epoch": 0.7900599359588385, "grad_norm": 1.4197444673763917, "learning_rate": 5.240118231089089e-06, "loss": 1.0411, "num_input_tokens_seen": 1303677312, "step": 7217 }, { "epoch": 0.7901694080297764, "grad_norm": 1.13981207528399, "learning_rate": 5.234851908103969e-06, "loss": 0.8597, "num_input_tokens_seen": 1303861888, "step": 7218 }, { "epoch": 0.7902788801007143, "grad_norm": 1.298357518345872, "learning_rate": 5.229587923298099e-06, "loss": 0.689, "num_input_tokens_seen": 1304040192, "step": 7219 }, { "epoch": 0.7903883521716522, "grad_norm": 1.2299074376176107, "learning_rate": 5.224326277294167e-06, "loss": 0.8445, "num_input_tokens_seen": 1304228128, "step": 7220 }, { "epoch": 0.7904978242425901, "grad_norm": 1.2772787001725303, "learning_rate": 5.219066970714639e-06, "loss": 0.9796, "num_input_tokens_seen": 1304409568, "step": 7221 }, { "epoch": 0.790607296313528, "grad_norm": 1.0862701784636761, "learning_rate": 5.2138100041816736e-06, "loss": 0.7517, "num_input_tokens_seen": 1304597728, "step": 7222 }, { "epoch": 0.7907167683844659, "grad_norm": 1.135060257341442, "learning_rate": 5.208555378317159e-06, "loss": 0.8287, "num_input_tokens_seen": 1304768416, "step": 7223 }, { "epoch": 0.7908262404554038, "grad_norm": 1.3490959990766898, "learning_rate": 5.203303093742712e-06, "loss": 0.7679, "num_input_tokens_seen": 1304953888, "step": 7224 }, { "epoch": 0.7909357125263418, "grad_norm": 1.2276888492698967, "learning_rate": 5.1980531510796595e-06, "loss": 0.879, "num_input_tokens_seen": 1305138240, "step": 7225 }, { "epoch": 0.7910451845972796, "grad_norm": 1.1766392311719625, "learning_rate": 5.192805550949068e-06, "loss": 0.9277, "num_input_tokens_seen": 1305310496, "step": 7226 }, { "epoch": 0.7911546566682175, "grad_norm": 1.3808971437633388, "learning_rate": 5.187560293971705e-06, "loss": 1.0396, "num_input_tokens_seen": 1305491264, "step": 7227 }, { "epoch": 0.7912641287391554, "grad_norm": 1.17792606849728, "learning_rate": 5.182317380768092e-06, "loss": 0.6558, "num_input_tokens_seen": 1305661056, "step": 7228 }, { "epoch": 0.7913736008100933, "grad_norm": 1.2130627103101048, "learning_rate": 5.177076811958451e-06, "loss": 0.7057, "num_input_tokens_seen": 1305797920, "step": 7229 }, { "epoch": 0.7914830728810313, "grad_norm": 1.2377632573942747, "learning_rate": 5.171838588162725e-06, "loss": 0.788, "num_input_tokens_seen": 1305939264, "step": 7230 }, { "epoch": 0.7915925449519692, "grad_norm": 1.1410092268427792, "learning_rate": 5.16660271000059e-06, "loss": 0.6852, "num_input_tokens_seen": 1306124512, "step": 7231 }, { "epoch": 0.791702017022907, "grad_norm": 1.109258457563414, "learning_rate": 5.161369178091438e-06, "loss": 0.804, "num_input_tokens_seen": 1306303936, "step": 7232 }, { "epoch": 0.7918114890938449, "grad_norm": 1.2613002685419563, "learning_rate": 5.1561379930543885e-06, "loss": 0.9677, "num_input_tokens_seen": 1306499936, "step": 7233 }, { "epoch": 0.7919209611647828, "grad_norm": 1.2233943965719307, "learning_rate": 5.1509091555082794e-06, "loss": 0.8378, "num_input_tokens_seen": 1306706688, "step": 7234 }, { "epoch": 0.7920304332357208, "grad_norm": 1.18454349295626, "learning_rate": 5.145682666071663e-06, "loss": 1.0767, "num_input_tokens_seen": 1306913888, "step": 7235 }, { "epoch": 0.7921399053066587, "grad_norm": 1.2475596448442245, "learning_rate": 5.140458525362848e-06, "loss": 0.7659, "num_input_tokens_seen": 1307046944, "step": 7236 }, { "epoch": 0.7922493773775965, "grad_norm": 1.2756800184482309, "learning_rate": 5.135236733999813e-06, "loss": 0.7439, "num_input_tokens_seen": 1307246528, "step": 7237 }, { "epoch": 0.7923588494485344, "grad_norm": 1.3470495212715357, "learning_rate": 5.1300172926003e-06, "loss": 0.9518, "num_input_tokens_seen": 1307440064, "step": 7238 }, { "epoch": 0.7924683215194723, "grad_norm": 1.3776881643645944, "learning_rate": 5.1248002017817596e-06, "loss": 0.9752, "num_input_tokens_seen": 1307623744, "step": 7239 }, { "epoch": 0.7925777935904103, "grad_norm": 1.163598909168119, "learning_rate": 5.119585462161358e-06, "loss": 0.7972, "num_input_tokens_seen": 1307824448, "step": 7240 }, { "epoch": 0.7926872656613482, "grad_norm": 1.178101844458856, "learning_rate": 5.114373074355994e-06, "loss": 1.0068, "num_input_tokens_seen": 1308033888, "step": 7241 }, { "epoch": 0.7927967377322861, "grad_norm": 1.0976551096628584, "learning_rate": 5.10916303898227e-06, "loss": 0.9101, "num_input_tokens_seen": 1308208832, "step": 7242 }, { "epoch": 0.7929062098032239, "grad_norm": 1.1875387988528303, "learning_rate": 5.1039553566565505e-06, "loss": 0.9809, "num_input_tokens_seen": 1308407072, "step": 7243 }, { "epoch": 0.7930156818741618, "grad_norm": 1.1190977504433213, "learning_rate": 5.098750027994862e-06, "loss": 0.6347, "num_input_tokens_seen": 1308583808, "step": 7244 }, { "epoch": 0.7931251539450997, "grad_norm": 1.1704225312725787, "learning_rate": 5.0935470536130155e-06, "loss": 0.6307, "num_input_tokens_seen": 1308761440, "step": 7245 }, { "epoch": 0.7932346260160377, "grad_norm": 1.351172109682714, "learning_rate": 5.088346434126481e-06, "loss": 0.8108, "num_input_tokens_seen": 1308919808, "step": 7246 }, { "epoch": 0.7933440980869756, "grad_norm": 1.1828914862883386, "learning_rate": 5.083148170150509e-06, "loss": 0.7119, "num_input_tokens_seen": 1309096320, "step": 7247 }, { "epoch": 0.7934535701579135, "grad_norm": 1.3096133686585758, "learning_rate": 5.0779522623000345e-06, "loss": 0.8216, "num_input_tokens_seen": 1309264768, "step": 7248 }, { "epoch": 0.7935630422288513, "grad_norm": 1.1897321602385025, "learning_rate": 5.072758711189721e-06, "loss": 1.054, "num_input_tokens_seen": 1309460768, "step": 7249 }, { "epoch": 0.7936725142997892, "grad_norm": 1.1854246978398322, "learning_rate": 5.067567517433958e-06, "loss": 0.7784, "num_input_tokens_seen": 1309654752, "step": 7250 }, { "epoch": 0.7937819863707272, "grad_norm": 1.228231570058395, "learning_rate": 5.062378681646845e-06, "loss": 0.8269, "num_input_tokens_seen": 1309837536, "step": 7251 }, { "epoch": 0.7938914584416651, "grad_norm": 1.1795162556632963, "learning_rate": 5.057192204442235e-06, "loss": 0.8039, "num_input_tokens_seen": 1310024576, "step": 7252 }, { "epoch": 0.794000930512603, "grad_norm": 1.2026896989732208, "learning_rate": 5.052008086433649e-06, "loss": 0.7508, "num_input_tokens_seen": 1310167264, "step": 7253 }, { "epoch": 0.7941104025835408, "grad_norm": 1.31607585801978, "learning_rate": 5.046826328234386e-06, "loss": 0.8507, "num_input_tokens_seen": 1310328320, "step": 7254 }, { "epoch": 0.7942198746544787, "grad_norm": 1.277792615504891, "learning_rate": 5.041646930457411e-06, "loss": 0.8398, "num_input_tokens_seen": 1310496992, "step": 7255 }, { "epoch": 0.7943293467254167, "grad_norm": 1.0564682797040397, "learning_rate": 5.0364698937154565e-06, "loss": 0.5934, "num_input_tokens_seen": 1310669920, "step": 7256 }, { "epoch": 0.7944388187963546, "grad_norm": 1.117988459362047, "learning_rate": 5.031295218620952e-06, "loss": 0.9749, "num_input_tokens_seen": 1310882944, "step": 7257 }, { "epoch": 0.7945482908672925, "grad_norm": 1.1177887799031425, "learning_rate": 5.026122905786046e-06, "loss": 0.7317, "num_input_tokens_seen": 1311019360, "step": 7258 }, { "epoch": 0.7946577629382304, "grad_norm": 1.1681559230901646, "learning_rate": 5.020952955822619e-06, "loss": 0.8124, "num_input_tokens_seen": 1311194752, "step": 7259 }, { "epoch": 0.7947672350091682, "grad_norm": 1.2787773649955076, "learning_rate": 5.015785369342255e-06, "loss": 0.9262, "num_input_tokens_seen": 1311364992, "step": 7260 }, { "epoch": 0.7948767070801062, "grad_norm": 1.1326955274501522, "learning_rate": 5.010620146956293e-06, "loss": 0.8334, "num_input_tokens_seen": 1311524928, "step": 7261 }, { "epoch": 0.7949861791510441, "grad_norm": 1.1547081435736595, "learning_rate": 5.0054572892757416e-06, "loss": 0.9194, "num_input_tokens_seen": 1311721600, "step": 7262 }, { "epoch": 0.795095651221982, "grad_norm": 1.1970265384686414, "learning_rate": 5.000296796911377e-06, "loss": 0.8079, "num_input_tokens_seen": 1311903040, "step": 7263 }, { "epoch": 0.7952051232929199, "grad_norm": 1.190346017827803, "learning_rate": 4.995138670473667e-06, "loss": 0.7586, "num_input_tokens_seen": 1312098592, "step": 7264 }, { "epoch": 0.7953145953638578, "grad_norm": 1.2240287706384634, "learning_rate": 4.98998291057281e-06, "loss": 0.947, "num_input_tokens_seen": 1312314080, "step": 7265 }, { "epoch": 0.7954240674347957, "grad_norm": 1.170978050293343, "learning_rate": 4.984829517818723e-06, "loss": 0.8285, "num_input_tokens_seen": 1312511200, "step": 7266 }, { "epoch": 0.7955335395057336, "grad_norm": 1.1886200892625494, "learning_rate": 4.979678492821041e-06, "loss": 0.9699, "num_input_tokens_seen": 1312689056, "step": 7267 }, { "epoch": 0.7956430115766715, "grad_norm": 1.151702961319878, "learning_rate": 4.974529836189113e-06, "loss": 0.9445, "num_input_tokens_seen": 1312887968, "step": 7268 }, { "epoch": 0.7957524836476094, "grad_norm": 1.1022789971942855, "learning_rate": 4.969383548532031e-06, "loss": 0.841, "num_input_tokens_seen": 1313066048, "step": 7269 }, { "epoch": 0.7958619557185473, "grad_norm": 1.2762889648377684, "learning_rate": 4.9642396304585834e-06, "loss": 0.692, "num_input_tokens_seen": 1313217920, "step": 7270 }, { "epoch": 0.7959714277894852, "grad_norm": 1.2305448478297922, "learning_rate": 4.959098082577284e-06, "loss": 0.8613, "num_input_tokens_seen": 1313387264, "step": 7271 }, { "epoch": 0.7960808998604231, "grad_norm": 1.2147188275740466, "learning_rate": 4.953958905496372e-06, "loss": 0.8678, "num_input_tokens_seen": 1313579008, "step": 7272 }, { "epoch": 0.796190371931361, "grad_norm": 1.0573783738736022, "learning_rate": 4.948822099823797e-06, "loss": 0.7518, "num_input_tokens_seen": 1313761792, "step": 7273 }, { "epoch": 0.7962998440022989, "grad_norm": 1.0888887001273149, "learning_rate": 4.943687666167238e-06, "loss": 0.7532, "num_input_tokens_seen": 1313943904, "step": 7274 }, { "epoch": 0.7964093160732368, "grad_norm": 1.225333114018661, "learning_rate": 4.938555605134082e-06, "loss": 0.9277, "num_input_tokens_seen": 1314101824, "step": 7275 }, { "epoch": 0.7965187881441748, "grad_norm": 1.4659303424362025, "learning_rate": 4.9334259173314575e-06, "loss": 0.8782, "num_input_tokens_seen": 1314267584, "step": 7276 }, { "epoch": 0.7966282602151126, "grad_norm": 1.1556015773976336, "learning_rate": 4.928298603366174e-06, "loss": 0.8843, "num_input_tokens_seen": 1314460672, "step": 7277 }, { "epoch": 0.7967377322860505, "grad_norm": 1.0641937219759692, "learning_rate": 4.92317366384481e-06, "loss": 0.6677, "num_input_tokens_seen": 1314642112, "step": 7278 }, { "epoch": 0.7968472043569884, "grad_norm": 1.2174573058175993, "learning_rate": 4.918051099373605e-06, "loss": 0.7358, "num_input_tokens_seen": 1314856256, "step": 7279 }, { "epoch": 0.7969566764279263, "grad_norm": 1.0962823706863898, "learning_rate": 4.912930910558572e-06, "loss": 0.6461, "num_input_tokens_seen": 1315023136, "step": 7280 }, { "epoch": 0.7970661484988643, "grad_norm": 1.1668203570541553, "learning_rate": 4.907813098005415e-06, "loss": 0.7788, "num_input_tokens_seen": 1315220704, "step": 7281 }, { "epoch": 0.7971756205698022, "grad_norm": 1.5085853611985982, "learning_rate": 4.90269766231955e-06, "loss": 0.9316, "num_input_tokens_seen": 1315414240, "step": 7282 }, { "epoch": 0.79728509264074, "grad_norm": 1.1651473142944493, "learning_rate": 4.897584604106145e-06, "loss": 0.7114, "num_input_tokens_seen": 1315555136, "step": 7283 }, { "epoch": 0.7973945647116779, "grad_norm": 1.0704022669216493, "learning_rate": 4.89247392397004e-06, "loss": 1.1238, "num_input_tokens_seen": 1315754496, "step": 7284 }, { "epoch": 0.7975040367826158, "grad_norm": 1.2657739998977457, "learning_rate": 4.8873656225158405e-06, "loss": 0.8349, "num_input_tokens_seen": 1315921824, "step": 7285 }, { "epoch": 0.7976135088535538, "grad_norm": 1.2464653106924326, "learning_rate": 4.88225970034783e-06, "loss": 0.8369, "num_input_tokens_seen": 1316085344, "step": 7286 }, { "epoch": 0.7977229809244917, "grad_norm": 1.4335841872392656, "learning_rate": 4.8771561580700505e-06, "loss": 1.3288, "num_input_tokens_seen": 1316281120, "step": 7287 }, { "epoch": 0.7978324529954295, "grad_norm": 1.1743891893700023, "learning_rate": 4.872054996286216e-06, "loss": 0.7825, "num_input_tokens_seen": 1316450464, "step": 7288 }, { "epoch": 0.7979419250663674, "grad_norm": 1.2607090928678193, "learning_rate": 4.866956215599802e-06, "loss": 0.6704, "num_input_tokens_seen": 1316614432, "step": 7289 }, { "epoch": 0.7980513971373053, "grad_norm": 1.3834248280811925, "learning_rate": 4.861859816613981e-06, "loss": 0.8262, "num_input_tokens_seen": 1316775488, "step": 7290 }, { "epoch": 0.7981608692082433, "grad_norm": 1.274710785095958, "learning_rate": 4.856765799931648e-06, "loss": 1.0612, "num_input_tokens_seen": 1316976640, "step": 7291 }, { "epoch": 0.7982703412791812, "grad_norm": 1.205697319428583, "learning_rate": 4.851674166155412e-06, "loss": 0.7451, "num_input_tokens_seen": 1317163680, "step": 7292 }, { "epoch": 0.7983798133501191, "grad_norm": 1.026784534335199, "learning_rate": 4.846584915887597e-06, "loss": 0.7143, "num_input_tokens_seen": 1317356320, "step": 7293 }, { "epoch": 0.7984892854210569, "grad_norm": 1.277234553998017, "learning_rate": 4.8414980497302755e-06, "loss": 0.8668, "num_input_tokens_seen": 1317524320, "step": 7294 }, { "epoch": 0.7985987574919948, "grad_norm": 1.1892904035638623, "learning_rate": 4.836413568285183e-06, "loss": 0.799, "num_input_tokens_seen": 1317682464, "step": 7295 }, { "epoch": 0.7987082295629327, "grad_norm": 1.2734643664188066, "learning_rate": 4.831331472153828e-06, "loss": 0.8909, "num_input_tokens_seen": 1317851136, "step": 7296 }, { "epoch": 0.7988177016338707, "grad_norm": 1.2733464588783658, "learning_rate": 4.8262517619374e-06, "loss": 0.724, "num_input_tokens_seen": 1318050720, "step": 7297 }, { "epoch": 0.7989271737048086, "grad_norm": 1.2637909209647147, "learning_rate": 4.821174438236825e-06, "loss": 0.9177, "num_input_tokens_seen": 1318231936, "step": 7298 }, { "epoch": 0.7990366457757465, "grad_norm": 1.2320661076638175, "learning_rate": 4.816099501652741e-06, "loss": 0.8303, "num_input_tokens_seen": 1318403968, "step": 7299 }, { "epoch": 0.7991461178466843, "grad_norm": 1.3570963736869852, "learning_rate": 4.8110269527854965e-06, "loss": 0.8109, "num_input_tokens_seen": 1318550016, "step": 7300 }, { "epoch": 0.7992555899176222, "grad_norm": 1.3473428997686092, "learning_rate": 4.805956792235172e-06, "loss": 0.7822, "num_input_tokens_seen": 1318721600, "step": 7301 }, { "epoch": 0.7993650619885602, "grad_norm": 1.2866878948206433, "learning_rate": 4.800889020601548e-06, "loss": 1.0638, "num_input_tokens_seen": 1318866304, "step": 7302 }, { "epoch": 0.7994745340594981, "grad_norm": 1.3234478442295616, "learning_rate": 4.795823638484142e-06, "loss": 0.9173, "num_input_tokens_seen": 1319056480, "step": 7303 }, { "epoch": 0.799584006130436, "grad_norm": 1.189827946077665, "learning_rate": 4.790760646482178e-06, "loss": 0.8702, "num_input_tokens_seen": 1319217312, "step": 7304 }, { "epoch": 0.7996934782013738, "grad_norm": 1.1272937914273065, "learning_rate": 4.785700045194596e-06, "loss": 0.7576, "num_input_tokens_seen": 1319394048, "step": 7305 }, { "epoch": 0.7998029502723117, "grad_norm": 1.1147497831225905, "learning_rate": 4.7806418352200565e-06, "loss": 0.9075, "num_input_tokens_seen": 1319601024, "step": 7306 }, { "epoch": 0.7999124223432497, "grad_norm": 1.1080264101530337, "learning_rate": 4.775586017156936e-06, "loss": 0.8667, "num_input_tokens_seen": 1319786944, "step": 7307 }, { "epoch": 0.8000218944141876, "grad_norm": 1.201322022024568, "learning_rate": 4.770532591603324e-06, "loss": 0.8673, "num_input_tokens_seen": 1319966592, "step": 7308 }, { "epoch": 0.8001313664851255, "grad_norm": 1.2677962434369103, "learning_rate": 4.765481559157034e-06, "loss": 0.8826, "num_input_tokens_seen": 1320149152, "step": 7309 }, { "epoch": 0.8002408385560634, "grad_norm": 1.1283530493515839, "learning_rate": 4.760432920415589e-06, "loss": 0.7683, "num_input_tokens_seen": 1320353440, "step": 7310 }, { "epoch": 0.8003503106270012, "grad_norm": 1.178689041503209, "learning_rate": 4.755386675976245e-06, "loss": 0.7083, "num_input_tokens_seen": 1320536224, "step": 7311 }, { "epoch": 0.8004597826979392, "grad_norm": 1.1952117678015715, "learning_rate": 4.750342826435955e-06, "loss": 0.8471, "num_input_tokens_seen": 1320739392, "step": 7312 }, { "epoch": 0.8005692547688771, "grad_norm": 1.3379104946916405, "learning_rate": 4.745301372391397e-06, "loss": 1.229, "num_input_tokens_seen": 1320927328, "step": 7313 }, { "epoch": 0.800678726839815, "grad_norm": 1.2792887680146277, "learning_rate": 4.740262314438968e-06, "loss": 0.827, "num_input_tokens_seen": 1321123104, "step": 7314 }, { "epoch": 0.8007881989107529, "grad_norm": 1.3331864792695283, "learning_rate": 4.7352256531747766e-06, "loss": 0.8672, "num_input_tokens_seen": 1321284832, "step": 7315 }, { "epoch": 0.8008976709816908, "grad_norm": 1.072225212072136, "learning_rate": 4.730191389194652e-06, "loss": 0.6346, "num_input_tokens_seen": 1321465824, "step": 7316 }, { "epoch": 0.8010071430526287, "grad_norm": 1.0176523953972034, "learning_rate": 4.725159523094127e-06, "loss": 0.7188, "num_input_tokens_seen": 1321632256, "step": 7317 }, { "epoch": 0.8011166151235666, "grad_norm": 1.0315676691156332, "learning_rate": 4.720130055468488e-06, "loss": 0.7554, "num_input_tokens_seen": 1321836096, "step": 7318 }, { "epoch": 0.8012260871945045, "grad_norm": 1.0791246506650534, "learning_rate": 4.7151029869126784e-06, "loss": 0.7997, "num_input_tokens_seen": 1322027392, "step": 7319 }, { "epoch": 0.8013355592654424, "grad_norm": 1.1932773593121375, "learning_rate": 4.710078318021424e-06, "loss": 0.924, "num_input_tokens_seen": 1322213536, "step": 7320 }, { "epoch": 0.8014450313363803, "grad_norm": 1.3496136147726459, "learning_rate": 4.705056049389101e-06, "loss": 0.7621, "num_input_tokens_seen": 1322377056, "step": 7321 }, { "epoch": 0.8015545034073182, "grad_norm": 1.203070948984348, "learning_rate": 4.700036181609857e-06, "loss": 0.6781, "num_input_tokens_seen": 1322556704, "step": 7322 }, { "epoch": 0.8016639754782561, "grad_norm": 1.2202227081985708, "learning_rate": 4.695018715277527e-06, "loss": 0.6953, "num_input_tokens_seen": 1322729856, "step": 7323 }, { "epoch": 0.801773447549194, "grad_norm": 1.2451629989274537, "learning_rate": 4.690003650985658e-06, "loss": 1.0356, "num_input_tokens_seen": 1322934368, "step": 7324 }, { "epoch": 0.8018829196201319, "grad_norm": 1.1953807636789398, "learning_rate": 4.684990989327548e-06, "loss": 0.8542, "num_input_tokens_seen": 1323103712, "step": 7325 }, { "epoch": 0.8019923916910698, "grad_norm": 1.1208226211678827, "learning_rate": 4.679980730896153e-06, "loss": 0.8998, "num_input_tokens_seen": 1323295232, "step": 7326 }, { "epoch": 0.8021018637620078, "grad_norm": 1.3371762338383457, "learning_rate": 4.674972876284203e-06, "loss": 0.9954, "num_input_tokens_seen": 1323467712, "step": 7327 }, { "epoch": 0.8022113358329456, "grad_norm": 1.2359506714862905, "learning_rate": 4.6699674260840955e-06, "loss": 0.8045, "num_input_tokens_seen": 1323649600, "step": 7328 }, { "epoch": 0.8023208079038835, "grad_norm": 1.174535198806391, "learning_rate": 4.664964380887985e-06, "loss": 0.6878, "num_input_tokens_seen": 1323840448, "step": 7329 }, { "epoch": 0.8024302799748214, "grad_norm": 1.2560310780330977, "learning_rate": 4.6599637412877125e-06, "loss": 0.8504, "num_input_tokens_seen": 1324028160, "step": 7330 }, { "epoch": 0.8025397520457593, "grad_norm": 1.203613380292846, "learning_rate": 4.654965507874845e-06, "loss": 0.8893, "num_input_tokens_seen": 1324204672, "step": 7331 }, { "epoch": 0.8026492241166973, "grad_norm": 1.195845640874568, "learning_rate": 4.649969681240668e-06, "loss": 0.8834, "num_input_tokens_seen": 1324373120, "step": 7332 }, { "epoch": 0.8027586961876352, "grad_norm": 1.1607160149026923, "learning_rate": 4.644976261976172e-06, "loss": 1.0278, "num_input_tokens_seen": 1324556352, "step": 7333 }, { "epoch": 0.802868168258573, "grad_norm": 1.2515635724962, "learning_rate": 4.639985250672074e-06, "loss": 0.6972, "num_input_tokens_seen": 1324750784, "step": 7334 }, { "epoch": 0.8029776403295109, "grad_norm": 1.1850950583626834, "learning_rate": 4.634996647918791e-06, "loss": 0.8335, "num_input_tokens_seen": 1324914976, "step": 7335 }, { "epoch": 0.8030871124004488, "grad_norm": 1.1468530741162637, "learning_rate": 4.63001045430648e-06, "loss": 0.7898, "num_input_tokens_seen": 1325077376, "step": 7336 }, { "epoch": 0.8031965844713868, "grad_norm": 1.2732857923886594, "learning_rate": 4.625026670424992e-06, "loss": 0.9254, "num_input_tokens_seen": 1325234176, "step": 7337 }, { "epoch": 0.8033060565423247, "grad_norm": 1.2473685222441862, "learning_rate": 4.620045296863898e-06, "loss": 0.9004, "num_input_tokens_seen": 1325400160, "step": 7338 }, { "epoch": 0.8034155286132625, "grad_norm": 1.1603984130482965, "learning_rate": 4.615066334212487e-06, "loss": 0.8151, "num_input_tokens_seen": 1325586976, "step": 7339 }, { "epoch": 0.8035250006842004, "grad_norm": 1.1624931663394364, "learning_rate": 4.61008978305976e-06, "loss": 0.8776, "num_input_tokens_seen": 1325754080, "step": 7340 }, { "epoch": 0.8036344727551383, "grad_norm": 1.2337775414203553, "learning_rate": 4.605115643994429e-06, "loss": 1.0879, "num_input_tokens_seen": 1325964416, "step": 7341 }, { "epoch": 0.8037439448260763, "grad_norm": 1.1676941105398664, "learning_rate": 4.6001439176049325e-06, "loss": 0.8148, "num_input_tokens_seen": 1326135104, "step": 7342 }, { "epoch": 0.8038534168970142, "grad_norm": 1.2434707826587843, "learning_rate": 4.595174604479405e-06, "loss": 0.8462, "num_input_tokens_seen": 1326312064, "step": 7343 }, { "epoch": 0.8039628889679521, "grad_norm": 1.1142271607685628, "learning_rate": 4.590207705205718e-06, "loss": 0.8595, "num_input_tokens_seen": 1326497984, "step": 7344 }, { "epoch": 0.8040723610388899, "grad_norm": 1.1627934854496207, "learning_rate": 4.585243220371446e-06, "loss": 0.8105, "num_input_tokens_seen": 1326673824, "step": 7345 }, { "epoch": 0.8041818331098278, "grad_norm": 1.1280589900689495, "learning_rate": 4.580281150563873e-06, "loss": 0.9916, "num_input_tokens_seen": 1326889088, "step": 7346 }, { "epoch": 0.8042913051807657, "grad_norm": 1.093526817439467, "learning_rate": 4.575321496370005e-06, "loss": 0.815, "num_input_tokens_seen": 1327073216, "step": 7347 }, { "epoch": 0.8044007772517037, "grad_norm": 1.2166731818995806, "learning_rate": 4.570364258376558e-06, "loss": 0.6458, "num_input_tokens_seen": 1327245248, "step": 7348 }, { "epoch": 0.8045102493226416, "grad_norm": 1.1274090510731996, "learning_rate": 4.565409437169965e-06, "loss": 0.9682, "num_input_tokens_seen": 1327428256, "step": 7349 }, { "epoch": 0.8046197213935795, "grad_norm": 1.0768629786993276, "learning_rate": 4.560457033336365e-06, "loss": 0.8056, "num_input_tokens_seen": 1327626720, "step": 7350 }, { "epoch": 0.8047291934645173, "grad_norm": 0.9335784272408881, "learning_rate": 4.555507047461638e-06, "loss": 0.7252, "num_input_tokens_seen": 1327783072, "step": 7351 }, { "epoch": 0.8048386655354552, "grad_norm": 1.0347485009141317, "learning_rate": 4.550559480131328e-06, "loss": 0.8611, "num_input_tokens_seen": 1327957120, "step": 7352 }, { "epoch": 0.8049481376063932, "grad_norm": 1.2165580012946016, "learning_rate": 4.5456143319307475e-06, "loss": 0.953, "num_input_tokens_seen": 1328127360, "step": 7353 }, { "epoch": 0.8050576096773311, "grad_norm": 1.2749665798220304, "learning_rate": 4.5406716034448905e-06, "loss": 0.8742, "num_input_tokens_seen": 1328302080, "step": 7354 }, { "epoch": 0.805167081748269, "grad_norm": 1.2814804428162119, "learning_rate": 4.535731295258469e-06, "loss": 0.7185, "num_input_tokens_seen": 1328458208, "step": 7355 }, { "epoch": 0.8052765538192068, "grad_norm": 1.1541906524167311, "learning_rate": 4.530793407955913e-06, "loss": 0.8009, "num_input_tokens_seen": 1328626208, "step": 7356 }, { "epoch": 0.8053860258901447, "grad_norm": 1.2424951808076115, "learning_rate": 4.525857942121364e-06, "loss": 0.9275, "num_input_tokens_seen": 1328810336, "step": 7357 }, { "epoch": 0.8054954979610827, "grad_norm": 1.149414736634918, "learning_rate": 4.520924898338691e-06, "loss": 0.9113, "num_input_tokens_seen": 1328994240, "step": 7358 }, { "epoch": 0.8056049700320206, "grad_norm": 1.2355586786758583, "learning_rate": 4.51599427719144e-06, "loss": 0.9001, "num_input_tokens_seen": 1329153280, "step": 7359 }, { "epoch": 0.8057144421029585, "grad_norm": 1.2040675545969255, "learning_rate": 4.511066079262921e-06, "loss": 0.7351, "num_input_tokens_seen": 1329318368, "step": 7360 }, { "epoch": 0.8058239141738964, "grad_norm": 1.2925079077620774, "learning_rate": 4.506140305136103e-06, "loss": 1.0326, "num_input_tokens_seen": 1329524448, "step": 7361 }, { "epoch": 0.8059333862448342, "grad_norm": 1.0909378958280398, "learning_rate": 4.501216955393722e-06, "loss": 0.7761, "num_input_tokens_seen": 1329713504, "step": 7362 }, { "epoch": 0.8060428583157722, "grad_norm": 1.1045407555220093, "learning_rate": 4.496296030618177e-06, "loss": 0.7, "num_input_tokens_seen": 1329896736, "step": 7363 }, { "epoch": 0.8061523303867101, "grad_norm": 1.3074963423347734, "learning_rate": 4.491377531391619e-06, "loss": 0.7794, "num_input_tokens_seen": 1330080864, "step": 7364 }, { "epoch": 0.806261802457648, "grad_norm": 1.307313845422261, "learning_rate": 4.486461458295896e-06, "loss": 1.1626, "num_input_tokens_seen": 1330271264, "step": 7365 }, { "epoch": 0.8063712745285859, "grad_norm": 1.1121980450208797, "learning_rate": 4.4815478119125595e-06, "loss": 0.7979, "num_input_tokens_seen": 1330438816, "step": 7366 }, { "epoch": 0.8064807465995238, "grad_norm": 1.156230204644738, "learning_rate": 4.4766365928229054e-06, "loss": 0.7302, "num_input_tokens_seen": 1330600544, "step": 7367 }, { "epoch": 0.8065902186704617, "grad_norm": 1.2010782344883422, "learning_rate": 4.471727801607895e-06, "loss": 0.7553, "num_input_tokens_seen": 1330782208, "step": 7368 }, { "epoch": 0.8066996907413996, "grad_norm": 1.1941072957856727, "learning_rate": 4.466821438848254e-06, "loss": 0.6178, "num_input_tokens_seen": 1330932512, "step": 7369 }, { "epoch": 0.8068091628123375, "grad_norm": 1.088822026021829, "learning_rate": 4.461917505124375e-06, "loss": 0.5548, "num_input_tokens_seen": 1331111936, "step": 7370 }, { "epoch": 0.8069186348832754, "grad_norm": 1.1136249212551228, "learning_rate": 4.457016001016395e-06, "loss": 0.7824, "num_input_tokens_seen": 1331275680, "step": 7371 }, { "epoch": 0.8070281069542133, "grad_norm": 1.1218785851721473, "learning_rate": 4.452116927104152e-06, "loss": 0.7388, "num_input_tokens_seen": 1331448832, "step": 7372 }, { "epoch": 0.8071375790251512, "grad_norm": 1.1911062894839426, "learning_rate": 4.447220283967196e-06, "loss": 0.8116, "num_input_tokens_seen": 1331641024, "step": 7373 }, { "epoch": 0.8072470510960891, "grad_norm": 1.1189448799589243, "learning_rate": 4.442326072184791e-06, "loss": 0.8541, "num_input_tokens_seen": 1331842176, "step": 7374 }, { "epoch": 0.807356523167027, "grad_norm": 0.9672480599666651, "learning_rate": 4.4374342923359125e-06, "loss": 0.6279, "num_input_tokens_seen": 1332058560, "step": 7375 }, { "epoch": 0.8074659952379649, "grad_norm": 1.1565144726135879, "learning_rate": 4.4325449449992455e-06, "loss": 0.8353, "num_input_tokens_seen": 1332270464, "step": 7376 }, { "epoch": 0.8075754673089028, "grad_norm": 1.0910306640637413, "learning_rate": 4.42765803075319e-06, "loss": 0.7302, "num_input_tokens_seen": 1332467360, "step": 7377 }, { "epoch": 0.8076849393798408, "grad_norm": 1.1953143948808, "learning_rate": 4.4227735501758654e-06, "loss": 0.7016, "num_input_tokens_seen": 1332676576, "step": 7378 }, { "epoch": 0.8077944114507786, "grad_norm": 1.0523386478654386, "learning_rate": 4.417891503845095e-06, "loss": 0.7291, "num_input_tokens_seen": 1332868768, "step": 7379 }, { "epoch": 0.8079038835217165, "grad_norm": 1.1731924662144262, "learning_rate": 4.413011892338412e-06, "loss": 1.055, "num_input_tokens_seen": 1333081120, "step": 7380 }, { "epoch": 0.8080133555926544, "grad_norm": 1.3588690419063394, "learning_rate": 4.408134716233067e-06, "loss": 0.8997, "num_input_tokens_seen": 1333270400, "step": 7381 }, { "epoch": 0.8081228276635923, "grad_norm": 1.0482178231924881, "learning_rate": 4.403259976106019e-06, "loss": 0.7662, "num_input_tokens_seen": 1333459008, "step": 7382 }, { "epoch": 0.8082322997345303, "grad_norm": 1.0773490656665565, "learning_rate": 4.398387672533944e-06, "loss": 0.8401, "num_input_tokens_seen": 1333674272, "step": 7383 }, { "epoch": 0.8083417718054682, "grad_norm": 1.1404888646653768, "learning_rate": 4.393517806093219e-06, "loss": 0.8886, "num_input_tokens_seen": 1333882144, "step": 7384 }, { "epoch": 0.808451243876406, "grad_norm": 1.1708097575428487, "learning_rate": 4.388650377359943e-06, "loss": 0.9777, "num_input_tokens_seen": 1334082624, "step": 7385 }, { "epoch": 0.8085607159473439, "grad_norm": 1.2604357452630048, "learning_rate": 4.383785386909931e-06, "loss": 1.0062, "num_input_tokens_seen": 1334251072, "step": 7386 }, { "epoch": 0.8086701880182818, "grad_norm": 1.3047898760620935, "learning_rate": 4.378922835318694e-06, "loss": 0.9016, "num_input_tokens_seen": 1334385024, "step": 7387 }, { "epoch": 0.8087796600892198, "grad_norm": 1.1906607374700084, "learning_rate": 4.374062723161468e-06, "loss": 0.8015, "num_input_tokens_seen": 1334564672, "step": 7388 }, { "epoch": 0.8088891321601577, "grad_norm": 1.2988650209162669, "learning_rate": 4.369205051013189e-06, "loss": 0.9851, "num_input_tokens_seen": 1334707360, "step": 7389 }, { "epoch": 0.8089986042310956, "grad_norm": 1.1222745412385378, "learning_rate": 4.364349819448507e-06, "loss": 0.5972, "num_input_tokens_seen": 1334874016, "step": 7390 }, { "epoch": 0.8091080763020334, "grad_norm": 1.2402865181121672, "learning_rate": 4.359497029041807e-06, "loss": 1.1233, "num_input_tokens_seen": 1335070688, "step": 7391 }, { "epoch": 0.8092175483729713, "grad_norm": 1.0245059893066726, "learning_rate": 4.354646680367136e-06, "loss": 0.5571, "num_input_tokens_seen": 1335242272, "step": 7392 }, { "epoch": 0.8093270204439093, "grad_norm": 1.2433641247264613, "learning_rate": 4.34979877399831e-06, "loss": 0.784, "num_input_tokens_seen": 1335412288, "step": 7393 }, { "epoch": 0.8094364925148472, "grad_norm": 1.1517762419267816, "learning_rate": 4.3449533105087984e-06, "loss": 0.9471, "num_input_tokens_seen": 1335611424, "step": 7394 }, { "epoch": 0.8095459645857851, "grad_norm": 1.1999970436059773, "learning_rate": 4.3401102904718296e-06, "loss": 0.7161, "num_input_tokens_seen": 1335776288, "step": 7395 }, { "epoch": 0.8096554366567229, "grad_norm": 1.144642847238417, "learning_rate": 4.335269714460322e-06, "loss": 0.6695, "num_input_tokens_seen": 1335943616, "step": 7396 }, { "epoch": 0.8097649087276608, "grad_norm": 1.1035726616897077, "learning_rate": 4.3304315830468985e-06, "loss": 0.9346, "num_input_tokens_seen": 1336130880, "step": 7397 }, { "epoch": 0.8098743807985987, "grad_norm": 1.1082114444241555, "learning_rate": 4.325595896803908e-06, "loss": 0.7704, "num_input_tokens_seen": 1336346144, "step": 7398 }, { "epoch": 0.8099838528695367, "grad_norm": 1.2890324906988697, "learning_rate": 4.320762656303392e-06, "loss": 0.8845, "num_input_tokens_seen": 1336521984, "step": 7399 }, { "epoch": 0.8100933249404746, "grad_norm": 1.1746239527110762, "learning_rate": 4.315931862117137e-06, "loss": 0.8613, "num_input_tokens_seen": 1336681920, "step": 7400 }, { "epoch": 0.8102027970114125, "grad_norm": 1.3854642539587578, "learning_rate": 4.311103514816589e-06, "loss": 0.753, "num_input_tokens_seen": 1336815872, "step": 7401 }, { "epoch": 0.8103122690823503, "grad_norm": 1.1015329018803142, "learning_rate": 4.306277614972956e-06, "loss": 0.7864, "num_input_tokens_seen": 1337000896, "step": 7402 }, { "epoch": 0.8104217411532882, "grad_norm": 1.2392695481245941, "learning_rate": 4.3014541631571095e-06, "loss": 0.848, "num_input_tokens_seen": 1337183680, "step": 7403 }, { "epoch": 0.8105312132242262, "grad_norm": 1.1027420438255913, "learning_rate": 4.29663315993967e-06, "loss": 0.9624, "num_input_tokens_seen": 1337376768, "step": 7404 }, { "epoch": 0.8106406852951641, "grad_norm": 1.2937385134716866, "learning_rate": 4.291814605890954e-06, "loss": 0.9327, "num_input_tokens_seen": 1337541632, "step": 7405 }, { "epoch": 0.810750157366102, "grad_norm": 1.0958871941813197, "learning_rate": 4.28699850158098e-06, "loss": 0.6438, "num_input_tokens_seen": 1337694624, "step": 7406 }, { "epoch": 0.8108596294370399, "grad_norm": 1.1932116727685784, "learning_rate": 4.2821848475794875e-06, "loss": 1.0823, "num_input_tokens_seen": 1337878080, "step": 7407 }, { "epoch": 0.8109691015079777, "grad_norm": 1.146980866995143, "learning_rate": 4.277373644455915e-06, "loss": 0.8478, "num_input_tokens_seen": 1338059744, "step": 7408 }, { "epoch": 0.8110785735789157, "grad_norm": 1.1891802394888091, "learning_rate": 4.272564892779438e-06, "loss": 0.9285, "num_input_tokens_seen": 1338242304, "step": 7409 }, { "epoch": 0.8111880456498536, "grad_norm": 1.251418024646674, "learning_rate": 4.267758593118898e-06, "loss": 0.919, "num_input_tokens_seen": 1338426656, "step": 7410 }, { "epoch": 0.8112975177207915, "grad_norm": 1.0561735069069853, "learning_rate": 4.262954746042888e-06, "loss": 0.7394, "num_input_tokens_seen": 1338594656, "step": 7411 }, { "epoch": 0.8114069897917294, "grad_norm": 1.1365933693235442, "learning_rate": 4.258153352119693e-06, "loss": 0.821, "num_input_tokens_seen": 1338753024, "step": 7412 }, { "epoch": 0.8115164618626672, "grad_norm": 1.0400747660208227, "learning_rate": 4.253354411917302e-06, "loss": 0.7586, "num_input_tokens_seen": 1338927744, "step": 7413 }, { "epoch": 0.8116259339336052, "grad_norm": 1.1490979015051992, "learning_rate": 4.2485579260034215e-06, "loss": 0.6698, "num_input_tokens_seen": 1339084992, "step": 7414 }, { "epoch": 0.8117354060045431, "grad_norm": 1.27556110078121, "learning_rate": 4.243763894945471e-06, "loss": 0.8649, "num_input_tokens_seen": 1339261280, "step": 7415 }, { "epoch": 0.811844878075481, "grad_norm": 1.3438054893924298, "learning_rate": 4.238972319310572e-06, "loss": 0.893, "num_input_tokens_seen": 1339400384, "step": 7416 }, { "epoch": 0.8119543501464189, "grad_norm": 1.2202762925641437, "learning_rate": 4.234183199665559e-06, "loss": 1.0016, "num_input_tokens_seen": 1339573088, "step": 7417 }, { "epoch": 0.8120638222173568, "grad_norm": 1.0971311351669342, "learning_rate": 4.229396536576968e-06, "loss": 0.7836, "num_input_tokens_seen": 1339782976, "step": 7418 }, { "epoch": 0.8121732942882947, "grad_norm": 1.2551667244795988, "learning_rate": 4.224612330611069e-06, "loss": 0.6798, "num_input_tokens_seen": 1339929248, "step": 7419 }, { "epoch": 0.8122827663592326, "grad_norm": 1.1993083112741831, "learning_rate": 4.219830582333814e-06, "loss": 0.8248, "num_input_tokens_seen": 1340105536, "step": 7420 }, { "epoch": 0.8123922384301705, "grad_norm": 1.2423809068890384, "learning_rate": 4.215051292310876e-06, "loss": 0.8452, "num_input_tokens_seen": 1340273536, "step": 7421 }, { "epoch": 0.8125017105011084, "grad_norm": 1.1457173317653688, "learning_rate": 4.210274461107638e-06, "loss": 0.6397, "num_input_tokens_seen": 1340458560, "step": 7422 }, { "epoch": 0.8126111825720463, "grad_norm": 1.1431544228047459, "learning_rate": 4.205500089289185e-06, "loss": 0.9328, "num_input_tokens_seen": 1340648512, "step": 7423 }, { "epoch": 0.8127206546429843, "grad_norm": 1.0639828264205713, "learning_rate": 4.200728177420321e-06, "loss": 0.7542, "num_input_tokens_seen": 1340833312, "step": 7424 }, { "epoch": 0.8128301267139221, "grad_norm": 1.3497814725685302, "learning_rate": 4.1959587260655465e-06, "loss": 1.1945, "num_input_tokens_seen": 1341012512, "step": 7425 }, { "epoch": 0.81293959878486, "grad_norm": 1.063196402814476, "learning_rate": 4.191191735789096e-06, "loss": 0.7304, "num_input_tokens_seen": 1341217696, "step": 7426 }, { "epoch": 0.8130490708557979, "grad_norm": 1.1377562412433606, "learning_rate": 4.186427207154869e-06, "loss": 0.847, "num_input_tokens_seen": 1341394208, "step": 7427 }, { "epoch": 0.8131585429267358, "grad_norm": 1.2583137129628714, "learning_rate": 4.181665140726523e-06, "loss": 0.8174, "num_input_tokens_seen": 1341558176, "step": 7428 }, { "epoch": 0.8132680149976738, "grad_norm": 1.1074942644198937, "learning_rate": 4.176905537067394e-06, "loss": 0.8906, "num_input_tokens_seen": 1341716992, "step": 7429 }, { "epoch": 0.8133774870686116, "grad_norm": 1.1695300737373793, "learning_rate": 4.1721483967405305e-06, "loss": 0.8367, "num_input_tokens_seen": 1341893280, "step": 7430 }, { "epoch": 0.8134869591395495, "grad_norm": 1.1871793715642296, "learning_rate": 4.167393720308699e-06, "loss": 0.8482, "num_input_tokens_seen": 1342072032, "step": 7431 }, { "epoch": 0.8135964312104874, "grad_norm": 1.0969931340519987, "learning_rate": 4.162641508334355e-06, "loss": 0.7623, "num_input_tokens_seen": 1342274080, "step": 7432 }, { "epoch": 0.8137059032814253, "grad_norm": 1.1990558625475731, "learning_rate": 4.157891761379701e-06, "loss": 0.9178, "num_input_tokens_seen": 1342449920, "step": 7433 }, { "epoch": 0.8138153753523633, "grad_norm": 1.1242703084674366, "learning_rate": 4.153144480006593e-06, "loss": 0.9183, "num_input_tokens_seen": 1342636736, "step": 7434 }, { "epoch": 0.8139248474233012, "grad_norm": 1.3044426511248195, "learning_rate": 4.148399664776656e-06, "loss": 0.9112, "num_input_tokens_seen": 1342808544, "step": 7435 }, { "epoch": 0.814034319494239, "grad_norm": 1.1854599947572764, "learning_rate": 4.143657316251165e-06, "loss": 0.7983, "num_input_tokens_seen": 1342996704, "step": 7436 }, { "epoch": 0.8141437915651769, "grad_norm": 1.1471338463324003, "learning_rate": 4.1389174349911495e-06, "loss": 0.8805, "num_input_tokens_seen": 1343168736, "step": 7437 }, { "epoch": 0.8142532636361148, "grad_norm": 1.0993510564524787, "learning_rate": 4.1341800215573185e-06, "loss": 0.6404, "num_input_tokens_seen": 1343357344, "step": 7438 }, { "epoch": 0.8143627357070528, "grad_norm": 1.20561881277851, "learning_rate": 4.129445076510105e-06, "loss": 0.9419, "num_input_tokens_seen": 1343525792, "step": 7439 }, { "epoch": 0.8144722077779907, "grad_norm": 1.2458297306293633, "learning_rate": 4.124712600409638e-06, "loss": 0.7908, "num_input_tokens_seen": 1343701184, "step": 7440 }, { "epoch": 0.8145816798489286, "grad_norm": 1.143859770463256, "learning_rate": 4.119982593815761e-06, "loss": 0.9502, "num_input_tokens_seen": 1343877248, "step": 7441 }, { "epoch": 0.8146911519198664, "grad_norm": 1.0383646636591708, "learning_rate": 4.11525505728804e-06, "loss": 0.8549, "num_input_tokens_seen": 1344084448, "step": 7442 }, { "epoch": 0.8148006239908043, "grad_norm": 1.1706584974100211, "learning_rate": 4.110529991385706e-06, "loss": 0.9725, "num_input_tokens_seen": 1344243040, "step": 7443 }, { "epoch": 0.8149100960617423, "grad_norm": 1.0895119438829697, "learning_rate": 4.105807396667755e-06, "loss": 0.8335, "num_input_tokens_seen": 1344424928, "step": 7444 }, { "epoch": 0.8150195681326802, "grad_norm": 1.1968380038367676, "learning_rate": 4.10108727369283e-06, "loss": 1.1022, "num_input_tokens_seen": 1344641088, "step": 7445 }, { "epoch": 0.8151290402036181, "grad_norm": 1.2540540756600431, "learning_rate": 4.0963696230193385e-06, "loss": 0.9508, "num_input_tokens_seen": 1344801920, "step": 7446 }, { "epoch": 0.8152385122745559, "grad_norm": 1.2438634446897052, "learning_rate": 4.091654445205356e-06, "loss": 0.7839, "num_input_tokens_seen": 1344972160, "step": 7447 }, { "epoch": 0.8153479843454938, "grad_norm": 1.1621919490424408, "learning_rate": 4.086941740808686e-06, "loss": 0.7279, "num_input_tokens_seen": 1345161216, "step": 7448 }, { "epoch": 0.8154574564164317, "grad_norm": 1.1426591371687307, "learning_rate": 4.082231510386828e-06, "loss": 0.7182, "num_input_tokens_seen": 1345324960, "step": 7449 }, { "epoch": 0.8155669284873697, "grad_norm": 1.1652381643706193, "learning_rate": 4.077523754496987e-06, "loss": 0.9246, "num_input_tokens_seen": 1345540896, "step": 7450 }, { "epoch": 0.8156764005583076, "grad_norm": 1.3193086046709563, "learning_rate": 4.0728184736961025e-06, "loss": 0.7945, "num_input_tokens_seen": 1345674400, "step": 7451 }, { "epoch": 0.8157858726292455, "grad_norm": 1.298413203226722, "learning_rate": 4.068115668540776e-06, "loss": 0.9205, "num_input_tokens_seen": 1345828960, "step": 7452 }, { "epoch": 0.8158953447001833, "grad_norm": 1.084851057667262, "learning_rate": 4.063415339587354e-06, "loss": 0.8473, "num_input_tokens_seen": 1346009728, "step": 7453 }, { "epoch": 0.8160048167711212, "grad_norm": 1.1766758553863896, "learning_rate": 4.058717487391875e-06, "loss": 1.0731, "num_input_tokens_seen": 1346209536, "step": 7454 }, { "epoch": 0.8161142888420592, "grad_norm": 1.373309396578378, "learning_rate": 4.0540221125100835e-06, "loss": 0.9373, "num_input_tokens_seen": 1346372832, "step": 7455 }, { "epoch": 0.8162237609129971, "grad_norm": 1.0554817882215373, "learning_rate": 4.049329215497433e-06, "loss": 0.7965, "num_input_tokens_seen": 1346573984, "step": 7456 }, { "epoch": 0.816333232983935, "grad_norm": 1.1971650073538145, "learning_rate": 4.0446387969090865e-06, "loss": 0.8718, "num_input_tokens_seen": 1346783648, "step": 7457 }, { "epoch": 0.8164427050548729, "grad_norm": 1.1220027946430564, "learning_rate": 4.039950857299907e-06, "loss": 0.8114, "num_input_tokens_seen": 1346974048, "step": 7458 }, { "epoch": 0.8165521771258107, "grad_norm": 1.1759619589803112, "learning_rate": 4.0352653972244805e-06, "loss": 0.9372, "num_input_tokens_seen": 1347157504, "step": 7459 }, { "epoch": 0.8166616491967487, "grad_norm": 1.0631668018014666, "learning_rate": 4.030582417237069e-06, "loss": 0.8335, "num_input_tokens_seen": 1347339168, "step": 7460 }, { "epoch": 0.8167711212676866, "grad_norm": 1.1838062460727967, "learning_rate": 4.025901917891678e-06, "loss": 0.9939, "num_input_tokens_seen": 1347473792, "step": 7461 }, { "epoch": 0.8168805933386245, "grad_norm": 1.2341669607494827, "learning_rate": 4.021223899741993e-06, "loss": 0.9088, "num_input_tokens_seen": 1347679424, "step": 7462 }, { "epoch": 0.8169900654095624, "grad_norm": 1.1613951980925115, "learning_rate": 4.016548363341416e-06, "loss": 0.918, "num_input_tokens_seen": 1347882368, "step": 7463 }, { "epoch": 0.8170995374805002, "grad_norm": 1.2206236302240658, "learning_rate": 4.011875309243054e-06, "loss": 1.0119, "num_input_tokens_seen": 1348074784, "step": 7464 }, { "epoch": 0.8172090095514382, "grad_norm": 1.2581749624838803, "learning_rate": 4.0072047379997175e-06, "loss": 1.0402, "num_input_tokens_seen": 1348251072, "step": 7465 }, { "epoch": 0.8173184816223761, "grad_norm": 1.0911811088498513, "learning_rate": 4.002536650163938e-06, "loss": 0.674, "num_input_tokens_seen": 1348422656, "step": 7466 }, { "epoch": 0.817427953693314, "grad_norm": 1.1433175066602306, "learning_rate": 3.9978710462879206e-06, "loss": 0.8421, "num_input_tokens_seen": 1348630304, "step": 7467 }, { "epoch": 0.8175374257642519, "grad_norm": 1.160731779742193, "learning_rate": 3.993207926923623e-06, "loss": 0.9513, "num_input_tokens_seen": 1348832128, "step": 7468 }, { "epoch": 0.8176468978351898, "grad_norm": 1.0409705061018657, "learning_rate": 3.988547292622655e-06, "loss": 0.7267, "num_input_tokens_seen": 1348996544, "step": 7469 }, { "epoch": 0.8177563699061277, "grad_norm": 1.2540584959018035, "learning_rate": 3.9838891439363816e-06, "loss": 0.8311, "num_input_tokens_seen": 1349166784, "step": 7470 }, { "epoch": 0.8178658419770656, "grad_norm": 1.3087678708055968, "learning_rate": 3.979233481415848e-06, "loss": 0.9876, "num_input_tokens_seen": 1349322240, "step": 7471 }, { "epoch": 0.8179753140480035, "grad_norm": 1.4776667749611536, "learning_rate": 3.974580305611808e-06, "loss": 0.8015, "num_input_tokens_seen": 1349502784, "step": 7472 }, { "epoch": 0.8180847861189414, "grad_norm": 1.2487418518717017, "learning_rate": 3.9699296170747245e-06, "loss": 0.9212, "num_input_tokens_seen": 1349659808, "step": 7473 }, { "epoch": 0.8181942581898793, "grad_norm": 1.162498652549701, "learning_rate": 3.965281416354757e-06, "loss": 0.8054, "num_input_tokens_seen": 1349826464, "step": 7474 }, { "epoch": 0.8183037302608173, "grad_norm": 1.2687364643295687, "learning_rate": 3.9606357040018e-06, "loss": 0.9847, "num_input_tokens_seen": 1350000736, "step": 7475 }, { "epoch": 0.8184132023317551, "grad_norm": 1.2742080229606478, "learning_rate": 3.955992480565407e-06, "loss": 0.9934, "num_input_tokens_seen": 1350188448, "step": 7476 }, { "epoch": 0.818522674402693, "grad_norm": 1.0901545693230246, "learning_rate": 3.951351746594883e-06, "loss": 0.6949, "num_input_tokens_seen": 1350362272, "step": 7477 }, { "epoch": 0.8186321464736309, "grad_norm": 1.1472722064703915, "learning_rate": 3.9467135026392015e-06, "loss": 0.7585, "num_input_tokens_seen": 1350558496, "step": 7478 }, { "epoch": 0.8187416185445688, "grad_norm": 1.2094723605608741, "learning_rate": 3.94207774924707e-06, "loss": 1.0478, "num_input_tokens_seen": 1350728736, "step": 7479 }, { "epoch": 0.8188510906155068, "grad_norm": 1.2127146815950975, "learning_rate": 3.937444486966885e-06, "loss": 1.0816, "num_input_tokens_seen": 1350930112, "step": 7480 }, { "epoch": 0.8189605626864446, "grad_norm": 1.0379178618969513, "learning_rate": 3.932813716346751e-06, "loss": 0.5673, "num_input_tokens_seen": 1351106848, "step": 7481 }, { "epoch": 0.8190700347573825, "grad_norm": 1.1555161080188858, "learning_rate": 3.928185437934481e-06, "loss": 0.7269, "num_input_tokens_seen": 1351303296, "step": 7482 }, { "epoch": 0.8191795068283204, "grad_norm": 1.2486235192351498, "learning_rate": 3.923559652277586e-06, "loss": 0.9375, "num_input_tokens_seen": 1351471296, "step": 7483 }, { "epoch": 0.8192889788992583, "grad_norm": 1.3029907065884982, "learning_rate": 3.918936359923306e-06, "loss": 0.8935, "num_input_tokens_seen": 1351682976, "step": 7484 }, { "epoch": 0.8193984509701963, "grad_norm": 1.1359406000276056, "learning_rate": 3.914315561418541e-06, "loss": 0.7457, "num_input_tokens_seen": 1351848736, "step": 7485 }, { "epoch": 0.8195079230411342, "grad_norm": 1.2239697503172913, "learning_rate": 3.909697257309941e-06, "loss": 0.8354, "num_input_tokens_seen": 1352026816, "step": 7486 }, { "epoch": 0.819617395112072, "grad_norm": 1.0353003635495392, "learning_rate": 3.905081448143841e-06, "loss": 0.7392, "num_input_tokens_seen": 1352186080, "step": 7487 }, { "epoch": 0.8197268671830099, "grad_norm": 1.1695609578836101, "learning_rate": 3.9004681344662755e-06, "loss": 0.7189, "num_input_tokens_seen": 1352362368, "step": 7488 }, { "epoch": 0.8198363392539478, "grad_norm": 1.0511042442381984, "learning_rate": 3.895857316822996e-06, "loss": 0.8238, "num_input_tokens_seen": 1352541568, "step": 7489 }, { "epoch": 0.8199458113248858, "grad_norm": 1.2746954479376558, "learning_rate": 3.891248995759453e-06, "loss": 0.8071, "num_input_tokens_seen": 1352752576, "step": 7490 }, { "epoch": 0.8200552833958237, "grad_norm": 1.1962901586410795, "learning_rate": 3.886643171820797e-06, "loss": 0.8081, "num_input_tokens_seen": 1352907808, "step": 7491 }, { "epoch": 0.8201647554667616, "grad_norm": 1.154295790618542, "learning_rate": 3.882039845551888e-06, "loss": 0.8746, "num_input_tokens_seen": 1353098208, "step": 7492 }, { "epoch": 0.8202742275376994, "grad_norm": 1.3443750657596332, "learning_rate": 3.877439017497303e-06, "loss": 0.936, "num_input_tokens_seen": 1353287936, "step": 7493 }, { "epoch": 0.8203836996086373, "grad_norm": 1.402031016212137, "learning_rate": 3.872840688201299e-06, "loss": 0.8818, "num_input_tokens_seen": 1353456384, "step": 7494 }, { "epoch": 0.8204931716795753, "grad_norm": 1.1987601033760538, "learning_rate": 3.868244858207854e-06, "loss": 1.0529, "num_input_tokens_seen": 1353632224, "step": 7495 }, { "epoch": 0.8206026437505132, "grad_norm": 1.1743274711991698, "learning_rate": 3.863651528060647e-06, "loss": 0.793, "num_input_tokens_seen": 1353804480, "step": 7496 }, { "epoch": 0.8207121158214511, "grad_norm": 1.3670471991348636, "learning_rate": 3.859060698303058e-06, "loss": 1.0167, "num_input_tokens_seen": 1353968000, "step": 7497 }, { "epoch": 0.8208215878923889, "grad_norm": 1.2498497872696204, "learning_rate": 3.8544723694781706e-06, "loss": 0.5841, "num_input_tokens_seen": 1354139136, "step": 7498 }, { "epoch": 0.8209310599633268, "grad_norm": 1.260355067783969, "learning_rate": 3.849886542128784e-06, "loss": 0.9554, "num_input_tokens_seen": 1354338944, "step": 7499 }, { "epoch": 0.8210405320342647, "grad_norm": 1.3785069565623422, "learning_rate": 3.845303216797377e-06, "loss": 0.9102, "num_input_tokens_seen": 1354505152, "step": 7500 }, { "epoch": 0.8211500041052027, "grad_norm": 1.173677609357351, "learning_rate": 3.8407223940261725e-06, "loss": 0.885, "num_input_tokens_seen": 1354704512, "step": 7501 }, { "epoch": 0.8212594761761406, "grad_norm": 1.245037049351224, "learning_rate": 3.8361440743570456e-06, "loss": 0.9483, "num_input_tokens_seen": 1354901408, "step": 7502 }, { "epoch": 0.8213689482470785, "grad_norm": 1.1249995761446685, "learning_rate": 3.8315682583316224e-06, "loss": 0.8181, "num_input_tokens_seen": 1355066720, "step": 7503 }, { "epoch": 0.8214784203180163, "grad_norm": 1.10158793102155, "learning_rate": 3.826994946491208e-06, "loss": 0.9978, "num_input_tokens_seen": 1355258688, "step": 7504 }, { "epoch": 0.8215878923889542, "grad_norm": 1.025046677805898, "learning_rate": 3.822424139376815e-06, "loss": 0.6953, "num_input_tokens_seen": 1355456704, "step": 7505 }, { "epoch": 0.8216973644598922, "grad_norm": 1.1361065556425267, "learning_rate": 3.817855837529164e-06, "loss": 0.8468, "num_input_tokens_seen": 1355641280, "step": 7506 }, { "epoch": 0.8218068365308301, "grad_norm": 1.2509898081546513, "learning_rate": 3.8132900414886653e-06, "loss": 0.9035, "num_input_tokens_seen": 1355810848, "step": 7507 }, { "epoch": 0.821916308601768, "grad_norm": 1.1194947433109739, "learning_rate": 3.8087267517954633e-06, "loss": 0.8664, "num_input_tokens_seen": 1355988032, "step": 7508 }, { "epoch": 0.8220257806727059, "grad_norm": 1.0844075794437729, "learning_rate": 3.8041659689893677e-06, "loss": 0.8168, "num_input_tokens_seen": 1356180672, "step": 7509 }, { "epoch": 0.8221352527436437, "grad_norm": 1.2303496287980735, "learning_rate": 3.799607693609927e-06, "loss": 0.7194, "num_input_tokens_seen": 1356344416, "step": 7510 }, { "epoch": 0.8222447248145817, "grad_norm": 1.2282774289494762, "learning_rate": 3.795051926196358e-06, "loss": 0.7303, "num_input_tokens_seen": 1356494048, "step": 7511 }, { "epoch": 0.8223541968855196, "grad_norm": 1.1320372987405256, "learning_rate": 3.7904986672876146e-06, "loss": 1.0664, "num_input_tokens_seen": 1356677504, "step": 7512 }, { "epoch": 0.8224636689564575, "grad_norm": 1.1945470715330113, "learning_rate": 3.7859479174223333e-06, "loss": 0.8093, "num_input_tokens_seen": 1356860960, "step": 7513 }, { "epoch": 0.8225731410273954, "grad_norm": 1.097401816857671, "learning_rate": 3.7813996771388583e-06, "loss": 0.8858, "num_input_tokens_seen": 1357061888, "step": 7514 }, { "epoch": 0.8226826130983332, "grad_norm": 1.446058008874439, "learning_rate": 3.7768539469752397e-06, "loss": 1.0126, "num_input_tokens_seen": 1357222720, "step": 7515 }, { "epoch": 0.8227920851692712, "grad_norm": 1.2199262664745196, "learning_rate": 3.7723107274692193e-06, "loss": 0.8464, "num_input_tokens_seen": 1357406848, "step": 7516 }, { "epoch": 0.8229015572402091, "grad_norm": 1.2144790543579256, "learning_rate": 3.767770019158273e-06, "loss": 0.8982, "num_input_tokens_seen": 1357585824, "step": 7517 }, { "epoch": 0.823011029311147, "grad_norm": 1.1972024972524449, "learning_rate": 3.76323182257953e-06, "loss": 0.8991, "num_input_tokens_seen": 1357799968, "step": 7518 }, { "epoch": 0.8231205013820849, "grad_norm": 1.1588814150003455, "learning_rate": 3.758696138269874e-06, "loss": 0.839, "num_input_tokens_seen": 1357962816, "step": 7519 }, { "epoch": 0.8232299734530228, "grad_norm": 1.045309804049039, "learning_rate": 3.7541629667658564e-06, "loss": 0.6424, "num_input_tokens_seen": 1358142688, "step": 7520 }, { "epoch": 0.8233394455239607, "grad_norm": 1.2276047572945215, "learning_rate": 3.7496323086037456e-06, "loss": 0.8478, "num_input_tokens_seen": 1358314720, "step": 7521 }, { "epoch": 0.8234489175948986, "grad_norm": 1.1822351898730616, "learning_rate": 3.7451041643195074e-06, "loss": 0.8221, "num_input_tokens_seen": 1358465024, "step": 7522 }, { "epoch": 0.8235583896658365, "grad_norm": 1.1502822799049979, "learning_rate": 3.7405785344488157e-06, "loss": 0.9457, "num_input_tokens_seen": 1358640640, "step": 7523 }, { "epoch": 0.8236678617367744, "grad_norm": 1.1213989374082596, "learning_rate": 3.7360554195270403e-06, "loss": 0.75, "num_input_tokens_seen": 1358835520, "step": 7524 }, { "epoch": 0.8237773338077123, "grad_norm": 1.297478822524278, "learning_rate": 3.731534820089255e-06, "loss": 1.0283, "num_input_tokens_seen": 1359009792, "step": 7525 }, { "epoch": 0.8238868058786503, "grad_norm": 1.178231003340848, "learning_rate": 3.727016736670247e-06, "loss": 0.9679, "num_input_tokens_seen": 1359193472, "step": 7526 }, { "epoch": 0.8239962779495881, "grad_norm": 1.3158324035116191, "learning_rate": 3.722501169804493e-06, "loss": 1.0486, "num_input_tokens_seen": 1359382080, "step": 7527 }, { "epoch": 0.824105750020526, "grad_norm": 1.207392814333406, "learning_rate": 3.7179881200261753e-06, "loss": 0.8244, "num_input_tokens_seen": 1359566208, "step": 7528 }, { "epoch": 0.8242152220914639, "grad_norm": 1.2934516857666063, "learning_rate": 3.7134775878691767e-06, "loss": 0.8143, "num_input_tokens_seen": 1359730848, "step": 7529 }, { "epoch": 0.8243246941624018, "grad_norm": 1.0831747244610366, "learning_rate": 3.708969573867088e-06, "loss": 0.6925, "num_input_tokens_seen": 1359911392, "step": 7530 }, { "epoch": 0.8244341662333398, "grad_norm": 1.3644002786794314, "learning_rate": 3.7044640785531966e-06, "loss": 1.038, "num_input_tokens_seen": 1360111648, "step": 7531 }, { "epoch": 0.8245436383042776, "grad_norm": 1.209254270141078, "learning_rate": 3.699961102460495e-06, "loss": 0.6033, "num_input_tokens_seen": 1360298688, "step": 7532 }, { "epoch": 0.8246531103752155, "grad_norm": 1.24350217909367, "learning_rate": 3.6954606461216706e-06, "loss": 0.8414, "num_input_tokens_seen": 1360473856, "step": 7533 }, { "epoch": 0.8247625824461534, "grad_norm": 1.1892313131837362, "learning_rate": 3.6909627100691293e-06, "loss": 0.8728, "num_input_tokens_seen": 1360681952, "step": 7534 }, { "epoch": 0.8248720545170913, "grad_norm": 1.2382741741375367, "learning_rate": 3.686467294834964e-06, "loss": 0.9224, "num_input_tokens_seen": 1360843232, "step": 7535 }, { "epoch": 0.8249815265880293, "grad_norm": 1.11383530064265, "learning_rate": 3.6819744009509715e-06, "loss": 0.9103, "num_input_tokens_seen": 1360996448, "step": 7536 }, { "epoch": 0.8250909986589672, "grad_norm": 1.2221151501302638, "learning_rate": 3.677484028948658e-06, "loss": 0.816, "num_input_tokens_seen": 1361169600, "step": 7537 }, { "epoch": 0.825200470729905, "grad_norm": 1.2242672577713283, "learning_rate": 3.67299617935922e-06, "loss": 0.8759, "num_input_tokens_seen": 1361341856, "step": 7538 }, { "epoch": 0.8253099428008429, "grad_norm": 1.2778003588123208, "learning_rate": 3.6685108527135635e-06, "loss": 0.8793, "num_input_tokens_seen": 1361507168, "step": 7539 }, { "epoch": 0.8254194148717808, "grad_norm": 1.1705765206094438, "learning_rate": 3.664028049542287e-06, "loss": 0.8645, "num_input_tokens_seen": 1361678528, "step": 7540 }, { "epoch": 0.8255288869427188, "grad_norm": 1.15706744810324, "learning_rate": 3.659547770375718e-06, "loss": 0.9495, "num_input_tokens_seen": 1361865792, "step": 7541 }, { "epoch": 0.8256383590136567, "grad_norm": 1.2070652731634122, "learning_rate": 3.655070015743839e-06, "loss": 1.0012, "num_input_tokens_seen": 1362046336, "step": 7542 }, { "epoch": 0.8257478310845946, "grad_norm": 1.4438332075441662, "learning_rate": 3.6505947861763867e-06, "loss": 0.8556, "num_input_tokens_seen": 1362197984, "step": 7543 }, { "epoch": 0.8258573031555324, "grad_norm": 1.179950406923559, "learning_rate": 3.6461220822027437e-06, "loss": 0.8818, "num_input_tokens_seen": 1362373600, "step": 7544 }, { "epoch": 0.8259667752264703, "grad_norm": 1.0850411427515654, "learning_rate": 3.641651904352045e-06, "loss": 0.6813, "num_input_tokens_seen": 1362539136, "step": 7545 }, { "epoch": 0.8260762472974083, "grad_norm": 1.2525730829750672, "learning_rate": 3.637184253153095e-06, "loss": 1.0741, "num_input_tokens_seen": 1362695264, "step": 7546 }, { "epoch": 0.8261857193683462, "grad_norm": 1.09813541257608, "learning_rate": 3.6327191291344015e-06, "loss": 0.7019, "num_input_tokens_seen": 1362886112, "step": 7547 }, { "epoch": 0.8262951914392841, "grad_norm": 1.22211978343386, "learning_rate": 3.6282565328242007e-06, "loss": 0.9411, "num_input_tokens_seen": 1363078752, "step": 7548 }, { "epoch": 0.8264046635102219, "grad_norm": 1.1383176484896098, "learning_rate": 3.623796464750384e-06, "loss": 0.8995, "num_input_tokens_seen": 1363248320, "step": 7549 }, { "epoch": 0.8265141355811598, "grad_norm": 1.3055683292052607, "learning_rate": 3.6193389254405934e-06, "loss": 0.9078, "num_input_tokens_seen": 1363435808, "step": 7550 }, { "epoch": 0.8266236076520977, "grad_norm": 1.1847449014412719, "learning_rate": 3.6148839154221236e-06, "loss": 0.6517, "num_input_tokens_seen": 1363602688, "step": 7551 }, { "epoch": 0.8267330797230357, "grad_norm": 1.0955597754831998, "learning_rate": 3.610431435222017e-06, "loss": 0.7979, "num_input_tokens_seen": 1363788384, "step": 7552 }, { "epoch": 0.8268425517939736, "grad_norm": 1.1506671670276825, "learning_rate": 3.605981485366969e-06, "loss": 0.7443, "num_input_tokens_seen": 1363944288, "step": 7553 }, { "epoch": 0.8269520238649115, "grad_norm": 1.1903261385311827, "learning_rate": 3.601534066383419e-06, "loss": 0.9126, "num_input_tokens_seen": 1364135360, "step": 7554 }, { "epoch": 0.8270614959358493, "grad_norm": 1.187337814097345, "learning_rate": 3.597089178797483e-06, "loss": 0.761, "num_input_tokens_seen": 1364321280, "step": 7555 }, { "epoch": 0.8271709680067872, "grad_norm": 1.2002298770704303, "learning_rate": 3.5926468231349817e-06, "loss": 1.2141, "num_input_tokens_seen": 1364514816, "step": 7556 }, { "epoch": 0.8272804400777252, "grad_norm": 1.2425514024045414, "learning_rate": 3.5882069999214366e-06, "loss": 0.7947, "num_input_tokens_seen": 1364708800, "step": 7557 }, { "epoch": 0.8273899121486631, "grad_norm": 1.1230025572947244, "learning_rate": 3.583769709682064e-06, "loss": 0.8245, "num_input_tokens_seen": 1364902784, "step": 7558 }, { "epoch": 0.827499384219601, "grad_norm": 1.1899274810364422, "learning_rate": 3.579334952941807e-06, "loss": 0.8493, "num_input_tokens_seen": 1365069888, "step": 7559 }, { "epoch": 0.8276088562905389, "grad_norm": 1.1814761717302873, "learning_rate": 3.574902730225263e-06, "loss": 0.8438, "num_input_tokens_seen": 1365244608, "step": 7560 }, { "epoch": 0.8277183283614767, "grad_norm": 1.070288553457311, "learning_rate": 3.570473042056777e-06, "loss": 0.9277, "num_input_tokens_seen": 1365436800, "step": 7561 }, { "epoch": 0.8278278004324147, "grad_norm": 1.1234081450169957, "learning_rate": 3.5660458889603594e-06, "loss": 0.9307, "num_input_tokens_seen": 1365613984, "step": 7562 }, { "epoch": 0.8279372725033526, "grad_norm": 1.1394984286315448, "learning_rate": 3.561621271459742e-06, "loss": 0.8611, "num_input_tokens_seen": 1365812448, "step": 7563 }, { "epoch": 0.8280467445742905, "grad_norm": 1.1864424564057292, "learning_rate": 3.557199190078342e-06, "loss": 0.9351, "num_input_tokens_seen": 1365991872, "step": 7564 }, { "epoch": 0.8281562166452284, "grad_norm": 1.3386701331726978, "learning_rate": 3.5527796453392882e-06, "loss": 0.9575, "num_input_tokens_seen": 1366165248, "step": 7565 }, { "epoch": 0.8282656887161662, "grad_norm": 1.0889801103686436, "learning_rate": 3.548362637765401e-06, "loss": 0.9666, "num_input_tokens_seen": 1366350496, "step": 7566 }, { "epoch": 0.8283751607871042, "grad_norm": 1.214348891562154, "learning_rate": 3.543948167879202e-06, "loss": 0.8587, "num_input_tokens_seen": 1366517376, "step": 7567 }, { "epoch": 0.8284846328580421, "grad_norm": 1.1438252742156778, "learning_rate": 3.5395362362029198e-06, "loss": 0.7803, "num_input_tokens_seen": 1366671712, "step": 7568 }, { "epoch": 0.82859410492898, "grad_norm": 1.151525899028512, "learning_rate": 3.5351268432584796e-06, "loss": 0.6866, "num_input_tokens_seen": 1366827616, "step": 7569 }, { "epoch": 0.8287035769999179, "grad_norm": 1.2072039730598185, "learning_rate": 3.5307199895674963e-06, "loss": 0.9221, "num_input_tokens_seen": 1366995616, "step": 7570 }, { "epoch": 0.8288130490708558, "grad_norm": 1.2059530732749952, "learning_rate": 3.5263156756512983e-06, "loss": 0.9502, "num_input_tokens_seen": 1367181536, "step": 7571 }, { "epoch": 0.8289225211417937, "grad_norm": 1.2356864143665105, "learning_rate": 3.521913902030902e-06, "loss": 0.859, "num_input_tokens_seen": 1367358496, "step": 7572 }, { "epoch": 0.8290319932127316, "grad_norm": 1.2739168914683596, "learning_rate": 3.5175146692270344e-06, "loss": 0.8655, "num_input_tokens_seen": 1367540832, "step": 7573 }, { "epoch": 0.8291414652836695, "grad_norm": 1.1416433308761902, "learning_rate": 3.5131179777601136e-06, "loss": 0.808, "num_input_tokens_seen": 1367724960, "step": 7574 }, { "epoch": 0.8292509373546074, "grad_norm": 1.300266610829947, "learning_rate": 3.508723828150254e-06, "loss": 0.7754, "num_input_tokens_seen": 1367863616, "step": 7575 }, { "epoch": 0.8293604094255453, "grad_norm": 1.256203991251464, "learning_rate": 3.504332220917289e-06, "loss": 1.0484, "num_input_tokens_seen": 1368075072, "step": 7576 }, { "epoch": 0.8294698814964833, "grad_norm": 1.239946274824064, "learning_rate": 3.499943156580726e-06, "loss": 0.9036, "num_input_tokens_seen": 1368244192, "step": 7577 }, { "epoch": 0.8295793535674211, "grad_norm": 1.3706416936258736, "learning_rate": 3.4955566356597887e-06, "loss": 1.1906, "num_input_tokens_seen": 1368401888, "step": 7578 }, { "epoch": 0.829688825638359, "grad_norm": 1.2389938277736618, "learning_rate": 3.491172658673392e-06, "loss": 1.1065, "num_input_tokens_seen": 1368578848, "step": 7579 }, { "epoch": 0.8297982977092969, "grad_norm": 1.1583953749327245, "learning_rate": 3.4867912261401458e-06, "loss": 0.903, "num_input_tokens_seen": 1368767680, "step": 7580 }, { "epoch": 0.8299077697802348, "grad_norm": 1.1240409896103245, "learning_rate": 3.4824123385783807e-06, "loss": 0.7845, "num_input_tokens_seen": 1368932992, "step": 7581 }, { "epoch": 0.8300172418511728, "grad_norm": 1.1244304062750652, "learning_rate": 3.4780359965060934e-06, "loss": 0.7248, "num_input_tokens_seen": 1369096064, "step": 7582 }, { "epoch": 0.8301267139221106, "grad_norm": 1.1317849596209604, "learning_rate": 3.4736622004410136e-06, "loss": 0.8587, "num_input_tokens_seen": 1369294752, "step": 7583 }, { "epoch": 0.8302361859930485, "grad_norm": 1.2588399635600611, "learning_rate": 3.469290950900533e-06, "loss": 0.9185, "num_input_tokens_seen": 1369470816, "step": 7584 }, { "epoch": 0.8303456580639864, "grad_norm": 1.1608793015608292, "learning_rate": 3.4649222484017836e-06, "loss": 1.2163, "num_input_tokens_seen": 1369676448, "step": 7585 }, { "epoch": 0.8304551301349243, "grad_norm": 1.1534881598861597, "learning_rate": 3.460556093461556e-06, "loss": 0.8272, "num_input_tokens_seen": 1369842656, "step": 7586 }, { "epoch": 0.8305646022058623, "grad_norm": 1.1852097762611973, "learning_rate": 3.4561924865963685e-06, "loss": 0.8145, "num_input_tokens_seen": 1370022304, "step": 7587 }, { "epoch": 0.8306740742768002, "grad_norm": 1.1385397466356586, "learning_rate": 3.4518314283224275e-06, "loss": 1.0451, "num_input_tokens_seen": 1370218752, "step": 7588 }, { "epoch": 0.830783546347738, "grad_norm": 1.2272182582558844, "learning_rate": 3.447472919155628e-06, "loss": 0.6608, "num_input_tokens_seen": 1370383840, "step": 7589 }, { "epoch": 0.8308930184186759, "grad_norm": 1.2175704432010492, "learning_rate": 3.443116959611592e-06, "loss": 0.7695, "num_input_tokens_seen": 1370551616, "step": 7590 }, { "epoch": 0.8310024904896138, "grad_norm": 1.1058337683965482, "learning_rate": 3.438763550205601e-06, "loss": 0.7232, "num_input_tokens_seen": 1370696992, "step": 7591 }, { "epoch": 0.8311119625605518, "grad_norm": 1.064990378770665, "learning_rate": 3.4344126914526735e-06, "loss": 0.6909, "num_input_tokens_seen": 1370866560, "step": 7592 }, { "epoch": 0.8312214346314897, "grad_norm": 1.428587193061495, "learning_rate": 3.430064383867487e-06, "loss": 1.0863, "num_input_tokens_seen": 1371055392, "step": 7593 }, { "epoch": 0.8313309067024276, "grad_norm": 1.046842432227208, "learning_rate": 3.4257186279644554e-06, "loss": 0.8436, "num_input_tokens_seen": 1371222720, "step": 7594 }, { "epoch": 0.8314403787733654, "grad_norm": 1.1831421446930839, "learning_rate": 3.4213754242576668e-06, "loss": 0.8846, "num_input_tokens_seen": 1371439328, "step": 7595 }, { "epoch": 0.8315498508443033, "grad_norm": 1.1983826546385243, "learning_rate": 3.417034773260913e-06, "loss": 0.7815, "num_input_tokens_seen": 1371591200, "step": 7596 }, { "epoch": 0.8316593229152413, "grad_norm": 1.1995748680763878, "learning_rate": 3.4126966754876867e-06, "loss": 1.1292, "num_input_tokens_seen": 1371776224, "step": 7597 }, { "epoch": 0.8317687949861792, "grad_norm": 1.0134015908174017, "learning_rate": 3.4083611314511763e-06, "loss": 0.7694, "num_input_tokens_seen": 1371977600, "step": 7598 }, { "epoch": 0.8318782670571171, "grad_norm": 1.1250740132886825, "learning_rate": 3.4040281416642672e-06, "loss": 0.9288, "num_input_tokens_seen": 1372148288, "step": 7599 }, { "epoch": 0.8319877391280549, "grad_norm": 1.1019400632693346, "learning_rate": 3.3996977066395376e-06, "loss": 0.8855, "num_input_tokens_seen": 1372305536, "step": 7600 }, { "epoch": 0.8320972111989928, "grad_norm": 1.1207631118691566, "learning_rate": 3.3953698268892857e-06, "loss": 0.6935, "num_input_tokens_seen": 1372483840, "step": 7601 }, { "epoch": 0.8322066832699307, "grad_norm": 1.1567135474482089, "learning_rate": 3.391044502925478e-06, "loss": 0.8322, "num_input_tokens_seen": 1372691712, "step": 7602 }, { "epoch": 0.8323161553408687, "grad_norm": 1.2975882441771338, "learning_rate": 3.3867217352597984e-06, "loss": 0.9013, "num_input_tokens_seen": 1372867104, "step": 7603 }, { "epoch": 0.8324256274118066, "grad_norm": 1.241804675725415, "learning_rate": 3.3824015244036222e-06, "loss": 0.9737, "num_input_tokens_seen": 1373060192, "step": 7604 }, { "epoch": 0.8325350994827445, "grad_norm": 1.1630107044963918, "learning_rate": 3.3780838708680153e-06, "loss": 0.7662, "num_input_tokens_seen": 1373264704, "step": 7605 }, { "epoch": 0.8326445715536823, "grad_norm": 1.2872926526555384, "learning_rate": 3.373768775163755e-06, "loss": 0.831, "num_input_tokens_seen": 1373380736, "step": 7606 }, { "epoch": 0.8327540436246202, "grad_norm": 1.4090541009066762, "learning_rate": 3.3694562378013076e-06, "loss": 0.8734, "num_input_tokens_seen": 1373528576, "step": 7607 }, { "epoch": 0.8328635156955582, "grad_norm": 1.0294182432595917, "learning_rate": 3.3651462592908275e-06, "loss": 0.8528, "num_input_tokens_seen": 1373733312, "step": 7608 }, { "epoch": 0.8329729877664961, "grad_norm": 1.118145245389566, "learning_rate": 3.3608388401421943e-06, "loss": 0.7326, "num_input_tokens_seen": 1373911168, "step": 7609 }, { "epoch": 0.833082459837434, "grad_norm": 1.078615657446313, "learning_rate": 3.356533980864959e-06, "loss": 0.8214, "num_input_tokens_seen": 1374110080, "step": 7610 }, { "epoch": 0.8331919319083719, "grad_norm": 1.2361682469072992, "learning_rate": 3.352231681968379e-06, "loss": 0.9811, "num_input_tokens_seen": 1374275616, "step": 7611 }, { "epoch": 0.8333014039793097, "grad_norm": 1.1547892854709474, "learning_rate": 3.347931943961405e-06, "loss": 0.7587, "num_input_tokens_seen": 1374459296, "step": 7612 }, { "epoch": 0.8334108760502477, "grad_norm": 1.1140735684523138, "learning_rate": 3.3436347673526936e-06, "loss": 0.8577, "num_input_tokens_seen": 1374632672, "step": 7613 }, { "epoch": 0.8335203481211856, "grad_norm": 1.1456209795143018, "learning_rate": 3.3393401526505856e-06, "loss": 0.721, "num_input_tokens_seen": 1374798432, "step": 7614 }, { "epoch": 0.8336298201921235, "grad_norm": 1.159966655613695, "learning_rate": 3.335048100363125e-06, "loss": 1.0255, "num_input_tokens_seen": 1374968224, "step": 7615 }, { "epoch": 0.8337392922630614, "grad_norm": 1.2060639786458918, "learning_rate": 3.330758610998072e-06, "loss": 0.9368, "num_input_tokens_seen": 1375150112, "step": 7616 }, { "epoch": 0.8338487643339992, "grad_norm": 1.2357746832047667, "learning_rate": 3.3264716850628375e-06, "loss": 0.8971, "num_input_tokens_seen": 1375333792, "step": 7617 }, { "epoch": 0.8339582364049372, "grad_norm": 1.2590788159292015, "learning_rate": 3.322187323064574e-06, "loss": 1.0951, "num_input_tokens_seen": 1375522624, "step": 7618 }, { "epoch": 0.8340677084758751, "grad_norm": 1.0383035856877028, "learning_rate": 3.3179055255101096e-06, "loss": 0.8395, "num_input_tokens_seen": 1375707872, "step": 7619 }, { "epoch": 0.834177180546813, "grad_norm": 1.0255229174333005, "learning_rate": 3.3136262929059746e-06, "loss": 0.8208, "num_input_tokens_seen": 1375885056, "step": 7620 }, { "epoch": 0.8342866526177509, "grad_norm": 1.0084725517656863, "learning_rate": 3.30934962575839e-06, "loss": 0.8335, "num_input_tokens_seen": 1376069184, "step": 7621 }, { "epoch": 0.8343961246886888, "grad_norm": 1.1769095548559707, "learning_rate": 3.3050755245732758e-06, "loss": 0.8384, "num_input_tokens_seen": 1376243232, "step": 7622 }, { "epoch": 0.8345055967596267, "grad_norm": 1.131917297230173, "learning_rate": 3.3008039898562603e-06, "loss": 1.0023, "num_input_tokens_seen": 1376403616, "step": 7623 }, { "epoch": 0.8346150688305646, "grad_norm": 1.2162916870657332, "learning_rate": 3.296535022112643e-06, "loss": 1.0856, "num_input_tokens_seen": 1376597824, "step": 7624 }, { "epoch": 0.8347245409015025, "grad_norm": 1.1471995214187283, "learning_rate": 3.2922686218474524e-06, "loss": 0.8147, "num_input_tokens_seen": 1376777920, "step": 7625 }, { "epoch": 0.8348340129724404, "grad_norm": 1.1347445618977563, "learning_rate": 3.288004789565377e-06, "loss": 0.7421, "num_input_tokens_seen": 1376900672, "step": 7626 }, { "epoch": 0.8349434850433783, "grad_norm": 1.1867620283199924, "learning_rate": 3.2837435257708383e-06, "loss": 0.851, "num_input_tokens_seen": 1377093088, "step": 7627 }, { "epoch": 0.8350529571143163, "grad_norm": 1.114673640552939, "learning_rate": 3.2794848309679134e-06, "loss": 0.9608, "num_input_tokens_seen": 1377285280, "step": 7628 }, { "epoch": 0.8351624291852541, "grad_norm": 1.122620768772804, "learning_rate": 3.2752287056604187e-06, "loss": 0.7284, "num_input_tokens_seen": 1377472544, "step": 7629 }, { "epoch": 0.835271901256192, "grad_norm": 1.1477350175494945, "learning_rate": 3.270975150351835e-06, "loss": 0.8578, "num_input_tokens_seen": 1377643008, "step": 7630 }, { "epoch": 0.8353813733271299, "grad_norm": 1.2603387519567262, "learning_rate": 3.2667241655453485e-06, "loss": 0.9372, "num_input_tokens_seen": 1377822656, "step": 7631 }, { "epoch": 0.8354908453980678, "grad_norm": 1.2906893088365168, "learning_rate": 3.262475751743857e-06, "loss": 1.0047, "num_input_tokens_seen": 1377994016, "step": 7632 }, { "epoch": 0.8356003174690058, "grad_norm": 1.08424509023338, "learning_rate": 3.2582299094499168e-06, "loss": 0.6135, "num_input_tokens_seen": 1378154400, "step": 7633 }, { "epoch": 0.8357097895399436, "grad_norm": 1.1899273808544593, "learning_rate": 3.253986639165826e-06, "loss": 0.7317, "num_input_tokens_seen": 1378318368, "step": 7634 }, { "epoch": 0.8358192616108815, "grad_norm": 1.0526679589547683, "learning_rate": 3.2497459413935337e-06, "loss": 0.8143, "num_input_tokens_seen": 1378525568, "step": 7635 }, { "epoch": 0.8359287336818194, "grad_norm": 1.0825455748058266, "learning_rate": 3.2455078166347242e-06, "loss": 0.7541, "num_input_tokens_seen": 1378729408, "step": 7636 }, { "epoch": 0.8360382057527573, "grad_norm": 1.3421016163820731, "learning_rate": 3.241272265390752e-06, "loss": 0.7979, "num_input_tokens_seen": 1378898528, "step": 7637 }, { "epoch": 0.8361476778236953, "grad_norm": 1.0979422743028961, "learning_rate": 3.2370392881626743e-06, "loss": 0.8118, "num_input_tokens_seen": 1379094976, "step": 7638 }, { "epoch": 0.8362571498946332, "grad_norm": 1.212039126475949, "learning_rate": 3.232808885451244e-06, "loss": 0.7194, "num_input_tokens_seen": 1379291424, "step": 7639 }, { "epoch": 0.836366621965571, "grad_norm": 1.138523883919198, "learning_rate": 3.228581057756913e-06, "loss": 0.692, "num_input_tokens_seen": 1379436800, "step": 7640 }, { "epoch": 0.8364760940365089, "grad_norm": 1.123822708832833, "learning_rate": 3.2243558055798234e-06, "loss": 0.9853, "num_input_tokens_seen": 1379622272, "step": 7641 }, { "epoch": 0.8365855661074468, "grad_norm": 1.175990342720109, "learning_rate": 3.2201331294198057e-06, "loss": 0.606, "num_input_tokens_seen": 1379767424, "step": 7642 }, { "epoch": 0.8366950381783848, "grad_norm": 1.3468244998961476, "learning_rate": 3.21591302977641e-06, "loss": 1.0203, "num_input_tokens_seen": 1379947296, "step": 7643 }, { "epoch": 0.8368045102493227, "grad_norm": 1.2074163125143578, "learning_rate": 3.2116955071488597e-06, "loss": 0.9275, "num_input_tokens_seen": 1380128960, "step": 7644 }, { "epoch": 0.8369139823202606, "grad_norm": 1.1770988001974731, "learning_rate": 3.2074805620360775e-06, "loss": 1.1151, "num_input_tokens_seen": 1380315776, "step": 7645 }, { "epoch": 0.8370234543911984, "grad_norm": 1.0688790516710183, "learning_rate": 3.2032681949366845e-06, "loss": 1.0043, "num_input_tokens_seen": 1380523872, "step": 7646 }, { "epoch": 0.8371329264621363, "grad_norm": 1.2377783298567298, "learning_rate": 3.1990584063489955e-06, "loss": 1.0074, "num_input_tokens_seen": 1380703520, "step": 7647 }, { "epoch": 0.8372423985330743, "grad_norm": 1.2494199837643758, "learning_rate": 3.194851196771015e-06, "loss": 0.7592, "num_input_tokens_seen": 1380868160, "step": 7648 }, { "epoch": 0.8373518706040122, "grad_norm": 1.2382928023416169, "learning_rate": 3.190646566700464e-06, "loss": 0.9049, "num_input_tokens_seen": 1381070208, "step": 7649 }, { "epoch": 0.8374613426749501, "grad_norm": 1.2082013134799405, "learning_rate": 3.1864445166347235e-06, "loss": 1.0248, "num_input_tokens_seen": 1381256576, "step": 7650 }, { "epoch": 0.8375708147458879, "grad_norm": 1.2337730968288367, "learning_rate": 3.1822450470709003e-06, "loss": 0.9314, "num_input_tokens_seen": 1381438016, "step": 7651 }, { "epoch": 0.8376802868168258, "grad_norm": 1.247561985914542, "learning_rate": 3.178048158505778e-06, "loss": 0.8797, "num_input_tokens_seen": 1381624832, "step": 7652 }, { "epoch": 0.8377897588877637, "grad_norm": 1.1644023936679182, "learning_rate": 3.1738538514358457e-06, "loss": 0.8262, "num_input_tokens_seen": 1381786560, "step": 7653 }, { "epoch": 0.8378992309587017, "grad_norm": 1.2259153857972664, "learning_rate": 3.1696621263572755e-06, "loss": 0.8704, "num_input_tokens_seen": 1381950528, "step": 7654 }, { "epoch": 0.8380087030296396, "grad_norm": 1.186001886723322, "learning_rate": 3.165472983765938e-06, "loss": 0.7766, "num_input_tokens_seen": 1382095232, "step": 7655 }, { "epoch": 0.8381181751005775, "grad_norm": 1.0557145945605526, "learning_rate": 3.161286424157417e-06, "loss": 0.6433, "num_input_tokens_seen": 1382256960, "step": 7656 }, { "epoch": 0.8382276471715153, "grad_norm": 1.1704791084129615, "learning_rate": 3.1571024480269524e-06, "loss": 0.8679, "num_input_tokens_seen": 1382443104, "step": 7657 }, { "epoch": 0.8383371192424532, "grad_norm": 1.052082319857169, "learning_rate": 3.152921055869523e-06, "loss": 0.7664, "num_input_tokens_seen": 1382631488, "step": 7658 }, { "epoch": 0.8384465913133912, "grad_norm": 1.3005784251770482, "learning_rate": 3.1487422481797565e-06, "loss": 0.9303, "num_input_tokens_seen": 1382831520, "step": 7659 }, { "epoch": 0.8385560633843291, "grad_norm": 1.2437555265064235, "learning_rate": 3.1445660254520173e-06, "loss": 0.9343, "num_input_tokens_seen": 1383037824, "step": 7660 }, { "epoch": 0.838665535455267, "grad_norm": 1.1875976221213043, "learning_rate": 3.1403923881803354e-06, "loss": 0.7319, "num_input_tokens_seen": 1383221728, "step": 7661 }, { "epoch": 0.8387750075262049, "grad_norm": 1.2246398921978816, "learning_rate": 3.1362213368584442e-06, "loss": 1.0251, "num_input_tokens_seen": 1383397120, "step": 7662 }, { "epoch": 0.8388844795971427, "grad_norm": 1.311301592529163, "learning_rate": 3.132052871979774e-06, "loss": 0.9082, "num_input_tokens_seen": 1383594464, "step": 7663 }, { "epoch": 0.8389939516680807, "grad_norm": 1.2995532313500535, "learning_rate": 3.1278869940374378e-06, "loss": 1.176, "num_input_tokens_seen": 1383768064, "step": 7664 }, { "epoch": 0.8391034237390186, "grad_norm": 1.1292144460293891, "learning_rate": 3.12372370352427e-06, "loss": 0.7747, "num_input_tokens_seen": 1383945024, "step": 7665 }, { "epoch": 0.8392128958099565, "grad_norm": 1.0507275876094848, "learning_rate": 3.119563000932757e-06, "loss": 0.8138, "num_input_tokens_seen": 1384124896, "step": 7666 }, { "epoch": 0.8393223678808944, "grad_norm": 1.1886305697482404, "learning_rate": 3.115404886755122e-06, "loss": 0.813, "num_input_tokens_seen": 1384302976, "step": 7667 }, { "epoch": 0.8394318399518322, "grad_norm": 1.3704863504330598, "learning_rate": 3.1112493614832426e-06, "loss": 0.818, "num_input_tokens_seen": 1384450592, "step": 7668 }, { "epoch": 0.8395413120227702, "grad_norm": 1.157320351964761, "learning_rate": 3.107096425608727e-06, "loss": 0.9293, "num_input_tokens_seen": 1384660928, "step": 7669 }, { "epoch": 0.8396507840937081, "grad_norm": 1.1831201795670236, "learning_rate": 3.1029460796228483e-06, "loss": 0.7254, "num_input_tokens_seen": 1384825792, "step": 7670 }, { "epoch": 0.839760256164646, "grad_norm": 1.2775824554170288, "learning_rate": 3.0987983240165914e-06, "loss": 1.048, "num_input_tokens_seen": 1385018880, "step": 7671 }, { "epoch": 0.8398697282355839, "grad_norm": 1.1522532282043512, "learning_rate": 3.0946531592806222e-06, "loss": 0.721, "num_input_tokens_seen": 1385186208, "step": 7672 }, { "epoch": 0.8399792003065218, "grad_norm": 1.1375428977674509, "learning_rate": 3.0905105859053068e-06, "loss": 0.8112, "num_input_tokens_seen": 1385369888, "step": 7673 }, { "epoch": 0.8400886723774597, "grad_norm": 1.1433986228303354, "learning_rate": 3.0863706043807115e-06, "loss": 0.8217, "num_input_tokens_seen": 1385575520, "step": 7674 }, { "epoch": 0.8401981444483976, "grad_norm": 1.0908842418109137, "learning_rate": 3.0822332151965754e-06, "loss": 0.6689, "num_input_tokens_seen": 1385768384, "step": 7675 }, { "epoch": 0.8403076165193355, "grad_norm": 1.1738192895947117, "learning_rate": 3.078098418842354e-06, "loss": 0.8717, "num_input_tokens_seen": 1385946464, "step": 7676 }, { "epoch": 0.8404170885902734, "grad_norm": 1.046208867183993, "learning_rate": 3.073966215807181e-06, "loss": 0.7271, "num_input_tokens_seen": 1386109760, "step": 7677 }, { "epoch": 0.8405265606612113, "grad_norm": 1.0564881953434333, "learning_rate": 3.06983660657989e-06, "loss": 0.758, "num_input_tokens_seen": 1386275296, "step": 7678 }, { "epoch": 0.8406360327321493, "grad_norm": 1.1838393255607798, "learning_rate": 3.0657095916490046e-06, "loss": 0.8687, "num_input_tokens_seen": 1386454496, "step": 7679 }, { "epoch": 0.8407455048030871, "grad_norm": 1.2993737014242916, "learning_rate": 3.0615851715027426e-06, "loss": 0.821, "num_input_tokens_seen": 1386634592, "step": 7680 }, { "epoch": 0.840854976874025, "grad_norm": 1.195409334536316, "learning_rate": 3.0574633466290166e-06, "loss": 1.0898, "num_input_tokens_seen": 1386850752, "step": 7681 }, { "epoch": 0.8409644489449629, "grad_norm": 1.287851678168549, "learning_rate": 3.0533441175154305e-06, "loss": 1.0014, "num_input_tokens_seen": 1387013824, "step": 7682 }, { "epoch": 0.8410739210159008, "grad_norm": 1.1429186712885535, "learning_rate": 3.049227484649275e-06, "loss": 0.8438, "num_input_tokens_seen": 1387199968, "step": 7683 }, { "epoch": 0.8411833930868388, "grad_norm": 1.2408642708683537, "learning_rate": 3.04511344851755e-06, "loss": 0.872, "num_input_tokens_seen": 1387379392, "step": 7684 }, { "epoch": 0.8412928651577766, "grad_norm": 1.112069095783894, "learning_rate": 3.041002009606933e-06, "loss": 0.6675, "num_input_tokens_seen": 1387537088, "step": 7685 }, { "epoch": 0.8414023372287145, "grad_norm": 1.161249846262352, "learning_rate": 3.036893168403801e-06, "loss": 0.7315, "num_input_tokens_seen": 1387696352, "step": 7686 }, { "epoch": 0.8415118092996524, "grad_norm": 1.1904070056384837, "learning_rate": 3.0327869253942183e-06, "loss": 0.7395, "num_input_tokens_seen": 1387824032, "step": 7687 }, { "epoch": 0.8416212813705903, "grad_norm": 1.141403559603905, "learning_rate": 3.0286832810639515e-06, "loss": 0.6728, "num_input_tokens_seen": 1387993824, "step": 7688 }, { "epoch": 0.8417307534415283, "grad_norm": 1.2445164566453937, "learning_rate": 3.024582235898449e-06, "loss": 1.0413, "num_input_tokens_seen": 1388176384, "step": 7689 }, { "epoch": 0.8418402255124662, "grad_norm": 1.1029137514482015, "learning_rate": 3.0204837903828525e-06, "loss": 0.8173, "num_input_tokens_seen": 1388377984, "step": 7690 }, { "epoch": 0.841949697583404, "grad_norm": 1.1985343943670936, "learning_rate": 3.0163879450020166e-06, "loss": 0.908, "num_input_tokens_seen": 1388514176, "step": 7691 }, { "epoch": 0.8420591696543419, "grad_norm": 1.1923548689683585, "learning_rate": 3.0122947002404504e-06, "loss": 0.9092, "num_input_tokens_seen": 1388690464, "step": 7692 }, { "epoch": 0.8421686417252798, "grad_norm": 1.2494346293750886, "learning_rate": 3.008204056582392e-06, "loss": 1.1221, "num_input_tokens_seen": 1388879968, "step": 7693 }, { "epoch": 0.8422781137962178, "grad_norm": 1.3142628411559503, "learning_rate": 3.004116014511754e-06, "loss": 0.7336, "num_input_tokens_seen": 1389056256, "step": 7694 }, { "epoch": 0.8423875858671557, "grad_norm": 1.1714815623728845, "learning_rate": 3.0000305745121443e-06, "loss": 1.0401, "num_input_tokens_seen": 1389235456, "step": 7695 }, { "epoch": 0.8424970579380936, "grad_norm": 1.2021380748658514, "learning_rate": 2.995947737066859e-06, "loss": 0.9094, "num_input_tokens_seen": 1389449376, "step": 7696 }, { "epoch": 0.8426065300090314, "grad_norm": 1.2155042612871225, "learning_rate": 2.9918675026588876e-06, "loss": 0.8304, "num_input_tokens_seen": 1389636192, "step": 7697 }, { "epoch": 0.8427160020799693, "grad_norm": 1.2753620699479717, "learning_rate": 2.987789871770927e-06, "loss": 0.9536, "num_input_tokens_seen": 1389837120, "step": 7698 }, { "epoch": 0.8428254741509073, "grad_norm": 1.1340384328206243, "learning_rate": 2.9837148448853353e-06, "loss": 0.9262, "num_input_tokens_seen": 1390019904, "step": 7699 }, { "epoch": 0.8429349462218452, "grad_norm": 1.2911227783426285, "learning_rate": 2.979642422484197e-06, "loss": 0.9046, "num_input_tokens_seen": 1390229568, "step": 7700 }, { "epoch": 0.8430444182927831, "grad_norm": 1.0588501904674552, "learning_rate": 2.9755726050492566e-06, "loss": 0.6904, "num_input_tokens_seen": 1390430272, "step": 7701 }, { "epoch": 0.8431538903637209, "grad_norm": 1.2607162792046478, "learning_rate": 2.9715053930619798e-06, "loss": 1.0272, "num_input_tokens_seen": 1390609248, "step": 7702 }, { "epoch": 0.8432633624346588, "grad_norm": 1.2165321809217884, "learning_rate": 2.9674407870035004e-06, "loss": 1.0622, "num_input_tokens_seen": 1390772992, "step": 7703 }, { "epoch": 0.8433728345055967, "grad_norm": 1.1885808743767006, "learning_rate": 2.963378787354659e-06, "loss": 1.0109, "num_input_tokens_seen": 1390972352, "step": 7704 }, { "epoch": 0.8434823065765347, "grad_norm": 1.228299993807041, "learning_rate": 2.95931939459598e-06, "loss": 0.8237, "num_input_tokens_seen": 1391175968, "step": 7705 }, { "epoch": 0.8435917786474726, "grad_norm": 1.1009412055016818, "learning_rate": 2.9552626092076765e-06, "loss": 0.8984, "num_input_tokens_seen": 1391375552, "step": 7706 }, { "epoch": 0.8437012507184105, "grad_norm": 1.096172429453642, "learning_rate": 2.951208431669675e-06, "loss": 0.7692, "num_input_tokens_seen": 1391563488, "step": 7707 }, { "epoch": 0.8438107227893483, "grad_norm": 1.2243653560801908, "learning_rate": 2.9471568624615533e-06, "loss": 0.9528, "num_input_tokens_seen": 1391759936, "step": 7708 }, { "epoch": 0.8439201948602862, "grad_norm": 1.1998001826340068, "learning_rate": 2.9431079020626253e-06, "loss": 1.0567, "num_input_tokens_seen": 1391940032, "step": 7709 }, { "epoch": 0.8440296669312242, "grad_norm": 1.2327914160024245, "learning_rate": 2.939061550951863e-06, "loss": 0.8089, "num_input_tokens_seen": 1392120128, "step": 7710 }, { "epoch": 0.8441391390021621, "grad_norm": 1.1615460224435077, "learning_rate": 2.9350178096079486e-06, "loss": 0.9647, "num_input_tokens_seen": 1392308960, "step": 7711 }, { "epoch": 0.8442486110731, "grad_norm": 1.0716286245409585, "learning_rate": 2.930976678509245e-06, "loss": 0.8485, "num_input_tokens_seen": 1392500480, "step": 7712 }, { "epoch": 0.8443580831440379, "grad_norm": 1.0416220782591599, "learning_rate": 2.926938158133813e-06, "loss": 0.8408, "num_input_tokens_seen": 1392715072, "step": 7713 }, { "epoch": 0.8444675552149757, "grad_norm": 1.1628429501736224, "learning_rate": 2.9229022489594e-06, "loss": 0.7933, "num_input_tokens_seen": 1392900768, "step": 7714 }, { "epoch": 0.8445770272859137, "grad_norm": 1.1178266390961242, "learning_rate": 2.9188689514634408e-06, "loss": 0.601, "num_input_tokens_seen": 1393043232, "step": 7715 }, { "epoch": 0.8446864993568516, "grad_norm": 1.1054711763065523, "learning_rate": 2.9148382661230766e-06, "loss": 0.83, "num_input_tokens_seen": 1393228480, "step": 7716 }, { "epoch": 0.8447959714277895, "grad_norm": 1.1545673705344015, "learning_rate": 2.9108101934151285e-06, "loss": 0.7581, "num_input_tokens_seen": 1393394016, "step": 7717 }, { "epoch": 0.8449054434987274, "grad_norm": 1.1730346536777774, "learning_rate": 2.9067847338161063e-06, "loss": 0.7452, "num_input_tokens_seen": 1393559328, "step": 7718 }, { "epoch": 0.8450149155696652, "grad_norm": 1.2137404457761263, "learning_rate": 2.9027618878022134e-06, "loss": 0.8109, "num_input_tokens_seen": 1393730240, "step": 7719 }, { "epoch": 0.8451243876406032, "grad_norm": 1.191200113442789, "learning_rate": 2.898741655849349e-06, "loss": 0.7156, "num_input_tokens_seen": 1393877408, "step": 7720 }, { "epoch": 0.8452338597115411, "grad_norm": 1.1115276244759482, "learning_rate": 2.8947240384330945e-06, "loss": 0.8589, "num_input_tokens_seen": 1394059520, "step": 7721 }, { "epoch": 0.845343331782479, "grad_norm": 1.3359445939795889, "learning_rate": 2.89070903602873e-06, "loss": 0.9219, "num_input_tokens_seen": 1394254176, "step": 7722 }, { "epoch": 0.8454528038534169, "grad_norm": 1.141700655484619, "learning_rate": 2.8866966491112144e-06, "loss": 1.1178, "num_input_tokens_seen": 1394473248, "step": 7723 }, { "epoch": 0.8455622759243548, "grad_norm": 1.2154365883860718, "learning_rate": 2.8826868781552217e-06, "loss": 0.8521, "num_input_tokens_seen": 1394664768, "step": 7724 }, { "epoch": 0.8456717479952927, "grad_norm": 1.2217679925682225, "learning_rate": 2.8786797236350806e-06, "loss": 0.8521, "num_input_tokens_seen": 1394859424, "step": 7725 }, { "epoch": 0.8457812200662306, "grad_norm": 1.038501731591946, "learning_rate": 2.8746751860248415e-06, "loss": 0.6363, "num_input_tokens_seen": 1395046464, "step": 7726 }, { "epoch": 0.8458906921371685, "grad_norm": 1.235675271786272, "learning_rate": 2.8706732657982347e-06, "loss": 0.9084, "num_input_tokens_seen": 1395197440, "step": 7727 }, { "epoch": 0.8460001642081064, "grad_norm": 1.1708544038028297, "learning_rate": 2.866673963428676e-06, "loss": 0.8158, "num_input_tokens_seen": 1395364096, "step": 7728 }, { "epoch": 0.8461096362790443, "grad_norm": 1.037020404684819, "learning_rate": 2.862677279389275e-06, "loss": 1.0561, "num_input_tokens_seen": 1395572192, "step": 7729 }, { "epoch": 0.8462191083499823, "grad_norm": 1.168308476570461, "learning_rate": 2.85868321415283e-06, "loss": 0.8771, "num_input_tokens_seen": 1395741088, "step": 7730 }, { "epoch": 0.8463285804209201, "grad_norm": 1.1532169977308198, "learning_rate": 2.8546917681918417e-06, "loss": 0.8788, "num_input_tokens_seen": 1395931488, "step": 7731 }, { "epoch": 0.846438052491858, "grad_norm": 1.125817743433929, "learning_rate": 2.8507029419784696e-06, "loss": 0.7878, "num_input_tokens_seen": 1396131968, "step": 7732 }, { "epoch": 0.8465475245627959, "grad_norm": 1.3053716362034342, "learning_rate": 2.8467167359846115e-06, "loss": 1.047, "num_input_tokens_seen": 1396309600, "step": 7733 }, { "epoch": 0.8466569966337338, "grad_norm": 1.0998187501008634, "learning_rate": 2.842733150681803e-06, "loss": 1.0107, "num_input_tokens_seen": 1396525312, "step": 7734 }, { "epoch": 0.8467664687046718, "grad_norm": 1.368123415357457, "learning_rate": 2.83875218654131e-06, "loss": 0.7566, "num_input_tokens_seen": 1396678752, "step": 7735 }, { "epoch": 0.8468759407756096, "grad_norm": 1.1688373104673098, "learning_rate": 2.8347738440340663e-06, "loss": 0.9354, "num_input_tokens_seen": 1396863328, "step": 7736 }, { "epoch": 0.8469854128465475, "grad_norm": 1.3417739088822185, "learning_rate": 2.830798123630707e-06, "loss": 0.8837, "num_input_tokens_seen": 1397048576, "step": 7737 }, { "epoch": 0.8470948849174854, "grad_norm": 1.363125942532486, "learning_rate": 2.8268250258015467e-06, "loss": 0.9969, "num_input_tokens_seen": 1397210304, "step": 7738 }, { "epoch": 0.8472043569884233, "grad_norm": 1.1782477985202062, "learning_rate": 2.822854551016593e-06, "loss": 0.7809, "num_input_tokens_seen": 1397405408, "step": 7739 }, { "epoch": 0.8473138290593613, "grad_norm": 1.1527434092432327, "learning_rate": 2.8188866997455626e-06, "loss": 0.6858, "num_input_tokens_seen": 1397577440, "step": 7740 }, { "epoch": 0.8474233011302992, "grad_norm": 1.1306028717557888, "learning_rate": 2.814921472457821e-06, "loss": 0.7818, "num_input_tokens_seen": 1397793824, "step": 7741 }, { "epoch": 0.847532773201237, "grad_norm": 1.1797319113348443, "learning_rate": 2.810958869622471e-06, "loss": 0.9396, "num_input_tokens_seen": 1397962496, "step": 7742 }, { "epoch": 0.8476422452721749, "grad_norm": 1.1102066146733247, "learning_rate": 2.8069988917082566e-06, "loss": 0.8358, "num_input_tokens_seen": 1398127136, "step": 7743 }, { "epoch": 0.8477517173431128, "grad_norm": 1.0503058510331247, "learning_rate": 2.8030415391836513e-06, "loss": 0.7324, "num_input_tokens_seen": 1398318656, "step": 7744 }, { "epoch": 0.8478611894140508, "grad_norm": 1.0493637791409138, "learning_rate": 2.799086812516799e-06, "loss": 0.8542, "num_input_tokens_seen": 1398517344, "step": 7745 }, { "epoch": 0.8479706614849887, "grad_norm": 1.1896377695770508, "learning_rate": 2.7951347121755373e-06, "loss": 0.8449, "num_input_tokens_seen": 1398642112, "step": 7746 }, { "epoch": 0.8480801335559266, "grad_norm": 1.136371625091909, "learning_rate": 2.791185238627389e-06, "loss": 0.9099, "num_input_tokens_seen": 1398841696, "step": 7747 }, { "epoch": 0.8481896056268644, "grad_norm": 1.3205731828654577, "learning_rate": 2.7872383923395667e-06, "loss": 0.8627, "num_input_tokens_seen": 1399002080, "step": 7748 }, { "epoch": 0.8482990776978023, "grad_norm": 1.2010453320348324, "learning_rate": 2.7832941737789912e-06, "loss": 0.8029, "num_input_tokens_seen": 1399171648, "step": 7749 }, { "epoch": 0.8484085497687402, "grad_norm": 1.1605696022070704, "learning_rate": 2.7793525834122315e-06, "loss": 0.7536, "num_input_tokens_seen": 1399367648, "step": 7750 }, { "epoch": 0.8485180218396782, "grad_norm": 1.333957297635375, "learning_rate": 2.775413621705586e-06, "loss": 1.0358, "num_input_tokens_seen": 1399522208, "step": 7751 }, { "epoch": 0.8486274939106161, "grad_norm": 1.190571927467199, "learning_rate": 2.771477289125024e-06, "loss": 0.9609, "num_input_tokens_seen": 1399731648, "step": 7752 }, { "epoch": 0.8487369659815539, "grad_norm": 1.1561820035993824, "learning_rate": 2.7675435861362064e-06, "loss": 0.7147, "num_input_tokens_seen": 1399932352, "step": 7753 }, { "epoch": 0.8488464380524918, "grad_norm": 0.9995858705834809, "learning_rate": 2.7636125132044806e-06, "loss": 0.9408, "num_input_tokens_seen": 1400138432, "step": 7754 }, { "epoch": 0.8489559101234297, "grad_norm": 1.1450291094348855, "learning_rate": 2.759684070794885e-06, "loss": 1.0131, "num_input_tokens_seen": 1400311360, "step": 7755 }, { "epoch": 0.8490653821943677, "grad_norm": 1.1601329248825785, "learning_rate": 2.755758259372149e-06, "loss": 0.9525, "num_input_tokens_seen": 1400486304, "step": 7756 }, { "epoch": 0.8491748542653056, "grad_norm": 1.048799993894302, "learning_rate": 2.7518350794006804e-06, "loss": 0.7636, "num_input_tokens_seen": 1400690816, "step": 7757 }, { "epoch": 0.8492843263362435, "grad_norm": 1.0386254101159147, "learning_rate": 2.7479145313445974e-06, "loss": 0.9384, "num_input_tokens_seen": 1400886592, "step": 7758 }, { "epoch": 0.8493937984071813, "grad_norm": 1.1104113077078517, "learning_rate": 2.743996615667685e-06, "loss": 0.712, "num_input_tokens_seen": 1401071168, "step": 7759 }, { "epoch": 0.8495032704781192, "grad_norm": 1.0968499531290778, "learning_rate": 2.7400813328334273e-06, "loss": 0.8476, "num_input_tokens_seen": 1401262912, "step": 7760 }, { "epoch": 0.8496127425490572, "grad_norm": 1.3095030401077317, "learning_rate": 2.736168683304996e-06, "loss": 0.9655, "num_input_tokens_seen": 1401448832, "step": 7761 }, { "epoch": 0.8497222146199951, "grad_norm": 1.1187067971865055, "learning_rate": 2.7322586675452454e-06, "loss": 0.7751, "num_input_tokens_seen": 1401637216, "step": 7762 }, { "epoch": 0.849831686690933, "grad_norm": 1.3495628249535863, "learning_rate": 2.728351286016725e-06, "loss": 0.8916, "num_input_tokens_seen": 1401788416, "step": 7763 }, { "epoch": 0.8499411587618709, "grad_norm": 1.093780190187412, "learning_rate": 2.7244465391816742e-06, "loss": 0.7469, "num_input_tokens_seen": 1401946560, "step": 7764 }, { "epoch": 0.8500506308328087, "grad_norm": 1.051283932386556, "learning_rate": 2.720544427502009e-06, "loss": 0.6088, "num_input_tokens_seen": 1402127328, "step": 7765 }, { "epoch": 0.8501601029037467, "grad_norm": 1.032145949060671, "learning_rate": 2.7166449514393565e-06, "loss": 0.7795, "num_input_tokens_seen": 1402313472, "step": 7766 }, { "epoch": 0.8502695749746846, "grad_norm": 1.1243584711069632, "learning_rate": 2.7127481114549965e-06, "loss": 0.7013, "num_input_tokens_seen": 1402485952, "step": 7767 }, { "epoch": 0.8503790470456225, "grad_norm": 1.0858429792146025, "learning_rate": 2.708853908009934e-06, "loss": 0.9505, "num_input_tokens_seen": 1402691136, "step": 7768 }, { "epoch": 0.8504885191165604, "grad_norm": 1.2030011893636734, "learning_rate": 2.7049623415648427e-06, "loss": 1.0753, "num_input_tokens_seen": 1402875936, "step": 7769 }, { "epoch": 0.8505979911874982, "grad_norm": 1.2637704047301817, "learning_rate": 2.7010734125800824e-06, "loss": 0.9341, "num_input_tokens_seen": 1403061632, "step": 7770 }, { "epoch": 0.8507074632584362, "grad_norm": 1.3015893024373082, "learning_rate": 2.6971871215157126e-06, "loss": 0.9258, "num_input_tokens_seen": 1403248224, "step": 7771 }, { "epoch": 0.8508169353293741, "grad_norm": 1.2054108542126263, "learning_rate": 2.6933034688314624e-06, "loss": 0.9328, "num_input_tokens_seen": 1403426080, "step": 7772 }, { "epoch": 0.850926407400312, "grad_norm": 1.0687388525487433, "learning_rate": 2.6894224549867815e-06, "loss": 0.689, "num_input_tokens_seen": 1403594528, "step": 7773 }, { "epoch": 0.8510358794712499, "grad_norm": 1.0939085709201628, "learning_rate": 2.6855440804407635e-06, "loss": 0.8507, "num_input_tokens_seen": 1403793664, "step": 7774 }, { "epoch": 0.8511453515421878, "grad_norm": 1.1564846316255288, "learning_rate": 2.68166834565223e-06, "loss": 0.8813, "num_input_tokens_seen": 1403989440, "step": 7775 }, { "epoch": 0.8512548236131257, "grad_norm": 1.1950607034595808, "learning_rate": 2.6777952510796565e-06, "loss": 0.9282, "num_input_tokens_seen": 1404186336, "step": 7776 }, { "epoch": 0.8513642956840636, "grad_norm": 1.3478142687164383, "learning_rate": 2.6739247971812375e-06, "loss": 1.127, "num_input_tokens_seen": 1404369792, "step": 7777 }, { "epoch": 0.8514737677550015, "grad_norm": 1.1337597799667922, "learning_rate": 2.6700569844148372e-06, "loss": 1.0784, "num_input_tokens_seen": 1404575648, "step": 7778 }, { "epoch": 0.8515832398259394, "grad_norm": 1.173690762472288, "learning_rate": 2.666191813238006e-06, "loss": 0.8727, "num_input_tokens_seen": 1404734912, "step": 7779 }, { "epoch": 0.8516927118968773, "grad_norm": 1.3540435245961833, "learning_rate": 2.662329284107987e-06, "loss": 0.9576, "num_input_tokens_seen": 1404890592, "step": 7780 }, { "epoch": 0.8518021839678153, "grad_norm": 1.2944587351092482, "learning_rate": 2.6584693974817084e-06, "loss": 0.7759, "num_input_tokens_seen": 1405040896, "step": 7781 }, { "epoch": 0.8519116560387531, "grad_norm": 1.1818511523135964, "learning_rate": 2.6546121538157998e-06, "loss": 1.1778, "num_input_tokens_seen": 1405236448, "step": 7782 }, { "epoch": 0.852021128109691, "grad_norm": 1.0815518415444454, "learning_rate": 2.650757553566546e-06, "loss": 0.7133, "num_input_tokens_seen": 1405427296, "step": 7783 }, { "epoch": 0.8521306001806289, "grad_norm": 1.0886146971131485, "learning_rate": 2.6469055971899525e-06, "loss": 0.744, "num_input_tokens_seen": 1405610976, "step": 7784 }, { "epoch": 0.8522400722515668, "grad_norm": 1.5846870641720938, "learning_rate": 2.6430562851416983e-06, "loss": 1.1366, "num_input_tokens_seen": 1405772704, "step": 7785 }, { "epoch": 0.8523495443225048, "grad_norm": 1.1746601830807633, "learning_rate": 2.6392096178771447e-06, "loss": 0.9202, "num_input_tokens_seen": 1405972064, "step": 7786 }, { "epoch": 0.8524590163934426, "grad_norm": 1.1131907108366168, "learning_rate": 2.635365595851344e-06, "loss": 0.8187, "num_input_tokens_seen": 1406174336, "step": 7787 }, { "epoch": 0.8525684884643805, "grad_norm": 1.3257379959774374, "learning_rate": 2.6315242195190436e-06, "loss": 0.8218, "num_input_tokens_seen": 1406321504, "step": 7788 }, { "epoch": 0.8526779605353184, "grad_norm": 1.1690060911256481, "learning_rate": 2.6276854893346636e-06, "loss": 0.8718, "num_input_tokens_seen": 1406480992, "step": 7789 }, { "epoch": 0.8527874326062563, "grad_norm": 1.2748991645881464, "learning_rate": 2.6238494057523183e-06, "loss": 0.8997, "num_input_tokens_seen": 1406623904, "step": 7790 }, { "epoch": 0.8528969046771943, "grad_norm": 1.0352667407681513, "learning_rate": 2.6200159692258195e-06, "loss": 1.0995, "num_input_tokens_seen": 1406842528, "step": 7791 }, { "epoch": 0.8530063767481322, "grad_norm": 1.1825023693036787, "learning_rate": 2.616185180208644e-06, "loss": 1.093, "num_input_tokens_seen": 1407031360, "step": 7792 }, { "epoch": 0.85311584881907, "grad_norm": 1.3023520484847573, "learning_rate": 2.612357039153973e-06, "loss": 0.9311, "num_input_tokens_seen": 1407216832, "step": 7793 }, { "epoch": 0.8532253208900079, "grad_norm": 1.2167223667214961, "learning_rate": 2.608531546514667e-06, "loss": 0.8651, "num_input_tokens_seen": 1407421792, "step": 7794 }, { "epoch": 0.8533347929609458, "grad_norm": 1.2839511218289619, "learning_rate": 2.6047087027432746e-06, "loss": 0.9297, "num_input_tokens_seen": 1407629888, "step": 7795 }, { "epoch": 0.8534442650318838, "grad_norm": 1.3035791223302688, "learning_rate": 2.600888508292029e-06, "loss": 1.033, "num_input_tokens_seen": 1407827904, "step": 7796 }, { "epoch": 0.8535537371028217, "grad_norm": 1.1752587865368338, "learning_rate": 2.597070963612852e-06, "loss": 0.7996, "num_input_tokens_seen": 1408001728, "step": 7797 }, { "epoch": 0.8536632091737596, "grad_norm": 1.3542624610861367, "learning_rate": 2.5932560691573487e-06, "loss": 1.094, "num_input_tokens_seen": 1408196832, "step": 7798 }, { "epoch": 0.8537726812446974, "grad_norm": 1.0585862613427175, "learning_rate": 2.5894438253768223e-06, "loss": 0.6372, "num_input_tokens_seen": 1408365952, "step": 7799 }, { "epoch": 0.8538821533156353, "grad_norm": 1.1504666832746402, "learning_rate": 2.5856342327222505e-06, "loss": 0.8728, "num_input_tokens_seen": 1408534400, "step": 7800 }, { "epoch": 0.8539916253865732, "grad_norm": 1.0286105508024734, "learning_rate": 2.581827291644301e-06, "loss": 0.6661, "num_input_tokens_seen": 1408706432, "step": 7801 }, { "epoch": 0.8541010974575112, "grad_norm": 1.2467429165207695, "learning_rate": 2.5780230025933245e-06, "loss": 0.8886, "num_input_tokens_seen": 1408875552, "step": 7802 }, { "epoch": 0.8542105695284491, "grad_norm": 1.198345450356404, "learning_rate": 2.5742213660193637e-06, "loss": 0.8864, "num_input_tokens_seen": 1409063488, "step": 7803 }, { "epoch": 0.8543200415993869, "grad_norm": 1.0591795591944821, "learning_rate": 2.5704223823721453e-06, "loss": 0.9737, "num_input_tokens_seen": 1409255232, "step": 7804 }, { "epoch": 0.8544295136703248, "grad_norm": 1.3066437269325102, "learning_rate": 2.5666260521010758e-06, "loss": 0.899, "num_input_tokens_seen": 1409398816, "step": 7805 }, { "epoch": 0.8545389857412627, "grad_norm": 1.2660526447951128, "learning_rate": 2.562832375655269e-06, "loss": 1.0309, "num_input_tokens_seen": 1409565472, "step": 7806 }, { "epoch": 0.8546484578122007, "grad_norm": 1.089859500894245, "learning_rate": 2.5590413534834906e-06, "loss": 0.7464, "num_input_tokens_seen": 1409750720, "step": 7807 }, { "epoch": 0.8547579298831386, "grad_norm": 1.217990736551872, "learning_rate": 2.555252986034229e-06, "loss": 0.8388, "num_input_tokens_seen": 1409935520, "step": 7808 }, { "epoch": 0.8548674019540765, "grad_norm": 1.2941770871429235, "learning_rate": 2.55146727375562e-06, "loss": 1.0589, "num_input_tokens_seen": 1410129504, "step": 7809 }, { "epoch": 0.8549768740250143, "grad_norm": 1.0081830785000632, "learning_rate": 2.547684217095528e-06, "loss": 0.712, "num_input_tokens_seen": 1410342080, "step": 7810 }, { "epoch": 0.8550863460959522, "grad_norm": 1.1523658362389526, "learning_rate": 2.5439038165014666e-06, "loss": 1.0402, "num_input_tokens_seen": 1410527328, "step": 7811 }, { "epoch": 0.8551958181668902, "grad_norm": 0.9933770325561788, "learning_rate": 2.5401260724206537e-06, "loss": 0.5315, "num_input_tokens_seen": 1410716832, "step": 7812 }, { "epoch": 0.8553052902378281, "grad_norm": 1.2358405672008699, "learning_rate": 2.5363509852999983e-06, "loss": 1.1034, "num_input_tokens_seen": 1410928736, "step": 7813 }, { "epoch": 0.855414762308766, "grad_norm": 1.0805579666518306, "learning_rate": 2.532578555586068e-06, "loss": 0.959, "num_input_tokens_seen": 1411123168, "step": 7814 }, { "epoch": 0.8555242343797039, "grad_norm": 1.287128546749999, "learning_rate": 2.5288087837251564e-06, "loss": 0.9765, "num_input_tokens_seen": 1411292512, "step": 7815 }, { "epoch": 0.8556337064506417, "grad_norm": 1.2880302689028407, "learning_rate": 2.5250416701631976e-06, "loss": 0.9359, "num_input_tokens_seen": 1411463424, "step": 7816 }, { "epoch": 0.8557431785215797, "grad_norm": 1.248536492959785, "learning_rate": 2.521277215345852e-06, "loss": 0.7694, "num_input_tokens_seen": 1411622688, "step": 7817 }, { "epoch": 0.8558526505925176, "grad_norm": 1.102083502314554, "learning_rate": 2.517515419718433e-06, "loss": 0.9374, "num_input_tokens_seen": 1411810624, "step": 7818 }, { "epoch": 0.8559621226634555, "grad_norm": 1.1341791787859954, "learning_rate": 2.5137562837259626e-06, "loss": 0.6854, "num_input_tokens_seen": 1411989152, "step": 7819 }, { "epoch": 0.8560715947343934, "grad_norm": 1.1882296880185443, "learning_rate": 2.5099998078131376e-06, "loss": 0.9311, "num_input_tokens_seen": 1412194336, "step": 7820 }, { "epoch": 0.8561810668053312, "grad_norm": 1.2532710668047649, "learning_rate": 2.5062459924243442e-06, "loss": 0.867, "num_input_tokens_seen": 1412343072, "step": 7821 }, { "epoch": 0.8562905388762692, "grad_norm": 1.2863724817620739, "learning_rate": 2.5024948380036468e-06, "loss": 0.936, "num_input_tokens_seen": 1412488448, "step": 7822 }, { "epoch": 0.8564000109472071, "grad_norm": 1.203518295408937, "learning_rate": 2.4987463449947986e-06, "loss": 0.9399, "num_input_tokens_seen": 1412679296, "step": 7823 }, { "epoch": 0.856509483018145, "grad_norm": 1.3825019444595101, "learning_rate": 2.495000513841253e-06, "loss": 0.9686, "num_input_tokens_seen": 1412844160, "step": 7824 }, { "epoch": 0.8566189550890829, "grad_norm": 1.151490752470821, "learning_rate": 2.491257344986114e-06, "loss": 0.7123, "num_input_tokens_seen": 1412986400, "step": 7825 }, { "epoch": 0.8567284271600208, "grad_norm": 1.2481015570993417, "learning_rate": 2.4875168388722057e-06, "loss": 0.927, "num_input_tokens_seen": 1413198752, "step": 7826 }, { "epoch": 0.8568378992309587, "grad_norm": 1.0228195654522771, "learning_rate": 2.4837789959420184e-06, "loss": 0.7741, "num_input_tokens_seen": 1413380192, "step": 7827 }, { "epoch": 0.8569473713018966, "grad_norm": 1.191297532421212, "learning_rate": 2.4800438166377337e-06, "loss": 1.1526, "num_input_tokens_seen": 1413557824, "step": 7828 }, { "epoch": 0.8570568433728345, "grad_norm": 1.2731081296751547, "learning_rate": 2.4763113014012155e-06, "loss": 0.7669, "num_input_tokens_seen": 1413743072, "step": 7829 }, { "epoch": 0.8571663154437724, "grad_norm": 1.1208440522505048, "learning_rate": 2.472581450674011e-06, "loss": 0.9124, "num_input_tokens_seen": 1413950496, "step": 7830 }, { "epoch": 0.8572757875147103, "grad_norm": 1.1172634178989664, "learning_rate": 2.468854264897355e-06, "loss": 1.0738, "num_input_tokens_seen": 1414148064, "step": 7831 }, { "epoch": 0.8573852595856483, "grad_norm": 1.128198897036382, "learning_rate": 2.4651297445121625e-06, "loss": 0.8156, "num_input_tokens_seen": 1414321440, "step": 7832 }, { "epoch": 0.8574947316565861, "grad_norm": 1.1354826403474676, "learning_rate": 2.461407889959047e-06, "loss": 1.0631, "num_input_tokens_seen": 1414532448, "step": 7833 }, { "epoch": 0.857604203727524, "grad_norm": 1.2734236453186123, "learning_rate": 2.4576887016782927e-06, "loss": 0.9928, "num_input_tokens_seen": 1414705152, "step": 7834 }, { "epoch": 0.8577136757984619, "grad_norm": 1.0954043819183135, "learning_rate": 2.4539721801098704e-06, "loss": 0.7673, "num_input_tokens_seen": 1414876288, "step": 7835 }, { "epoch": 0.8578231478693998, "grad_norm": 1.2552783149848838, "learning_rate": 2.4502583256934388e-06, "loss": 1.0046, "num_input_tokens_seen": 1415042944, "step": 7836 }, { "epoch": 0.8579326199403378, "grad_norm": 1.167720296198978, "learning_rate": 2.4465471388683383e-06, "loss": 0.872, "num_input_tokens_seen": 1415205792, "step": 7837 }, { "epoch": 0.8580420920112756, "grad_norm": 1.0461894395407925, "learning_rate": 2.4428386200735924e-06, "loss": 0.6125, "num_input_tokens_seen": 1415408288, "step": 7838 }, { "epoch": 0.8581515640822135, "grad_norm": 1.1978188958558331, "learning_rate": 2.439132769747926e-06, "loss": 0.6882, "num_input_tokens_seen": 1415593536, "step": 7839 }, { "epoch": 0.8582610361531514, "grad_norm": 1.0400416414470666, "learning_rate": 2.435429588329716e-06, "loss": 0.9777, "num_input_tokens_seen": 1415807680, "step": 7840 }, { "epoch": 0.8583705082240893, "grad_norm": 1.1450040706412241, "learning_rate": 2.431729076257053e-06, "loss": 1.1333, "num_input_tokens_seen": 1415999200, "step": 7841 }, { "epoch": 0.8584799802950273, "grad_norm": 1.0723450011176996, "learning_rate": 2.4280312339676953e-06, "loss": 0.8651, "num_input_tokens_seen": 1416158688, "step": 7842 }, { "epoch": 0.8585894523659652, "grad_norm": 1.0662117805205096, "learning_rate": 2.4243360618990934e-06, "loss": 0.8586, "num_input_tokens_seen": 1416366336, "step": 7843 }, { "epoch": 0.858698924436903, "grad_norm": 1.3612219442063278, "learning_rate": 2.4206435604883782e-06, "loss": 0.9977, "num_input_tokens_seen": 1416551584, "step": 7844 }, { "epoch": 0.8588083965078409, "grad_norm": 1.1821080820107934, "learning_rate": 2.416953730172361e-06, "loss": 0.8665, "num_input_tokens_seen": 1416710176, "step": 7845 }, { "epoch": 0.8589178685787788, "grad_norm": 1.1445262505223572, "learning_rate": 2.4132665713875542e-06, "loss": 0.8248, "num_input_tokens_seen": 1416887808, "step": 7846 }, { "epoch": 0.8590273406497168, "grad_norm": 1.1545633953956045, "learning_rate": 2.409582084570125e-06, "loss": 1.0299, "num_input_tokens_seen": 1417063424, "step": 7847 }, { "epoch": 0.8591368127206547, "grad_norm": 1.0957713930346749, "learning_rate": 2.4059002701559587e-06, "loss": 0.7771, "num_input_tokens_seen": 1417249344, "step": 7848 }, { "epoch": 0.8592462847915926, "grad_norm": 1.1264511921108733, "learning_rate": 2.4022211285805895e-06, "loss": 1.0066, "num_input_tokens_seen": 1417431904, "step": 7849 }, { "epoch": 0.8593557568625304, "grad_norm": 1.1418750131958875, "learning_rate": 2.39854466027927e-06, "loss": 0.9284, "num_input_tokens_seen": 1417621184, "step": 7850 }, { "epoch": 0.8594652289334683, "grad_norm": 1.1064357773992, "learning_rate": 2.394870865686899e-06, "loss": 0.8133, "num_input_tokens_seen": 1417787840, "step": 7851 }, { "epoch": 0.8595747010044062, "grad_norm": 1.298114597828034, "learning_rate": 2.3911997452380987e-06, "loss": 0.8186, "num_input_tokens_seen": 1417950464, "step": 7852 }, { "epoch": 0.8596841730753442, "grad_norm": 1.0935403895478153, "learning_rate": 2.387531299367146e-06, "loss": 0.9724, "num_input_tokens_seen": 1418118464, "step": 7853 }, { "epoch": 0.8597936451462821, "grad_norm": 0.9940835452977472, "learning_rate": 2.3838655285080085e-06, "loss": 0.8241, "num_input_tokens_seen": 1418322976, "step": 7854 }, { "epoch": 0.8599031172172199, "grad_norm": 1.2309675870667496, "learning_rate": 2.3802024330943556e-06, "loss": 0.8793, "num_input_tokens_seen": 1418464544, "step": 7855 }, { "epoch": 0.8600125892881578, "grad_norm": 1.103745428142821, "learning_rate": 2.376542013559502e-06, "loss": 0.8008, "num_input_tokens_seen": 1418648224, "step": 7856 }, { "epoch": 0.8601220613590957, "grad_norm": 1.184563117072257, "learning_rate": 2.3728842703364894e-06, "loss": 0.7788, "num_input_tokens_seen": 1418841984, "step": 7857 }, { "epoch": 0.8602315334300337, "grad_norm": 1.2143554196666406, "learning_rate": 2.3692292038580006e-06, "loss": 0.8563, "num_input_tokens_seen": 1419022080, "step": 7858 }, { "epoch": 0.8603410055009716, "grad_norm": 1.0423282556836093, "learning_rate": 2.3655768145564416e-06, "loss": 0.692, "num_input_tokens_seen": 1419219648, "step": 7859 }, { "epoch": 0.8604504775719095, "grad_norm": 0.9527987250023262, "learning_rate": 2.361927102863873e-06, "loss": 0.655, "num_input_tokens_seen": 1419388768, "step": 7860 }, { "epoch": 0.8605599496428473, "grad_norm": 1.1932553809472413, "learning_rate": 2.3582800692120542e-06, "loss": 1.0623, "num_input_tokens_seen": 1419586112, "step": 7861 }, { "epoch": 0.8606694217137852, "grad_norm": 1.1410261520009832, "learning_rate": 2.354635714032419e-06, "loss": 0.8117, "num_input_tokens_seen": 1419757472, "step": 7862 }, { "epoch": 0.8607788937847232, "grad_norm": 1.2761207348467074, "learning_rate": 2.3509940377560878e-06, "loss": 1.0653, "num_input_tokens_seen": 1419934208, "step": 7863 }, { "epoch": 0.8608883658556611, "grad_norm": 1.3319540588744372, "learning_rate": 2.3473550408138645e-06, "loss": 1.0587, "num_input_tokens_seen": 1420104896, "step": 7864 }, { "epoch": 0.860997837926599, "grad_norm": 1.130358016595522, "learning_rate": 2.343718723636232e-06, "loss": 0.9916, "num_input_tokens_seen": 1420281408, "step": 7865 }, { "epoch": 0.8611073099975369, "grad_norm": 1.1975898241507752, "learning_rate": 2.3400850866533654e-06, "loss": 0.8977, "num_input_tokens_seen": 1420445600, "step": 7866 }, { "epoch": 0.8612167820684747, "grad_norm": 1.295943327688963, "learning_rate": 2.3364541302951154e-06, "loss": 0.902, "num_input_tokens_seen": 1420635104, "step": 7867 }, { "epoch": 0.8613262541394127, "grad_norm": 1.915003455734123, "learning_rate": 2.3328258549910166e-06, "loss": 0.8734, "num_input_tokens_seen": 1420809824, "step": 7868 }, { "epoch": 0.8614357262103506, "grad_norm": 1.193262074401238, "learning_rate": 2.3292002611702863e-06, "loss": 0.9852, "num_input_tokens_seen": 1420987232, "step": 7869 }, { "epoch": 0.8615451982812885, "grad_norm": 1.275993870469322, "learning_rate": 2.325577349261826e-06, "loss": 0.9151, "num_input_tokens_seen": 1421175840, "step": 7870 }, { "epoch": 0.8616546703522264, "grad_norm": 1.1264628859669539, "learning_rate": 2.321957119694221e-06, "loss": 0.9278, "num_input_tokens_seen": 1421374752, "step": 7871 }, { "epoch": 0.8617641424231642, "grad_norm": 1.1119848901867881, "learning_rate": 2.3183395728957334e-06, "loss": 1.0867, "num_input_tokens_seen": 1421580384, "step": 7872 }, { "epoch": 0.8618736144941022, "grad_norm": 1.1698584435896613, "learning_rate": 2.3147247092943107e-06, "loss": 0.8585, "num_input_tokens_seen": 1421784448, "step": 7873 }, { "epoch": 0.8619830865650401, "grad_norm": 1.2993237001715368, "learning_rate": 2.311112529317591e-06, "loss": 0.8153, "num_input_tokens_seen": 1421932960, "step": 7874 }, { "epoch": 0.862092558635978, "grad_norm": 1.1573631495738297, "learning_rate": 2.307503033392888e-06, "loss": 0.8775, "num_input_tokens_seen": 1422136128, "step": 7875 }, { "epoch": 0.8622020307069159, "grad_norm": 1.1371526250604556, "learning_rate": 2.303896221947194e-06, "loss": 0.8707, "num_input_tokens_seen": 1422313536, "step": 7876 }, { "epoch": 0.8623115027778538, "grad_norm": 1.1372007416501682, "learning_rate": 2.3002920954071916e-06, "loss": 0.9514, "num_input_tokens_seen": 1422473472, "step": 7877 }, { "epoch": 0.8624209748487917, "grad_norm": 1.0029166721290639, "learning_rate": 2.296690654199238e-06, "loss": 0.7847, "num_input_tokens_seen": 1422662080, "step": 7878 }, { "epoch": 0.8625304469197296, "grad_norm": 1.149950963509028, "learning_rate": 2.293091898749378e-06, "loss": 0.7288, "num_input_tokens_seen": 1422838816, "step": 7879 }, { "epoch": 0.8626399189906675, "grad_norm": 1.1532981411759409, "learning_rate": 2.2894958294833317e-06, "loss": 0.8464, "num_input_tokens_seen": 1423000768, "step": 7880 }, { "epoch": 0.8627493910616054, "grad_norm": 1.3390396418896193, "learning_rate": 2.2859024468265265e-06, "loss": 0.8793, "num_input_tokens_seen": 1423182880, "step": 7881 }, { "epoch": 0.8628588631325433, "grad_norm": 1.1295159617254922, "learning_rate": 2.2823117512040304e-06, "loss": 0.9295, "num_input_tokens_seen": 1423353792, "step": 7882 }, { "epoch": 0.8629683352034813, "grad_norm": 1.1190340480686902, "learning_rate": 2.2787237430406285e-06, "loss": 0.8576, "num_input_tokens_seen": 1423506560, "step": 7883 }, { "epoch": 0.8630778072744191, "grad_norm": 1.1318950752824528, "learning_rate": 2.2751384227607727e-06, "loss": 1.2177, "num_input_tokens_seen": 1423701440, "step": 7884 }, { "epoch": 0.863187279345357, "grad_norm": 1.3722929315757606, "learning_rate": 2.2715557907885986e-06, "loss": 0.971, "num_input_tokens_seen": 1423872800, "step": 7885 }, { "epoch": 0.8632967514162949, "grad_norm": 1.2873239526477664, "learning_rate": 2.2679758475479235e-06, "loss": 1.0315, "num_input_tokens_seen": 1424060512, "step": 7886 }, { "epoch": 0.8634062234872328, "grad_norm": 1.0350139016217574, "learning_rate": 2.264398593462247e-06, "loss": 0.9044, "num_input_tokens_seen": 1424288768, "step": 7887 }, { "epoch": 0.8635156955581708, "grad_norm": 1.2526920417315428, "learning_rate": 2.260824028954764e-06, "loss": 1.0692, "num_input_tokens_seen": 1424446016, "step": 7888 }, { "epoch": 0.8636251676291086, "grad_norm": 1.2105139699269252, "learning_rate": 2.2572521544483166e-06, "loss": 0.7548, "num_input_tokens_seen": 1424625440, "step": 7889 }, { "epoch": 0.8637346397000465, "grad_norm": 1.2445145887850868, "learning_rate": 2.2536829703654727e-06, "loss": 0.7942, "num_input_tokens_seen": 1424815168, "step": 7890 }, { "epoch": 0.8638441117709844, "grad_norm": 1.2119124886673907, "learning_rate": 2.2501164771284418e-06, "loss": 0.8447, "num_input_tokens_seen": 1425019456, "step": 7891 }, { "epoch": 0.8639535838419223, "grad_norm": 1.0671125036560036, "learning_rate": 2.246552675159147e-06, "loss": 0.6941, "num_input_tokens_seen": 1425190144, "step": 7892 }, { "epoch": 0.8640630559128603, "grad_norm": 1.2667389194477447, "learning_rate": 2.2429915648791684e-06, "loss": 0.9953, "num_input_tokens_seen": 1425405856, "step": 7893 }, { "epoch": 0.8641725279837982, "grad_norm": 1.165635447928314, "learning_rate": 2.239433146709785e-06, "loss": 0.8594, "num_input_tokens_seen": 1425579904, "step": 7894 }, { "epoch": 0.864282000054736, "grad_norm": 1.304273345451697, "learning_rate": 2.2358774210719523e-06, "loss": 0.9179, "num_input_tokens_seen": 1425729312, "step": 7895 }, { "epoch": 0.8643914721256739, "grad_norm": 1.1787333387570744, "learning_rate": 2.2323243883862976e-06, "loss": 0.8734, "num_input_tokens_seen": 1425897984, "step": 7896 }, { "epoch": 0.8645009441966118, "grad_norm": 1.0706788153186766, "learning_rate": 2.2287740490731514e-06, "loss": 0.9315, "num_input_tokens_seen": 1426101600, "step": 7897 }, { "epoch": 0.8646104162675498, "grad_norm": 1.1785845115893268, "learning_rate": 2.2252264035524968e-06, "loss": 0.7682, "num_input_tokens_seen": 1426287520, "step": 7898 }, { "epoch": 0.8647198883384877, "grad_norm": 1.1798230023909626, "learning_rate": 2.2216814522440233e-06, "loss": 0.8621, "num_input_tokens_seen": 1426484416, "step": 7899 }, { "epoch": 0.8648293604094256, "grad_norm": 1.1960808273006036, "learning_rate": 2.21813919556709e-06, "loss": 0.7531, "num_input_tokens_seen": 1426677056, "step": 7900 }, { "epoch": 0.8649388324803634, "grad_norm": 1.1171763492908064, "learning_rate": 2.214599633940739e-06, "loss": 0.8248, "num_input_tokens_seen": 1426873280, "step": 7901 }, { "epoch": 0.8650483045513013, "grad_norm": 1.2095310041562635, "learning_rate": 2.211062767783692e-06, "loss": 0.8414, "num_input_tokens_seen": 1427047328, "step": 7902 }, { "epoch": 0.8651577766222392, "grad_norm": 1.1648670462844286, "learning_rate": 2.207528597514355e-06, "loss": 0.7419, "num_input_tokens_seen": 1427194272, "step": 7903 }, { "epoch": 0.8652672486931772, "grad_norm": 1.0763653756037985, "learning_rate": 2.2039971235508135e-06, "loss": 0.7807, "num_input_tokens_seen": 1427386016, "step": 7904 }, { "epoch": 0.8653767207641151, "grad_norm": 1.0204523828958172, "learning_rate": 2.200468346310833e-06, "loss": 1.0509, "num_input_tokens_seen": 1427602176, "step": 7905 }, { "epoch": 0.8654861928350529, "grad_norm": 1.2366508556822164, "learning_rate": 2.1969422662118572e-06, "loss": 1.077, "num_input_tokens_seen": 1427741952, "step": 7906 }, { "epoch": 0.8655956649059908, "grad_norm": 1.1948981470730726, "learning_rate": 2.193418883671025e-06, "loss": 1.0904, "num_input_tokens_seen": 1427925184, "step": 7907 }, { "epoch": 0.8657051369769287, "grad_norm": 0.9960591927312086, "learning_rate": 2.189898199105139e-06, "loss": 0.6643, "num_input_tokens_seen": 1428103040, "step": 7908 }, { "epoch": 0.8658146090478667, "grad_norm": 1.2634439395473416, "learning_rate": 2.1863802129306886e-06, "loss": 0.7444, "num_input_tokens_seen": 1428278208, "step": 7909 }, { "epoch": 0.8659240811188046, "grad_norm": 1.2249058998711384, "learning_rate": 2.182864925563849e-06, "loss": 1.0175, "num_input_tokens_seen": 1428468608, "step": 7910 }, { "epoch": 0.8660335531897425, "grad_norm": 1.0630869646573786, "learning_rate": 2.1793523374204706e-06, "loss": 0.7739, "num_input_tokens_seen": 1428634816, "step": 7911 }, { "epoch": 0.8661430252606803, "grad_norm": 1.1072299259422036, "learning_rate": 2.175842448916085e-06, "loss": 0.5897, "num_input_tokens_seen": 1428819616, "step": 7912 }, { "epoch": 0.8662524973316182, "grad_norm": 1.227212575681055, "learning_rate": 2.1723352604658994e-06, "loss": 0.75, "num_input_tokens_seen": 1428977984, "step": 7913 }, { "epoch": 0.8663619694025562, "grad_norm": 1.036420920509867, "learning_rate": 2.1688307724848227e-06, "loss": 0.6415, "num_input_tokens_seen": 1429164128, "step": 7914 }, { "epoch": 0.8664714414734941, "grad_norm": 1.277267733770561, "learning_rate": 2.1653289853874103e-06, "loss": 0.9209, "num_input_tokens_seen": 1429324064, "step": 7915 }, { "epoch": 0.866580913544432, "grad_norm": 1.1827635418574862, "learning_rate": 2.161829899587933e-06, "loss": 0.8351, "num_input_tokens_seen": 1429507520, "step": 7916 }, { "epoch": 0.8666903856153699, "grad_norm": 1.2171008371785461, "learning_rate": 2.158333515500316e-06, "loss": 1.0325, "num_input_tokens_seen": 1429681792, "step": 7917 }, { "epoch": 0.8667998576863077, "grad_norm": 1.1361792683591982, "learning_rate": 2.1548398335381802e-06, "loss": 0.9532, "num_input_tokens_seen": 1429875776, "step": 7918 }, { "epoch": 0.8669093297572457, "grad_norm": 1.0646908555847083, "learning_rate": 2.151348854114821e-06, "loss": 0.7295, "num_input_tokens_seen": 1430077824, "step": 7919 }, { "epoch": 0.8670188018281836, "grad_norm": 1.2008484443721503, "learning_rate": 2.147860577643207e-06, "loss": 0.9101, "num_input_tokens_seen": 1430275840, "step": 7920 }, { "epoch": 0.8671282738991215, "grad_norm": 1.2755784835445778, "learning_rate": 2.144375004536012e-06, "loss": 1.249, "num_input_tokens_seen": 1430482144, "step": 7921 }, { "epoch": 0.8672377459700594, "grad_norm": 1.170535733739548, "learning_rate": 2.1408921352055496e-06, "loss": 0.8179, "num_input_tokens_seen": 1430691136, "step": 7922 }, { "epoch": 0.8673472180409972, "grad_norm": 1.1462653761636319, "learning_rate": 2.1374119700638575e-06, "loss": 0.8916, "num_input_tokens_seen": 1430857344, "step": 7923 }, { "epoch": 0.8674566901119352, "grad_norm": 1.04151083098451, "learning_rate": 2.1339345095226144e-06, "loss": 0.7561, "num_input_tokens_seen": 1431059168, "step": 7924 }, { "epoch": 0.8675661621828731, "grad_norm": 1.0728463578874872, "learning_rate": 2.1304597539932137e-06, "loss": 0.7718, "num_input_tokens_seen": 1431208800, "step": 7925 }, { "epoch": 0.867675634253811, "grad_norm": 1.0780837009646818, "learning_rate": 2.1269877038867013e-06, "loss": 0.8912, "num_input_tokens_seen": 1431385088, "step": 7926 }, { "epoch": 0.8677851063247489, "grad_norm": 0.9477757604202867, "learning_rate": 2.1235183596138214e-06, "loss": 0.941, "num_input_tokens_seen": 1431588704, "step": 7927 }, { "epoch": 0.8678945783956868, "grad_norm": 1.0611011047512835, "learning_rate": 2.120051721584984e-06, "loss": 0.7733, "num_input_tokens_seen": 1431794336, "step": 7928 }, { "epoch": 0.8680040504666247, "grad_norm": 1.2535960446550332, "learning_rate": 2.1165877902102867e-06, "loss": 1.0319, "num_input_tokens_seen": 1431978240, "step": 7929 }, { "epoch": 0.8681135225375626, "grad_norm": 1.2702417351600808, "learning_rate": 2.113126565899515e-06, "loss": 0.8809, "num_input_tokens_seen": 1432164384, "step": 7930 }, { "epoch": 0.8682229946085005, "grad_norm": 1.1563723215920139, "learning_rate": 2.1096680490621107e-06, "loss": 1.0229, "num_input_tokens_seen": 1432323872, "step": 7931 }, { "epoch": 0.8683324666794384, "grad_norm": 1.2383820886798917, "learning_rate": 2.106212240107225e-06, "loss": 0.8813, "num_input_tokens_seen": 1432488512, "step": 7932 }, { "epoch": 0.8684419387503763, "grad_norm": 1.0141837824868942, "learning_rate": 2.102759139443658e-06, "loss": 0.6869, "num_input_tokens_seen": 1432624928, "step": 7933 }, { "epoch": 0.8685514108213143, "grad_norm": 1.1704062349192497, "learning_rate": 2.0993087474799166e-06, "loss": 0.9822, "num_input_tokens_seen": 1432839520, "step": 7934 }, { "epoch": 0.8686608828922521, "grad_norm": 1.0869473489084056, "learning_rate": 2.0958610646241717e-06, "loss": 1.054, "num_input_tokens_seen": 1433056576, "step": 7935 }, { "epoch": 0.86877035496319, "grad_norm": 1.1470205167915948, "learning_rate": 2.09241609128428e-06, "loss": 0.7834, "num_input_tokens_seen": 1433224128, "step": 7936 }, { "epoch": 0.8688798270341279, "grad_norm": 1.1738279218879817, "learning_rate": 2.0889738278677686e-06, "loss": 0.887, "num_input_tokens_seen": 1433401984, "step": 7937 }, { "epoch": 0.8689892991050658, "grad_norm": 1.1341528493715518, "learning_rate": 2.085534274781853e-06, "loss": 0.7389, "num_input_tokens_seen": 1433591264, "step": 7938 }, { "epoch": 0.8690987711760038, "grad_norm": 1.1715697844719688, "learning_rate": 2.0820974324334356e-06, "loss": 0.8667, "num_input_tokens_seen": 1433762176, "step": 7939 }, { "epoch": 0.8692082432469416, "grad_norm": 1.310428847158777, "learning_rate": 2.0786633012290723e-06, "loss": 1.0546, "num_input_tokens_seen": 1433930848, "step": 7940 }, { "epoch": 0.8693177153178795, "grad_norm": 1.227295723211582, "learning_rate": 2.0752318815750265e-06, "loss": 0.8716, "num_input_tokens_seen": 1434077120, "step": 7941 }, { "epoch": 0.8694271873888174, "grad_norm": 1.0248067301458463, "learning_rate": 2.0718031738772265e-06, "loss": 0.7107, "num_input_tokens_seen": 1434270432, "step": 7942 }, { "epoch": 0.8695366594597553, "grad_norm": 1.088205288593136, "learning_rate": 2.068377178541275e-06, "loss": 0.6412, "num_input_tokens_seen": 1434432832, "step": 7943 }, { "epoch": 0.8696461315306933, "grad_norm": 1.147853261554649, "learning_rate": 2.0649538959724686e-06, "loss": 0.8234, "num_input_tokens_seen": 1434610240, "step": 7944 }, { "epoch": 0.8697556036016312, "grad_norm": 1.0977666398874213, "learning_rate": 2.0615333265757737e-06, "loss": 0.6637, "num_input_tokens_seen": 1434799744, "step": 7945 }, { "epoch": 0.869865075672569, "grad_norm": 1.1635810829437168, "learning_rate": 2.058115470755831e-06, "loss": 0.6997, "num_input_tokens_seen": 1434969088, "step": 7946 }, { "epoch": 0.8699745477435069, "grad_norm": 1.1543803176798708, "learning_rate": 2.0547003289169724e-06, "loss": 0.8498, "num_input_tokens_seen": 1435127008, "step": 7947 }, { "epoch": 0.8700840198144448, "grad_norm": 1.10288667562877, "learning_rate": 2.0512879014631976e-06, "loss": 0.8177, "num_input_tokens_seen": 1435333536, "step": 7948 }, { "epoch": 0.8701934918853828, "grad_norm": 1.1852799795463222, "learning_rate": 2.047878188798197e-06, "loss": 0.8377, "num_input_tokens_seen": 1435527296, "step": 7949 }, { "epoch": 0.8703029639563207, "grad_norm": 0.99063210710449, "learning_rate": 2.0444711913253312e-06, "loss": 0.7697, "num_input_tokens_seen": 1435710304, "step": 7950 }, { "epoch": 0.8704124360272586, "grad_norm": 1.2143777033078296, "learning_rate": 2.041066909447639e-06, "loss": 0.8658, "num_input_tokens_seen": 1435906752, "step": 7951 }, { "epoch": 0.8705219080981964, "grad_norm": 1.2270101714330532, "learning_rate": 2.0376653435678405e-06, "loss": 0.8749, "num_input_tokens_seen": 1436108352, "step": 7952 }, { "epoch": 0.8706313801691343, "grad_norm": 1.0669217383365888, "learning_rate": 2.0342664940883353e-06, "loss": 0.6855, "num_input_tokens_seen": 1436282400, "step": 7953 }, { "epoch": 0.8707408522400722, "grad_norm": 1.1790648073790069, "learning_rate": 2.030870361411202e-06, "loss": 1.1256, "num_input_tokens_seen": 1436473696, "step": 7954 }, { "epoch": 0.8708503243110102, "grad_norm": 1.3055444518534713, "learning_rate": 2.027476945938189e-06, "loss": 1.1362, "num_input_tokens_seen": 1436646400, "step": 7955 }, { "epoch": 0.8709597963819481, "grad_norm": 0.9880607570102203, "learning_rate": 2.0240862480707475e-06, "loss": 0.8049, "num_input_tokens_seen": 1436832992, "step": 7956 }, { "epoch": 0.8710692684528859, "grad_norm": 1.035859757810367, "learning_rate": 2.0206982682099723e-06, "loss": 1.0491, "num_input_tokens_seen": 1437024960, "step": 7957 }, { "epoch": 0.8711787405238238, "grad_norm": 1.0665957663238352, "learning_rate": 2.017313006756666e-06, "loss": 0.8016, "num_input_tokens_seen": 1437203040, "step": 7958 }, { "epoch": 0.8712882125947617, "grad_norm": 1.1780606097932205, "learning_rate": 2.0139304641112966e-06, "loss": 0.8894, "num_input_tokens_seen": 1437396800, "step": 7959 }, { "epoch": 0.8713976846656997, "grad_norm": 1.260277222835568, "learning_rate": 2.010550640674011e-06, "loss": 0.9047, "num_input_tokens_seen": 1437573088, "step": 7960 }, { "epoch": 0.8715071567366376, "grad_norm": 1.0131875941186086, "learning_rate": 2.0071735368446364e-06, "loss": 0.8187, "num_input_tokens_seen": 1437775360, "step": 7961 }, { "epoch": 0.8716166288075755, "grad_norm": 1.1522257598988999, "learning_rate": 2.003799153022673e-06, "loss": 0.8468, "num_input_tokens_seen": 1437974272, "step": 7962 }, { "epoch": 0.8717261008785133, "grad_norm": 1.1375222528817124, "learning_rate": 2.0004274896073176e-06, "loss": 0.7723, "num_input_tokens_seen": 1438170720, "step": 7963 }, { "epoch": 0.8718355729494512, "grad_norm": 1.0883573835460265, "learning_rate": 1.9970585469974127e-06, "loss": 0.97, "num_input_tokens_seen": 1438372320, "step": 7964 }, { "epoch": 0.8719450450203892, "grad_norm": 1.0204263316940678, "learning_rate": 1.9936923255915175e-06, "loss": 0.8117, "num_input_tokens_seen": 1438589376, "step": 7965 }, { "epoch": 0.8720545170913271, "grad_norm": 1.1330162654325313, "learning_rate": 1.9903288257878292e-06, "loss": 0.7077, "num_input_tokens_seen": 1438776416, "step": 7966 }, { "epoch": 0.872163989162265, "grad_norm": 1.1146244445539295, "learning_rate": 1.986968047984261e-06, "loss": 0.9362, "num_input_tokens_seen": 1438984064, "step": 7967 }, { "epoch": 0.8722734612332029, "grad_norm": 1.1739554693169623, "learning_rate": 1.983609992578375e-06, "loss": 0.9842, "num_input_tokens_seen": 1439177376, "step": 7968 }, { "epoch": 0.8723829333041407, "grad_norm": 1.166131708702199, "learning_rate": 1.9802546599674313e-06, "loss": 1.1032, "num_input_tokens_seen": 1439351424, "step": 7969 }, { "epoch": 0.8724924053750787, "grad_norm": 1.1597599675678787, "learning_rate": 1.9769020505483544e-06, "loss": 0.9115, "num_input_tokens_seen": 1439527040, "step": 7970 }, { "epoch": 0.8726018774460166, "grad_norm": 1.303656164726055, "learning_rate": 1.973552164717746e-06, "loss": 0.9054, "num_input_tokens_seen": 1439734912, "step": 7971 }, { "epoch": 0.8727113495169545, "grad_norm": 1.1008947526448938, "learning_rate": 1.9702050028719056e-06, "loss": 0.9975, "num_input_tokens_seen": 1439918816, "step": 7972 }, { "epoch": 0.8728208215878924, "grad_norm": 1.1970097080050561, "learning_rate": 1.9668605654067805e-06, "loss": 0.8377, "num_input_tokens_seen": 1440094208, "step": 7973 }, { "epoch": 0.8729302936588302, "grad_norm": 1.1772063480789323, "learning_rate": 1.9635188527180244e-06, "loss": 0.9173, "num_input_tokens_seen": 1440272512, "step": 7974 }, { "epoch": 0.8730397657297682, "grad_norm": 1.1611789601157378, "learning_rate": 1.960179865200948e-06, "loss": 0.7722, "num_input_tokens_seen": 1440415872, "step": 7975 }, { "epoch": 0.8731492378007061, "grad_norm": 1.129400758771471, "learning_rate": 1.9568436032505493e-06, "loss": 0.83, "num_input_tokens_seen": 1440602688, "step": 7976 }, { "epoch": 0.873258709871644, "grad_norm": 1.2654948226479166, "learning_rate": 1.953510067261499e-06, "loss": 0.897, "num_input_tokens_seen": 1440794656, "step": 7977 }, { "epoch": 0.8733681819425819, "grad_norm": 1.1922968803014928, "learning_rate": 1.950179257628154e-06, "loss": 1.0808, "num_input_tokens_seen": 1440986400, "step": 7978 }, { "epoch": 0.8734776540135198, "grad_norm": 1.3332134381668523, "learning_rate": 1.946851174744538e-06, "loss": 0.9389, "num_input_tokens_seen": 1441177024, "step": 7979 }, { "epoch": 0.8735871260844577, "grad_norm": 1.0544448573425775, "learning_rate": 1.943525819004352e-06, "loss": 0.8517, "num_input_tokens_seen": 1441377504, "step": 7980 }, { "epoch": 0.8736965981553956, "grad_norm": 1.0922893034677639, "learning_rate": 1.9402031908009904e-06, "loss": 0.9728, "num_input_tokens_seen": 1441543040, "step": 7981 }, { "epoch": 0.8738060702263335, "grad_norm": 1.2332705131632722, "learning_rate": 1.936883290527508e-06, "loss": 0.9247, "num_input_tokens_seen": 1441724256, "step": 7982 }, { "epoch": 0.8739155422972714, "grad_norm": 1.1144270505104512, "learning_rate": 1.9335661185766436e-06, "loss": 0.8652, "num_input_tokens_seen": 1441917344, "step": 7983 }, { "epoch": 0.8740250143682093, "grad_norm": 1.1309940343610096, "learning_rate": 1.9302516753408136e-06, "loss": 0.8265, "num_input_tokens_seen": 1442082432, "step": 7984 }, { "epoch": 0.8741344864391473, "grad_norm": 1.1770845205025326, "learning_rate": 1.926939961212107e-06, "loss": 0.8603, "num_input_tokens_seen": 1442282240, "step": 7985 }, { "epoch": 0.8742439585100851, "grad_norm": 1.2169893703388293, "learning_rate": 1.923630976582294e-06, "loss": 1.0881, "num_input_tokens_seen": 1442443296, "step": 7986 }, { "epoch": 0.874353430581023, "grad_norm": 1.1622806711332638, "learning_rate": 1.9203247218428226e-06, "loss": 1.0638, "num_input_tokens_seen": 1442633920, "step": 7987 }, { "epoch": 0.8744629026519609, "grad_norm": 1.2349090386688448, "learning_rate": 1.9170211973848106e-06, "loss": 1.1263, "num_input_tokens_seen": 1442801696, "step": 7988 }, { "epoch": 0.8745723747228988, "grad_norm": 1.034829545197875, "learning_rate": 1.9137204035990704e-06, "loss": 0.8317, "num_input_tokens_seen": 1442992544, "step": 7989 }, { "epoch": 0.8746818467938368, "grad_norm": 1.0948763905610122, "learning_rate": 1.9104223408760698e-06, "loss": 0.805, "num_input_tokens_seen": 1443156288, "step": 7990 }, { "epoch": 0.8747913188647746, "grad_norm": 1.3052907223080026, "learning_rate": 1.907127009605969e-06, "loss": 0.9913, "num_input_tokens_seen": 1443312416, "step": 7991 }, { "epoch": 0.8749007909357125, "grad_norm": 1.2560002795206962, "learning_rate": 1.9038344101785954e-06, "loss": 0.9264, "num_input_tokens_seen": 1443474368, "step": 7992 }, { "epoch": 0.8750102630066504, "grad_norm": 1.184384575958593, "learning_rate": 1.9005445429834595e-06, "loss": 0.8144, "num_input_tokens_seen": 1443650656, "step": 7993 }, { "epoch": 0.8751197350775883, "grad_norm": 1.0493638359416575, "learning_rate": 1.8972574084097472e-06, "loss": 1.0021, "num_input_tokens_seen": 1443844640, "step": 7994 }, { "epoch": 0.8752292071485263, "grad_norm": 1.1511206391229458, "learning_rate": 1.8939730068463114e-06, "loss": 0.955, "num_input_tokens_seen": 1444039744, "step": 7995 }, { "epoch": 0.8753386792194642, "grad_norm": 1.2266407352450133, "learning_rate": 1.8906913386817077e-06, "loss": 0.8502, "num_input_tokens_seen": 1444210880, "step": 7996 }, { "epoch": 0.875448151290402, "grad_norm": 1.0889683424146224, "learning_rate": 1.8874124043041314e-06, "loss": 0.8335, "num_input_tokens_seen": 1444397248, "step": 7997 }, { "epoch": 0.8755576233613399, "grad_norm": 0.9977786306480476, "learning_rate": 1.8841362041014944e-06, "loss": 0.6665, "num_input_tokens_seen": 1444570624, "step": 7998 }, { "epoch": 0.8756670954322778, "grad_norm": 1.1491373226484773, "learning_rate": 1.8808627384613448e-06, "loss": 1.1255, "num_input_tokens_seen": 1444762592, "step": 7999 }, { "epoch": 0.8757765675032158, "grad_norm": 1.1700819412294525, "learning_rate": 1.8775920077709397e-06, "loss": 1.0141, "num_input_tokens_seen": 1444942688, "step": 8000 }, { "epoch": 0.8758860395741537, "grad_norm": 1.08836318869385, "learning_rate": 1.8743240124172002e-06, "loss": 0.6989, "num_input_tokens_seen": 1445108672, "step": 8001 }, { "epoch": 0.8759955116450916, "grad_norm": 1.1750846142434654, "learning_rate": 1.8710587527867196e-06, "loss": 0.7678, "num_input_tokens_seen": 1445253152, "step": 8002 }, { "epoch": 0.8761049837160294, "grad_norm": 1.0846986359134796, "learning_rate": 1.8677962292657724e-06, "loss": 0.7638, "num_input_tokens_seen": 1445420032, "step": 8003 }, { "epoch": 0.8762144557869673, "grad_norm": 1.157264934177253, "learning_rate": 1.8645364422403083e-06, "loss": 0.7675, "num_input_tokens_seen": 1445598560, "step": 8004 }, { "epoch": 0.8763239278579052, "grad_norm": 1.3302316451015277, "learning_rate": 1.8612793920959632e-06, "loss": 0.9291, "num_input_tokens_seen": 1445749088, "step": 8005 }, { "epoch": 0.8764333999288432, "grad_norm": 1.0556453734939413, "learning_rate": 1.8580250792180232e-06, "loss": 0.7447, "num_input_tokens_seen": 1445935904, "step": 8006 }, { "epoch": 0.8765428719997811, "grad_norm": 1.1841564792156363, "learning_rate": 1.8547735039914859e-06, "loss": 1.0742, "num_input_tokens_seen": 1446120928, "step": 8007 }, { "epoch": 0.8766523440707189, "grad_norm": 1.2467862300775732, "learning_rate": 1.8515246668009883e-06, "loss": 0.9839, "num_input_tokens_seen": 1446295424, "step": 8008 }, { "epoch": 0.8767618161416568, "grad_norm": 1.1729508103075696, "learning_rate": 1.8482785680308728e-06, "loss": 0.8744, "num_input_tokens_seen": 1446488960, "step": 8009 }, { "epoch": 0.8768712882125947, "grad_norm": 1.1368726383664944, "learning_rate": 1.845035208065146e-06, "loss": 0.8698, "num_input_tokens_seen": 1446676000, "step": 8010 }, { "epoch": 0.8769807602835327, "grad_norm": 1.179958287164526, "learning_rate": 1.8417945872874875e-06, "loss": 0.8216, "num_input_tokens_seen": 1446831232, "step": 8011 }, { "epoch": 0.8770902323544706, "grad_norm": 1.039574747808217, "learning_rate": 1.8385567060812598e-06, "loss": 1.0143, "num_input_tokens_seen": 1447016928, "step": 8012 }, { "epoch": 0.8771997044254085, "grad_norm": 1.0674218452485438, "learning_rate": 1.8353215648294925e-06, "loss": 0.7935, "num_input_tokens_seen": 1447212928, "step": 8013 }, { "epoch": 0.8773091764963463, "grad_norm": 1.0561552219642647, "learning_rate": 1.8320891639149101e-06, "loss": 0.8023, "num_input_tokens_seen": 1447387424, "step": 8014 }, { "epoch": 0.8774186485672842, "grad_norm": 1.2228451908849138, "learning_rate": 1.828859503719879e-06, "loss": 0.8365, "num_input_tokens_seen": 1447562592, "step": 8015 }, { "epoch": 0.8775281206382222, "grad_norm": 1.0606581487291635, "learning_rate": 1.82563258462648e-06, "loss": 0.7126, "num_input_tokens_seen": 1447718720, "step": 8016 }, { "epoch": 0.8776375927091601, "grad_norm": 1.084488100425401, "learning_rate": 1.8224084070164405e-06, "loss": 0.7217, "num_input_tokens_seen": 1447912704, "step": 8017 }, { "epoch": 0.877747064780098, "grad_norm": 1.1510008665192604, "learning_rate": 1.8191869712711807e-06, "loss": 1.1072, "num_input_tokens_seen": 1448108928, "step": 8018 }, { "epoch": 0.8778565368510359, "grad_norm": 1.282124476788706, "learning_rate": 1.81596827777179e-06, "loss": 1.1257, "num_input_tokens_seen": 1448298880, "step": 8019 }, { "epoch": 0.8779660089219737, "grad_norm": 1.0958602710985912, "learning_rate": 1.8127523268990282e-06, "loss": 1.0175, "num_input_tokens_seen": 1448476064, "step": 8020 }, { "epoch": 0.8780754809929117, "grad_norm": 1.2285677318105495, "learning_rate": 1.8095391190333404e-06, "loss": 1.049, "num_input_tokens_seen": 1448653248, "step": 8021 }, { "epoch": 0.8781849530638496, "grad_norm": 1.1837723096862038, "learning_rate": 1.8063286545548398e-06, "loss": 0.6419, "num_input_tokens_seen": 1448834688, "step": 8022 }, { "epoch": 0.8782944251347875, "grad_norm": 1.1666068618069043, "learning_rate": 1.8031209338433246e-06, "loss": 1.2523, "num_input_tokens_seen": 1449035392, "step": 8023 }, { "epoch": 0.8784038972057254, "grad_norm": 1.1510585536264644, "learning_rate": 1.799915957278256e-06, "loss": 0.9236, "num_input_tokens_seen": 1449248640, "step": 8024 }, { "epoch": 0.8785133692766632, "grad_norm": 1.3345005271170223, "learning_rate": 1.79671372523878e-06, "loss": 1.3181, "num_input_tokens_seen": 1449445760, "step": 8025 }, { "epoch": 0.8786228413476012, "grad_norm": 1.2429702979365311, "learning_rate": 1.7935142381037135e-06, "loss": 0.7695, "num_input_tokens_seen": 1449598304, "step": 8026 }, { "epoch": 0.8787323134185391, "grad_norm": 1.0301201150355024, "learning_rate": 1.7903174962515478e-06, "loss": 0.6428, "num_input_tokens_seen": 1449779296, "step": 8027 }, { "epoch": 0.878841785489477, "grad_norm": 1.1388443412183622, "learning_rate": 1.7871235000604503e-06, "loss": 0.8285, "num_input_tokens_seen": 1449953344, "step": 8028 }, { "epoch": 0.8789512575604149, "grad_norm": 1.2664036651340205, "learning_rate": 1.7839322499082738e-06, "loss": 0.8732, "num_input_tokens_seen": 1450129184, "step": 8029 }, { "epoch": 0.8790607296313528, "grad_norm": 1.070845478201743, "learning_rate": 1.7807437461725252e-06, "loss": 0.9521, "num_input_tokens_seen": 1450331008, "step": 8030 }, { "epoch": 0.8791702017022907, "grad_norm": 1.2825548459199176, "learning_rate": 1.7775579892304051e-06, "loss": 1.0078, "num_input_tokens_seen": 1450483776, "step": 8031 }, { "epoch": 0.8792796737732286, "grad_norm": 1.2019359608473184, "learning_rate": 1.7743749794587817e-06, "loss": 0.8961, "num_input_tokens_seen": 1450654688, "step": 8032 }, { "epoch": 0.8793891458441665, "grad_norm": 1.1173147382505202, "learning_rate": 1.7711947172342009e-06, "loss": 0.7725, "num_input_tokens_seen": 1450849568, "step": 8033 }, { "epoch": 0.8794986179151044, "grad_norm": 0.9970377618779979, "learning_rate": 1.7680172029328757e-06, "loss": 0.7195, "num_input_tokens_seen": 1451050720, "step": 8034 }, { "epoch": 0.8796080899860423, "grad_norm": 1.1265221999918975, "learning_rate": 1.7648424369307e-06, "loss": 1.1587, "num_input_tokens_seen": 1451231264, "step": 8035 }, { "epoch": 0.8797175620569803, "grad_norm": 1.2542360531611485, "learning_rate": 1.7616704196032564e-06, "loss": 1.1482, "num_input_tokens_seen": 1451401504, "step": 8036 }, { "epoch": 0.8798270341279181, "grad_norm": 1.0193566066782882, "learning_rate": 1.758501151325767e-06, "loss": 0.7707, "num_input_tokens_seen": 1451575104, "step": 8037 }, { "epoch": 0.879936506198856, "grad_norm": 0.9785444365807299, "learning_rate": 1.7553346324731712e-06, "loss": 0.6864, "num_input_tokens_seen": 1451754528, "step": 8038 }, { "epoch": 0.8800459782697939, "grad_norm": 0.9873921914946849, "learning_rate": 1.7521708634200413e-06, "loss": 0.7789, "num_input_tokens_seen": 1451941568, "step": 8039 }, { "epoch": 0.8801554503407318, "grad_norm": 1.0770969741715037, "learning_rate": 1.7490098445406667e-06, "loss": 0.898, "num_input_tokens_seen": 1452157504, "step": 8040 }, { "epoch": 0.8802649224116698, "grad_norm": 1.1370240945133512, "learning_rate": 1.7458515762089706e-06, "loss": 0.7523, "num_input_tokens_seen": 1452342528, "step": 8041 }, { "epoch": 0.8803743944826076, "grad_norm": 1.170877413525155, "learning_rate": 1.742696058798582e-06, "loss": 0.9647, "num_input_tokens_seen": 1452514784, "step": 8042 }, { "epoch": 0.8804838665535455, "grad_norm": 1.310028564927927, "learning_rate": 1.7395432926827909e-06, "loss": 0.7674, "num_input_tokens_seen": 1452692192, "step": 8043 }, { "epoch": 0.8805933386244834, "grad_norm": 1.2201599377402874, "learning_rate": 1.7363932782345603e-06, "loss": 0.7177, "num_input_tokens_seen": 1452884384, "step": 8044 }, { "epoch": 0.8807028106954213, "grad_norm": 1.3100625065458247, "learning_rate": 1.7332460158265313e-06, "loss": 0.9239, "num_input_tokens_seen": 1453081504, "step": 8045 }, { "epoch": 0.8808122827663593, "grad_norm": 1.0254152131538572, "learning_rate": 1.7301015058310194e-06, "loss": 0.705, "num_input_tokens_seen": 1453268320, "step": 8046 }, { "epoch": 0.8809217548372972, "grad_norm": 1.0339276778497042, "learning_rate": 1.726959748620019e-06, "loss": 0.6399, "num_input_tokens_seen": 1453453792, "step": 8047 }, { "epoch": 0.881031226908235, "grad_norm": 1.1473676408215223, "learning_rate": 1.7238207445651855e-06, "loss": 0.8233, "num_input_tokens_seen": 1453638144, "step": 8048 }, { "epoch": 0.8811406989791729, "grad_norm": 1.143861385820952, "learning_rate": 1.7206844940378636e-06, "loss": 0.7879, "num_input_tokens_seen": 1453804800, "step": 8049 }, { "epoch": 0.8812501710501108, "grad_norm": 1.084327602175102, "learning_rate": 1.7175509974090647e-06, "loss": 0.9437, "num_input_tokens_seen": 1453989600, "step": 8050 }, { "epoch": 0.8813596431210488, "grad_norm": 1.2197747934913448, "learning_rate": 1.714420255049473e-06, "loss": 0.866, "num_input_tokens_seen": 1454163872, "step": 8051 }, { "epoch": 0.8814691151919867, "grad_norm": 1.1064092726195462, "learning_rate": 1.7112922673294507e-06, "loss": 0.9615, "num_input_tokens_seen": 1454343296, "step": 8052 }, { "epoch": 0.8815785872629246, "grad_norm": 1.0681881392529873, "learning_rate": 1.708167034619032e-06, "loss": 0.7319, "num_input_tokens_seen": 1454501888, "step": 8053 }, { "epoch": 0.8816880593338624, "grad_norm": 1.1710178037604457, "learning_rate": 1.705044557287927e-06, "loss": 0.9225, "num_input_tokens_seen": 1454681536, "step": 8054 }, { "epoch": 0.8817975314048003, "grad_norm": 1.139511296275289, "learning_rate": 1.701924835705515e-06, "loss": 0.8514, "num_input_tokens_seen": 1454873952, "step": 8055 }, { "epoch": 0.8819070034757382, "grad_norm": 1.202507702888038, "learning_rate": 1.6988078702408622e-06, "loss": 0.9843, "num_input_tokens_seen": 1455029408, "step": 8056 }, { "epoch": 0.8820164755466762, "grad_norm": 0.9977741204684214, "learning_rate": 1.6956936612626928e-06, "loss": 0.6851, "num_input_tokens_seen": 1455220704, "step": 8057 }, { "epoch": 0.8821259476176141, "grad_norm": 1.1790738056853078, "learning_rate": 1.6925822091394121e-06, "loss": 0.8278, "num_input_tokens_seen": 1455431936, "step": 8058 }, { "epoch": 0.8822354196885519, "grad_norm": 0.9925363182802512, "learning_rate": 1.689473514239101e-06, "loss": 0.8114, "num_input_tokens_seen": 1455650784, "step": 8059 }, { "epoch": 0.8823448917594898, "grad_norm": 1.201720817860744, "learning_rate": 1.6863675769295096e-06, "loss": 1.0355, "num_input_tokens_seen": 1455812288, "step": 8060 }, { "epoch": 0.8824543638304277, "grad_norm": 1.1161358558587393, "learning_rate": 1.683264397578066e-06, "loss": 0.6941, "num_input_tokens_seen": 1455980512, "step": 8061 }, { "epoch": 0.8825638359013657, "grad_norm": 1.0608780767010306, "learning_rate": 1.6801639765518712e-06, "loss": 0.7013, "num_input_tokens_seen": 1456156800, "step": 8062 }, { "epoch": 0.8826733079723036, "grad_norm": 1.1568711648524028, "learning_rate": 1.6770663142176957e-06, "loss": 0.9795, "num_input_tokens_seen": 1456328608, "step": 8063 }, { "epoch": 0.8827827800432415, "grad_norm": 1.2188892773983373, "learning_rate": 1.6739714109419907e-06, "loss": 0.8617, "num_input_tokens_seen": 1456512736, "step": 8064 }, { "epoch": 0.8828922521141793, "grad_norm": 1.1281504493442311, "learning_rate": 1.6708792670908746e-06, "loss": 0.8722, "num_input_tokens_seen": 1456709408, "step": 8065 }, { "epoch": 0.8830017241851172, "grad_norm": 1.1169324463691603, "learning_rate": 1.6677898830301463e-06, "loss": 0.8306, "num_input_tokens_seen": 1456896672, "step": 8066 }, { "epoch": 0.8831111962560552, "grad_norm": 1.1583645532823794, "learning_rate": 1.664703259125272e-06, "loss": 0.7984, "num_input_tokens_seen": 1457034208, "step": 8067 }, { "epoch": 0.8832206683269931, "grad_norm": 1.067102728809796, "learning_rate": 1.66161939574139e-06, "loss": 0.7835, "num_input_tokens_seen": 1457187648, "step": 8068 }, { "epoch": 0.883330140397931, "grad_norm": 1.150693584762331, "learning_rate": 1.6585382932433197e-06, "loss": 1.1522, "num_input_tokens_seen": 1457349376, "step": 8069 }, { "epoch": 0.8834396124688689, "grad_norm": 0.968599276969194, "learning_rate": 1.6554599519955417e-06, "loss": 1.0939, "num_input_tokens_seen": 1457573376, "step": 8070 }, { "epoch": 0.8835490845398067, "grad_norm": 1.2791306249322814, "learning_rate": 1.652384372362234e-06, "loss": 1.0179, "num_input_tokens_seen": 1457766240, "step": 8071 }, { "epoch": 0.8836585566107447, "grad_norm": 1.1772150061674587, "learning_rate": 1.649311554707214e-06, "loss": 0.7176, "num_input_tokens_seen": 1457947232, "step": 8072 }, { "epoch": 0.8837680286816826, "grad_norm": 1.0209628756066522, "learning_rate": 1.6462414993940023e-06, "loss": 0.8226, "num_input_tokens_seen": 1458161600, "step": 8073 }, { "epoch": 0.8838775007526205, "grad_norm": 1.201623698289508, "learning_rate": 1.6431742067857775e-06, "loss": 0.8731, "num_input_tokens_seen": 1458339232, "step": 8074 }, { "epoch": 0.8839869728235584, "grad_norm": 1.193628159142014, "learning_rate": 1.6401096772453912e-06, "loss": 0.6466, "num_input_tokens_seen": 1458492224, "step": 8075 }, { "epoch": 0.8840964448944962, "grad_norm": 1.2496417008917415, "learning_rate": 1.6370479111353754e-06, "loss": 0.8376, "num_input_tokens_seen": 1458677472, "step": 8076 }, { "epoch": 0.8842059169654342, "grad_norm": 1.1849772107472085, "learning_rate": 1.633988908817924e-06, "loss": 1.002, "num_input_tokens_seen": 1458847264, "step": 8077 }, { "epoch": 0.8843153890363721, "grad_norm": 1.0543091271880354, "learning_rate": 1.6309326706549221e-06, "loss": 0.8866, "num_input_tokens_seen": 1459009664, "step": 8078 }, { "epoch": 0.88442486110731, "grad_norm": 1.015359579763569, "learning_rate": 1.627879197007906e-06, "loss": 0.8243, "num_input_tokens_seen": 1459196928, "step": 8079 }, { "epoch": 0.8845343331782479, "grad_norm": 1.1254245698649685, "learning_rate": 1.6248284882381087e-06, "loss": 0.7807, "num_input_tokens_seen": 1459371648, "step": 8080 }, { "epoch": 0.8846438052491858, "grad_norm": 1.0895923611696656, "learning_rate": 1.6217805447064083e-06, "loss": 0.9257, "num_input_tokens_seen": 1459573248, "step": 8081 }, { "epoch": 0.8847532773201237, "grad_norm": 1.084134423120733, "learning_rate": 1.6187353667733856e-06, "loss": 0.8314, "num_input_tokens_seen": 1459762080, "step": 8082 }, { "epoch": 0.8848627493910616, "grad_norm": 1.1857602274689092, "learning_rate": 1.6156929547992638e-06, "loss": 1.12, "num_input_tokens_seen": 1459927840, "step": 8083 }, { "epoch": 0.8849722214619995, "grad_norm": 1.0923774826871921, "learning_rate": 1.6126533091439661e-06, "loss": 0.672, "num_input_tokens_seen": 1460118912, "step": 8084 }, { "epoch": 0.8850816935329374, "grad_norm": 1.4222258250286908, "learning_rate": 1.6096164301670712e-06, "loss": 0.7913, "num_input_tokens_seen": 1460306400, "step": 8085 }, { "epoch": 0.8851911656038753, "grad_norm": 1.0513681807523059, "learning_rate": 1.6065823182278366e-06, "loss": 0.722, "num_input_tokens_seen": 1460497248, "step": 8086 }, { "epoch": 0.8853006376748133, "grad_norm": 1.1549104715303113, "learning_rate": 1.6035509736851973e-06, "loss": 0.9942, "num_input_tokens_seen": 1460651808, "step": 8087 }, { "epoch": 0.8854101097457511, "grad_norm": 1.343205541270074, "learning_rate": 1.6005223968977468e-06, "loss": 0.9091, "num_input_tokens_seen": 1460810400, "step": 8088 }, { "epoch": 0.885519581816689, "grad_norm": 1.260488518592631, "learning_rate": 1.597496588223768e-06, "loss": 0.9068, "num_input_tokens_seen": 1460989152, "step": 8089 }, { "epoch": 0.8856290538876269, "grad_norm": 1.2248573356387886, "learning_rate": 1.5944735480212026e-06, "loss": 0.8857, "num_input_tokens_seen": 1461173056, "step": 8090 }, { "epoch": 0.8857385259585648, "grad_norm": 1.2308686106246651, "learning_rate": 1.5914532766476753e-06, "loss": 0.818, "num_input_tokens_seen": 1461334112, "step": 8091 }, { "epoch": 0.8858479980295028, "grad_norm": 1.310405240314844, "learning_rate": 1.5884357744604756e-06, "loss": 1.0169, "num_input_tokens_seen": 1461464928, "step": 8092 }, { "epoch": 0.8859574701004407, "grad_norm": 1.115203857004318, "learning_rate": 1.5854210418165678e-06, "loss": 1.1004, "num_input_tokens_seen": 1461685120, "step": 8093 }, { "epoch": 0.8860669421713785, "grad_norm": 1.1170864459663992, "learning_rate": 1.5824090790725887e-06, "loss": 0.6956, "num_input_tokens_seen": 1461862976, "step": 8094 }, { "epoch": 0.8861764142423164, "grad_norm": 0.9880486316353108, "learning_rate": 1.579399886584848e-06, "loss": 0.8613, "num_input_tokens_seen": 1462030752, "step": 8095 }, { "epoch": 0.8862858863132543, "grad_norm": 1.087784493164446, "learning_rate": 1.5763934647093275e-06, "loss": 0.8494, "num_input_tokens_seen": 1462192256, "step": 8096 }, { "epoch": 0.8863953583841923, "grad_norm": 1.4268200578108003, "learning_rate": 1.5733898138016845e-06, "loss": 1.0877, "num_input_tokens_seen": 1462351744, "step": 8097 }, { "epoch": 0.8865048304551302, "grad_norm": 1.0647046833090466, "learning_rate": 1.5703889342172401e-06, "loss": 0.8029, "num_input_tokens_seen": 1462551776, "step": 8098 }, { "epoch": 0.886614302526068, "grad_norm": 1.1013667155841669, "learning_rate": 1.5673908263109966e-06, "loss": 0.859, "num_input_tokens_seen": 1462700288, "step": 8099 }, { "epoch": 0.8867237745970059, "grad_norm": 1.217953201400237, "learning_rate": 1.564395490437623e-06, "loss": 0.963, "num_input_tokens_seen": 1462899872, "step": 8100 }, { "epoch": 0.8868332466679438, "grad_norm": 1.0502610744881298, "learning_rate": 1.5614029269514634e-06, "loss": 0.9545, "num_input_tokens_seen": 1463092736, "step": 8101 }, { "epoch": 0.8869427187388818, "grad_norm": 1.1916432129390342, "learning_rate": 1.558413136206527e-06, "loss": 0.9477, "num_input_tokens_seen": 1463301728, "step": 8102 }, { "epoch": 0.8870521908098197, "grad_norm": 1.211505977954356, "learning_rate": 1.5554261185565022e-06, "loss": 0.6839, "num_input_tokens_seen": 1463452928, "step": 8103 }, { "epoch": 0.8871616628807576, "grad_norm": 1.1289152257225898, "learning_rate": 1.552441874354757e-06, "loss": 0.8789, "num_input_tokens_seen": 1463607488, "step": 8104 }, { "epoch": 0.8872711349516954, "grad_norm": 1.1810399065438046, "learning_rate": 1.5494604039543058e-06, "loss": 0.6964, "num_input_tokens_seen": 1463797888, "step": 8105 }, { "epoch": 0.8873806070226333, "grad_norm": 1.1637977964786503, "learning_rate": 1.5464817077078614e-06, "loss": 0.8435, "num_input_tokens_seen": 1463946624, "step": 8106 }, { "epoch": 0.8874900790935712, "grad_norm": 1.2264699718782133, "learning_rate": 1.543505785967797e-06, "loss": 0.9518, "num_input_tokens_seen": 1464126720, "step": 8107 }, { "epoch": 0.8875995511645092, "grad_norm": 1.0085664634647986, "learning_rate": 1.5405326390861562e-06, "loss": 0.6915, "num_input_tokens_seen": 1464311968, "step": 8108 }, { "epoch": 0.8877090232354471, "grad_norm": 1.1545367048235102, "learning_rate": 1.5375622674146577e-06, "loss": 0.9181, "num_input_tokens_seen": 1464499680, "step": 8109 }, { "epoch": 0.887818495306385, "grad_norm": 1.1772352587574286, "learning_rate": 1.5345946713046872e-06, "loss": 0.9051, "num_input_tokens_seen": 1464691424, "step": 8110 }, { "epoch": 0.8879279673773228, "grad_norm": 1.3691434982639699, "learning_rate": 1.5316298511073164e-06, "loss": 0.7612, "num_input_tokens_seen": 1464858304, "step": 8111 }, { "epoch": 0.8880374394482607, "grad_norm": 1.196457906212925, "learning_rate": 1.528667807173262e-06, "loss": 1.09, "num_input_tokens_seen": 1465057888, "step": 8112 }, { "epoch": 0.8881469115191987, "grad_norm": 1.022606257301523, "learning_rate": 1.5257085398529436e-06, "loss": 0.6979, "num_input_tokens_seen": 1465237536, "step": 8113 }, { "epoch": 0.8882563835901366, "grad_norm": 1.100340848913434, "learning_rate": 1.5227520494964232e-06, "loss": 0.8239, "num_input_tokens_seen": 1465443392, "step": 8114 }, { "epoch": 0.8883658556610745, "grad_norm": 1.1962556297145859, "learning_rate": 1.5197983364534597e-06, "loss": 0.7103, "num_input_tokens_seen": 1465636704, "step": 8115 }, { "epoch": 0.8884753277320123, "grad_norm": 1.062989066267797, "learning_rate": 1.5168474010734622e-06, "loss": 0.6803, "num_input_tokens_seen": 1465819488, "step": 8116 }, { "epoch": 0.8885847998029502, "grad_norm": 1.1989500260362194, "learning_rate": 1.5138992437055299e-06, "loss": 0.8589, "num_input_tokens_seen": 1466013472, "step": 8117 }, { "epoch": 0.8886942718738882, "grad_norm": 1.1359480509733677, "learning_rate": 1.5109538646984167e-06, "loss": 0.8557, "num_input_tokens_seen": 1466190656, "step": 8118 }, { "epoch": 0.8888037439448261, "grad_norm": 1.1076548497808245, "learning_rate": 1.5080112644005523e-06, "loss": 1.0078, "num_input_tokens_seen": 1466368736, "step": 8119 }, { "epoch": 0.888913216015764, "grad_norm": 1.106145644788579, "learning_rate": 1.5050714431600554e-06, "loss": 0.888, "num_input_tokens_seen": 1466570336, "step": 8120 }, { "epoch": 0.8890226880867019, "grad_norm": 1.190638560620877, "learning_rate": 1.502134401324687e-06, "loss": 0.85, "num_input_tokens_seen": 1466713696, "step": 8121 }, { "epoch": 0.8891321601576397, "grad_norm": 1.1796165091267716, "learning_rate": 1.499200139241902e-06, "loss": 0.9244, "num_input_tokens_seen": 1466895360, "step": 8122 }, { "epoch": 0.8892416322285777, "grad_norm": 1.0973275666562383, "learning_rate": 1.4962686572588091e-06, "loss": 0.9121, "num_input_tokens_seen": 1467075008, "step": 8123 }, { "epoch": 0.8893511042995156, "grad_norm": 1.064999613336484, "learning_rate": 1.493339955722206e-06, "loss": 0.8043, "num_input_tokens_seen": 1467251744, "step": 8124 }, { "epoch": 0.8894605763704535, "grad_norm": 1.013216596318676, "learning_rate": 1.4904140349785488e-06, "loss": 1.006, "num_input_tokens_seen": 1467444608, "step": 8125 }, { "epoch": 0.8895700484413914, "grad_norm": 1.1488572475940717, "learning_rate": 1.4874908953739691e-06, "loss": 0.882, "num_input_tokens_seen": 1467621344, "step": 8126 }, { "epoch": 0.8896795205123293, "grad_norm": 1.1341019756376958, "learning_rate": 1.4845705372542707e-06, "loss": 0.6848, "num_input_tokens_seen": 1467790240, "step": 8127 }, { "epoch": 0.8897889925832672, "grad_norm": 1.1128468519274008, "learning_rate": 1.481652960964916e-06, "loss": 0.7481, "num_input_tokens_seen": 1467948384, "step": 8128 }, { "epoch": 0.8898984646542051, "grad_norm": 1.0113784040828226, "learning_rate": 1.4787381668510653e-06, "loss": 0.7216, "num_input_tokens_seen": 1468117056, "step": 8129 }, { "epoch": 0.890007936725143, "grad_norm": 1.1743622895791865, "learning_rate": 1.4758261552575175e-06, "loss": 0.7021, "num_input_tokens_seen": 1468303424, "step": 8130 }, { "epoch": 0.8901174087960809, "grad_norm": 1.2101229974008845, "learning_rate": 1.4729169265287695e-06, "loss": 0.712, "num_input_tokens_seen": 1468495392, "step": 8131 }, { "epoch": 0.8902268808670188, "grad_norm": 1.2072060961455016, "learning_rate": 1.470010481008971e-06, "loss": 0.8649, "num_input_tokens_seen": 1468663392, "step": 8132 }, { "epoch": 0.8903363529379567, "grad_norm": 1.0295970848108917, "learning_rate": 1.4671068190419524e-06, "loss": 0.7878, "num_input_tokens_seen": 1468858720, "step": 8133 }, { "epoch": 0.8904458250088946, "grad_norm": 1.0753425961545042, "learning_rate": 1.4642059409712082e-06, "loss": 0.8735, "num_input_tokens_seen": 1469031424, "step": 8134 }, { "epoch": 0.8905552970798325, "grad_norm": 1.066712723174782, "learning_rate": 1.461307847139909e-06, "loss": 0.7509, "num_input_tokens_seen": 1469213312, "step": 8135 }, { "epoch": 0.8906647691507704, "grad_norm": 1.1990656055989832, "learning_rate": 1.4584125378908935e-06, "loss": 1.0316, "num_input_tokens_seen": 1469415808, "step": 8136 }, { "epoch": 0.8907742412217083, "grad_norm": 1.3168439731645238, "learning_rate": 1.455520013566672e-06, "loss": 0.8483, "num_input_tokens_seen": 1469572832, "step": 8137 }, { "epoch": 0.8908837132926463, "grad_norm": 1.2938965295196863, "learning_rate": 1.45263027450942e-06, "loss": 1.114, "num_input_tokens_seen": 1469761216, "step": 8138 }, { "epoch": 0.8909931853635841, "grad_norm": 1.212956769187025, "learning_rate": 1.4497433210609923e-06, "loss": 0.8024, "num_input_tokens_seen": 1469909056, "step": 8139 }, { "epoch": 0.891102657434522, "grad_norm": 1.2974597118348352, "learning_rate": 1.4468591535629127e-06, "loss": 0.8456, "num_input_tokens_seen": 1470105280, "step": 8140 }, { "epoch": 0.8912121295054599, "grad_norm": 1.3492276967276067, "learning_rate": 1.443977772356367e-06, "loss": 1.0542, "num_input_tokens_seen": 1470271488, "step": 8141 }, { "epoch": 0.8913216015763978, "grad_norm": 1.1929443436819687, "learning_rate": 1.4410991777822209e-06, "loss": 0.7493, "num_input_tokens_seen": 1470434784, "step": 8142 }, { "epoch": 0.8914310736473358, "grad_norm": 1.0597974401943768, "learning_rate": 1.4382233701810022e-06, "loss": 0.8244, "num_input_tokens_seen": 1470615776, "step": 8143 }, { "epoch": 0.8915405457182737, "grad_norm": 1.0486140823041465, "learning_rate": 1.4353503498929193e-06, "loss": 1.0386, "num_input_tokens_seen": 1470816928, "step": 8144 }, { "epoch": 0.8916500177892115, "grad_norm": 1.0536862742566617, "learning_rate": 1.4324801172578366e-06, "loss": 0.878, "num_input_tokens_seen": 1471008896, "step": 8145 }, { "epoch": 0.8917594898601494, "grad_norm": 1.0323623668942945, "learning_rate": 1.4296126726153102e-06, "loss": 0.9795, "num_input_tokens_seen": 1471199072, "step": 8146 }, { "epoch": 0.8918689619310873, "grad_norm": 1.106906185475494, "learning_rate": 1.4267480163045384e-06, "loss": 0.7253, "num_input_tokens_seen": 1471373120, "step": 8147 }, { "epoch": 0.8919784340020253, "grad_norm": 1.1135824766373907, "learning_rate": 1.4238861486644162e-06, "loss": 1.068, "num_input_tokens_seen": 1471572256, "step": 8148 }, { "epoch": 0.8920879060729632, "grad_norm": 1.1606020087130575, "learning_rate": 1.4210270700334927e-06, "loss": 0.6819, "num_input_tokens_seen": 1471724128, "step": 8149 }, { "epoch": 0.892197378143901, "grad_norm": 1.1194901644507655, "learning_rate": 1.4181707807499917e-06, "loss": 0.907, "num_input_tokens_seen": 1471910496, "step": 8150 }, { "epoch": 0.8923068502148389, "grad_norm": 1.208621560721811, "learning_rate": 1.4153172811518067e-06, "loss": 1.1784, "num_input_tokens_seen": 1472081632, "step": 8151 }, { "epoch": 0.8924163222857768, "grad_norm": 1.166725338868331, "learning_rate": 1.4124665715764957e-06, "loss": 0.9356, "num_input_tokens_seen": 1472245376, "step": 8152 }, { "epoch": 0.8925257943567148, "grad_norm": 1.1422378946398841, "learning_rate": 1.4096186523613052e-06, "loss": 0.8015, "num_input_tokens_seen": 1472442496, "step": 8153 }, { "epoch": 0.8926352664276527, "grad_norm": 1.2218212164629834, "learning_rate": 1.4067735238431245e-06, "loss": 0.7774, "num_input_tokens_seen": 1472630432, "step": 8154 }, { "epoch": 0.8927447384985906, "grad_norm": 1.143086429568857, "learning_rate": 1.4039311863585425e-06, "loss": 0.9688, "num_input_tokens_seen": 1472784544, "step": 8155 }, { "epoch": 0.8928542105695284, "grad_norm": 1.0175451467372338, "learning_rate": 1.4010916402437845e-06, "loss": 0.7513, "num_input_tokens_seen": 1472992640, "step": 8156 }, { "epoch": 0.8929636826404663, "grad_norm": 1.090341597846942, "learning_rate": 1.3982548858347738e-06, "loss": 0.933, "num_input_tokens_seen": 1473193568, "step": 8157 }, { "epoch": 0.8930731547114042, "grad_norm": 1.2458562833953732, "learning_rate": 1.3954209234670917e-06, "loss": 0.9874, "num_input_tokens_seen": 1473410624, "step": 8158 }, { "epoch": 0.8931826267823422, "grad_norm": 0.9757510056095869, "learning_rate": 1.3925897534759925e-06, "loss": 0.5702, "num_input_tokens_seen": 1473609760, "step": 8159 }, { "epoch": 0.8932920988532801, "grad_norm": 1.0927569649744275, "learning_rate": 1.389761376196394e-06, "loss": 0.7562, "num_input_tokens_seen": 1473795680, "step": 8160 }, { "epoch": 0.893401570924218, "grad_norm": 1.0403869906587795, "learning_rate": 1.3869357919628845e-06, "loss": 0.7635, "num_input_tokens_seen": 1473973536, "step": 8161 }, { "epoch": 0.8935110429951558, "grad_norm": 1.1558170023348646, "learning_rate": 1.3841130011097408e-06, "loss": 1.0854, "num_input_tokens_seen": 1474168640, "step": 8162 }, { "epoch": 0.8936205150660937, "grad_norm": 1.1068558364837666, "learning_rate": 1.3812930039708738e-06, "loss": 0.8992, "num_input_tokens_seen": 1474343808, "step": 8163 }, { "epoch": 0.8937299871370317, "grad_norm": 1.1322117231843292, "learning_rate": 1.378475800879897e-06, "loss": 0.8204, "num_input_tokens_seen": 1474531296, "step": 8164 }, { "epoch": 0.8938394592079696, "grad_norm": 1.0560487228120694, "learning_rate": 1.3756613921700774e-06, "loss": 0.7473, "num_input_tokens_seen": 1474721472, "step": 8165 }, { "epoch": 0.8939489312789075, "grad_norm": 1.112510226770608, "learning_rate": 1.372849778174351e-06, "loss": 0.7822, "num_input_tokens_seen": 1474903360, "step": 8166 }, { "epoch": 0.8940584033498453, "grad_norm": 1.2053798501837771, "learning_rate": 1.3700409592253299e-06, "loss": 0.844, "num_input_tokens_seen": 1475066656, "step": 8167 }, { "epoch": 0.8941678754207832, "grad_norm": 1.1261647340969567, "learning_rate": 1.3672349356552899e-06, "loss": 0.7506, "num_input_tokens_seen": 1475230848, "step": 8168 }, { "epoch": 0.8942773474917212, "grad_norm": 1.030477176426467, "learning_rate": 1.3644317077961794e-06, "loss": 0.6415, "num_input_tokens_seen": 1475406464, "step": 8169 }, { "epoch": 0.8943868195626591, "grad_norm": 1.0950443783132553, "learning_rate": 1.3616312759796079e-06, "loss": 0.885, "num_input_tokens_seen": 1475626656, "step": 8170 }, { "epoch": 0.894496291633597, "grad_norm": 1.0962747045188739, "learning_rate": 1.3588336405368745e-06, "loss": 0.5948, "num_input_tokens_seen": 1475775840, "step": 8171 }, { "epoch": 0.8946057637045349, "grad_norm": 1.0725488618688621, "learning_rate": 1.3560388017989256e-06, "loss": 0.8246, "num_input_tokens_seen": 1475989536, "step": 8172 }, { "epoch": 0.8947152357754727, "grad_norm": 1.2764640835536571, "learning_rate": 1.3532467600963883e-06, "loss": 0.9701, "num_input_tokens_seen": 1476180608, "step": 8173 }, { "epoch": 0.8948247078464107, "grad_norm": 1.0999624115850615, "learning_rate": 1.350457515759554e-06, "loss": 0.9495, "num_input_tokens_seen": 1476354432, "step": 8174 }, { "epoch": 0.8949341799173486, "grad_norm": 1.1210724831933225, "learning_rate": 1.3476710691183837e-06, "loss": 0.7738, "num_input_tokens_seen": 1476556032, "step": 8175 }, { "epoch": 0.8950436519882865, "grad_norm": 1.0147335541814662, "learning_rate": 1.3448874205025137e-06, "loss": 0.7371, "num_input_tokens_seen": 1476760544, "step": 8176 }, { "epoch": 0.8951531240592244, "grad_norm": 1.0238080700624268, "learning_rate": 1.342106570241239e-06, "loss": 0.8386, "num_input_tokens_seen": 1476972448, "step": 8177 }, { "epoch": 0.8952625961301623, "grad_norm": 1.2798802567143146, "learning_rate": 1.3393285186635268e-06, "loss": 0.7986, "num_input_tokens_seen": 1477148288, "step": 8178 }, { "epoch": 0.8953720682011002, "grad_norm": 1.2818953586786273, "learning_rate": 1.3365532660980256e-06, "loss": 0.878, "num_input_tokens_seen": 1477313152, "step": 8179 }, { "epoch": 0.8954815402720381, "grad_norm": 1.1119388986960068, "learning_rate": 1.333780812873031e-06, "loss": 0.6281, "num_input_tokens_seen": 1477464352, "step": 8180 }, { "epoch": 0.895591012342976, "grad_norm": 1.0771620499692331, "learning_rate": 1.3310111593165254e-06, "loss": 0.892, "num_input_tokens_seen": 1477651168, "step": 8181 }, { "epoch": 0.8957004844139139, "grad_norm": 1.370658174975199, "learning_rate": 1.3282443057561545e-06, "loss": 0.9016, "num_input_tokens_seen": 1477836416, "step": 8182 }, { "epoch": 0.8958099564848518, "grad_norm": 1.1216161487722829, "learning_rate": 1.3254802525192289e-06, "loss": 0.9771, "num_input_tokens_seen": 1478016960, "step": 8183 }, { "epoch": 0.8959194285557897, "grad_norm": 1.1152932173242318, "learning_rate": 1.3227189999327316e-06, "loss": 0.9035, "num_input_tokens_seen": 1478196608, "step": 8184 }, { "epoch": 0.8960289006267276, "grad_norm": 1.0325991157637302, "learning_rate": 1.3199605483233096e-06, "loss": 0.843, "num_input_tokens_seen": 1478363936, "step": 8185 }, { "epoch": 0.8961383726976655, "grad_norm": 1.110810331853855, "learning_rate": 1.3172048980172935e-06, "loss": 0.8142, "num_input_tokens_seen": 1478557024, "step": 8186 }, { "epoch": 0.8962478447686034, "grad_norm": 1.1033641609963776, "learning_rate": 1.314452049340656e-06, "loss": 1.0974, "num_input_tokens_seen": 1478722560, "step": 8187 }, { "epoch": 0.8963573168395413, "grad_norm": 1.3064061797011637, "learning_rate": 1.3117020026190696e-06, "loss": 1.0359, "num_input_tokens_seen": 1478919680, "step": 8188 }, { "epoch": 0.8964667889104793, "grad_norm": 1.0119455444174519, "learning_rate": 1.3089547581778438e-06, "loss": 0.8444, "num_input_tokens_seen": 1479116800, "step": 8189 }, { "epoch": 0.8965762609814171, "grad_norm": 1.1709610490388178, "learning_rate": 1.3062103163419853e-06, "loss": 0.7548, "num_input_tokens_seen": 1479304288, "step": 8190 }, { "epoch": 0.896685733052355, "grad_norm": 1.1643832999981472, "learning_rate": 1.303468677436151e-06, "loss": 0.8489, "num_input_tokens_seen": 1479482816, "step": 8191 }, { "epoch": 0.8967952051232929, "grad_norm": 1.087949466997373, "learning_rate": 1.3007298417846731e-06, "loss": 0.775, "num_input_tokens_seen": 1479678592, "step": 8192 }, { "epoch": 0.8969046771942308, "grad_norm": 1.1274853908835731, "learning_rate": 1.2979938097115507e-06, "loss": 1.0827, "num_input_tokens_seen": 1479872352, "step": 8193 }, { "epoch": 0.8970141492651688, "grad_norm": 1.0605030928930408, "learning_rate": 1.2952605815404445e-06, "loss": 1.1926, "num_input_tokens_seen": 1480076416, "step": 8194 }, { "epoch": 0.8971236213361067, "grad_norm": 1.1031666431725728, "learning_rate": 1.2925301575947013e-06, "loss": 0.8696, "num_input_tokens_seen": 1480251808, "step": 8195 }, { "epoch": 0.8972330934070445, "grad_norm": 1.125601236959915, "learning_rate": 1.2898025381973155e-06, "loss": 0.6899, "num_input_tokens_seen": 1480390240, "step": 8196 }, { "epoch": 0.8973425654779824, "grad_norm": 1.0603271593991224, "learning_rate": 1.287077723670968e-06, "loss": 0.8551, "num_input_tokens_seen": 1480592064, "step": 8197 }, { "epoch": 0.8974520375489203, "grad_norm": 1.038777018171583, "learning_rate": 1.2843557143379897e-06, "loss": 0.7425, "num_input_tokens_seen": 1480778432, "step": 8198 }, { "epoch": 0.8975615096198583, "grad_norm": 1.228568411028125, "learning_rate": 1.2816365105203953e-06, "loss": 0.9109, "num_input_tokens_seen": 1480961216, "step": 8199 }, { "epoch": 0.8976709816907962, "grad_norm": 1.295531024607014, "learning_rate": 1.278920112539861e-06, "loss": 1.1402, "num_input_tokens_seen": 1481145568, "step": 8200 }, { "epoch": 0.897780453761734, "grad_norm": 1.0644289906416817, "learning_rate": 1.2762065207177292e-06, "loss": 0.7234, "num_input_tokens_seen": 1481329696, "step": 8201 }, { "epoch": 0.8978899258326719, "grad_norm": 1.25094220890757, "learning_rate": 1.273495735375016e-06, "loss": 0.9701, "num_input_tokens_seen": 1481488736, "step": 8202 }, { "epoch": 0.8979993979036098, "grad_norm": 1.078810763984929, "learning_rate": 1.270787756832395e-06, "loss": 0.8281, "num_input_tokens_seen": 1481657408, "step": 8203 }, { "epoch": 0.8981088699745478, "grad_norm": 1.2709127568091474, "learning_rate": 1.2680825854102268e-06, "loss": 0.7454, "num_input_tokens_seen": 1481832800, "step": 8204 }, { "epoch": 0.8982183420454857, "grad_norm": 1.1602778001811203, "learning_rate": 1.2653802214285137e-06, "loss": 0.8103, "num_input_tokens_seen": 1481989152, "step": 8205 }, { "epoch": 0.8983278141164236, "grad_norm": 1.2078664414770668, "learning_rate": 1.2626806652069501e-06, "loss": 1.1708, "num_input_tokens_seen": 1482159392, "step": 8206 }, { "epoch": 0.8984372861873614, "grad_norm": 1.0587735181806344, "learning_rate": 1.259983917064886e-06, "loss": 0.7148, "num_input_tokens_seen": 1482316192, "step": 8207 }, { "epoch": 0.8985467582582993, "grad_norm": 1.074637646928936, "learning_rate": 1.2572899773213437e-06, "loss": 0.9127, "num_input_tokens_seen": 1482507264, "step": 8208 }, { "epoch": 0.8986562303292372, "grad_norm": 1.0573879567664874, "learning_rate": 1.2545988462950077e-06, "loss": 0.9315, "num_input_tokens_seen": 1482684448, "step": 8209 }, { "epoch": 0.8987657024001752, "grad_norm": 1.0738223922537613, "learning_rate": 1.251910524304234e-06, "loss": 0.6121, "num_input_tokens_seen": 1482849760, "step": 8210 }, { "epoch": 0.8988751744711131, "grad_norm": 1.2441174850827141, "learning_rate": 1.249225011667046e-06, "loss": 1.0885, "num_input_tokens_seen": 1483023136, "step": 8211 }, { "epoch": 0.898984646542051, "grad_norm": 0.967356325368795, "learning_rate": 1.246542308701132e-06, "loss": 0.7088, "num_input_tokens_seen": 1483213088, "step": 8212 }, { "epoch": 0.8990941186129888, "grad_norm": 1.1006894811818522, "learning_rate": 1.2438624157238593e-06, "loss": 0.7588, "num_input_tokens_seen": 1483377728, "step": 8213 }, { "epoch": 0.8992035906839267, "grad_norm": 1.1868816572809155, "learning_rate": 1.2411853330522472e-06, "loss": 0.8855, "num_input_tokens_seen": 1483546400, "step": 8214 }, { "epoch": 0.8993130627548647, "grad_norm": 1.3188983182981562, "learning_rate": 1.238511061002992e-06, "loss": 0.9485, "num_input_tokens_seen": 1483706112, "step": 8215 }, { "epoch": 0.8994225348258026, "grad_norm": 1.1429157508144587, "learning_rate": 1.235839599892455e-06, "loss": 0.9716, "num_input_tokens_seen": 1483898528, "step": 8216 }, { "epoch": 0.8995320068967405, "grad_norm": 1.1453251607697317, "learning_rate": 1.2331709500366606e-06, "loss": 0.9124, "num_input_tokens_seen": 1484089600, "step": 8217 }, { "epoch": 0.8996414789676783, "grad_norm": 1.0580802882797826, "learning_rate": 1.2305051117513067e-06, "loss": 0.8838, "num_input_tokens_seen": 1484239008, "step": 8218 }, { "epoch": 0.8997509510386162, "grad_norm": 1.2153285491268164, "learning_rate": 1.2278420853517658e-06, "loss": 0.9587, "num_input_tokens_seen": 1484414400, "step": 8219 }, { "epoch": 0.8998604231095542, "grad_norm": 1.0176930426727964, "learning_rate": 1.2251818711530556e-06, "loss": 0.8158, "num_input_tokens_seen": 1484593376, "step": 8220 }, { "epoch": 0.8999698951804921, "grad_norm": 1.0404871302280323, "learning_rate": 1.222524469469885e-06, "loss": 0.831, "num_input_tokens_seen": 1484776160, "step": 8221 }, { "epoch": 0.90007936725143, "grad_norm": 1.2062133684673995, "learning_rate": 1.2198698806166086e-06, "loss": 0.8508, "num_input_tokens_seen": 1484975744, "step": 8222 }, { "epoch": 0.9001888393223679, "grad_norm": 1.0918788708518317, "learning_rate": 1.2172181049072695e-06, "loss": 0.9728, "num_input_tokens_seen": 1485174208, "step": 8223 }, { "epoch": 0.9002983113933057, "grad_norm": 1.1519808957648356, "learning_rate": 1.214569142655564e-06, "loss": 0.839, "num_input_tokens_seen": 1485350272, "step": 8224 }, { "epoch": 0.9004077834642437, "grad_norm": 1.1765326446757574, "learning_rate": 1.211922994174855e-06, "loss": 1.0387, "num_input_tokens_seen": 1485539776, "step": 8225 }, { "epoch": 0.9005172555351816, "grad_norm": 1.1819756151256195, "learning_rate": 1.209279659778187e-06, "loss": 0.7375, "num_input_tokens_seen": 1485720096, "step": 8226 }, { "epoch": 0.9006267276061195, "grad_norm": 1.1748936239747645, "learning_rate": 1.2066391397782484e-06, "loss": 0.8056, "num_input_tokens_seen": 1485908480, "step": 8227 }, { "epoch": 0.9007361996770574, "grad_norm": 1.1020621390849705, "learning_rate": 1.204001434487423e-06, "loss": 0.776, "num_input_tokens_seen": 1486050272, "step": 8228 }, { "epoch": 0.9008456717479953, "grad_norm": 1.0232973075535288, "learning_rate": 1.2013665442177275e-06, "loss": 0.8227, "num_input_tokens_seen": 1486258592, "step": 8229 }, { "epoch": 0.9009551438189332, "grad_norm": 1.1499136954555131, "learning_rate": 1.1987344692808849e-06, "loss": 0.7831, "num_input_tokens_seen": 1486437792, "step": 8230 }, { "epoch": 0.9010646158898711, "grad_norm": 1.1957300491566807, "learning_rate": 1.1961052099882435e-06, "loss": 0.748, "num_input_tokens_seen": 1486608704, "step": 8231 }, { "epoch": 0.901174087960809, "grad_norm": 1.0815568565699771, "learning_rate": 1.1934787666508573e-06, "loss": 0.8954, "num_input_tokens_seen": 1486783648, "step": 8232 }, { "epoch": 0.9012835600317469, "grad_norm": 1.0229370990481326, "learning_rate": 1.190855139579422e-06, "loss": 0.7173, "num_input_tokens_seen": 1486965312, "step": 8233 }, { "epoch": 0.9013930321026848, "grad_norm": 1.2127113885561969, "learning_rate": 1.1882343290843063e-06, "loss": 1.1024, "num_input_tokens_seen": 1487155936, "step": 8234 }, { "epoch": 0.9015025041736227, "grad_norm": 1.3405283521836149, "learning_rate": 1.1856163354755505e-06, "loss": 1.03, "num_input_tokens_seen": 1487315872, "step": 8235 }, { "epoch": 0.9016119762445606, "grad_norm": 1.1237094682591118, "learning_rate": 1.1830011590628547e-06, "loss": 1.0913, "num_input_tokens_seen": 1487497984, "step": 8236 }, { "epoch": 0.9017214483154985, "grad_norm": 0.9768684823373398, "learning_rate": 1.1803888001555963e-06, "loss": 0.7215, "num_input_tokens_seen": 1487675840, "step": 8237 }, { "epoch": 0.9018309203864364, "grad_norm": 1.331142005659181, "learning_rate": 1.1777792590628028e-06, "loss": 0.958, "num_input_tokens_seen": 1487861088, "step": 8238 }, { "epoch": 0.9019403924573743, "grad_norm": 1.1512031211793485, "learning_rate": 1.175172536093183e-06, "loss": 0.822, "num_input_tokens_seen": 1488042528, "step": 8239 }, { "epoch": 0.9020498645283123, "grad_norm": 1.2411452425326874, "learning_rate": 1.1725686315551099e-06, "loss": 0.9403, "num_input_tokens_seen": 1488231808, "step": 8240 }, { "epoch": 0.9021593365992501, "grad_norm": 1.0232987054957368, "learning_rate": 1.1699675457566144e-06, "loss": 1.0121, "num_input_tokens_seen": 1488421984, "step": 8241 }, { "epoch": 0.902268808670188, "grad_norm": 1.3386567755260281, "learning_rate": 1.1673692790054063e-06, "loss": 0.8738, "num_input_tokens_seen": 1488556160, "step": 8242 }, { "epoch": 0.9023782807411259, "grad_norm": 1.1592958387304306, "learning_rate": 1.1647738316088508e-06, "loss": 0.8242, "num_input_tokens_seen": 1488752160, "step": 8243 }, { "epoch": 0.9024877528120638, "grad_norm": 1.0144446923834878, "learning_rate": 1.1621812038739855e-06, "loss": 0.7347, "num_input_tokens_seen": 1488958688, "step": 8244 }, { "epoch": 0.9025972248830018, "grad_norm": 1.1530372730989737, "learning_rate": 1.1595913961075094e-06, "loss": 1.034, "num_input_tokens_seen": 1489147968, "step": 8245 }, { "epoch": 0.9027066969539397, "grad_norm": 1.0271052674794385, "learning_rate": 1.1570044086158e-06, "loss": 0.8037, "num_input_tokens_seen": 1489328288, "step": 8246 }, { "epoch": 0.9028161690248775, "grad_norm": 0.9707368349788366, "learning_rate": 1.15442024170489e-06, "loss": 0.516, "num_input_tokens_seen": 1489507488, "step": 8247 }, { "epoch": 0.9029256410958154, "grad_norm": 1.2018424294631445, "learning_rate": 1.1518388956804793e-06, "loss": 0.8438, "num_input_tokens_seen": 1489695200, "step": 8248 }, { "epoch": 0.9030351131667533, "grad_norm": 1.0897508260291047, "learning_rate": 1.149260370847935e-06, "loss": 0.9425, "num_input_tokens_seen": 1489855360, "step": 8249 }, { "epoch": 0.9031445852376913, "grad_norm": 1.2323871976444367, "learning_rate": 1.1466846675122988e-06, "loss": 0.7014, "num_input_tokens_seen": 1489975872, "step": 8250 }, { "epoch": 0.9032540573086292, "grad_norm": 1.3657320395274504, "learning_rate": 1.1441117859782636e-06, "loss": 0.942, "num_input_tokens_seen": 1490164032, "step": 8251 }, { "epoch": 0.903363529379567, "grad_norm": 0.9926039354976134, "learning_rate": 1.1415417265501993e-06, "loss": 0.7039, "num_input_tokens_seen": 1490330688, "step": 8252 }, { "epoch": 0.9034730014505049, "grad_norm": 1.1005387114146714, "learning_rate": 1.138974489532138e-06, "loss": 0.8314, "num_input_tokens_seen": 1490529824, "step": 8253 }, { "epoch": 0.9035824735214428, "grad_norm": 0.9848423711157928, "learning_rate": 1.1364100752277812e-06, "loss": 0.7096, "num_input_tokens_seen": 1490724256, "step": 8254 }, { "epoch": 0.9036919455923808, "grad_norm": 1.106846358770366, "learning_rate": 1.1338484839404944e-06, "loss": 1.0336, "num_input_tokens_seen": 1490889792, "step": 8255 }, { "epoch": 0.9038014176633187, "grad_norm": 1.081715451645606, "learning_rate": 1.131289715973305e-06, "loss": 0.6998, "num_input_tokens_seen": 1491058016, "step": 8256 }, { "epoch": 0.9039108897342566, "grad_norm": 1.2188887883903703, "learning_rate": 1.1287337716289149e-06, "loss": 0.9641, "num_input_tokens_seen": 1491242816, "step": 8257 }, { "epoch": 0.9040203618051944, "grad_norm": 1.2409639390383063, "learning_rate": 1.1261806512096878e-06, "loss": 0.9602, "num_input_tokens_seen": 1491435904, "step": 8258 }, { "epoch": 0.9041298338761323, "grad_norm": 1.1850946560006654, "learning_rate": 1.1236303550176463e-06, "loss": 1.0269, "num_input_tokens_seen": 1491595616, "step": 8259 }, { "epoch": 0.9042393059470702, "grad_norm": 1.0271031783368139, "learning_rate": 1.1210828833544907e-06, "loss": 0.933, "num_input_tokens_seen": 1491778624, "step": 8260 }, { "epoch": 0.9043487780180082, "grad_norm": 1.0624737455826805, "learning_rate": 1.1185382365215853e-06, "loss": 0.777, "num_input_tokens_seen": 1491972160, "step": 8261 }, { "epoch": 0.9044582500889461, "grad_norm": 1.1533993815838945, "learning_rate": 1.1159964148199475e-06, "loss": 1.0945, "num_input_tokens_seen": 1492178912, "step": 8262 }, { "epoch": 0.904567722159884, "grad_norm": 1.0937355585507262, "learning_rate": 1.1134574185502816e-06, "loss": 0.9078, "num_input_tokens_seen": 1492392160, "step": 8263 }, { "epoch": 0.9046771942308218, "grad_norm": 1.1360939117586821, "learning_rate": 1.1109212480129334e-06, "loss": 0.7378, "num_input_tokens_seen": 1492574944, "step": 8264 }, { "epoch": 0.9047866663017597, "grad_norm": 1.0099088172543553, "learning_rate": 1.1083879035079349e-06, "loss": 0.7589, "num_input_tokens_seen": 1492760864, "step": 8265 }, { "epoch": 0.9048961383726977, "grad_norm": 1.0885423115754231, "learning_rate": 1.105857385334977e-06, "loss": 0.9072, "num_input_tokens_seen": 1492913632, "step": 8266 }, { "epoch": 0.9050056104436356, "grad_norm": 1.0158997384184507, "learning_rate": 1.1033296937934061e-06, "loss": 0.9772, "num_input_tokens_seen": 1493112096, "step": 8267 }, { "epoch": 0.9051150825145735, "grad_norm": 1.167684003637182, "learning_rate": 1.1008048291822588e-06, "loss": 0.8122, "num_input_tokens_seen": 1493247168, "step": 8268 }, { "epoch": 0.9052245545855113, "grad_norm": 1.089390704971363, "learning_rate": 1.0982827918002065e-06, "loss": 0.8768, "num_input_tokens_seen": 1493450560, "step": 8269 }, { "epoch": 0.9053340266564492, "grad_norm": 1.1449779380938387, "learning_rate": 1.0957635819456135e-06, "loss": 1.0532, "num_input_tokens_seen": 1493625280, "step": 8270 }, { "epoch": 0.9054434987273872, "grad_norm": 1.0853727230516903, "learning_rate": 1.0932471999164835e-06, "loss": 0.8289, "num_input_tokens_seen": 1493830688, "step": 8271 }, { "epoch": 0.9055529707983251, "grad_norm": 1.1612475878276758, "learning_rate": 1.0907336460105166e-06, "loss": 1.2016, "num_input_tokens_seen": 1494045728, "step": 8272 }, { "epoch": 0.905662442869263, "grad_norm": 1.1825374510240008, "learning_rate": 1.088222920525045e-06, "loss": 0.9235, "num_input_tokens_seen": 1494203200, "step": 8273 }, { "epoch": 0.9057719149402009, "grad_norm": 1.1481585001535783, "learning_rate": 1.0857150237570946e-06, "loss": 1.0415, "num_input_tokens_seen": 1494382848, "step": 8274 }, { "epoch": 0.9058813870111387, "grad_norm": 1.0581745852159443, "learning_rate": 1.0832099560033394e-06, "loss": 0.9335, "num_input_tokens_seen": 1494586240, "step": 8275 }, { "epoch": 0.9059908590820767, "grad_norm": 1.1862857533444258, "learning_rate": 1.0807077175601255e-06, "loss": 0.8872, "num_input_tokens_seen": 1494782464, "step": 8276 }, { "epoch": 0.9061003311530146, "grad_norm": 1.0481398613364652, "learning_rate": 1.0782083087234608e-06, "loss": 0.7234, "num_input_tokens_seen": 1494966816, "step": 8277 }, { "epoch": 0.9062098032239525, "grad_norm": 1.2929783846677712, "learning_rate": 1.0757117297890224e-06, "loss": 0.7995, "num_input_tokens_seen": 1495166624, "step": 8278 }, { "epoch": 0.9063192752948904, "grad_norm": 1.2206979492077772, "learning_rate": 1.073217981052152e-06, "loss": 1.1398, "num_input_tokens_seen": 1495342688, "step": 8279 }, { "epoch": 0.9064287473658283, "grad_norm": 1.0603042240646194, "learning_rate": 1.0707270628078552e-06, "loss": 0.8177, "num_input_tokens_seen": 1495539584, "step": 8280 }, { "epoch": 0.9065382194367662, "grad_norm": 1.3377442662731605, "learning_rate": 1.0682389753508021e-06, "loss": 1.036, "num_input_tokens_seen": 1495686080, "step": 8281 }, { "epoch": 0.9066476915077041, "grad_norm": 1.1356243095332867, "learning_rate": 1.065753718975329e-06, "loss": 1.1312, "num_input_tokens_seen": 1495872000, "step": 8282 }, { "epoch": 0.906757163578642, "grad_norm": 1.2310982683285008, "learning_rate": 1.0632712939754347e-06, "loss": 0.8706, "num_input_tokens_seen": 1496014912, "step": 8283 }, { "epoch": 0.9068666356495799, "grad_norm": 1.1376493125424227, "learning_rate": 1.0607917006447865e-06, "loss": 0.9748, "num_input_tokens_seen": 1496203520, "step": 8284 }, { "epoch": 0.9069761077205178, "grad_norm": 1.3074893675275183, "learning_rate": 1.0583149392767195e-06, "loss": 1.209, "num_input_tokens_seen": 1496410272, "step": 8285 }, { "epoch": 0.9070855797914557, "grad_norm": 1.1075303766291242, "learning_rate": 1.0558410101642213e-06, "loss": 1.0296, "num_input_tokens_seen": 1496605600, "step": 8286 }, { "epoch": 0.9071950518623936, "grad_norm": 1.2126779170116502, "learning_rate": 1.0533699135999608e-06, "loss": 0.95, "num_input_tokens_seen": 1496776736, "step": 8287 }, { "epoch": 0.9073045239333315, "grad_norm": 1.1398379079291203, "learning_rate": 1.0509016498762625e-06, "loss": 0.8804, "num_input_tokens_seen": 1496968480, "step": 8288 }, { "epoch": 0.9074139960042694, "grad_norm": 1.0209879206691046, "learning_rate": 1.0484362192851149e-06, "loss": 0.6753, "num_input_tokens_seen": 1497154848, "step": 8289 }, { "epoch": 0.9075234680752073, "grad_norm": 1.1973647407962773, "learning_rate": 1.0459736221181766e-06, "loss": 0.8413, "num_input_tokens_seen": 1497342336, "step": 8290 }, { "epoch": 0.9076329401461453, "grad_norm": 1.2386115082003961, "learning_rate": 1.0435138586667641e-06, "loss": 0.847, "num_input_tokens_seen": 1497511008, "step": 8291 }, { "epoch": 0.9077424122170831, "grad_norm": 1.028273187410667, "learning_rate": 1.0410569292218676e-06, "loss": 0.8252, "num_input_tokens_seen": 1497671616, "step": 8292 }, { "epoch": 0.907851884288021, "grad_norm": 1.1716579490561598, "learning_rate": 1.038602834074129e-06, "loss": 0.8048, "num_input_tokens_seen": 1497853280, "step": 8293 }, { "epoch": 0.9079613563589589, "grad_norm": 1.2073329313970185, "learning_rate": 1.0361515735138772e-06, "loss": 0.9138, "num_input_tokens_seen": 1498044800, "step": 8294 }, { "epoch": 0.9080708284298968, "grad_norm": 1.145857683558811, "learning_rate": 1.0337031478310749e-06, "loss": 0.7592, "num_input_tokens_seen": 1498220416, "step": 8295 }, { "epoch": 0.9081803005008348, "grad_norm": 1.186675890009822, "learning_rate": 1.0312575573153792e-06, "loss": 0.7531, "num_input_tokens_seen": 1498384608, "step": 8296 }, { "epoch": 0.9082897725717727, "grad_norm": 1.1442127500692165, "learning_rate": 1.0288148022560923e-06, "loss": 0.8312, "num_input_tokens_seen": 1498575680, "step": 8297 }, { "epoch": 0.9083992446427105, "grad_norm": 0.9363785712220238, "learning_rate": 1.026374882942191e-06, "loss": 0.7952, "num_input_tokens_seen": 1498764736, "step": 8298 }, { "epoch": 0.9085087167136484, "grad_norm": 1.1397009986975488, "learning_rate": 1.0239377996623112e-06, "loss": 1.1108, "num_input_tokens_seen": 1498947968, "step": 8299 }, { "epoch": 0.9086181887845863, "grad_norm": 1.1579597307460567, "learning_rate": 1.02150355270475e-06, "loss": 0.9321, "num_input_tokens_seen": 1499139040, "step": 8300 }, { "epoch": 0.9087276608555243, "grad_norm": 1.082490513696137, "learning_rate": 1.0190721423574884e-06, "loss": 0.8652, "num_input_tokens_seen": 1499316224, "step": 8301 }, { "epoch": 0.9088371329264622, "grad_norm": 1.1785142634924135, "learning_rate": 1.0166435689081404e-06, "loss": 0.9271, "num_input_tokens_seen": 1499499680, "step": 8302 }, { "epoch": 0.9089466049974, "grad_norm": 1.2287962188255228, "learning_rate": 1.0142178326440155e-06, "loss": 0.8369, "num_input_tokens_seen": 1499658272, "step": 8303 }, { "epoch": 0.9090560770683379, "grad_norm": 1.1672404388310669, "learning_rate": 1.0117949338520645e-06, "loss": 0.9242, "num_input_tokens_seen": 1499815520, "step": 8304 }, { "epoch": 0.9091655491392758, "grad_norm": 1.087578939444823, "learning_rate": 1.009374872818919e-06, "loss": 0.6883, "num_input_tokens_seen": 1499959104, "step": 8305 }, { "epoch": 0.9092750212102138, "grad_norm": 1.2044528222985253, "learning_rate": 1.0069576498308587e-06, "loss": 0.9726, "num_input_tokens_seen": 1500167648, "step": 8306 }, { "epoch": 0.9093844932811517, "grad_norm": 1.3158773837085613, "learning_rate": 1.0045432651738434e-06, "loss": 1.0508, "num_input_tokens_seen": 1500309216, "step": 8307 }, { "epoch": 0.9094939653520896, "grad_norm": 1.0625791520200065, "learning_rate": 1.0021317191334895e-06, "loss": 0.693, "num_input_tokens_seen": 1500491328, "step": 8308 }, { "epoch": 0.9096034374230274, "grad_norm": 1.101391880538574, "learning_rate": 9.99723011995074e-07, "loss": 0.8125, "num_input_tokens_seen": 1500638496, "step": 8309 }, { "epoch": 0.9097129094939653, "grad_norm": 1.202216114708137, "learning_rate": 9.973171440435524e-07, "loss": 0.9299, "num_input_tokens_seen": 1500813888, "step": 8310 }, { "epoch": 0.9098223815649032, "grad_norm": 1.1088290953351392, "learning_rate": 9.949141155635194e-07, "loss": 0.9402, "num_input_tokens_seen": 1500988608, "step": 8311 }, { "epoch": 0.9099318536358412, "grad_norm": 1.0810839089955897, "learning_rate": 9.925139268392614e-07, "loss": 0.9988, "num_input_tokens_seen": 1501179008, "step": 8312 }, { "epoch": 0.9100413257067791, "grad_norm": 1.0366522577230914, "learning_rate": 9.901165781547096e-07, "loss": 0.9574, "num_input_tokens_seen": 1501369184, "step": 8313 }, { "epoch": 0.910150797777717, "grad_norm": 1.113323599026625, "learning_rate": 9.877220697934674e-07, "loss": 1.0071, "num_input_tokens_seen": 1501565632, "step": 8314 }, { "epoch": 0.9102602698486548, "grad_norm": 1.0802601666032787, "learning_rate": 9.853304020388005e-07, "loss": 1.0336, "num_input_tokens_seen": 1501753792, "step": 8315 }, { "epoch": 0.9103697419195927, "grad_norm": 0.9999907910399797, "learning_rate": 9.829415751736404e-07, "loss": 0.8269, "num_input_tokens_seen": 1501953376, "step": 8316 }, { "epoch": 0.9104792139905307, "grad_norm": 1.0767061599971883, "learning_rate": 9.805555894805778e-07, "loss": 1.1037, "num_input_tokens_seen": 1502156768, "step": 8317 }, { "epoch": 0.9105886860614686, "grad_norm": 1.0975236371390729, "learning_rate": 9.781724452418733e-07, "loss": 0.8832, "num_input_tokens_seen": 1502339552, "step": 8318 }, { "epoch": 0.9106981581324065, "grad_norm": 1.3447311722642483, "learning_rate": 9.757921427394457e-07, "loss": 0.8312, "num_input_tokens_seen": 1502492320, "step": 8319 }, { "epoch": 0.9108076302033443, "grad_norm": 1.0807158258679443, "learning_rate": 9.73414682254878e-07, "loss": 0.7666, "num_input_tokens_seen": 1502681376, "step": 8320 }, { "epoch": 0.9109171022742822, "grad_norm": 1.1012757381241631, "learning_rate": 9.710400640694228e-07, "loss": 0.8382, "num_input_tokens_seen": 1502850944, "step": 8321 }, { "epoch": 0.9110265743452202, "grad_norm": 0.9554209961580272, "learning_rate": 9.686682884639948e-07, "loss": 0.7403, "num_input_tokens_seen": 1503009984, "step": 8322 }, { "epoch": 0.9111360464161581, "grad_norm": 1.0119926052539534, "learning_rate": 9.662993557191691e-07, "loss": 0.7701, "num_input_tokens_seen": 1503198368, "step": 8323 }, { "epoch": 0.911245518487096, "grad_norm": 1.2269151022872447, "learning_rate": 9.639332661151856e-07, "loss": 0.942, "num_input_tokens_seen": 1503374880, "step": 8324 }, { "epoch": 0.9113549905580339, "grad_norm": 1.017129048486036, "learning_rate": 9.615700199319455e-07, "loss": 0.7965, "num_input_tokens_seen": 1503576256, "step": 8325 }, { "epoch": 0.9114644626289717, "grad_norm": 1.0749391671644717, "learning_rate": 9.592096174490195e-07, "loss": 0.8966, "num_input_tokens_seen": 1503744256, "step": 8326 }, { "epoch": 0.9115739346999097, "grad_norm": 1.1189009857586603, "learning_rate": 9.56852058945637e-07, "loss": 0.8747, "num_input_tokens_seen": 1503922784, "step": 8327 }, { "epoch": 0.9116834067708476, "grad_norm": 1.242827484523957, "learning_rate": 9.544973447006888e-07, "loss": 0.9247, "num_input_tokens_seen": 1504076896, "step": 8328 }, { "epoch": 0.9117928788417855, "grad_norm": 1.1129464164612786, "learning_rate": 9.521454749927411e-07, "loss": 1.1011, "num_input_tokens_seen": 1504278048, "step": 8329 }, { "epoch": 0.9119023509127234, "grad_norm": 1.063046931697608, "learning_rate": 9.497964501000128e-07, "loss": 0.8944, "num_input_tokens_seen": 1504480320, "step": 8330 }, { "epoch": 0.9120118229836613, "grad_norm": 1.3718389241334787, "learning_rate": 9.474502703003901e-07, "loss": 1.0168, "num_input_tokens_seen": 1504642944, "step": 8331 }, { "epoch": 0.9121212950545992, "grad_norm": 1.1361647366803977, "learning_rate": 9.451069358714177e-07, "loss": 1.0183, "num_input_tokens_seen": 1504822368, "step": 8332 }, { "epoch": 0.9122307671255371, "grad_norm": 1.1059087410125106, "learning_rate": 9.427664470903097e-07, "loss": 0.8239, "num_input_tokens_seen": 1505012320, "step": 8333 }, { "epoch": 0.912340239196475, "grad_norm": 1.152709902765688, "learning_rate": 9.40428804233942e-07, "loss": 1.2347, "num_input_tokens_seen": 1505191072, "step": 8334 }, { "epoch": 0.9124497112674129, "grad_norm": 1.0458370586090815, "learning_rate": 9.380940075788491e-07, "loss": 0.6422, "num_input_tokens_seen": 1505385952, "step": 8335 }, { "epoch": 0.9125591833383508, "grad_norm": 1.2553270792951363, "learning_rate": 9.357620574012432e-07, "loss": 1.0457, "num_input_tokens_seen": 1505547680, "step": 8336 }, { "epoch": 0.9126686554092887, "grad_norm": 1.0457977901868003, "learning_rate": 9.33432953976976e-07, "loss": 0.9652, "num_input_tokens_seen": 1505712992, "step": 8337 }, { "epoch": 0.9127781274802266, "grad_norm": 1.1272626481521537, "learning_rate": 9.311066975815852e-07, "loss": 0.9688, "num_input_tokens_seen": 1505925344, "step": 8338 }, { "epoch": 0.9128875995511645, "grad_norm": 1.0508503377104825, "learning_rate": 9.28783288490262e-07, "loss": 0.7433, "num_input_tokens_seen": 1506120672, "step": 8339 }, { "epoch": 0.9129970716221024, "grad_norm": 1.4019737074205503, "learning_rate": 9.264627269778586e-07, "loss": 1.1451, "num_input_tokens_seen": 1506277920, "step": 8340 }, { "epoch": 0.9131065436930403, "grad_norm": 1.1767462638826467, "learning_rate": 9.241450133188944e-07, "loss": 0.9105, "num_input_tokens_seen": 1506451968, "step": 8341 }, { "epoch": 0.9132160157639783, "grad_norm": 1.087986666208494, "learning_rate": 9.218301477875474e-07, "loss": 0.8518, "num_input_tokens_seen": 1506626016, "step": 8342 }, { "epoch": 0.9133254878349161, "grad_norm": 1.062641583153429, "learning_rate": 9.195181306576678e-07, "loss": 0.8852, "num_input_tokens_seen": 1506797824, "step": 8343 }, { "epoch": 0.913434959905854, "grad_norm": 1.1822549531364621, "learning_rate": 9.172089622027563e-07, "loss": 0.8781, "num_input_tokens_seen": 1507004352, "step": 8344 }, { "epoch": 0.9135444319767919, "grad_norm": 1.1017642546602175, "learning_rate": 9.149026426959889e-07, "loss": 0.9618, "num_input_tokens_seen": 1507185568, "step": 8345 }, { "epoch": 0.9136539040477298, "grad_norm": 1.1028662467262427, "learning_rate": 9.125991724101918e-07, "loss": 0.8454, "num_input_tokens_seen": 1507356256, "step": 8346 }, { "epoch": 0.9137633761186678, "grad_norm": 1.0927313829772267, "learning_rate": 9.102985516178692e-07, "loss": 0.9125, "num_input_tokens_seen": 1507563456, "step": 8347 }, { "epoch": 0.9138728481896057, "grad_norm": 1.123754500654838, "learning_rate": 9.080007805911728e-07, "loss": 1.083, "num_input_tokens_seen": 1507779392, "step": 8348 }, { "epoch": 0.9139823202605435, "grad_norm": 1.091428363551446, "learning_rate": 9.057058596019325e-07, "loss": 1.0887, "num_input_tokens_seen": 1507956128, "step": 8349 }, { "epoch": 0.9140917923314814, "grad_norm": 1.1487051723230608, "learning_rate": 9.034137889216255e-07, "loss": 0.9039, "num_input_tokens_seen": 1508125472, "step": 8350 }, { "epoch": 0.9142012644024193, "grad_norm": 1.1003581287703124, "learning_rate": 9.011245688214015e-07, "loss": 0.9551, "num_input_tokens_seen": 1508293696, "step": 8351 }, { "epoch": 0.9143107364733573, "grad_norm": 1.1185383132900408, "learning_rate": 8.988381995720746e-07, "loss": 0.9495, "num_input_tokens_seen": 1508488800, "step": 8352 }, { "epoch": 0.9144202085442952, "grad_norm": 1.086720301108004, "learning_rate": 8.965546814441117e-07, "loss": 0.8581, "num_input_tokens_seen": 1508669120, "step": 8353 }, { "epoch": 0.914529680615233, "grad_norm": 1.1398979378069942, "learning_rate": 8.942740147076551e-07, "loss": 0.8787, "num_input_tokens_seen": 1508819200, "step": 8354 }, { "epoch": 0.9146391526861709, "grad_norm": 1.1763814110476343, "learning_rate": 8.919961996324999e-07, "loss": 1.0021, "num_input_tokens_seen": 1509002656, "step": 8355 }, { "epoch": 0.9147486247571088, "grad_norm": 1.0843632766133888, "learning_rate": 8.897212364881058e-07, "loss": 0.7413, "num_input_tokens_seen": 1509179616, "step": 8356 }, { "epoch": 0.9148580968280468, "grad_norm": 1.1699014958053928, "learning_rate": 8.87449125543599e-07, "loss": 1.1203, "num_input_tokens_seen": 1509385920, "step": 8357 }, { "epoch": 0.9149675688989847, "grad_norm": 1.1102488125487868, "learning_rate": 8.851798670677674e-07, "loss": 0.9004, "num_input_tokens_seen": 1509549216, "step": 8358 }, { "epoch": 0.9150770409699226, "grad_norm": 1.093714195755347, "learning_rate": 8.829134613290574e-07, "loss": 0.8603, "num_input_tokens_seen": 1509726624, "step": 8359 }, { "epoch": 0.9151865130408604, "grad_norm": 1.2067857017254207, "learning_rate": 8.806499085955794e-07, "loss": 0.8909, "num_input_tokens_seen": 1509904480, "step": 8360 }, { "epoch": 0.9152959851117983, "grad_norm": 1.2577463926393615, "learning_rate": 8.783892091351053e-07, "loss": 0.8662, "num_input_tokens_seen": 1510045152, "step": 8361 }, { "epoch": 0.9154054571827362, "grad_norm": 1.0761862752127527, "learning_rate": 8.761313632150797e-07, "loss": 0.9722, "num_input_tokens_seen": 1510227936, "step": 8362 }, { "epoch": 0.9155149292536742, "grad_norm": 1.195162495693904, "learning_rate": 8.738763711025971e-07, "loss": 0.9622, "num_input_tokens_seen": 1510399968, "step": 8363 }, { "epoch": 0.9156244013246121, "grad_norm": 1.065424260895753, "learning_rate": 8.716242330644164e-07, "loss": 0.576, "num_input_tokens_seen": 1510596192, "step": 8364 }, { "epoch": 0.91573387339555, "grad_norm": 1.2143990539901426, "learning_rate": 8.693749493669662e-07, "loss": 0.9965, "num_input_tokens_seen": 1510784800, "step": 8365 }, { "epoch": 0.9158433454664878, "grad_norm": 1.1442969280885418, "learning_rate": 8.671285202763252e-07, "loss": 0.8559, "num_input_tokens_seen": 1510939584, "step": 8366 }, { "epoch": 0.9159528175374257, "grad_norm": 1.4199711556258627, "learning_rate": 8.648849460582503e-07, "loss": 0.8519, "num_input_tokens_seen": 1511079808, "step": 8367 }, { "epoch": 0.9160622896083637, "grad_norm": 1.2467281436721653, "learning_rate": 8.626442269781432e-07, "loss": 1.038, "num_input_tokens_seen": 1511282976, "step": 8368 }, { "epoch": 0.9161717616793016, "grad_norm": 1.0987276434766693, "learning_rate": 8.604063633010862e-07, "loss": 0.8502, "num_input_tokens_seen": 1511478304, "step": 8369 }, { "epoch": 0.9162812337502395, "grad_norm": 1.0747520648788276, "learning_rate": 8.581713552918064e-07, "loss": 0.8015, "num_input_tokens_seen": 1511651456, "step": 8370 }, { "epoch": 0.9163907058211773, "grad_norm": 1.2313835497621186, "learning_rate": 8.559392032147034e-07, "loss": 1.0392, "num_input_tokens_seen": 1511812512, "step": 8371 }, { "epoch": 0.9165001778921152, "grad_norm": 1.1347380485401142, "learning_rate": 8.537099073338384e-07, "loss": 0.8304, "num_input_tokens_seen": 1511992384, "step": 8372 }, { "epoch": 0.9166096499630532, "grad_norm": 1.204553276530628, "learning_rate": 8.514834679129336e-07, "loss": 1.1033, "num_input_tokens_seen": 1512172480, "step": 8373 }, { "epoch": 0.9167191220339911, "grad_norm": 1.2098886192698972, "learning_rate": 8.492598852153672e-07, "loss": 0.6975, "num_input_tokens_seen": 1512342272, "step": 8374 }, { "epoch": 0.916828594104929, "grad_norm": 1.165289929376735, "learning_rate": 8.470391595041871e-07, "loss": 1.029, "num_input_tokens_seen": 1512541632, "step": 8375 }, { "epoch": 0.9169380661758669, "grad_norm": 1.0994207244091108, "learning_rate": 8.448212910421055e-07, "loss": 0.9035, "num_input_tokens_seen": 1512737856, "step": 8376 }, { "epoch": 0.9170475382468047, "grad_norm": 1.301585822108259, "learning_rate": 8.426062800914846e-07, "loss": 0.9798, "num_input_tokens_seen": 1512936096, "step": 8377 }, { "epoch": 0.9171570103177427, "grad_norm": 1.133752156921152, "learning_rate": 8.40394126914365e-07, "loss": 0.8594, "num_input_tokens_seen": 1513101632, "step": 8378 }, { "epoch": 0.9172664823886806, "grad_norm": 1.166872307274219, "learning_rate": 8.381848317724289e-07, "loss": 0.9631, "num_input_tokens_seen": 1513272544, "step": 8379 }, { "epoch": 0.9173759544596185, "grad_norm": 1.1958590485912608, "learning_rate": 8.359783949270394e-07, "loss": 0.9114, "num_input_tokens_seen": 1513452416, "step": 8380 }, { "epoch": 0.9174854265305564, "grad_norm": 1.0371675923992254, "learning_rate": 8.337748166392129e-07, "loss": 0.5996, "num_input_tokens_seen": 1513616160, "step": 8381 }, { "epoch": 0.9175948986014943, "grad_norm": 1.065696340281619, "learning_rate": 8.315740971696295e-07, "loss": 0.9963, "num_input_tokens_seen": 1513805216, "step": 8382 }, { "epoch": 0.9177043706724322, "grad_norm": 1.2497253116153915, "learning_rate": 8.293762367786257e-07, "loss": 0.8812, "num_input_tokens_seen": 1513984416, "step": 8383 }, { "epoch": 0.9178138427433701, "grad_norm": 1.2360171247795935, "learning_rate": 8.271812357262043e-07, "loss": 0.717, "num_input_tokens_seen": 1514143232, "step": 8384 }, { "epoch": 0.917923314814308, "grad_norm": 1.1408595536374289, "learning_rate": 8.249890942720384e-07, "loss": 0.7321, "num_input_tokens_seen": 1514313024, "step": 8385 }, { "epoch": 0.9180327868852459, "grad_norm": 1.1288243564350342, "learning_rate": 8.227998126754427e-07, "loss": 0.8186, "num_input_tokens_seen": 1514509248, "step": 8386 }, { "epoch": 0.9181422589561838, "grad_norm": 1.2897332498224707, "learning_rate": 8.206133911954156e-07, "loss": 0.908, "num_input_tokens_seen": 1514681952, "step": 8387 }, { "epoch": 0.9182517310271217, "grad_norm": 1.4186030038648807, "learning_rate": 8.184298300905946e-07, "loss": 1.1958, "num_input_tokens_seen": 1514897440, "step": 8388 }, { "epoch": 0.9183612030980596, "grad_norm": 1.02453635824913, "learning_rate": 8.162491296193009e-07, "loss": 0.9813, "num_input_tokens_seen": 1515083136, "step": 8389 }, { "epoch": 0.9184706751689975, "grad_norm": 1.2189008668297288, "learning_rate": 8.140712900395031e-07, "loss": 0.8221, "num_input_tokens_seen": 1515242848, "step": 8390 }, { "epoch": 0.9185801472399354, "grad_norm": 1.2371595819979513, "learning_rate": 8.118963116088369e-07, "loss": 0.781, "num_input_tokens_seen": 1515410624, "step": 8391 }, { "epoch": 0.9186896193108733, "grad_norm": 1.3360549696646606, "learning_rate": 8.097241945845962e-07, "loss": 0.9616, "num_input_tokens_seen": 1515586464, "step": 8392 }, { "epoch": 0.9187990913818113, "grad_norm": 1.1861492807220901, "learning_rate": 8.075549392237369e-07, "loss": 0.9637, "num_input_tokens_seen": 1515743488, "step": 8393 }, { "epoch": 0.9189085634527491, "grad_norm": 1.1146777044731577, "learning_rate": 8.053885457828869e-07, "loss": 0.7242, "num_input_tokens_seen": 1515936576, "step": 8394 }, { "epoch": 0.919018035523687, "grad_norm": 1.0294774745757551, "learning_rate": 8.032250145183134e-07, "loss": 0.6952, "num_input_tokens_seen": 1516131232, "step": 8395 }, { "epoch": 0.9191275075946249, "grad_norm": 1.0287656046354805, "learning_rate": 8.010643456859645e-07, "loss": 0.7914, "num_input_tokens_seen": 1516335296, "step": 8396 }, { "epoch": 0.9192369796655628, "grad_norm": 1.1357758271868121, "learning_rate": 7.989065395414468e-07, "loss": 0.8502, "num_input_tokens_seen": 1516523456, "step": 8397 }, { "epoch": 0.9193464517365008, "grad_norm": 1.1020559193327, "learning_rate": 7.967515963400202e-07, "loss": 0.6692, "num_input_tokens_seen": 1516700416, "step": 8398 }, { "epoch": 0.9194559238074387, "grad_norm": 1.4233209153972963, "learning_rate": 7.945995163366083e-07, "loss": 1.1064, "num_input_tokens_seen": 1516869088, "step": 8399 }, { "epoch": 0.9195653958783765, "grad_norm": 1.0805473205287472, "learning_rate": 7.924502997858018e-07, "loss": 0.7076, "num_input_tokens_seen": 1517019616, "step": 8400 }, { "epoch": 0.9196748679493144, "grad_norm": 1.118146844589734, "learning_rate": 7.903039469418506e-07, "loss": 1.1123, "num_input_tokens_seen": 1517222112, "step": 8401 }, { "epoch": 0.9197843400202523, "grad_norm": 1.2416528954905006, "learning_rate": 7.881604580586593e-07, "loss": 0.7529, "num_input_tokens_seen": 1517400640, "step": 8402 }, { "epoch": 0.9198938120911903, "grad_norm": 1.0872830623986243, "learning_rate": 7.860198333897978e-07, "loss": 1.0415, "num_input_tokens_seen": 1517576704, "step": 8403 }, { "epoch": 0.9200032841621282, "grad_norm": 1.259566795567805, "learning_rate": 7.838820731885021e-07, "loss": 0.8858, "num_input_tokens_seen": 1517748064, "step": 8404 }, { "epoch": 0.920112756233066, "grad_norm": 1.0468331513651925, "learning_rate": 7.817471777076673e-07, "loss": 1.0054, "num_input_tokens_seen": 1517969376, "step": 8405 }, { "epoch": 0.9202222283040039, "grad_norm": 1.0849219314334548, "learning_rate": 7.796151471998414e-07, "loss": 0.9971, "num_input_tokens_seen": 1518148352, "step": 8406 }, { "epoch": 0.9203317003749418, "grad_norm": 1.111533308613753, "learning_rate": 7.77485981917242e-07, "loss": 1.0098, "num_input_tokens_seen": 1518339424, "step": 8407 }, { "epoch": 0.9204411724458798, "grad_norm": 1.1500430140536413, "learning_rate": 7.753596821117426e-07, "loss": 0.908, "num_input_tokens_seen": 1518530272, "step": 8408 }, { "epoch": 0.9205506445168177, "grad_norm": 1.0431664097396536, "learning_rate": 7.732362480348892e-07, "loss": 0.8615, "num_input_tokens_seen": 1518710816, "step": 8409 }, { "epoch": 0.9206601165877556, "grad_norm": 1.0997449882724388, "learning_rate": 7.7111567993787e-07, "loss": 0.7845, "num_input_tokens_seen": 1518867616, "step": 8410 }, { "epoch": 0.9207695886586934, "grad_norm": 1.262343733152816, "learning_rate": 7.689979780715534e-07, "loss": 0.999, "num_input_tokens_seen": 1519025984, "step": 8411 }, { "epoch": 0.9208790607296313, "grad_norm": 1.1638965872482423, "learning_rate": 7.668831426864448e-07, "loss": 0.8872, "num_input_tokens_seen": 1519221984, "step": 8412 }, { "epoch": 0.9209885328005692, "grad_norm": 1.1240332476256578, "learning_rate": 7.647711740327412e-07, "loss": 0.7102, "num_input_tokens_seen": 1519394464, "step": 8413 }, { "epoch": 0.9210980048715072, "grad_norm": 1.059995841791886, "learning_rate": 7.626620723602762e-07, "loss": 0.9296, "num_input_tokens_seen": 1519583520, "step": 8414 }, { "epoch": 0.9212074769424451, "grad_norm": 1.3315794953392144, "learning_rate": 7.605558379185556e-07, "loss": 1.0464, "num_input_tokens_seen": 1519741888, "step": 8415 }, { "epoch": 0.921316949013383, "grad_norm": 1.015392100700051, "learning_rate": 7.584524709567386e-07, "loss": 0.9576, "num_input_tokens_seen": 1519928032, "step": 8416 }, { "epoch": 0.9214264210843208, "grad_norm": 1.1414371890993107, "learning_rate": 7.563519717236511e-07, "loss": 0.9107, "num_input_tokens_seen": 1520122912, "step": 8417 }, { "epoch": 0.9215358931552587, "grad_norm": 1.0971271692354303, "learning_rate": 7.54254340467786e-07, "loss": 0.7846, "num_input_tokens_seen": 1520287552, "step": 8418 }, { "epoch": 0.9216453652261967, "grad_norm": 1.2708543661075946, "learning_rate": 7.521595774372752e-07, "loss": 1.1156, "num_input_tokens_seen": 1520464960, "step": 8419 }, { "epoch": 0.9217548372971346, "grad_norm": 1.051379462490605, "learning_rate": 7.500676828799402e-07, "loss": 0.7549, "num_input_tokens_seen": 1520622208, "step": 8420 }, { "epoch": 0.9218643093680725, "grad_norm": 1.6370570304364749, "learning_rate": 7.479786570432329e-07, "loss": 1.1153, "num_input_tokens_seen": 1520815744, "step": 8421 }, { "epoch": 0.9219737814390103, "grad_norm": 1.0660806235587574, "learning_rate": 7.458925001742917e-07, "loss": 0.8271, "num_input_tokens_seen": 1520996288, "step": 8422 }, { "epoch": 0.9220832535099482, "grad_norm": 0.9394786617115419, "learning_rate": 7.438092125199025e-07, "loss": 0.7357, "num_input_tokens_seen": 1521183776, "step": 8423 }, { "epoch": 0.9221927255808862, "grad_norm": 1.0531955480292399, "learning_rate": 7.417287943265128e-07, "loss": 1.0724, "num_input_tokens_seen": 1521362528, "step": 8424 }, { "epoch": 0.9223021976518241, "grad_norm": 1.012800956078842, "learning_rate": 7.39651245840231e-07, "loss": 0.6984, "num_input_tokens_seen": 1521540832, "step": 8425 }, { "epoch": 0.922411669722762, "grad_norm": 1.1932159187761453, "learning_rate": 7.375765673068275e-07, "loss": 0.9409, "num_input_tokens_seen": 1521728544, "step": 8426 }, { "epoch": 0.9225211417936999, "grad_norm": 1.0485216541882947, "learning_rate": 7.355047589717418e-07, "loss": 0.7307, "num_input_tokens_seen": 1521912896, "step": 8427 }, { "epoch": 0.9226306138646377, "grad_norm": 1.0584216664213726, "learning_rate": 7.334358210800473e-07, "loss": 1.0393, "num_input_tokens_seen": 1522110240, "step": 8428 }, { "epoch": 0.9227400859355757, "grad_norm": 1.0575367623109946, "learning_rate": 7.313697538765124e-07, "loss": 0.7603, "num_input_tokens_seen": 1522277120, "step": 8429 }, { "epoch": 0.9228495580065136, "grad_norm": 1.0720849501628127, "learning_rate": 7.293065576055386e-07, "loss": 0.7412, "num_input_tokens_seen": 1522459456, "step": 8430 }, { "epoch": 0.9229590300774515, "grad_norm": 1.1551956318236423, "learning_rate": 7.272462325112056e-07, "loss": 0.9742, "num_input_tokens_seen": 1522648736, "step": 8431 }, { "epoch": 0.9230685021483894, "grad_norm": 1.1191296002708773, "learning_rate": 7.25188778837238e-07, "loss": 0.8604, "num_input_tokens_seen": 1522807328, "step": 8432 }, { "epoch": 0.9231779742193273, "grad_norm": 1.0601998282958638, "learning_rate": 7.231341968270328e-07, "loss": 0.9193, "num_input_tokens_seen": 1523008704, "step": 8433 }, { "epoch": 0.9232874462902652, "grad_norm": 1.1381302292864437, "learning_rate": 7.210824867236427e-07, "loss": 0.863, "num_input_tokens_seen": 1523223744, "step": 8434 }, { "epoch": 0.9233969183612031, "grad_norm": 1.0568564823813598, "learning_rate": 7.190336487697791e-07, "loss": 1.314, "num_input_tokens_seen": 1523444608, "step": 8435 }, { "epoch": 0.923506390432141, "grad_norm": 1.0883924330242587, "learning_rate": 7.169876832078204e-07, "loss": 0.7338, "num_input_tokens_seen": 1523643296, "step": 8436 }, { "epoch": 0.9236158625030789, "grad_norm": 1.1905928539832555, "learning_rate": 7.14944590279798e-07, "loss": 0.623, "num_input_tokens_seen": 1523815552, "step": 8437 }, { "epoch": 0.9237253345740168, "grad_norm": 0.9327033996678763, "learning_rate": 7.129043702274018e-07, "loss": 0.7945, "num_input_tokens_seen": 1524033056, "step": 8438 }, { "epoch": 0.9238348066449547, "grad_norm": 1.2284234865990977, "learning_rate": 7.108670232919946e-07, "loss": 0.8372, "num_input_tokens_seen": 1524196352, "step": 8439 }, { "epoch": 0.9239442787158926, "grad_norm": 1.4216521371424684, "learning_rate": 7.088325497145832e-07, "loss": 0.873, "num_input_tokens_seen": 1524360096, "step": 8440 }, { "epoch": 0.9240537507868305, "grad_norm": 1.1303063392629464, "learning_rate": 7.068009497358446e-07, "loss": 0.9152, "num_input_tokens_seen": 1524548256, "step": 8441 }, { "epoch": 0.9241632228577684, "grad_norm": 1.0301548315542122, "learning_rate": 7.047722235961119e-07, "loss": 0.917, "num_input_tokens_seen": 1524739328, "step": 8442 }, { "epoch": 0.9242726949287063, "grad_norm": 1.2644690898256274, "learning_rate": 7.027463715353789e-07, "loss": 0.882, "num_input_tokens_seen": 1524908224, "step": 8443 }, { "epoch": 0.9243821669996443, "grad_norm": 1.021839670859624, "learning_rate": 7.007233937933067e-07, "loss": 0.672, "num_input_tokens_seen": 1525072640, "step": 8444 }, { "epoch": 0.9244916390705821, "grad_norm": 1.1256268662135929, "learning_rate": 6.987032906091983e-07, "loss": 0.8003, "num_input_tokens_seen": 1525254976, "step": 8445 }, { "epoch": 0.92460111114152, "grad_norm": 1.1537938708295474, "learning_rate": 6.966860622220378e-07, "loss": 1.0907, "num_input_tokens_seen": 1525446272, "step": 8446 }, { "epoch": 0.9247105832124579, "grad_norm": 1.0354518252466487, "learning_rate": 6.946717088704563e-07, "loss": 0.9342, "num_input_tokens_seen": 1525654144, "step": 8447 }, { "epoch": 0.9248200552833958, "grad_norm": 1.0727492390089641, "learning_rate": 6.926602307927494e-07, "loss": 0.7745, "num_input_tokens_seen": 1525812960, "step": 8448 }, { "epoch": 0.9249295273543338, "grad_norm": 1.0826023949347672, "learning_rate": 6.906516282268682e-07, "loss": 0.825, "num_input_tokens_seen": 1525965056, "step": 8449 }, { "epoch": 0.9250389994252717, "grad_norm": 1.1183971978232952, "learning_rate": 6.88645901410423e-07, "loss": 0.8161, "num_input_tokens_seen": 1526169568, "step": 8450 }, { "epoch": 0.9251484714962095, "grad_norm": 1.0503825172119914, "learning_rate": 6.866430505807014e-07, "loss": 0.7019, "num_input_tokens_seen": 1526341152, "step": 8451 }, { "epoch": 0.9252579435671474, "grad_norm": 1.0482952675228323, "learning_rate": 6.846430759746198e-07, "loss": 0.6988, "num_input_tokens_seen": 1526513856, "step": 8452 }, { "epoch": 0.9253674156380853, "grad_norm": 0.9695068294821688, "learning_rate": 6.826459778287858e-07, "loss": 0.5826, "num_input_tokens_seen": 1526686784, "step": 8453 }, { "epoch": 0.9254768877090233, "grad_norm": 1.181825229316986, "learning_rate": 6.806517563794385e-07, "loss": 1.0091, "num_input_tokens_seen": 1526872256, "step": 8454 }, { "epoch": 0.9255863597799612, "grad_norm": 1.0719912096204045, "learning_rate": 6.786604118625029e-07, "loss": 0.7468, "num_input_tokens_seen": 1527026592, "step": 8455 }, { "epoch": 0.925695831850899, "grad_norm": 1.1305794641235423, "learning_rate": 6.766719445135434e-07, "loss": 0.8087, "num_input_tokens_seen": 1527206240, "step": 8456 }, { "epoch": 0.9258053039218369, "grad_norm": 1.1465337635430601, "learning_rate": 6.746863545677967e-07, "loss": 0.9421, "num_input_tokens_seen": 1527348032, "step": 8457 }, { "epoch": 0.9259147759927748, "grad_norm": 1.0417571727852708, "learning_rate": 6.727036422601529e-07, "loss": 0.7819, "num_input_tokens_seen": 1527527232, "step": 8458 }, { "epoch": 0.9260242480637128, "grad_norm": 0.999379024819577, "learning_rate": 6.707238078251576e-07, "loss": 0.8923, "num_input_tokens_seen": 1527710688, "step": 8459 }, { "epoch": 0.9261337201346507, "grad_norm": 1.0636864937496733, "learning_rate": 6.687468514970319e-07, "loss": 0.9081, "num_input_tokens_seen": 1527886080, "step": 8460 }, { "epoch": 0.9262431922055886, "grad_norm": 1.0455459014835597, "learning_rate": 6.667727735096357e-07, "loss": 0.7566, "num_input_tokens_seen": 1528058784, "step": 8461 }, { "epoch": 0.9263526642765264, "grad_norm": 1.1880880706682673, "learning_rate": 6.648015740965074e-07, "loss": 0.8383, "num_input_tokens_seen": 1528244928, "step": 8462 }, { "epoch": 0.9264621363474643, "grad_norm": 1.1854213289907758, "learning_rate": 6.628332534908272e-07, "loss": 0.7559, "num_input_tokens_seen": 1528400832, "step": 8463 }, { "epoch": 0.9265716084184022, "grad_norm": 1.1329303022496813, "learning_rate": 6.608678119254502e-07, "loss": 1.1642, "num_input_tokens_seen": 1528612960, "step": 8464 }, { "epoch": 0.9266810804893402, "grad_norm": 1.1502346069028933, "learning_rate": 6.589052496328824e-07, "loss": 0.7349, "num_input_tokens_seen": 1528769760, "step": 8465 }, { "epoch": 0.9267905525602781, "grad_norm": 0.949244463536993, "learning_rate": 6.569455668452934e-07, "loss": 0.8504, "num_input_tokens_seen": 1528971360, "step": 8466 }, { "epoch": 0.926900024631216, "grad_norm": 1.136522833336753, "learning_rate": 6.549887637945063e-07, "loss": 0.8098, "num_input_tokens_seen": 1529166240, "step": 8467 }, { "epoch": 0.9270094967021538, "grad_norm": 1.1002415521874231, "learning_rate": 6.530348407120052e-07, "loss": 0.7526, "num_input_tokens_seen": 1529336032, "step": 8468 }, { "epoch": 0.9271189687730917, "grad_norm": 1.1321155377186636, "learning_rate": 6.510837978289414e-07, "loss": 0.8906, "num_input_tokens_seen": 1529540544, "step": 8469 }, { "epoch": 0.9272284408440297, "grad_norm": 1.1556708973445058, "learning_rate": 6.491356353761191e-07, "loss": 0.9147, "num_input_tokens_seen": 1529755136, "step": 8470 }, { "epoch": 0.9273379129149676, "grad_norm": 1.2718931791988675, "learning_rate": 6.471903535839985e-07, "loss": 0.8517, "num_input_tokens_seen": 1529933664, "step": 8471 }, { "epoch": 0.9274473849859055, "grad_norm": 1.0929007366578989, "learning_rate": 6.452479526827065e-07, "loss": 0.821, "num_input_tokens_seen": 1530105472, "step": 8472 }, { "epoch": 0.9275568570568433, "grad_norm": 1.00095893658262, "learning_rate": 6.433084329020233e-07, "loss": 0.9283, "num_input_tokens_seen": 1530281312, "step": 8473 }, { "epoch": 0.9276663291277812, "grad_norm": 1.2068609222428663, "learning_rate": 6.413717944713876e-07, "loss": 0.7355, "num_input_tokens_seen": 1530406080, "step": 8474 }, { "epoch": 0.9277758011987192, "grad_norm": 1.150462797585491, "learning_rate": 6.39438037619905e-07, "loss": 0.6925, "num_input_tokens_seen": 1530575424, "step": 8475 }, { "epoch": 0.9278852732696571, "grad_norm": 1.085116178735323, "learning_rate": 6.375071625763285e-07, "loss": 0.7625, "num_input_tokens_seen": 1530733344, "step": 8476 }, { "epoch": 0.927994745340595, "grad_norm": 1.1591580396139678, "learning_rate": 6.355791695690866e-07, "loss": 1.1852, "num_input_tokens_seen": 1530910976, "step": 8477 }, { "epoch": 0.9281042174115329, "grad_norm": 1.1469940144583084, "learning_rate": 6.336540588262496e-07, "loss": 0.8084, "num_input_tokens_seen": 1531099136, "step": 8478 }, { "epoch": 0.9282136894824707, "grad_norm": 1.1337201921441276, "learning_rate": 6.317318305755604e-07, "loss": 0.793, "num_input_tokens_seen": 1531284384, "step": 8479 }, { "epoch": 0.9283231615534087, "grad_norm": 1.1804571104454136, "learning_rate": 6.298124850444093e-07, "loss": 0.9664, "num_input_tokens_seen": 1531469856, "step": 8480 }, { "epoch": 0.9284326336243466, "grad_norm": 1.166369411655849, "learning_rate": 6.278960224598507e-07, "loss": 0.8822, "num_input_tokens_seen": 1531652640, "step": 8481 }, { "epoch": 0.9285421056952845, "grad_norm": 1.0547464424899717, "learning_rate": 6.259824430486061e-07, "loss": 0.8781, "num_input_tokens_seen": 1531842368, "step": 8482 }, { "epoch": 0.9286515777662224, "grad_norm": 1.2292699878177795, "learning_rate": 6.240717470370361e-07, "loss": 0.8605, "num_input_tokens_seen": 1532052032, "step": 8483 }, { "epoch": 0.9287610498371603, "grad_norm": 1.0867594620121834, "learning_rate": 6.221639346511876e-07, "loss": 0.8176, "num_input_tokens_seen": 1532221376, "step": 8484 }, { "epoch": 0.9288705219080982, "grad_norm": 1.0920074751112525, "learning_rate": 6.202590061167385e-07, "loss": 0.8043, "num_input_tokens_seen": 1532422304, "step": 8485 }, { "epoch": 0.9289799939790361, "grad_norm": 0.947209873721816, "learning_rate": 6.183569616590446e-07, "loss": 0.8283, "num_input_tokens_seen": 1532637120, "step": 8486 }, { "epoch": 0.929089466049974, "grad_norm": 1.0918883147189382, "learning_rate": 6.164578015031092e-07, "loss": 0.7343, "num_input_tokens_seen": 1532801984, "step": 8487 }, { "epoch": 0.9291989381209119, "grad_norm": 1.1857289106957496, "learning_rate": 6.145615258736054e-07, "loss": 1.006, "num_input_tokens_seen": 1532994848, "step": 8488 }, { "epoch": 0.9293084101918498, "grad_norm": 1.1567381266653811, "learning_rate": 6.126681349948565e-07, "loss": 1.0277, "num_input_tokens_seen": 1533185024, "step": 8489 }, { "epoch": 0.9294178822627877, "grad_norm": 1.255867680689754, "learning_rate": 6.107776290908418e-07, "loss": 1.0418, "num_input_tokens_seen": 1533360864, "step": 8490 }, { "epoch": 0.9295273543337256, "grad_norm": 1.1744639980016112, "learning_rate": 6.088900083852184e-07, "loss": 0.7971, "num_input_tokens_seen": 1533542976, "step": 8491 }, { "epoch": 0.9296368264046635, "grad_norm": 1.3475259026778508, "learning_rate": 6.070052731012688e-07, "loss": 1.0393, "num_input_tokens_seen": 1533717248, "step": 8492 }, { "epoch": 0.9297462984756014, "grad_norm": 1.1447738546571455, "learning_rate": 6.051234234619729e-07, "loss": 0.9276, "num_input_tokens_seen": 1533922208, "step": 8493 }, { "epoch": 0.9298557705465393, "grad_norm": 1.0942867596844321, "learning_rate": 6.032444596899333e-07, "loss": 0.6549, "num_input_tokens_seen": 1534081696, "step": 8494 }, { "epoch": 0.9299652426174773, "grad_norm": 1.1698720981917743, "learning_rate": 6.013683820074418e-07, "loss": 0.8835, "num_input_tokens_seen": 1534246784, "step": 8495 }, { "epoch": 0.9300747146884151, "grad_norm": 0.9425586595671412, "learning_rate": 5.99495190636426e-07, "loss": 0.7314, "num_input_tokens_seen": 1534415904, "step": 8496 }, { "epoch": 0.930184186759353, "grad_norm": 1.1184791620706789, "learning_rate": 5.976248857984812e-07, "loss": 0.7582, "num_input_tokens_seen": 1534620192, "step": 8497 }, { "epoch": 0.9302936588302909, "grad_norm": 1.0440231879425237, "learning_rate": 5.957574677148664e-07, "loss": 0.7716, "num_input_tokens_seen": 1534799616, "step": 8498 }, { "epoch": 0.9304031309012288, "grad_norm": 1.0536228599187178, "learning_rate": 5.938929366064882e-07, "loss": 0.7782, "num_input_tokens_seen": 1534978816, "step": 8499 }, { "epoch": 0.9305126029721668, "grad_norm": 1.1533910614975706, "learning_rate": 5.920312926939203e-07, "loss": 0.8257, "num_input_tokens_seen": 1535190048, "step": 8500 }, { "epoch": 0.9306220750431047, "grad_norm": 1.0426570126419101, "learning_rate": 5.901725361973864e-07, "loss": 0.7596, "num_input_tokens_seen": 1535391648, "step": 8501 }, { "epoch": 0.9307315471140425, "grad_norm": 1.12043195058419, "learning_rate": 5.883166673367829e-07, "loss": 1.1427, "num_input_tokens_seen": 1535575104, "step": 8502 }, { "epoch": 0.9308410191849804, "grad_norm": 1.008588152337294, "learning_rate": 5.864636863316453e-07, "loss": 0.8961, "num_input_tokens_seen": 1535766400, "step": 8503 }, { "epoch": 0.9309504912559183, "grad_norm": 1.1074600347709198, "learning_rate": 5.84613593401187e-07, "loss": 0.9153, "num_input_tokens_seen": 1535944928, "step": 8504 }, { "epoch": 0.9310599633268563, "grad_norm": 1.027720799169766, "learning_rate": 5.827663887642665e-07, "loss": 1.0147, "num_input_tokens_seen": 1536123232, "step": 8505 }, { "epoch": 0.9311694353977942, "grad_norm": 1.0771078204108264, "learning_rate": 5.809220726394032e-07, "loss": 0.9718, "num_input_tokens_seen": 1536305120, "step": 8506 }, { "epoch": 0.931278907468732, "grad_norm": 1.1070620105160636, "learning_rate": 5.790806452447756e-07, "loss": 0.7181, "num_input_tokens_seen": 1536491040, "step": 8507 }, { "epoch": 0.9313883795396699, "grad_norm": 1.0839617202728358, "learning_rate": 5.772421067982259e-07, "loss": 0.7907, "num_input_tokens_seen": 1536681216, "step": 8508 }, { "epoch": 0.9314978516106078, "grad_norm": 1.0602806136432297, "learning_rate": 5.754064575172441e-07, "loss": 0.9292, "num_input_tokens_seen": 1536875648, "step": 8509 }, { "epoch": 0.9316073236815458, "grad_norm": 1.1914962484418339, "learning_rate": 5.735736976189871e-07, "loss": 1.0131, "num_input_tokens_seen": 1537057984, "step": 8510 }, { "epoch": 0.9317167957524837, "grad_norm": 1.1330340464763398, "learning_rate": 5.717438273202674e-07, "loss": 0.7645, "num_input_tokens_seen": 1537246816, "step": 8511 }, { "epoch": 0.9318262678234216, "grad_norm": 1.1446301935021237, "learning_rate": 5.699168468375538e-07, "loss": 0.909, "num_input_tokens_seen": 1537409440, "step": 8512 }, { "epoch": 0.9319357398943594, "grad_norm": 1.094667268043923, "learning_rate": 5.680927563869731e-07, "loss": 0.8172, "num_input_tokens_seen": 1537586400, "step": 8513 }, { "epoch": 0.9320452119652973, "grad_norm": 1.1564085568148734, "learning_rate": 5.662715561843141e-07, "loss": 1.0101, "num_input_tokens_seen": 1537789120, "step": 8514 }, { "epoch": 0.9321546840362352, "grad_norm": 1.1129381688500986, "learning_rate": 5.644532464450237e-07, "loss": 0.8299, "num_input_tokens_seen": 1537951744, "step": 8515 }, { "epoch": 0.9322641561071732, "grad_norm": 1.203906016315732, "learning_rate": 5.626378273841965e-07, "loss": 0.9206, "num_input_tokens_seen": 1538158048, "step": 8516 }, { "epoch": 0.9323736281781111, "grad_norm": 0.9708541967472281, "learning_rate": 5.608252992166024e-07, "loss": 0.7052, "num_input_tokens_seen": 1538376000, "step": 8517 }, { "epoch": 0.932483100249049, "grad_norm": 1.1743678218506457, "learning_rate": 5.590156621566506e-07, "loss": 1.1166, "num_input_tokens_seen": 1538572224, "step": 8518 }, { "epoch": 0.9325925723199868, "grad_norm": 1.0110370003318567, "learning_rate": 5.572089164184253e-07, "loss": 0.6626, "num_input_tokens_seen": 1538771136, "step": 8519 }, { "epoch": 0.9327020443909247, "grad_norm": 1.1829143122566408, "learning_rate": 5.554050622156609e-07, "loss": 0.9293, "num_input_tokens_seen": 1538905088, "step": 8520 }, { "epoch": 0.9328115164618627, "grad_norm": 1.058319675677826, "learning_rate": 5.536040997617453e-07, "loss": 0.8047, "num_input_tokens_seen": 1539074208, "step": 8521 }, { "epoch": 0.9329209885328006, "grad_norm": 1.1064844217147554, "learning_rate": 5.518060292697302e-07, "loss": 0.875, "num_input_tokens_seen": 1539268192, "step": 8522 }, { "epoch": 0.9330304606037385, "grad_norm": 1.138100220495849, "learning_rate": 5.50010850952326e-07, "loss": 0.8767, "num_input_tokens_seen": 1539418272, "step": 8523 }, { "epoch": 0.9331399326746763, "grad_norm": 0.9687319415470499, "learning_rate": 5.482185650218991e-07, "loss": 0.8176, "num_input_tokens_seen": 1539624128, "step": 8524 }, { "epoch": 0.9332494047456142, "grad_norm": 1.1531556970495696, "learning_rate": 5.464291716904684e-07, "loss": 1.0462, "num_input_tokens_seen": 1539795712, "step": 8525 }, { "epoch": 0.9333588768165522, "grad_norm": 0.9929835512752363, "learning_rate": 5.446426711697233e-07, "loss": 0.8196, "num_input_tokens_seen": 1539988800, "step": 8526 }, { "epoch": 0.9334683488874901, "grad_norm": 1.2267960247130862, "learning_rate": 5.428590636709973e-07, "loss": 0.7238, "num_input_tokens_seen": 1540177184, "step": 8527 }, { "epoch": 0.933577820958428, "grad_norm": 1.1547760202961812, "learning_rate": 5.41078349405294e-07, "loss": 0.9734, "num_input_tokens_seen": 1540381472, "step": 8528 }, { "epoch": 0.9336872930293659, "grad_norm": 1.0734236115247082, "learning_rate": 5.393005285832586e-07, "loss": 0.7023, "num_input_tokens_seen": 1540566048, "step": 8529 }, { "epoch": 0.9337967651003037, "grad_norm": 0.9992874408223208, "learning_rate": 5.375256014152119e-07, "loss": 0.7538, "num_input_tokens_seen": 1540751968, "step": 8530 }, { "epoch": 0.9339062371712417, "grad_norm": 1.1348054914972916, "learning_rate": 5.35753568111122e-07, "loss": 0.9848, "num_input_tokens_seen": 1540927360, "step": 8531 }, { "epoch": 0.9340157092421796, "grad_norm": 1.1420636967099667, "learning_rate": 5.339844288806156e-07, "loss": 1.1922, "num_input_tokens_seen": 1541126496, "step": 8532 }, { "epoch": 0.9341251813131175, "grad_norm": 1.0820569748006723, "learning_rate": 5.322181839329865e-07, "loss": 0.8993, "num_input_tokens_seen": 1541313984, "step": 8533 }, { "epoch": 0.9342346533840554, "grad_norm": 1.2383609589635238, "learning_rate": 5.304548334771648e-07, "loss": 0.8487, "num_input_tokens_seen": 1541473920, "step": 8534 }, { "epoch": 0.9343441254549933, "grad_norm": 1.1765553407083964, "learning_rate": 5.28694377721764e-07, "loss": 0.9311, "num_input_tokens_seen": 1541658720, "step": 8535 }, { "epoch": 0.9344535975259312, "grad_norm": 1.1103527971594664, "learning_rate": 5.269368168750316e-07, "loss": 0.9159, "num_input_tokens_seen": 1541818880, "step": 8536 }, { "epoch": 0.9345630695968691, "grad_norm": 1.1364828697420666, "learning_rate": 5.251821511448928e-07, "loss": 0.876, "num_input_tokens_seen": 1541990464, "step": 8537 }, { "epoch": 0.934672541667807, "grad_norm": 1.0630740129824228, "learning_rate": 5.234303807389151e-07, "loss": 0.6961, "num_input_tokens_seen": 1542149728, "step": 8538 }, { "epoch": 0.9347820137387449, "grad_norm": 1.0524486939708069, "learning_rate": 5.216815058643353e-07, "loss": 0.8403, "num_input_tokens_seen": 1542329152, "step": 8539 }, { "epoch": 0.9348914858096828, "grad_norm": 1.041512776769392, "learning_rate": 5.199355267280382e-07, "loss": 0.6809, "num_input_tokens_seen": 1542488416, "step": 8540 }, { "epoch": 0.9350009578806207, "grad_norm": 1.1939623319316721, "learning_rate": 5.181924435365693e-07, "loss": 0.9135, "num_input_tokens_seen": 1542682176, "step": 8541 }, { "epoch": 0.9351104299515586, "grad_norm": 1.2494242295779856, "learning_rate": 5.164522564961332e-07, "loss": 0.9344, "num_input_tokens_seen": 1542872352, "step": 8542 }, { "epoch": 0.9352199020224965, "grad_norm": 1.1764733188260872, "learning_rate": 5.147149658125877e-07, "loss": 0.8241, "num_input_tokens_seen": 1543026240, "step": 8543 }, { "epoch": 0.9353293740934344, "grad_norm": 1.1199851053814265, "learning_rate": 5.129805716914571e-07, "loss": 0.9263, "num_input_tokens_seen": 1543232544, "step": 8544 }, { "epoch": 0.9354388461643723, "grad_norm": 1.1855937816509567, "learning_rate": 5.112490743379133e-07, "loss": 1.0197, "num_input_tokens_seen": 1543410624, "step": 8545 }, { "epoch": 0.9355483182353103, "grad_norm": 1.0537072606769435, "learning_rate": 5.095204739567899e-07, "loss": 0.863, "num_input_tokens_seen": 1543624544, "step": 8546 }, { "epoch": 0.9356577903062481, "grad_norm": 1.135374500063297, "learning_rate": 5.07794770752576e-07, "loss": 0.8846, "num_input_tokens_seen": 1543827712, "step": 8547 }, { "epoch": 0.935767262377186, "grad_norm": 1.102457102591942, "learning_rate": 5.060719649294194e-07, "loss": 1.1799, "num_input_tokens_seen": 1543996160, "step": 8548 }, { "epoch": 0.9358767344481239, "grad_norm": 1.0586699285933385, "learning_rate": 5.043520566911264e-07, "loss": 0.8553, "num_input_tokens_seen": 1544201344, "step": 8549 }, { "epoch": 0.9359862065190618, "grad_norm": 1.0547646953447847, "learning_rate": 5.026350462411567e-07, "loss": 0.9294, "num_input_tokens_seen": 1544402720, "step": 8550 }, { "epoch": 0.9360956785899998, "grad_norm": 1.1244943330044428, "learning_rate": 5.009209337826254e-07, "loss": 0.9125, "num_input_tokens_seen": 1544595584, "step": 8551 }, { "epoch": 0.9362051506609377, "grad_norm": 1.1104541955919716, "learning_rate": 4.992097195183176e-07, "loss": 0.7121, "num_input_tokens_seen": 1544752832, "step": 8552 }, { "epoch": 0.9363146227318755, "grad_norm": 1.081040848365151, "learning_rate": 4.975014036506631e-07, "loss": 0.7835, "num_input_tokens_seen": 1544917696, "step": 8553 }, { "epoch": 0.9364240948028134, "grad_norm": 1.2847091221642675, "learning_rate": 4.957959863817502e-07, "loss": 0.9602, "num_input_tokens_seen": 1545098240, "step": 8554 }, { "epoch": 0.9365335668737513, "grad_norm": 1.1156388332674023, "learning_rate": 4.940934679133286e-07, "loss": 0.9899, "num_input_tokens_seen": 1545311712, "step": 8555 }, { "epoch": 0.9366430389446893, "grad_norm": 1.2267444257001245, "learning_rate": 4.923938484468038e-07, "loss": 0.8513, "num_input_tokens_seen": 1545507936, "step": 8556 }, { "epoch": 0.9367525110156272, "grad_norm": 1.0461585596704452, "learning_rate": 4.906971281832346e-07, "loss": 0.8305, "num_input_tokens_seen": 1545675712, "step": 8557 }, { "epoch": 0.936861983086565, "grad_norm": 1.0694095673316657, "learning_rate": 4.890033073233408e-07, "loss": 0.723, "num_input_tokens_seen": 1545842368, "step": 8558 }, { "epoch": 0.9369714551575029, "grad_norm": 0.9780928256072136, "learning_rate": 4.87312386067501e-07, "loss": 0.882, "num_input_tokens_seen": 1546033888, "step": 8559 }, { "epoch": 0.9370809272284408, "grad_norm": 1.0765641011750782, "learning_rate": 4.856243646157415e-07, "loss": 1.0436, "num_input_tokens_seen": 1546230784, "step": 8560 }, { "epoch": 0.9371903992993788, "grad_norm": 1.1009773702383334, "learning_rate": 4.83939243167758e-07, "loss": 1.016, "num_input_tokens_seen": 1546413120, "step": 8561 }, { "epoch": 0.9372998713703167, "grad_norm": 1.0315505659030746, "learning_rate": 4.822570219228967e-07, "loss": 0.7022, "num_input_tokens_seen": 1546574400, "step": 8562 }, { "epoch": 0.9374093434412546, "grad_norm": 1.1767668790982413, "learning_rate": 4.805777010801593e-07, "loss": 0.9933, "num_input_tokens_seen": 1546740832, "step": 8563 }, { "epoch": 0.9375188155121924, "grad_norm": 1.1057275799335458, "learning_rate": 4.789012808382065e-07, "loss": 0.7225, "num_input_tokens_seen": 1546906144, "step": 8564 }, { "epoch": 0.9376282875831303, "grad_norm": 1.1541470141522219, "learning_rate": 4.772277613953546e-07, "loss": 0.9981, "num_input_tokens_seen": 1547086912, "step": 8565 }, { "epoch": 0.9377377596540682, "grad_norm": 1.0556553673540494, "learning_rate": 4.7555714294958144e-07, "loss": 0.8358, "num_input_tokens_seen": 1547263424, "step": 8566 }, { "epoch": 0.9378472317250062, "grad_norm": 1.1941357473686214, "learning_rate": 4.738894256985121e-07, "loss": 0.8621, "num_input_tokens_seen": 1547436128, "step": 8567 }, { "epoch": 0.9379567037959441, "grad_norm": 1.0173650068107956, "learning_rate": 4.722246098394417e-07, "loss": 0.7605, "num_input_tokens_seen": 1547629888, "step": 8568 }, { "epoch": 0.938066175866882, "grad_norm": 1.1454743545472128, "learning_rate": 4.705626955693071e-07, "loss": 0.8189, "num_input_tokens_seen": 1547815584, "step": 8569 }, { "epoch": 0.9381756479378198, "grad_norm": 1.1284332868591946, "learning_rate": 4.689036830847177e-07, "loss": 0.6934, "num_input_tokens_seen": 1547976640, "step": 8570 }, { "epoch": 0.9382851200087577, "grad_norm": 1.1278804883919553, "learning_rate": 4.67247572581922e-07, "loss": 0.8402, "num_input_tokens_seen": 1548135680, "step": 8571 }, { "epoch": 0.9383945920796957, "grad_norm": 1.146368745499972, "learning_rate": 4.655943642568411e-07, "loss": 0.7004, "num_input_tokens_seen": 1548304352, "step": 8572 }, { "epoch": 0.9385040641506336, "grad_norm": 1.1031193114396085, "learning_rate": 4.639440583050464e-07, "loss": 1.0581, "num_input_tokens_seen": 1548506624, "step": 8573 }, { "epoch": 0.9386135362215715, "grad_norm": 1.1804054046337789, "learning_rate": 4.622966549217622e-07, "loss": 1.0687, "num_input_tokens_seen": 1548682688, "step": 8574 }, { "epoch": 0.9387230082925093, "grad_norm": 1.0187060645055033, "learning_rate": 4.606521543018799e-07, "loss": 0.7917, "num_input_tokens_seen": 1548874208, "step": 8575 }, { "epoch": 0.9388324803634472, "grad_norm": 1.0519616966309129, "learning_rate": 4.5901055663993274e-07, "loss": 0.8699, "num_input_tokens_seen": 1549049600, "step": 8576 }, { "epoch": 0.9389419524343852, "grad_norm": 1.274889720560592, "learning_rate": 4.573718621301265e-07, "loss": 0.8874, "num_input_tokens_seen": 1549249632, "step": 8577 }, { "epoch": 0.9390514245053231, "grad_norm": 1.3564368756487082, "learning_rate": 4.557360709663061e-07, "loss": 0.9138, "num_input_tokens_seen": 1549429280, "step": 8578 }, { "epoch": 0.939160896576261, "grad_norm": 1.154032307365958, "learning_rate": 4.5410318334199175e-07, "loss": 1.1141, "num_input_tokens_seen": 1549606912, "step": 8579 }, { "epoch": 0.9392703686471989, "grad_norm": 0.9339972545799895, "learning_rate": 4.524731994503456e-07, "loss": 0.7208, "num_input_tokens_seen": 1549812544, "step": 8580 }, { "epoch": 0.9393798407181367, "grad_norm": 1.0363542641895824, "learning_rate": 4.50846119484194e-07, "loss": 0.669, "num_input_tokens_seen": 1550011456, "step": 8581 }, { "epoch": 0.9394893127890747, "grad_norm": 1.1811426547892192, "learning_rate": 4.4922194363601343e-07, "loss": 0.7336, "num_input_tokens_seen": 1550208128, "step": 8582 }, { "epoch": 0.9395987848600126, "grad_norm": 1.0307193893783617, "learning_rate": 4.476006720979475e-07, "loss": 0.7707, "num_input_tokens_seen": 1550384864, "step": 8583 }, { "epoch": 0.9397082569309505, "grad_norm": 1.0348734343353676, "learning_rate": 4.459823050617845e-07, "loss": 0.8883, "num_input_tokens_seen": 1550559584, "step": 8584 }, { "epoch": 0.9398177290018884, "grad_norm": 1.2247325099140849, "learning_rate": 4.44366842718974e-07, "loss": 0.9643, "num_input_tokens_seen": 1550698912, "step": 8585 }, { "epoch": 0.9399272010728263, "grad_norm": 1.0076029360037473, "learning_rate": 4.4275428526062425e-07, "loss": 0.6882, "num_input_tokens_seen": 1550888192, "step": 8586 }, { "epoch": 0.9400366731437642, "grad_norm": 1.0562013287588443, "learning_rate": 4.411446328774993e-07, "loss": 0.926, "num_input_tokens_seen": 1551073888, "step": 8587 }, { "epoch": 0.9401461452147021, "grad_norm": 1.1119904111819252, "learning_rate": 4.3953788576001353e-07, "loss": 0.7265, "num_input_tokens_seen": 1551250624, "step": 8588 }, { "epoch": 0.94025561728564, "grad_norm": 1.1243329189668088, "learning_rate": 4.3793404409824546e-07, "loss": 0.7611, "num_input_tokens_seen": 1551449088, "step": 8589 }, { "epoch": 0.9403650893565779, "grad_norm": 1.1752236396934723, "learning_rate": 4.3633310808192385e-07, "loss": 0.8881, "num_input_tokens_seen": 1551615072, "step": 8590 }, { "epoch": 0.9404745614275158, "grad_norm": 1.0030531171195494, "learning_rate": 4.347350779004389e-07, "loss": 0.757, "num_input_tokens_seen": 1551807936, "step": 8591 }, { "epoch": 0.9405840334984537, "grad_norm": 1.1099569916340375, "learning_rate": 4.331399537428338e-07, "loss": 0.6877, "num_input_tokens_seen": 1551994080, "step": 8592 }, { "epoch": 0.9406935055693916, "grad_norm": 1.136986664860748, "learning_rate": 4.3154773579780483e-07, "loss": 0.9182, "num_input_tokens_seen": 1552151328, "step": 8593 }, { "epoch": 0.9408029776403295, "grad_norm": 0.9944622366454297, "learning_rate": 4.2995842425371524e-07, "loss": 0.8199, "num_input_tokens_seen": 1552328512, "step": 8594 }, { "epoch": 0.9409124497112674, "grad_norm": 1.1591320204796278, "learning_rate": 4.283720192985757e-07, "loss": 0.8618, "num_input_tokens_seen": 1552496064, "step": 8595 }, { "epoch": 0.9410219217822053, "grad_norm": 1.075058376590212, "learning_rate": 4.267885211200501e-07, "loss": 1.074, "num_input_tokens_seen": 1552695424, "step": 8596 }, { "epoch": 0.9411313938531433, "grad_norm": 2.701939670230885, "learning_rate": 4.25207929905469e-07, "loss": 0.919, "num_input_tokens_seen": 1552878880, "step": 8597 }, { "epoch": 0.9412408659240811, "grad_norm": 1.1410154954491647, "learning_rate": 4.236302458418051e-07, "loss": 0.6893, "num_input_tokens_seen": 1553065248, "step": 8598 }, { "epoch": 0.941350337995019, "grad_norm": 1.1087027648704133, "learning_rate": 4.2205546911570913e-07, "loss": 0.8205, "num_input_tokens_seen": 1553225408, "step": 8599 }, { "epoch": 0.9414598100659569, "grad_norm": 1.0472903779806497, "learning_rate": 4.2048359991345986e-07, "loss": 0.8021, "num_input_tokens_seen": 1553411552, "step": 8600 }, { "epoch": 0.9415692821368948, "grad_norm": 1.1908275261789205, "learning_rate": 4.1891463842101685e-07, "loss": 0.8877, "num_input_tokens_seen": 1553583584, "step": 8601 }, { "epoch": 0.9416787542078328, "grad_norm": 1.0281192767786895, "learning_rate": 4.173485848239761e-07, "loss": 0.7356, "num_input_tokens_seen": 1553795712, "step": 8602 }, { "epoch": 0.9417882262787707, "grad_norm": 1.1847177336710764, "learning_rate": 4.157854393076088e-07, "loss": 0.8512, "num_input_tokens_seen": 1553959456, "step": 8603 }, { "epoch": 0.9418976983497085, "grad_norm": 0.9630088191970455, "learning_rate": 4.1422520205682547e-07, "loss": 0.5796, "num_input_tokens_seen": 1554131488, "step": 8604 }, { "epoch": 0.9420071704206464, "grad_norm": 1.0406813093650444, "learning_rate": 4.126678732562006e-07, "loss": 0.7157, "num_input_tokens_seen": 1554267904, "step": 8605 }, { "epoch": 0.9421166424915843, "grad_norm": 1.1036395248129574, "learning_rate": 4.1111345308996185e-07, "loss": 0.794, "num_input_tokens_seen": 1554447328, "step": 8606 }, { "epoch": 0.9422261145625223, "grad_norm": 1.121841978474401, "learning_rate": 4.095619417419955e-07, "loss": 0.7521, "num_input_tokens_seen": 1554598528, "step": 8607 }, { "epoch": 0.9423355866334602, "grad_norm": 1.2232250803848037, "learning_rate": 4.080133393958463e-07, "loss": 1.0468, "num_input_tokens_seen": 1554799232, "step": 8608 }, { "epoch": 0.942445058704398, "grad_norm": 1.1419798760152544, "learning_rate": 4.0646764623470113e-07, "loss": 1.1265, "num_input_tokens_seen": 1554991872, "step": 8609 }, { "epoch": 0.9425545307753359, "grad_norm": 1.290681550493475, "learning_rate": 4.049248624414248e-07, "loss": 0.9153, "num_input_tokens_seen": 1555152032, "step": 8610 }, { "epoch": 0.9426640028462738, "grad_norm": 1.1419129611816357, "learning_rate": 4.0338498819851577e-07, "loss": 1.1393, "num_input_tokens_seen": 1555347360, "step": 8611 }, { "epoch": 0.9427734749172118, "grad_norm": 1.102890026365019, "learning_rate": 4.018480236881422e-07, "loss": 0.9882, "num_input_tokens_seen": 1555545152, "step": 8612 }, { "epoch": 0.9428829469881497, "grad_norm": 1.0548270804266642, "learning_rate": 4.003139690921254e-07, "loss": 0.7905, "num_input_tokens_seen": 1555699040, "step": 8613 }, { "epoch": 0.9429924190590876, "grad_norm": 1.2223519138442442, "learning_rate": 3.987828245919367e-07, "loss": 1.1261, "num_input_tokens_seen": 1555886528, "step": 8614 }, { "epoch": 0.9431018911300254, "grad_norm": 1.1356032623578731, "learning_rate": 3.972545903687119e-07, "loss": 0.9953, "num_input_tokens_seen": 1556065952, "step": 8615 }, { "epoch": 0.9432113632009633, "grad_norm": 1.0520832829742923, "learning_rate": 3.9572926660323695e-07, "loss": 0.6505, "num_input_tokens_seen": 1556208416, "step": 8616 }, { "epoch": 0.9433208352719012, "grad_norm": 1.1573662910928924, "learning_rate": 3.9420685347595634e-07, "loss": 0.8938, "num_input_tokens_seen": 1556410464, "step": 8617 }, { "epoch": 0.9434303073428392, "grad_norm": 1.0806546043810814, "learning_rate": 3.926873511669621e-07, "loss": 0.8711, "num_input_tokens_seen": 1556560992, "step": 8618 }, { "epoch": 0.9435397794137771, "grad_norm": 1.1784353115695732, "learning_rate": 3.91170759856016e-07, "loss": 0.7365, "num_input_tokens_seen": 1556734368, "step": 8619 }, { "epoch": 0.943649251484715, "grad_norm": 1.175013446730919, "learning_rate": 3.896570797225246e-07, "loss": 0.8661, "num_input_tokens_seen": 1556896320, "step": 8620 }, { "epoch": 0.9437587235556528, "grad_norm": 1.1103775974197352, "learning_rate": 3.88146310945553e-07, "loss": 0.8052, "num_input_tokens_seen": 1557033856, "step": 8621 }, { "epoch": 0.9438681956265907, "grad_norm": 1.0638617596488693, "learning_rate": 3.86638453703822e-07, "loss": 0.8734, "num_input_tokens_seen": 1557225376, "step": 8622 }, { "epoch": 0.9439776676975287, "grad_norm": 1.1592022090996121, "learning_rate": 3.8513350817571124e-07, "loss": 1.0547, "num_input_tokens_seen": 1557420928, "step": 8623 }, { "epoch": 0.9440871397684666, "grad_norm": 1.1875858777766017, "learning_rate": 3.836314745392505e-07, "loss": 0.8674, "num_input_tokens_seen": 1557622528, "step": 8624 }, { "epoch": 0.9441966118394045, "grad_norm": 0.9690981208678344, "learning_rate": 3.8213235297212823e-07, "loss": 0.9329, "num_input_tokens_seen": 1557815168, "step": 8625 }, { "epoch": 0.9443060839103423, "grad_norm": 1.0216732980991066, "learning_rate": 3.806361436516831e-07, "loss": 0.7863, "num_input_tokens_seen": 1558003104, "step": 8626 }, { "epoch": 0.9444155559812802, "grad_norm": 1.244267479673933, "learning_rate": 3.7914284675492075e-07, "loss": 0.768, "num_input_tokens_seen": 1558161696, "step": 8627 }, { "epoch": 0.9445250280522182, "grad_norm": 1.015712968977967, "learning_rate": 3.7765246245849426e-07, "loss": 0.8753, "num_input_tokens_seen": 1558373600, "step": 8628 }, { "epoch": 0.9446345001231561, "grad_norm": 0.9491483246223065, "learning_rate": 3.761649909387099e-07, "loss": 0.8266, "num_input_tokens_seen": 1558547872, "step": 8629 }, { "epoch": 0.944743972194094, "grad_norm": 1.1233634170914755, "learning_rate": 3.746804323715353e-07, "loss": 0.837, "num_input_tokens_seen": 1558713856, "step": 8630 }, { "epoch": 0.9448534442650319, "grad_norm": 1.182662849584252, "learning_rate": 3.731987869325881e-07, "loss": 1.302, "num_input_tokens_seen": 1558902240, "step": 8631 }, { "epoch": 0.9449629163359697, "grad_norm": 1.183898030565056, "learning_rate": 3.7172005479714777e-07, "loss": 0.6936, "num_input_tokens_seen": 1559067552, "step": 8632 }, { "epoch": 0.9450723884069077, "grad_norm": 1.022185571820905, "learning_rate": 3.7024423614014094e-07, "loss": 0.9548, "num_input_tokens_seen": 1559262880, "step": 8633 }, { "epoch": 0.9451818604778456, "grad_norm": 1.2606676760756392, "learning_rate": 3.6877133113616123e-07, "loss": 1.3285, "num_input_tokens_seen": 1559447456, "step": 8634 }, { "epoch": 0.9452913325487835, "grad_norm": 1.3355559311654834, "learning_rate": 3.673013399594444e-07, "loss": 0.8459, "num_input_tokens_seen": 1559633376, "step": 8635 }, { "epoch": 0.9454008046197214, "grad_norm": 1.352154464300495, "learning_rate": 3.658342627838873e-07, "loss": 0.929, "num_input_tokens_seen": 1559782336, "step": 8636 }, { "epoch": 0.9455102766906593, "grad_norm": 1.178255690148315, "learning_rate": 3.643700997830457e-07, "loss": 0.9585, "num_input_tokens_seen": 1559946080, "step": 8637 }, { "epoch": 0.9456197487615972, "grad_norm": 1.2499835966941304, "learning_rate": 3.6290885113012816e-07, "loss": 1.0131, "num_input_tokens_seen": 1560093920, "step": 8638 }, { "epoch": 0.9457292208325351, "grad_norm": 1.1457041696517036, "learning_rate": 3.614505169979909e-07, "loss": 0.8744, "num_input_tokens_seen": 1560278272, "step": 8639 }, { "epoch": 0.945838692903473, "grad_norm": 1.1624925510619042, "learning_rate": 3.5999509755915985e-07, "loss": 0.8874, "num_input_tokens_seen": 1560461728, "step": 8640 }, { "epoch": 0.9459481649744109, "grad_norm": 1.0503589107539129, "learning_rate": 3.585425929858055e-07, "loss": 0.7381, "num_input_tokens_seen": 1560644960, "step": 8641 }, { "epoch": 0.9460576370453488, "grad_norm": 1.0461861350963715, "learning_rate": 3.570930034497516e-07, "loss": 0.732, "num_input_tokens_seen": 1560846784, "step": 8642 }, { "epoch": 0.9461671091162867, "grad_norm": 1.20239156190471, "learning_rate": 3.556463291224915e-07, "loss": 0.7702, "num_input_tokens_seen": 1561011872, "step": 8643 }, { "epoch": 0.9462765811872246, "grad_norm": 1.0963509832171374, "learning_rate": 3.54202570175155e-07, "loss": 1.0645, "num_input_tokens_seen": 1561211904, "step": 8644 }, { "epoch": 0.9463860532581625, "grad_norm": 1.0587187408192344, "learning_rate": 3.527617267785416e-07, "loss": 0.6722, "num_input_tokens_seen": 1561375200, "step": 8645 }, { "epoch": 0.9464955253291004, "grad_norm": 1.294886984146507, "learning_rate": 3.513237991030982e-07, "loss": 0.9221, "num_input_tokens_seen": 1561535136, "step": 8646 }, { "epoch": 0.9466049974000383, "grad_norm": 1.235080710410839, "learning_rate": 3.498887873189277e-07, "loss": 1.0339, "num_input_tokens_seen": 1561729792, "step": 8647 }, { "epoch": 0.9467144694709763, "grad_norm": 1.1376390958985123, "learning_rate": 3.484566915957943e-07, "loss": 0.7969, "num_input_tokens_seen": 1561928256, "step": 8648 }, { "epoch": 0.9468239415419141, "grad_norm": 1.2156963239207084, "learning_rate": 3.470275121031041e-07, "loss": 0.9274, "num_input_tokens_seen": 1562086848, "step": 8649 }, { "epoch": 0.946933413612852, "grad_norm": 1.0695363592857554, "learning_rate": 3.4560124900993305e-07, "loss": 0.988, "num_input_tokens_seen": 1562287552, "step": 8650 }, { "epoch": 0.9470428856837899, "grad_norm": 1.0719818128743994, "learning_rate": 3.441779024850017e-07, "loss": 0.9888, "num_input_tokens_seen": 1562464960, "step": 8651 }, { "epoch": 0.9471523577547278, "grad_norm": 1.1891786355688518, "learning_rate": 3.4275747269669203e-07, "loss": 0.7119, "num_input_tokens_seen": 1562619520, "step": 8652 }, { "epoch": 0.9472618298256658, "grad_norm": 1.042754990749813, "learning_rate": 3.4133995981303624e-07, "loss": 1.0552, "num_input_tokens_seen": 1562840832, "step": 8653 }, { "epoch": 0.9473713018966037, "grad_norm": 1.3663991780695128, "learning_rate": 3.3992536400172246e-07, "loss": 1.2645, "num_input_tokens_seen": 1563020256, "step": 8654 }, { "epoch": 0.9474807739675415, "grad_norm": 1.0570518271733442, "learning_rate": 3.3851368543009745e-07, "loss": 0.7241, "num_input_tokens_seen": 1563197888, "step": 8655 }, { "epoch": 0.9475902460384794, "grad_norm": 1.200713080889189, "learning_rate": 3.3710492426515804e-07, "loss": 0.8746, "num_input_tokens_seen": 1563376192, "step": 8656 }, { "epoch": 0.9476997181094173, "grad_norm": 1.0237602132769863, "learning_rate": 3.3569908067355993e-07, "loss": 0.7682, "num_input_tokens_seen": 1563557184, "step": 8657 }, { "epoch": 0.9478091901803553, "grad_norm": 1.2249882794812093, "learning_rate": 3.3429615482160893e-07, "loss": 0.7507, "num_input_tokens_seen": 1563735488, "step": 8658 }, { "epoch": 0.9479186622512932, "grad_norm": 1.2093791656028117, "learning_rate": 3.328961468752695e-07, "loss": 0.7345, "num_input_tokens_seen": 1563909536, "step": 8659 }, { "epoch": 0.948028134322231, "grad_norm": 1.147202950552547, "learning_rate": 3.3149905700016193e-07, "loss": 0.8257, "num_input_tokens_seen": 1564088288, "step": 8660 }, { "epoch": 0.9481376063931689, "grad_norm": 1.1140769925452205, "learning_rate": 3.301048853615568e-07, "loss": 1.1699, "num_input_tokens_seen": 1564279808, "step": 8661 }, { "epoch": 0.9482470784641068, "grad_norm": 0.9447633393459609, "learning_rate": 3.2871363212438613e-07, "loss": 0.6865, "num_input_tokens_seen": 1564473120, "step": 8662 }, { "epoch": 0.9483565505350448, "grad_norm": 1.123516588252425, "learning_rate": 3.2732529745322647e-07, "loss": 1.1145, "num_input_tokens_seen": 1564659488, "step": 8663 }, { "epoch": 0.9484660226059827, "grad_norm": 1.1915370181500649, "learning_rate": 3.2593988151231603e-07, "loss": 1.0402, "num_input_tokens_seen": 1564862208, "step": 8664 }, { "epoch": 0.9485754946769206, "grad_norm": 1.1064405718951842, "learning_rate": 3.245573844655514e-07, "loss": 0.8883, "num_input_tokens_seen": 1565052384, "step": 8665 }, { "epoch": 0.9486849667478584, "grad_norm": 1.1014627722649586, "learning_rate": 3.231778064764768e-07, "loss": 0.752, "num_input_tokens_seen": 1565214336, "step": 8666 }, { "epoch": 0.9487944388187963, "grad_norm": 1.2041064136218682, "learning_rate": 3.2180114770829495e-07, "loss": 0.8407, "num_input_tokens_seen": 1565400928, "step": 8667 }, { "epoch": 0.9489039108897342, "grad_norm": 1.1379500599145633, "learning_rate": 3.204274083238562e-07, "loss": 0.8355, "num_input_tokens_seen": 1565542496, "step": 8668 }, { "epoch": 0.9490133829606722, "grad_norm": 1.1066364820128778, "learning_rate": 3.1905658848567774e-07, "loss": 0.932, "num_input_tokens_seen": 1565724384, "step": 8669 }, { "epoch": 0.9491228550316101, "grad_norm": 1.0109207364185606, "learning_rate": 3.1768868835592434e-07, "loss": 0.983, "num_input_tokens_seen": 1565927776, "step": 8670 }, { "epoch": 0.949232327102548, "grad_norm": 1.0356577108462715, "learning_rate": 3.1632370809641376e-07, "loss": 0.7766, "num_input_tokens_seen": 1566093984, "step": 8671 }, { "epoch": 0.9493417991734858, "grad_norm": 1.1626970103690568, "learning_rate": 3.149616478686196e-07, "loss": 0.8213, "num_input_tokens_seen": 1566259744, "step": 8672 }, { "epoch": 0.9494512712444237, "grad_norm": 1.193789240806284, "learning_rate": 3.1360250783367406e-07, "loss": 0.9684, "num_input_tokens_seen": 1566412512, "step": 8673 }, { "epoch": 0.9495607433153617, "grad_norm": 1.210709827308789, "learning_rate": 3.122462881523625e-07, "loss": 0.7546, "num_input_tokens_seen": 1566602240, "step": 8674 }, { "epoch": 0.9496702153862996, "grad_norm": 1.1984845128592512, "learning_rate": 3.1089298898511476e-07, "loss": 0.9811, "num_input_tokens_seen": 1566831616, "step": 8675 }, { "epoch": 0.9497796874572375, "grad_norm": 1.2424409235950244, "learning_rate": 3.095426104920335e-07, "loss": 0.9962, "num_input_tokens_seen": 1566999168, "step": 8676 }, { "epoch": 0.9498891595281753, "grad_norm": 1.1748998639880635, "learning_rate": 3.081951528328575e-07, "loss": 0.6982, "num_input_tokens_seen": 1567184192, "step": 8677 }, { "epoch": 0.9499986315991132, "grad_norm": 1.1280268639652744, "learning_rate": 3.0685061616699263e-07, "loss": 0.8442, "num_input_tokens_seen": 1567353088, "step": 8678 }, { "epoch": 0.9501081036700512, "grad_norm": 1.227537654369096, "learning_rate": 3.0550900065349774e-07, "loss": 0.8111, "num_input_tokens_seen": 1567517056, "step": 8679 }, { "epoch": 0.9502175757409891, "grad_norm": 1.2350992902618882, "learning_rate": 3.0417030645107924e-07, "loss": 0.8162, "num_input_tokens_seen": 1567705664, "step": 8680 }, { "epoch": 0.950327047811927, "grad_norm": 1.0620166576752972, "learning_rate": 3.028345337181021e-07, "loss": 0.8566, "num_input_tokens_seen": 1567878816, "step": 8681 }, { "epoch": 0.9504365198828649, "grad_norm": 1.1423361497578457, "learning_rate": 3.015016826125844e-07, "loss": 0.9804, "num_input_tokens_seen": 1568039872, "step": 8682 }, { "epoch": 0.9505459919538027, "grad_norm": 1.1287278823121896, "learning_rate": 3.001717532922055e-07, "loss": 0.7235, "num_input_tokens_seen": 1568210560, "step": 8683 }, { "epoch": 0.9506554640247407, "grad_norm": 1.154458436454847, "learning_rate": 2.988447459142868e-07, "loss": 1.2196, "num_input_tokens_seen": 1568414400, "step": 8684 }, { "epoch": 0.9507649360956786, "grad_norm": 1.0584657598731824, "learning_rate": 2.975206606358194e-07, "loss": 0.9516, "num_input_tokens_seen": 1568601888, "step": 8685 }, { "epoch": 0.9508744081666165, "grad_norm": 1.0263941912814498, "learning_rate": 2.961994976134308e-07, "loss": 0.6294, "num_input_tokens_seen": 1568796768, "step": 8686 }, { "epoch": 0.9509838802375544, "grad_norm": 1.1080031711071645, "learning_rate": 2.948812570034154e-07, "loss": 0.8852, "num_input_tokens_seen": 1568982688, "step": 8687 }, { "epoch": 0.9510933523084923, "grad_norm": 1.2259891409089678, "learning_rate": 2.9356593896172066e-07, "loss": 0.8671, "num_input_tokens_seen": 1569148896, "step": 8688 }, { "epoch": 0.9512028243794302, "grad_norm": 1.1675503086466232, "learning_rate": 2.9225354364394444e-07, "loss": 0.6928, "num_input_tokens_seen": 1569332352, "step": 8689 }, { "epoch": 0.9513122964503681, "grad_norm": 1.0656777153512293, "learning_rate": 2.9094407120534295e-07, "loss": 0.7127, "num_input_tokens_seen": 1569519168, "step": 8690 }, { "epoch": 0.951421768521306, "grad_norm": 1.46678834143136, "learning_rate": 2.896375218008174e-07, "loss": 1.0312, "num_input_tokens_seen": 1569713600, "step": 8691 }, { "epoch": 0.9515312405922439, "grad_norm": 1.0839805809160796, "learning_rate": 2.883338955849385e-07, "loss": 0.9164, "num_input_tokens_seen": 1569909152, "step": 8692 }, { "epoch": 0.9516407126631818, "grad_norm": 0.9268148262004509, "learning_rate": 2.870331927119163e-07, "loss": 0.7892, "num_input_tokens_seen": 1570100896, "step": 8693 }, { "epoch": 0.9517501847341197, "grad_norm": 1.1443868812931786, "learning_rate": 2.857354133356277e-07, "loss": 0.9148, "num_input_tokens_seen": 1570284352, "step": 8694 }, { "epoch": 0.9518596568050576, "grad_norm": 1.0298202892114692, "learning_rate": 2.8444055760959154e-07, "loss": 0.8886, "num_input_tokens_seen": 1570481920, "step": 8695 }, { "epoch": 0.9519691288759955, "grad_norm": 1.2607090455891519, "learning_rate": 2.8314862568699087e-07, "loss": 1.0138, "num_input_tokens_seen": 1570658208, "step": 8696 }, { "epoch": 0.9520786009469334, "grad_norm": 1.1424856295193135, "learning_rate": 2.8185961772065616e-07, "loss": 1.0857, "num_input_tokens_seen": 1570848832, "step": 8697 }, { "epoch": 0.9521880730178713, "grad_norm": 1.0624339700264664, "learning_rate": 2.8057353386307663e-07, "loss": 0.8204, "num_input_tokens_seen": 1571013696, "step": 8698 }, { "epoch": 0.9522975450888093, "grad_norm": 1.1527744328911191, "learning_rate": 2.792903742663916e-07, "loss": 0.8364, "num_input_tokens_seen": 1571216640, "step": 8699 }, { "epoch": 0.9524070171597471, "grad_norm": 1.200459835243316, "learning_rate": 2.7801013908239636e-07, "loss": 1.0535, "num_input_tokens_seen": 1571405696, "step": 8700 }, { "epoch": 0.952516489230685, "grad_norm": 1.1101194758166633, "learning_rate": 2.76732828462542e-07, "loss": 0.8625, "num_input_tokens_seen": 1571594304, "step": 8701 }, { "epoch": 0.9526259613016229, "grad_norm": 1.201260496762527, "learning_rate": 2.7545844255793263e-07, "loss": 0.7619, "num_input_tokens_seen": 1571790976, "step": 8702 }, { "epoch": 0.9527354333725608, "grad_norm": 1.0932623184699952, "learning_rate": 2.741869815193226e-07, "loss": 0.7626, "num_input_tokens_seen": 1571943744, "step": 8703 }, { "epoch": 0.9528449054434988, "grad_norm": 1.0926559425985263, "learning_rate": 2.729184454971251e-07, "loss": 0.7584, "num_input_tokens_seen": 1572130112, "step": 8704 }, { "epoch": 0.9529543775144367, "grad_norm": 1.1701224892231439, "learning_rate": 2.71652834641406e-07, "loss": 0.7514, "num_input_tokens_seen": 1572308864, "step": 8705 }, { "epoch": 0.9530638495853745, "grad_norm": 1.0421990115146198, "learning_rate": 2.7039014910188455e-07, "loss": 0.6886, "num_input_tokens_seen": 1572494336, "step": 8706 }, { "epoch": 0.9531733216563124, "grad_norm": 1.3112905924780547, "learning_rate": 2.691303890279301e-07, "loss": 0.999, "num_input_tokens_seen": 1572638592, "step": 8707 }, { "epoch": 0.9532827937272503, "grad_norm": 1.0103490092379661, "learning_rate": 2.678735545685762e-07, "loss": 1.2156, "num_input_tokens_seen": 1572849376, "step": 8708 }, { "epoch": 0.9533922657981883, "grad_norm": 1.219671878856383, "learning_rate": 2.666196458725012e-07, "loss": 0.9521, "num_input_tokens_seen": 1573016256, "step": 8709 }, { "epoch": 0.9535017378691262, "grad_norm": 1.015568775674799, "learning_rate": 2.653686630880392e-07, "loss": 0.6871, "num_input_tokens_seen": 1573197024, "step": 8710 }, { "epoch": 0.953611209940064, "grad_norm": 1.1123731155045187, "learning_rate": 2.641206063631774e-07, "loss": 0.7378, "num_input_tokens_seen": 1573388544, "step": 8711 }, { "epoch": 0.9537206820110019, "grad_norm": 1.1122629963309947, "learning_rate": 2.628754758455643e-07, "loss": 0.8983, "num_input_tokens_seen": 1573538848, "step": 8712 }, { "epoch": 0.9538301540819398, "grad_norm": 1.2051783292833245, "learning_rate": 2.616332716824932e-07, "loss": 0.9111, "num_input_tokens_seen": 1573701920, "step": 8713 }, { "epoch": 0.9539396261528778, "grad_norm": 1.0639050113683655, "learning_rate": 2.6039399402091324e-07, "loss": 0.9241, "num_input_tokens_seen": 1573896128, "step": 8714 }, { "epoch": 0.9540490982238157, "grad_norm": 1.1585079004399472, "learning_rate": 2.591576430074266e-07, "loss": 0.8482, "num_input_tokens_seen": 1574088768, "step": 8715 }, { "epoch": 0.9541585702947536, "grad_norm": 1.1055494622734072, "learning_rate": 2.5792421878829965e-07, "loss": 0.7391, "num_input_tokens_seen": 1574259008, "step": 8716 }, { "epoch": 0.9542680423656914, "grad_norm": 1.1458865008879262, "learning_rate": 2.5669372150943505e-07, "loss": 0.7435, "num_input_tokens_seen": 1574444256, "step": 8717 }, { "epoch": 0.9543775144366293, "grad_norm": 1.1569646740188269, "learning_rate": 2.554661513164053e-07, "loss": 1.035, "num_input_tokens_seen": 1574628160, "step": 8718 }, { "epoch": 0.9544869865075672, "grad_norm": 0.9774678115715826, "learning_rate": 2.5424150835442193e-07, "loss": 0.7947, "num_input_tokens_seen": 1574789216, "step": 8719 }, { "epoch": 0.9545964585785052, "grad_norm": 1.1597010172100102, "learning_rate": 2.530197927683664e-07, "loss": 0.8687, "num_input_tokens_seen": 1574937728, "step": 8720 }, { "epoch": 0.9547059306494431, "grad_norm": 1.135899986266821, "learning_rate": 2.5180100470275916e-07, "loss": 1.0996, "num_input_tokens_seen": 1575109312, "step": 8721 }, { "epoch": 0.954815402720381, "grad_norm": 1.1623166196015462, "learning_rate": 2.5058514430178205e-07, "loss": 1.0317, "num_input_tokens_seen": 1575283584, "step": 8722 }, { "epoch": 0.9549248747913188, "grad_norm": 1.0038484786810855, "learning_rate": 2.4937221170927007e-07, "loss": 0.7477, "num_input_tokens_seen": 1575429184, "step": 8723 }, { "epoch": 0.9550343468622567, "grad_norm": 1.133146986414037, "learning_rate": 2.481622070687112e-07, "loss": 1.013, "num_input_tokens_seen": 1575613760, "step": 8724 }, { "epoch": 0.9551438189331947, "grad_norm": 1.22498253790418, "learning_rate": 2.469551305232465e-07, "loss": 1.0867, "num_input_tokens_seen": 1575788256, "step": 8725 }, { "epoch": 0.9552532910041326, "grad_norm": 1.1108686036798967, "learning_rate": 2.457509822156673e-07, "loss": 0.874, "num_input_tokens_seen": 1575955360, "step": 8726 }, { "epoch": 0.9553627630750705, "grad_norm": 1.195701785057237, "learning_rate": 2.445497622884263e-07, "loss": 0.8812, "num_input_tokens_seen": 1576140608, "step": 8727 }, { "epoch": 0.9554722351460083, "grad_norm": 1.15294442765967, "learning_rate": 2.4335147088362377e-07, "loss": 0.8874, "num_input_tokens_seen": 1576344672, "step": 8728 }, { "epoch": 0.9555817072169462, "grad_norm": 1.1531536295161613, "learning_rate": 2.421561081430157e-07, "loss": 0.8185, "num_input_tokens_seen": 1576519392, "step": 8729 }, { "epoch": 0.9556911792878842, "grad_norm": 1.3631903937813954, "learning_rate": 2.409636742080112e-07, "loss": 1.0725, "num_input_tokens_seen": 1576664544, "step": 8730 }, { "epoch": 0.9558006513588221, "grad_norm": 1.0364734833854503, "learning_rate": 2.3977416921967256e-07, "loss": 0.9885, "num_input_tokens_seen": 1576849120, "step": 8731 }, { "epoch": 0.95591012342976, "grad_norm": 1.1335904837100546, "learning_rate": 2.385875933187176e-07, "loss": 0.9752, "num_input_tokens_seen": 1577000096, "step": 8732 }, { "epoch": 0.9560195955006979, "grad_norm": 1.2512790811457546, "learning_rate": 2.374039466455119e-07, "loss": 0.8625, "num_input_tokens_seen": 1577177504, "step": 8733 }, { "epoch": 0.9561290675716357, "grad_norm": 1.239097494663036, "learning_rate": 2.3622322934008235e-07, "loss": 0.8287, "num_input_tokens_seen": 1577353344, "step": 8734 }, { "epoch": 0.9562385396425737, "grad_norm": 1.2244370139960614, "learning_rate": 2.350454415421033e-07, "loss": 1.0082, "num_input_tokens_seen": 1577537472, "step": 8735 }, { "epoch": 0.9563480117135116, "grad_norm": 1.2162660570643138, "learning_rate": 2.3387058339090773e-07, "loss": 0.9232, "num_input_tokens_seen": 1577727200, "step": 8736 }, { "epoch": 0.9564574837844495, "grad_norm": 1.0808500046180831, "learning_rate": 2.3269865502547894e-07, "loss": 1.0253, "num_input_tokens_seen": 1577936640, "step": 8737 }, { "epoch": 0.9565669558553874, "grad_norm": 1.1385042515383392, "learning_rate": 2.3152965658445046e-07, "loss": 0.89, "num_input_tokens_seen": 1578147872, "step": 8738 }, { "epoch": 0.9566764279263253, "grad_norm": 1.1162707962713239, "learning_rate": 2.3036358820611448e-07, "loss": 0.8703, "num_input_tokens_seen": 1578315872, "step": 8739 }, { "epoch": 0.9567858999972632, "grad_norm": 1.131083833493162, "learning_rate": 2.2920045002841338e-07, "loss": 1.0554, "num_input_tokens_seen": 1578467968, "step": 8740 }, { "epoch": 0.9568953720682011, "grad_norm": 0.9859748434258306, "learning_rate": 2.280402421889455e-07, "loss": 0.8738, "num_input_tokens_seen": 1578645152, "step": 8741 }, { "epoch": 0.957004844139139, "grad_norm": 1.0719118073040987, "learning_rate": 2.2688296482496208e-07, "loss": 0.8372, "num_input_tokens_seen": 1578822560, "step": 8742 }, { "epoch": 0.9571143162100769, "grad_norm": 1.0548852240696085, "learning_rate": 2.2572861807336477e-07, "loss": 0.7412, "num_input_tokens_seen": 1578988096, "step": 8743 }, { "epoch": 0.9572237882810148, "grad_norm": 1.119879832973839, "learning_rate": 2.2457720207071098e-07, "loss": 1.0477, "num_input_tokens_seen": 1579175360, "step": 8744 }, { "epoch": 0.9573332603519527, "grad_norm": 1.223946131652182, "learning_rate": 2.23428716953214e-07, "loss": 0.9589, "num_input_tokens_seen": 1579333280, "step": 8745 }, { "epoch": 0.9574427324228906, "grad_norm": 1.1143727088686444, "learning_rate": 2.2228316285673456e-07, "loss": 0.9437, "num_input_tokens_seen": 1579485824, "step": 8746 }, { "epoch": 0.9575522044938285, "grad_norm": 1.289589005654493, "learning_rate": 2.211405399167893e-07, "loss": 1.0804, "num_input_tokens_seen": 1579650688, "step": 8747 }, { "epoch": 0.9576616765647664, "grad_norm": 1.2053189276636862, "learning_rate": 2.2000084826854784e-07, "loss": 1.0293, "num_input_tokens_seen": 1579818240, "step": 8748 }, { "epoch": 0.9577711486357043, "grad_norm": 1.2098307812801283, "learning_rate": 2.1886408804683568e-07, "loss": 0.9951, "num_input_tokens_seen": 1580026336, "step": 8749 }, { "epoch": 0.9578806207066423, "grad_norm": 1.2441186828115622, "learning_rate": 2.1773025938612856e-07, "loss": 0.8404, "num_input_tokens_seen": 1580196352, "step": 8750 }, { "epoch": 0.9579900927775801, "grad_norm": 1.2400078033386315, "learning_rate": 2.1659936242055811e-07, "loss": 0.7377, "num_input_tokens_seen": 1580331424, "step": 8751 }, { "epoch": 0.958099564848518, "grad_norm": 1.4599468932225834, "learning_rate": 2.1547139728390064e-07, "loss": 0.8792, "num_input_tokens_seen": 1580488896, "step": 8752 }, { "epoch": 0.9582090369194559, "grad_norm": 1.085110246357428, "learning_rate": 2.143463641095994e-07, "loss": 1.1186, "num_input_tokens_seen": 1580682208, "step": 8753 }, { "epoch": 0.9583185089903938, "grad_norm": 1.2795369396206664, "learning_rate": 2.1322426303074238e-07, "loss": 0.9549, "num_input_tokens_seen": 1580854464, "step": 8754 }, { "epoch": 0.9584279810613318, "grad_norm": 1.1375992763081058, "learning_rate": 2.1210509418006785e-07, "loss": 0.8644, "num_input_tokens_seen": 1581057856, "step": 8755 }, { "epoch": 0.9585374531322697, "grad_norm": 1.1122669618821863, "learning_rate": 2.1098885768997824e-07, "loss": 1.1488, "num_input_tokens_seen": 1581234816, "step": 8756 }, { "epoch": 0.9586469252032075, "grad_norm": 1.1481986283983, "learning_rate": 2.098755536925151e-07, "loss": 0.8268, "num_input_tokens_seen": 1581386912, "step": 8757 }, { "epoch": 0.9587563972741454, "grad_norm": 1.1544353576313144, "learning_rate": 2.0876518231938426e-07, "loss": 0.9307, "num_input_tokens_seen": 1581565664, "step": 8758 }, { "epoch": 0.9588658693450833, "grad_norm": 1.0598451881899462, "learning_rate": 2.0765774370193892e-07, "loss": 0.6979, "num_input_tokens_seen": 1581752256, "step": 8759 }, { "epoch": 0.9589753414160213, "grad_norm": 1.1484257703947658, "learning_rate": 2.0655323797119098e-07, "loss": 0.85, "num_input_tokens_seen": 1581951616, "step": 8760 }, { "epoch": 0.9590848134869592, "grad_norm": 1.083536856435934, "learning_rate": 2.0545166525779147e-07, "loss": 1.1693, "num_input_tokens_seen": 1582157024, "step": 8761 }, { "epoch": 0.959194285557897, "grad_norm": 1.0424942480244204, "learning_rate": 2.0435302569206672e-07, "loss": 0.7726, "num_input_tokens_seen": 1582367136, "step": 8762 }, { "epoch": 0.9593037576288349, "grad_norm": 1.1792197910828404, "learning_rate": 2.0325731940397386e-07, "loss": 0.9017, "num_input_tokens_seen": 1582514528, "step": 8763 }, { "epoch": 0.9594132296997728, "grad_norm": 1.0591699362363456, "learning_rate": 2.0216454652313976e-07, "loss": 0.977, "num_input_tokens_seen": 1582714560, "step": 8764 }, { "epoch": 0.9595227017707108, "grad_norm": 1.0785576325511372, "learning_rate": 2.0107470717883326e-07, "loss": 1.1677, "num_input_tokens_seen": 1582898464, "step": 8765 }, { "epoch": 0.9596321738416487, "grad_norm": 0.9713854166821567, "learning_rate": 1.9998780149997898e-07, "loss": 0.8775, "num_input_tokens_seen": 1583079456, "step": 8766 }, { "epoch": 0.9597416459125866, "grad_norm": 1.1062235058559107, "learning_rate": 1.9890382961516295e-07, "loss": 1.007, "num_input_tokens_seen": 1583267168, "step": 8767 }, { "epoch": 0.9598511179835244, "grad_norm": 1.2767530936343163, "learning_rate": 1.9782279165260765e-07, "loss": 0.8749, "num_input_tokens_seen": 1583458912, "step": 8768 }, { "epoch": 0.9599605900544623, "grad_norm": 1.1025239252448469, "learning_rate": 1.9674468774020516e-07, "loss": 1.0305, "num_input_tokens_seen": 1583650208, "step": 8769 }, { "epoch": 0.9600700621254002, "grad_norm": 1.1352430380024123, "learning_rate": 1.956695180054896e-07, "loss": 0.9036, "num_input_tokens_seen": 1583807232, "step": 8770 }, { "epoch": 0.9601795341963382, "grad_norm": 1.0059274002331355, "learning_rate": 1.9459728257565367e-07, "loss": 0.9012, "num_input_tokens_seen": 1583978368, "step": 8771 }, { "epoch": 0.9602890062672761, "grad_norm": 1.0155963013335612, "learning_rate": 1.935279815775376e-07, "loss": 0.8983, "num_input_tokens_seen": 1584161824, "step": 8772 }, { "epoch": 0.960398478338214, "grad_norm": 1.1288606311068212, "learning_rate": 1.9246161513764015e-07, "loss": 1.0054, "num_input_tokens_seen": 1584365216, "step": 8773 }, { "epoch": 0.9605079504091518, "grad_norm": 1.2378586971731367, "learning_rate": 1.9139818338211047e-07, "loss": 0.9221, "num_input_tokens_seen": 1584538816, "step": 8774 }, { "epoch": 0.9606174224800897, "grad_norm": 1.0309426976882605, "learning_rate": 1.903376864367451e-07, "loss": 0.8978, "num_input_tokens_seen": 1584693600, "step": 8775 }, { "epoch": 0.9607268945510277, "grad_norm": 1.237113281475099, "learning_rate": 1.892801244270076e-07, "loss": 0.9562, "num_input_tokens_seen": 1584897664, "step": 8776 }, { "epoch": 0.9608363666219656, "grad_norm": 1.2055694716002443, "learning_rate": 1.8822549747800066e-07, "loss": 0.802, "num_input_tokens_seen": 1585075072, "step": 8777 }, { "epoch": 0.9609458386929035, "grad_norm": 1.0095988565705984, "learning_rate": 1.8717380571448562e-07, "loss": 0.7554, "num_input_tokens_seen": 1585218656, "step": 8778 }, { "epoch": 0.9610553107638413, "grad_norm": 0.991303087282525, "learning_rate": 1.8612504926087405e-07, "loss": 0.8497, "num_input_tokens_seen": 1585403904, "step": 8779 }, { "epoch": 0.9611647828347792, "grad_norm": 1.0720557613709216, "learning_rate": 1.8507922824123614e-07, "loss": 0.9242, "num_input_tokens_seen": 1585567200, "step": 8780 }, { "epoch": 0.9612742549057172, "grad_norm": 1.1427392047246163, "learning_rate": 1.8403634277928407e-07, "loss": 0.8591, "num_input_tokens_seen": 1585759168, "step": 8781 }, { "epoch": 0.9613837269766551, "grad_norm": 1.300611559364423, "learning_rate": 1.829963929983941e-07, "loss": 1.0846, "num_input_tokens_seen": 1585928064, "step": 8782 }, { "epoch": 0.961493199047593, "grad_norm": 1.2572032803638613, "learning_rate": 1.8195937902158732e-07, "loss": 0.9992, "num_input_tokens_seen": 1586079264, "step": 8783 }, { "epoch": 0.9616026711185309, "grad_norm": 1.1976018187812953, "learning_rate": 1.8092530097154337e-07, "loss": 0.921, "num_input_tokens_seen": 1586261376, "step": 8784 }, { "epoch": 0.9617121431894687, "grad_norm": 1.1469055649844806, "learning_rate": 1.7989415897058938e-07, "loss": 0.9677, "num_input_tokens_seen": 1586453120, "step": 8785 }, { "epoch": 0.9618216152604067, "grad_norm": 1.2066955099469776, "learning_rate": 1.7886595314070832e-07, "loss": 0.8934, "num_input_tokens_seen": 1586608576, "step": 8786 }, { "epoch": 0.9619310873313446, "grad_norm": 1.1613764655621612, "learning_rate": 1.7784068360353623e-07, "loss": 0.9203, "num_input_tokens_seen": 1586790912, "step": 8787 }, { "epoch": 0.9620405594022825, "grad_norm": 1.1093445894613243, "learning_rate": 1.7681835048035944e-07, "loss": 0.7854, "num_input_tokens_seen": 1586973248, "step": 8788 }, { "epoch": 0.9621500314732204, "grad_norm": 1.2453434996533712, "learning_rate": 1.7579895389211732e-07, "loss": 0.9243, "num_input_tokens_seen": 1587143936, "step": 8789 }, { "epoch": 0.9622595035441583, "grad_norm": 1.1016291063882104, "learning_rate": 1.7478249395940227e-07, "loss": 1.003, "num_input_tokens_seen": 1587316640, "step": 8790 }, { "epoch": 0.9623689756150962, "grad_norm": 1.1518205909898143, "learning_rate": 1.7376897080246257e-07, "loss": 1.0206, "num_input_tokens_seen": 1587473440, "step": 8791 }, { "epoch": 0.9624784476860341, "grad_norm": 1.0912568150468103, "learning_rate": 1.727583845411912e-07, "loss": 1.0641, "num_input_tokens_seen": 1587673472, "step": 8792 }, { "epoch": 0.962587919756972, "grad_norm": 1.0409122723488764, "learning_rate": 1.717507352951453e-07, "loss": 0.8737, "num_input_tokens_seen": 1587836768, "step": 8793 }, { "epoch": 0.9626973918279099, "grad_norm": 1.0683654003914869, "learning_rate": 1.707460231835184e-07, "loss": 0.905, "num_input_tokens_seen": 1588030752, "step": 8794 }, { "epoch": 0.9628068638988478, "grad_norm": 1.228621340328692, "learning_rate": 1.6974424832517654e-07, "loss": 0.9741, "num_input_tokens_seen": 1588196064, "step": 8795 }, { "epoch": 0.9629163359697858, "grad_norm": 1.0590836070758216, "learning_rate": 1.687454108386194e-07, "loss": 1.1864, "num_input_tokens_seen": 1588396096, "step": 8796 }, { "epoch": 0.9630258080407236, "grad_norm": 1.2006365319670733, "learning_rate": 1.6774951084201073e-07, "loss": 0.9868, "num_input_tokens_seen": 1588575296, "step": 8797 }, { "epoch": 0.9631352801116615, "grad_norm": 1.0421128207649581, "learning_rate": 1.6675654845316746e-07, "loss": 0.7224, "num_input_tokens_seen": 1588732768, "step": 8798 }, { "epoch": 0.9632447521825994, "grad_norm": 1.0421641242795805, "learning_rate": 1.657665237895484e-07, "loss": 0.8894, "num_input_tokens_seen": 1588898752, "step": 8799 }, { "epoch": 0.9633542242535373, "grad_norm": 1.3123241488408492, "learning_rate": 1.6477943696827647e-07, "loss": 0.7983, "num_input_tokens_seen": 1589081312, "step": 8800 }, { "epoch": 0.9634636963244753, "grad_norm": 0.9704536409051004, "learning_rate": 1.6379528810611666e-07, "loss": 0.5396, "num_input_tokens_seen": 1589277760, "step": 8801 }, { "epoch": 0.9635731683954131, "grad_norm": 1.2334423646330797, "learning_rate": 1.6281407731949805e-07, "loss": 0.7957, "num_input_tokens_seen": 1589457184, "step": 8802 }, { "epoch": 0.963682640466351, "grad_norm": 1.1084251635854123, "learning_rate": 1.6183580472449444e-07, "loss": 0.8748, "num_input_tokens_seen": 1589677824, "step": 8803 }, { "epoch": 0.9637921125372889, "grad_norm": 1.0701899771625776, "learning_rate": 1.6086047043682994e-07, "loss": 0.6863, "num_input_tokens_seen": 1589867328, "step": 8804 }, { "epoch": 0.9639015846082268, "grad_norm": 1.215632044911503, "learning_rate": 1.5988807457189003e-07, "loss": 0.9494, "num_input_tokens_seen": 1590070720, "step": 8805 }, { "epoch": 0.9640110566791648, "grad_norm": 1.0797880862868077, "learning_rate": 1.5891861724470214e-07, "loss": 0.9283, "num_input_tokens_seen": 1590242528, "step": 8806 }, { "epoch": 0.9641205287501027, "grad_norm": 1.129266067765538, "learning_rate": 1.5795209856995507e-07, "loss": 0.978, "num_input_tokens_seen": 1590418592, "step": 8807 }, { "epoch": 0.9642300008210405, "grad_norm": 1.1297497089822035, "learning_rate": 1.5698851866198516e-07, "loss": 0.9938, "num_input_tokens_seen": 1590605184, "step": 8808 }, { "epoch": 0.9643394728919784, "grad_norm": 1.2031512071802, "learning_rate": 1.5602787763478177e-07, "loss": 1.0023, "num_input_tokens_seen": 1590773632, "step": 8809 }, { "epoch": 0.9644489449629163, "grad_norm": 1.0661494466362895, "learning_rate": 1.5507017560198457e-07, "loss": 0.7938, "num_input_tokens_seen": 1590953280, "step": 8810 }, { "epoch": 0.9645584170338543, "grad_norm": 1.055109914324063, "learning_rate": 1.5411541267689178e-07, "loss": 0.8998, "num_input_tokens_seen": 1591138304, "step": 8811 }, { "epoch": 0.9646678891047922, "grad_norm": 1.0267075574047397, "learning_rate": 1.531635889724492e-07, "loss": 0.9486, "num_input_tokens_seen": 1591343488, "step": 8812 }, { "epoch": 0.9647773611757301, "grad_norm": 1.1289354472361048, "learning_rate": 1.5221470460125565e-07, "loss": 1.0323, "num_input_tokens_seen": 1591530304, "step": 8813 }, { "epoch": 0.9648868332466679, "grad_norm": 1.1729030931318647, "learning_rate": 1.512687596755602e-07, "loss": 1.1449, "num_input_tokens_seen": 1591717568, "step": 8814 }, { "epoch": 0.9649963053176058, "grad_norm": 1.078877449065571, "learning_rate": 1.5032575430726782e-07, "loss": 1.0165, "num_input_tokens_seen": 1591910880, "step": 8815 }, { "epoch": 0.9651057773885438, "grad_norm": 1.3567769885762515, "learning_rate": 1.4938568860793367e-07, "loss": 0.9565, "num_input_tokens_seen": 1592096352, "step": 8816 }, { "epoch": 0.9652152494594817, "grad_norm": 1.1491439099971266, "learning_rate": 1.4844856268876607e-07, "loss": 0.8753, "num_input_tokens_seen": 1592276224, "step": 8817 }, { "epoch": 0.9653247215304196, "grad_norm": 1.1452277346022375, "learning_rate": 1.475143766606263e-07, "loss": 0.9768, "num_input_tokens_seen": 1592430784, "step": 8818 }, { "epoch": 0.9654341936013574, "grad_norm": 1.1093719912206117, "learning_rate": 1.4658313063402595e-07, "loss": 0.9454, "num_input_tokens_seen": 1592606848, "step": 8819 }, { "epoch": 0.9655436656722953, "grad_norm": 1.2166402602705007, "learning_rate": 1.4565482471912971e-07, "loss": 0.9764, "num_input_tokens_seen": 1592765216, "step": 8820 }, { "epoch": 0.9656531377432332, "grad_norm": 1.0628385565417395, "learning_rate": 1.447294590257553e-07, "loss": 0.684, "num_input_tokens_seen": 1592955840, "step": 8821 }, { "epoch": 0.9657626098141712, "grad_norm": 1.0949127556172364, "learning_rate": 1.438070336633679e-07, "loss": 0.7183, "num_input_tokens_seen": 1593144896, "step": 8822 }, { "epoch": 0.9658720818851091, "grad_norm": 1.2620022576277137, "learning_rate": 1.4288754874109134e-07, "loss": 0.9583, "num_input_tokens_seen": 1593338432, "step": 8823 }, { "epoch": 0.965981553956047, "grad_norm": 1.1321625522157088, "learning_rate": 1.419710043677025e-07, "loss": 1.0413, "num_input_tokens_seen": 1593528384, "step": 8824 }, { "epoch": 0.9660910260269848, "grad_norm": 1.209814178211806, "learning_rate": 1.410574006516202e-07, "loss": 1.2107, "num_input_tokens_seen": 1593729312, "step": 8825 }, { "epoch": 0.9662004980979227, "grad_norm": 1.158574474171796, "learning_rate": 1.4014673770092746e-07, "loss": 0.8511, "num_input_tokens_seen": 1593912992, "step": 8826 }, { "epoch": 0.9663099701688607, "grad_norm": 1.1458353678367355, "learning_rate": 1.3923901562334917e-07, "loss": 0.7478, "num_input_tokens_seen": 1594051872, "step": 8827 }, { "epoch": 0.9664194422397986, "grad_norm": 1.0563910395792924, "learning_rate": 1.383342345262717e-07, "loss": 0.7839, "num_input_tokens_seen": 1594242272, "step": 8828 }, { "epoch": 0.9665289143107365, "grad_norm": 1.1323796991111172, "learning_rate": 1.3743239451672608e-07, "loss": 0.9916, "num_input_tokens_seen": 1594425952, "step": 8829 }, { "epoch": 0.9666383863816744, "grad_norm": 1.1504131114167448, "learning_rate": 1.3653349570139918e-07, "loss": 0.7943, "num_input_tokens_seen": 1594588576, "step": 8830 }, { "epoch": 0.9667478584526122, "grad_norm": 1.0970408930950304, "learning_rate": 1.3563753818663093e-07, "loss": 0.6632, "num_input_tokens_seen": 1594774720, "step": 8831 }, { "epoch": 0.9668573305235502, "grad_norm": 1.1080488956177208, "learning_rate": 1.3474452207840605e-07, "loss": 0.7547, "num_input_tokens_seen": 1594959520, "step": 8832 }, { "epoch": 0.9669668025944881, "grad_norm": 1.2269603788543384, "learning_rate": 1.3385444748237053e-07, "loss": 0.7651, "num_input_tokens_seen": 1595127296, "step": 8833 }, { "epoch": 0.967076274665426, "grad_norm": 1.120325602903049, "learning_rate": 1.3296731450381795e-07, "loss": 0.8201, "num_input_tokens_seen": 1595276032, "step": 8834 }, { "epoch": 0.9671857467363639, "grad_norm": 1.302661213114388, "learning_rate": 1.3208312324769766e-07, "loss": 0.9111, "num_input_tokens_seen": 1595450080, "step": 8835 }, { "epoch": 0.9672952188073017, "grad_norm": 1.187550192324329, "learning_rate": 1.3120187381859826e-07, "loss": 0.8124, "num_input_tokens_seen": 1595620096, "step": 8836 }, { "epoch": 0.9674046908782397, "grad_norm": 1.123308393463777, "learning_rate": 1.303235663207808e-07, "loss": 0.9544, "num_input_tokens_seen": 1595820352, "step": 8837 }, { "epoch": 0.9675141629491776, "grad_norm": 1.0869724638433236, "learning_rate": 1.2944820085814268e-07, "loss": 0.6601, "num_input_tokens_seen": 1595978720, "step": 8838 }, { "epoch": 0.9676236350201155, "grad_norm": 1.0574213834718451, "learning_rate": 1.2857577753423444e-07, "loss": 0.8659, "num_input_tokens_seen": 1596122752, "step": 8839 }, { "epoch": 0.9677331070910534, "grad_norm": 1.1831986173768698, "learning_rate": 1.2770629645226796e-07, "loss": 0.8747, "num_input_tokens_seen": 1596258496, "step": 8840 }, { "epoch": 0.9678425791619913, "grad_norm": 1.2512885604694004, "learning_rate": 1.2683975771509982e-07, "loss": 1.0417, "num_input_tokens_seen": 1596404320, "step": 8841 }, { "epoch": 0.9679520512329292, "grad_norm": 1.0520666832440453, "learning_rate": 1.2597616142523973e-07, "loss": 0.929, "num_input_tokens_seen": 1596595168, "step": 8842 }, { "epoch": 0.9680615233038671, "grad_norm": 1.1454150852614766, "learning_rate": 1.251155076848448e-07, "loss": 0.9454, "num_input_tokens_seen": 1596775488, "step": 8843 }, { "epoch": 0.968170995374805, "grad_norm": 1.07433348996592, "learning_rate": 1.2425779659573368e-07, "loss": 0.8901, "num_input_tokens_seen": 1596957152, "step": 8844 }, { "epoch": 0.9682804674457429, "grad_norm": 1.163181201739234, "learning_rate": 1.2340302825937232e-07, "loss": 0.6821, "num_input_tokens_seen": 1597095360, "step": 8845 }, { "epoch": 0.9683899395166808, "grad_norm": 1.1745051051938704, "learning_rate": 1.2255120277687714e-07, "loss": 0.9196, "num_input_tokens_seen": 1597272320, "step": 8846 }, { "epoch": 0.9684994115876188, "grad_norm": 1.3447476609070166, "learning_rate": 1.2170232024901473e-07, "loss": 0.964, "num_input_tokens_seen": 1597433600, "step": 8847 }, { "epoch": 0.9686088836585566, "grad_norm": 1.1011618027458163, "learning_rate": 1.208563807762103e-07, "loss": 1.1229, "num_input_tokens_seen": 1597638784, "step": 8848 }, { "epoch": 0.9687183557294945, "grad_norm": 1.0391868000685471, "learning_rate": 1.2001338445853382e-07, "loss": 0.8259, "num_input_tokens_seen": 1597847328, "step": 8849 }, { "epoch": 0.9688278278004324, "grad_norm": 1.1893951451995486, "learning_rate": 1.1917333139571385e-07, "loss": 1.1402, "num_input_tokens_seen": 1598021152, "step": 8850 }, { "epoch": 0.9689372998713703, "grad_norm": 1.1672095954162016, "learning_rate": 1.1833622168712366e-07, "loss": 0.7415, "num_input_tokens_seen": 1598197888, "step": 8851 }, { "epoch": 0.9690467719423083, "grad_norm": 1.2316215322498536, "learning_rate": 1.1750205543179239e-07, "loss": 1.1279, "num_input_tokens_seen": 1598365888, "step": 8852 }, { "epoch": 0.9691562440132461, "grad_norm": 1.174283718360024, "learning_rate": 1.1667083272840218e-07, "loss": 0.6788, "num_input_tokens_seen": 1598546880, "step": 8853 }, { "epoch": 0.969265716084184, "grad_norm": 1.0298116073720287, "learning_rate": 1.1584255367528274e-07, "loss": 0.7425, "num_input_tokens_seen": 1598715328, "step": 8854 }, { "epoch": 0.9693751881551219, "grad_norm": 1.2548330809242123, "learning_rate": 1.1501721837041679e-07, "loss": 0.8326, "num_input_tokens_seen": 1598913568, "step": 8855 }, { "epoch": 0.9694846602260598, "grad_norm": 1.242725279895808, "learning_rate": 1.141948269114429e-07, "loss": 0.915, "num_input_tokens_seen": 1599093216, "step": 8856 }, { "epoch": 0.9695941322969978, "grad_norm": 1.2014234822069725, "learning_rate": 1.133753793956499e-07, "loss": 0.6336, "num_input_tokens_seen": 1599248896, "step": 8857 }, { "epoch": 0.9697036043679357, "grad_norm": 1.160811011507582, "learning_rate": 1.1255887591997138e-07, "loss": 0.7579, "num_input_tokens_seen": 1599431456, "step": 8858 }, { "epoch": 0.9698130764388735, "grad_norm": 1.1235446523358747, "learning_rate": 1.1174531658100229e-07, "loss": 1.0034, "num_input_tokens_seen": 1599616032, "step": 8859 }, { "epoch": 0.9699225485098114, "grad_norm": 1.0973370179484891, "learning_rate": 1.1093470147498231e-07, "loss": 0.9299, "num_input_tokens_seen": 1599818528, "step": 8860 }, { "epoch": 0.9700320205807493, "grad_norm": 1.2779600657251844, "learning_rate": 1.1012703069780972e-07, "loss": 1.0742, "num_input_tokens_seen": 1599987872, "step": 8861 }, { "epoch": 0.9701414926516873, "grad_norm": 1.1827162205805657, "learning_rate": 1.0932230434502755e-07, "loss": 0.9106, "num_input_tokens_seen": 1600174912, "step": 8862 }, { "epoch": 0.9702509647226252, "grad_norm": 1.1950632471242162, "learning_rate": 1.0852052251183187e-07, "loss": 0.9124, "num_input_tokens_seen": 1600316928, "step": 8863 }, { "epoch": 0.9703604367935631, "grad_norm": 1.1094480678512126, "learning_rate": 1.0772168529307736e-07, "loss": 1.0668, "num_input_tokens_seen": 1600525248, "step": 8864 }, { "epoch": 0.9704699088645009, "grad_norm": 1.170466173826333, "learning_rate": 1.0692579278325788e-07, "loss": 0.8373, "num_input_tokens_seen": 1600703552, "step": 8865 }, { "epoch": 0.9705793809354388, "grad_norm": 1.129608305586223, "learning_rate": 1.061328450765342e-07, "loss": 0.9818, "num_input_tokens_seen": 1600892384, "step": 8866 }, { "epoch": 0.9706888530063768, "grad_norm": 1.1697678504392244, "learning_rate": 1.0534284226670077e-07, "loss": 0.9934, "num_input_tokens_seen": 1601065088, "step": 8867 }, { "epoch": 0.9707983250773147, "grad_norm": 1.159034109253856, "learning_rate": 1.045557844472217e-07, "loss": 0.9454, "num_input_tokens_seen": 1601226592, "step": 8868 }, { "epoch": 0.9709077971482526, "grad_norm": 1.0726260498418716, "learning_rate": 1.0377167171120028e-07, "loss": 1.0661, "num_input_tokens_seen": 1601423488, "step": 8869 }, { "epoch": 0.9710172692191904, "grad_norm": 1.2633419402105264, "learning_rate": 1.0299050415139844e-07, "loss": 1.0512, "num_input_tokens_seen": 1601592608, "step": 8870 }, { "epoch": 0.9711267412901283, "grad_norm": 1.0965507614972885, "learning_rate": 1.022122818602228e-07, "loss": 0.9743, "num_input_tokens_seen": 1601750080, "step": 8871 }, { "epoch": 0.9712362133610662, "grad_norm": 1.1501191989655783, "learning_rate": 1.0143700492973862e-07, "loss": 1.4801, "num_input_tokens_seen": 1601954368, "step": 8872 }, { "epoch": 0.9713456854320042, "grad_norm": 1.215833794086274, "learning_rate": 1.0066467345165864e-07, "loss": 0.9686, "num_input_tokens_seen": 1602149472, "step": 8873 }, { "epoch": 0.9714551575029421, "grad_norm": 1.2759542577999188, "learning_rate": 9.989528751734867e-08, "loss": 0.9571, "num_input_tokens_seen": 1602308736, "step": 8874 }, { "epoch": 0.97156462957388, "grad_norm": 0.9950076596046719, "learning_rate": 9.912884721782478e-08, "loss": 0.9825, "num_input_tokens_seen": 1602486368, "step": 8875 }, { "epoch": 0.9716741016448178, "grad_norm": 1.1947815657135752, "learning_rate": 9.836535264375613e-08, "loss": 0.6396, "num_input_tokens_seen": 1602684832, "step": 8876 }, { "epoch": 0.9717835737157557, "grad_norm": 1.0972774843123025, "learning_rate": 9.760480388546211e-08, "loss": 1.0107, "num_input_tokens_seen": 1602867392, "step": 8877 }, { "epoch": 0.9718930457866937, "grad_norm": 1.1685155398764187, "learning_rate": 9.684720103291522e-08, "loss": 0.9433, "num_input_tokens_seen": 1603068768, "step": 8878 }, { "epoch": 0.9720025178576316, "grad_norm": 1.180965514465262, "learning_rate": 9.609254417573543e-08, "loss": 0.8469, "num_input_tokens_seen": 1603235648, "step": 8879 }, { "epoch": 0.9721119899285695, "grad_norm": 1.1630571875874445, "learning_rate": 9.534083340320132e-08, "loss": 0.7047, "num_input_tokens_seen": 1603398944, "step": 8880 }, { "epoch": 0.9722214619995074, "grad_norm": 1.2154187868448745, "learning_rate": 9.459206880423621e-08, "loss": 0.8611, "num_input_tokens_seen": 1603575008, "step": 8881 }, { "epoch": 0.9723309340704452, "grad_norm": 0.97931122558207, "learning_rate": 9.384625046741924e-08, "loss": 0.9154, "num_input_tokens_seen": 1603734944, "step": 8882 }, { "epoch": 0.9724404061413832, "grad_norm": 1.0601419199099438, "learning_rate": 9.310337848097705e-08, "loss": 1.0286, "num_input_tokens_seen": 1603910560, "step": 8883 }, { "epoch": 0.9725498782123211, "grad_norm": 1.1148650564979308, "learning_rate": 9.236345293279492e-08, "loss": 0.6788, "num_input_tokens_seen": 1604103200, "step": 8884 }, { "epoch": 0.972659350283259, "grad_norm": 1.060541704421673, "learning_rate": 9.162647391039724e-08, "loss": 0.8142, "num_input_tokens_seen": 1604284640, "step": 8885 }, { "epoch": 0.9727688223541969, "grad_norm": 1.1709321361588911, "learning_rate": 9.089244150097265e-08, "loss": 0.8404, "num_input_tokens_seen": 1604438976, "step": 8886 }, { "epoch": 0.9728782944251347, "grad_norm": 1.1751471041858719, "learning_rate": 9.016135579135165e-08, "loss": 0.9308, "num_input_tokens_seen": 1604619520, "step": 8887 }, { "epoch": 0.9729877664960727, "grad_norm": 1.1477577637194016, "learning_rate": 8.943321686802619e-08, "loss": 0.8302, "num_input_tokens_seen": 1604815072, "step": 8888 }, { "epoch": 0.9730972385670106, "grad_norm": 1.0174131058276432, "learning_rate": 8.870802481712736e-08, "loss": 0.7656, "num_input_tokens_seen": 1604983520, "step": 8889 }, { "epoch": 0.9732067106379485, "grad_norm": 1.0873003853188081, "learning_rate": 8.798577972445043e-08, "loss": 0.853, "num_input_tokens_seen": 1605153984, "step": 8890 }, { "epoch": 0.9733161827088864, "grad_norm": 1.246212561991054, "learning_rate": 8.726648167542706e-08, "loss": 0.9289, "num_input_tokens_seen": 1605291968, "step": 8891 }, { "epoch": 0.9734256547798243, "grad_norm": 1.1786737191203593, "learning_rate": 8.65501307551586e-08, "loss": 0.8955, "num_input_tokens_seen": 1605467808, "step": 8892 }, { "epoch": 0.9735351268507622, "grad_norm": 1.080758733996031, "learning_rate": 8.583672704838008e-08, "loss": 1.1198, "num_input_tokens_seen": 1605672768, "step": 8893 }, { "epoch": 0.9736445989217001, "grad_norm": 0.9626472273844774, "learning_rate": 8.512627063949064e-08, "loss": 0.832, "num_input_tokens_seen": 1605858016, "step": 8894 }, { "epoch": 0.973754070992638, "grad_norm": 1.1334377799337465, "learning_rate": 8.441876161253414e-08, "loss": 0.9512, "num_input_tokens_seen": 1606034752, "step": 8895 }, { "epoch": 0.9738635430635759, "grad_norm": 1.064615891637444, "learning_rate": 8.371420005120756e-08, "loss": 0.7818, "num_input_tokens_seen": 1606222912, "step": 8896 }, { "epoch": 0.9739730151345138, "grad_norm": 0.9928135136018885, "learning_rate": 8.301258603885808e-08, "loss": 0.6377, "num_input_tokens_seen": 1606407040, "step": 8897 }, { "epoch": 0.9740824872054518, "grad_norm": 1.1242173969613232, "learning_rate": 8.231391965848601e-08, "loss": 0.9861, "num_input_tokens_seen": 1606579296, "step": 8898 }, { "epoch": 0.9741919592763896, "grad_norm": 1.2863800344178673, "learning_rate": 8.161820099274464e-08, "loss": 0.9617, "num_input_tokens_seen": 1606779776, "step": 8899 }, { "epoch": 0.9743014313473275, "grad_norm": 1.2490425257532813, "learning_rate": 8.092543012393483e-08, "loss": 0.9095, "num_input_tokens_seen": 1606950464, "step": 8900 }, { "epoch": 0.9744109034182654, "grad_norm": 1.3786131463561155, "learning_rate": 8.023560713400769e-08, "loss": 0.9943, "num_input_tokens_seen": 1607079040, "step": 8901 }, { "epoch": 0.9745203754892033, "grad_norm": 1.0972817213019272, "learning_rate": 7.954873210457015e-08, "loss": 0.9535, "num_input_tokens_seen": 1607285792, "step": 8902 }, { "epoch": 0.9746298475601413, "grad_norm": 1.1345878107209293, "learning_rate": 7.886480511687666e-08, "loss": 0.8621, "num_input_tokens_seen": 1607454912, "step": 8903 }, { "epoch": 0.9747393196310791, "grad_norm": 1.088825803235421, "learning_rate": 7.81838262518375e-08, "loss": 1.0413, "num_input_tokens_seen": 1607654496, "step": 8904 }, { "epoch": 0.974848791702017, "grad_norm": 1.1197944049653867, "learning_rate": 7.75057955900077e-08, "loss": 1.0182, "num_input_tokens_seen": 1607828768, "step": 8905 }, { "epoch": 0.9749582637729549, "grad_norm": 1.015281208245332, "learning_rate": 7.683071321160085e-08, "loss": 0.7526, "num_input_tokens_seen": 1608007744, "step": 8906 }, { "epoch": 0.9750677358438928, "grad_norm": 1.1436200990545806, "learning_rate": 7.615857919647252e-08, "loss": 1.0368, "num_input_tokens_seen": 1608198144, "step": 8907 }, { "epoch": 0.9751772079148308, "grad_norm": 1.1459779416935587, "learning_rate": 7.548939362414243e-08, "loss": 0.9094, "num_input_tokens_seen": 1608370848, "step": 8908 }, { "epoch": 0.9752866799857687, "grad_norm": 1.1236681471940975, "learning_rate": 7.482315657376394e-08, "loss": 1.0805, "num_input_tokens_seen": 1608565056, "step": 8909 }, { "epoch": 0.9753961520567065, "grad_norm": 1.1395255760512955, "learning_rate": 7.41598681241601e-08, "loss": 1.0569, "num_input_tokens_seen": 1608769568, "step": 8910 }, { "epoch": 0.9755056241276444, "grad_norm": 1.0041213343242463, "learning_rate": 7.349952835379592e-08, "loss": 1.0158, "num_input_tokens_seen": 1608961088, "step": 8911 }, { "epoch": 0.9756150961985823, "grad_norm": 1.0822762993366222, "learning_rate": 7.284213734078394e-08, "loss": 0.9854, "num_input_tokens_seen": 1609147008, "step": 8912 }, { "epoch": 0.9757245682695203, "grad_norm": 1.0758130813355056, "learning_rate": 7.218769516289247e-08, "loss": 0.9069, "num_input_tokens_seen": 1609344576, "step": 8913 }, { "epoch": 0.9758340403404582, "grad_norm": 1.0938907532724234, "learning_rate": 7.153620189754573e-08, "loss": 0.8174, "num_input_tokens_seen": 1609543040, "step": 8914 }, { "epoch": 0.9759435124113961, "grad_norm": 1.1722624074808499, "learning_rate": 7.088765762180982e-08, "loss": 0.9677, "num_input_tokens_seen": 1609726048, "step": 8915 }, { "epoch": 0.9760529844823339, "grad_norm": 1.123756197953662, "learning_rate": 7.024206241240671e-08, "loss": 0.8634, "num_input_tokens_seen": 1609911520, "step": 8916 }, { "epoch": 0.9761624565532718, "grad_norm": 1.2052874267438844, "learning_rate": 6.959941634571143e-08, "loss": 1.1983, "num_input_tokens_seen": 1610100128, "step": 8917 }, { "epoch": 0.9762719286242098, "grad_norm": 1.2047043874623182, "learning_rate": 6.895971949774649e-08, "loss": 0.7806, "num_input_tokens_seen": 1610260288, "step": 8918 }, { "epoch": 0.9763814006951477, "grad_norm": 1.109026518340415, "learning_rate": 6.832297194418746e-08, "loss": 0.8743, "num_input_tokens_seen": 1610456960, "step": 8919 }, { "epoch": 0.9764908727660856, "grad_norm": 1.0993241639823823, "learning_rate": 6.768917376035744e-08, "loss": 0.9147, "num_input_tokens_seen": 1610629888, "step": 8920 }, { "epoch": 0.9766003448370234, "grad_norm": 1.1912892768845411, "learning_rate": 6.70583250212381e-08, "loss": 0.8439, "num_input_tokens_seen": 1610801248, "step": 8921 }, { "epoch": 0.9767098169079613, "grad_norm": 1.1213042696234077, "learning_rate": 6.643042580145309e-08, "loss": 1.0172, "num_input_tokens_seen": 1611002624, "step": 8922 }, { "epoch": 0.9768192889788992, "grad_norm": 1.3031348852427587, "learning_rate": 6.580547617528465e-08, "loss": 1.2454, "num_input_tokens_seen": 1611194592, "step": 8923 }, { "epoch": 0.9769287610498372, "grad_norm": 1.1297849515508664, "learning_rate": 6.518347621666255e-08, "loss": 0.8735, "num_input_tokens_seen": 1611380736, "step": 8924 }, { "epoch": 0.9770382331207751, "grad_norm": 1.0580401222178573, "learning_rate": 6.456442599916679e-08, "loss": 0.8455, "num_input_tokens_seen": 1611552768, "step": 8925 }, { "epoch": 0.977147705191713, "grad_norm": 1.121242765830698, "learning_rate": 6.394832559603048e-08, "loss": 0.7613, "num_input_tokens_seen": 1611695008, "step": 8926 }, { "epoch": 0.9772571772626508, "grad_norm": 1.2314442960536793, "learning_rate": 6.333517508013975e-08, "loss": 1.1046, "num_input_tokens_seen": 1611891008, "step": 8927 }, { "epoch": 0.9773666493335887, "grad_norm": 1.179572447162082, "learning_rate": 6.272497452402548e-08, "loss": 0.8751, "num_input_tokens_seen": 1612056320, "step": 8928 }, { "epoch": 0.9774761214045267, "grad_norm": 1.2976400577607452, "learning_rate": 6.211772399987715e-08, "loss": 0.88, "num_input_tokens_seen": 1612241120, "step": 8929 }, { "epoch": 0.9775855934754646, "grad_norm": 1.306391169024472, "learning_rate": 6.151342357952617e-08, "loss": 1.1554, "num_input_tokens_seen": 1612416288, "step": 8930 }, { "epoch": 0.9776950655464025, "grad_norm": 1.1897944820270328, "learning_rate": 6.091207333446259e-08, "loss": 1.2414, "num_input_tokens_seen": 1612573760, "step": 8931 }, { "epoch": 0.9778045376173404, "grad_norm": 1.1206594017862928, "learning_rate": 6.031367333582949e-08, "loss": 0.9695, "num_input_tokens_seen": 1612730112, "step": 8932 }, { "epoch": 0.9779140096882782, "grad_norm": 1.0855687565089267, "learning_rate": 5.971822365440639e-08, "loss": 0.6102, "num_input_tokens_seen": 1612899456, "step": 8933 }, { "epoch": 0.9780234817592162, "grad_norm": 1.0532935644340151, "learning_rate": 5.912572436064523e-08, "loss": 1.0883, "num_input_tokens_seen": 1613075968, "step": 8934 }, { "epoch": 0.9781329538301541, "grad_norm": 1.1092339009562588, "learning_rate": 5.853617552462887e-08, "loss": 0.6987, "num_input_tokens_seen": 1613227392, "step": 8935 }, { "epoch": 0.978242425901092, "grad_norm": 1.0135924321026832, "learning_rate": 5.794957721610428e-08, "loss": 0.7384, "num_input_tokens_seen": 1613399200, "step": 8936 }, { "epoch": 0.9783518979720299, "grad_norm": 1.2386459150574425, "learning_rate": 5.7365929504460404e-08, "loss": 1.0244, "num_input_tokens_seen": 1613564960, "step": 8937 }, { "epoch": 0.9784613700429677, "grad_norm": 1.1805637970181688, "learning_rate": 5.678523245874756e-08, "loss": 0.9379, "num_input_tokens_seen": 1613755136, "step": 8938 }, { "epoch": 0.9785708421139057, "grad_norm": 1.114432559406915, "learning_rate": 5.620748614765803e-08, "loss": 0.9636, "num_input_tokens_seen": 1613927616, "step": 8939 }, { "epoch": 0.9786803141848436, "grad_norm": 1.3355390166335694, "learning_rate": 5.563269063953991e-08, "loss": 0.8928, "num_input_tokens_seen": 1614135712, "step": 8940 }, { "epoch": 0.9787897862557815, "grad_norm": 0.9660557844589118, "learning_rate": 5.506084600238881e-08, "loss": 0.6238, "num_input_tokens_seen": 1614321408, "step": 8941 }, { "epoch": 0.9788992583267194, "grad_norm": 1.2701255463699503, "learning_rate": 5.4491952303850624e-08, "loss": 1.0358, "num_input_tokens_seen": 1614481344, "step": 8942 }, { "epoch": 0.9790087303976573, "grad_norm": 1.1330636107894718, "learning_rate": 5.392600961122707e-08, "loss": 1.0387, "num_input_tokens_seen": 1614679808, "step": 8943 }, { "epoch": 0.9791182024685952, "grad_norm": 0.9989341479659726, "learning_rate": 5.3363017991470145e-08, "loss": 0.7523, "num_input_tokens_seen": 1614847360, "step": 8944 }, { "epoch": 0.9792276745395331, "grad_norm": 1.066538987614492, "learning_rate": 5.280297751117658e-08, "loss": 1.0129, "num_input_tokens_seen": 1615044032, "step": 8945 }, { "epoch": 0.979337146610471, "grad_norm": 1.134989678592217, "learning_rate": 5.224588823659893e-08, "loss": 0.9199, "num_input_tokens_seen": 1615236448, "step": 8946 }, { "epoch": 0.9794466186814089, "grad_norm": 1.2177588516255098, "learning_rate": 5.169175023364003e-08, "loss": 0.9877, "num_input_tokens_seen": 1615457536, "step": 8947 }, { "epoch": 0.9795560907523468, "grad_norm": 1.1515102152037944, "learning_rate": 5.114056356785857e-08, "loss": 0.8683, "num_input_tokens_seen": 1615602912, "step": 8948 }, { "epoch": 0.9796655628232848, "grad_norm": 1.1488249248523623, "learning_rate": 5.05923283044496e-08, "loss": 0.9934, "num_input_tokens_seen": 1615769568, "step": 8949 }, { "epoch": 0.9797750348942226, "grad_norm": 0.9954411481899545, "learning_rate": 5.004704450827513e-08, "loss": 0.743, "num_input_tokens_seen": 1615955936, "step": 8950 }, { "epoch": 0.9798845069651605, "grad_norm": 1.1599816084916517, "learning_rate": 4.9504712243839126e-08, "loss": 0.9042, "num_input_tokens_seen": 1616162240, "step": 8951 }, { "epoch": 0.9799939790360984, "grad_norm": 1.1848841013345044, "learning_rate": 4.896533157529859e-08, "loss": 0.9349, "num_input_tokens_seen": 1616347712, "step": 8952 }, { "epoch": 0.9801034511070363, "grad_norm": 1.1480218044164177, "learning_rate": 4.842890256646082e-08, "loss": 0.8569, "num_input_tokens_seen": 1616497120, "step": 8953 }, { "epoch": 0.9802129231779743, "grad_norm": 1.1961962356364009, "learning_rate": 4.789542528078339e-08, "loss": 1.0539, "num_input_tokens_seen": 1616643168, "step": 8954 }, { "epoch": 0.9803223952489121, "grad_norm": 1.1895360056497586, "learning_rate": 4.73648997813797e-08, "loss": 0.8197, "num_input_tokens_seen": 1616805120, "step": 8955 }, { "epoch": 0.98043186731985, "grad_norm": 1.199097667632715, "learning_rate": 4.6837326131002336e-08, "loss": 1.0513, "num_input_tokens_seen": 1617007616, "step": 8956 }, { "epoch": 0.9805413393907879, "grad_norm": 1.1039595259697015, "learning_rate": 4.63127043920708e-08, "loss": 1.0188, "num_input_tokens_seen": 1617166208, "step": 8957 }, { "epoch": 0.9806508114617258, "grad_norm": 1.163117197739282, "learning_rate": 4.579103462664103e-08, "loss": 1.1022, "num_input_tokens_seen": 1617345856, "step": 8958 }, { "epoch": 0.9807602835326638, "grad_norm": 1.1458037401481547, "learning_rate": 4.52723168964303e-08, "loss": 1.048, "num_input_tokens_seen": 1617523712, "step": 8959 }, { "epoch": 0.9808697556036017, "grad_norm": 1.0226063155885154, "learning_rate": 4.4756551262795096e-08, "loss": 0.6062, "num_input_tokens_seen": 1617704480, "step": 8960 }, { "epoch": 0.9809792276745395, "grad_norm": 1.1826767595377272, "learning_rate": 4.424373778675606e-08, "loss": 0.794, "num_input_tokens_seen": 1617854112, "step": 8961 }, { "epoch": 0.9810886997454774, "grad_norm": 1.1338735412615186, "learning_rate": 4.373387652897576e-08, "loss": 0.8626, "num_input_tokens_seen": 1618029952, "step": 8962 }, { "epoch": 0.9811981718164153, "grad_norm": 1.0537312446454634, "learning_rate": 4.3226967549769845e-08, "loss": 0.8156, "num_input_tokens_seen": 1618221920, "step": 8963 }, { "epoch": 0.9813076438873533, "grad_norm": 1.0578452979994342, "learning_rate": 4.2723010909104244e-08, "loss": 1.0326, "num_input_tokens_seen": 1618427328, "step": 8964 }, { "epoch": 0.9814171159582912, "grad_norm": 0.8912036421437721, "learning_rate": 4.222200666659515e-08, "loss": 0.8037, "num_input_tokens_seen": 1618609888, "step": 8965 }, { "epoch": 0.9815265880292291, "grad_norm": 1.258247584564419, "learning_rate": 4.1723954881511816e-08, "loss": 1.0293, "num_input_tokens_seen": 1618783936, "step": 8966 }, { "epoch": 0.9816360601001669, "grad_norm": 1.2795895307252727, "learning_rate": 4.122885561277101e-08, "loss": 0.9095, "num_input_tokens_seen": 1618977696, "step": 8967 }, { "epoch": 0.9817455321711048, "grad_norm": 0.9832771779583288, "learning_rate": 4.073670891894532e-08, "loss": 0.886, "num_input_tokens_seen": 1619176832, "step": 8968 }, { "epoch": 0.9818550042420428, "grad_norm": 1.0342457347668508, "learning_rate": 4.0247514858252065e-08, "loss": 0.9492, "num_input_tokens_seen": 1619348416, "step": 8969 }, { "epoch": 0.9819644763129807, "grad_norm": 1.028006278375089, "learning_rate": 3.97612734885644e-08, "loss": 0.7255, "num_input_tokens_seen": 1619550464, "step": 8970 }, { "epoch": 0.9820739483839186, "grad_norm": 1.2876421414771075, "learning_rate": 3.9277984867400196e-08, "loss": 0.8936, "num_input_tokens_seen": 1619718016, "step": 8971 }, { "epoch": 0.9821834204548564, "grad_norm": 1.2042760915877626, "learning_rate": 3.879764905193595e-08, "loss": 0.9367, "num_input_tokens_seen": 1619916480, "step": 8972 }, { "epoch": 0.9822928925257943, "grad_norm": 1.1246594867360282, "learning_rate": 3.832026609899009e-08, "loss": 0.8873, "num_input_tokens_seen": 1620096128, "step": 8973 }, { "epoch": 0.9824023645967322, "grad_norm": 1.1017686366925854, "learning_rate": 3.7845836065039664e-08, "loss": 0.8523, "num_input_tokens_seen": 1620272640, "step": 8974 }, { "epoch": 0.9825118366676702, "grad_norm": 1.2519406989041826, "learning_rate": 3.737435900620645e-08, "loss": 1.1305, "num_input_tokens_seen": 1620433024, "step": 8975 }, { "epoch": 0.9826213087386081, "grad_norm": 1.1602817557423004, "learning_rate": 3.690583497826528e-08, "loss": 0.789, "num_input_tokens_seen": 1620584000, "step": 8976 }, { "epoch": 0.982730780809546, "grad_norm": 1.174457755672512, "learning_rate": 3.644026403664402e-08, "loss": 1.2681, "num_input_tokens_seen": 1620780448, "step": 8977 }, { "epoch": 0.9828402528804838, "grad_norm": 1.0756893564728751, "learning_rate": 3.5977646236415306e-08, "loss": 0.8091, "num_input_tokens_seen": 1620955392, "step": 8978 }, { "epoch": 0.9829497249514217, "grad_norm": 1.0583191124769142, "learning_rate": 3.551798163231035e-08, "loss": 0.792, "num_input_tokens_seen": 1621118016, "step": 8979 }, { "epoch": 0.9830591970223597, "grad_norm": 1.0583686166924255, "learning_rate": 3.506127027870232e-08, "loss": 0.8992, "num_input_tokens_seen": 1621302144, "step": 8980 }, { "epoch": 0.9831686690932976, "grad_norm": 1.1694936334731074, "learning_rate": 3.4607512229622993e-08, "loss": 0.9606, "num_input_tokens_seen": 1621499264, "step": 8981 }, { "epoch": 0.9832781411642355, "grad_norm": 1.0514490777753347, "learning_rate": 3.415670753874889e-08, "loss": 0.9206, "num_input_tokens_seen": 1621696384, "step": 8982 }, { "epoch": 0.9833876132351734, "grad_norm": 1.2004812845044672, "learning_rate": 3.370885625940956e-08, "loss": 1.0507, "num_input_tokens_seen": 1621890144, "step": 8983 }, { "epoch": 0.9834970853061112, "grad_norm": 1.0165063408514532, "learning_rate": 3.3263958444582076e-08, "loss": 0.7291, "num_input_tokens_seen": 1622071808, "step": 8984 }, { "epoch": 0.9836065573770492, "grad_norm": 1.180730548056305, "learning_rate": 3.2822014146902114e-08, "loss": 1.017, "num_input_tokens_seen": 1622241376, "step": 8985 }, { "epoch": 0.9837160294479871, "grad_norm": 1.061269047075317, "learning_rate": 3.2383023418650074e-08, "loss": 1.0597, "num_input_tokens_seen": 1622440288, "step": 8986 }, { "epoch": 0.983825501518925, "grad_norm": 1.0590878280262925, "learning_rate": 3.1946986311756634e-08, "loss": 1.0105, "num_input_tokens_seen": 1622629792, "step": 8987 }, { "epoch": 0.9839349735898629, "grad_norm": 1.1621051179182793, "learning_rate": 3.151390287780276e-08, "loss": 1.0486, "num_input_tokens_seen": 1622814592, "step": 8988 }, { "epoch": 0.9840444456608007, "grad_norm": 1.0239728735780405, "learning_rate": 3.108377316801969e-08, "loss": 0.9022, "num_input_tokens_seen": 1623006560, "step": 8989 }, { "epoch": 0.9841539177317387, "grad_norm": 1.225764361156383, "learning_rate": 3.065659723329728e-08, "loss": 1.0511, "num_input_tokens_seen": 1623185088, "step": 8990 }, { "epoch": 0.9842633898026766, "grad_norm": 1.0912128995376245, "learning_rate": 3.023237512416455e-08, "loss": 0.7805, "num_input_tokens_seen": 1623339872, "step": 8991 }, { "epoch": 0.9843728618736145, "grad_norm": 1.1827106769631053, "learning_rate": 2.981110689080913e-08, "loss": 1.0692, "num_input_tokens_seen": 1623551552, "step": 8992 }, { "epoch": 0.9844823339445524, "grad_norm": 1.210821133836766, "learning_rate": 2.9392792583066154e-08, "loss": 1.0629, "num_input_tokens_seen": 1623748896, "step": 8993 }, { "epoch": 0.9845918060154903, "grad_norm": 1.155566400060345, "learning_rate": 2.8977432250418267e-08, "loss": 1.0581, "num_input_tokens_seen": 1623929664, "step": 8994 }, { "epoch": 0.9847012780864282, "grad_norm": 1.1856930185452683, "learning_rate": 2.8565025942001166e-08, "loss": 0.9712, "num_input_tokens_seen": 1624095648, "step": 8995 }, { "epoch": 0.9848107501573661, "grad_norm": 1.0830858510333417, "learning_rate": 2.8155573706609152e-08, "loss": 0.9379, "num_input_tokens_seen": 1624291200, "step": 8996 }, { "epoch": 0.984920222228304, "grad_norm": 0.9664338326122153, "learning_rate": 2.7749075592670148e-08, "loss": 0.6978, "num_input_tokens_seen": 1624448224, "step": 8997 }, { "epoch": 0.9850296942992419, "grad_norm": 1.032838465007866, "learning_rate": 2.734553164827902e-08, "loss": 0.9129, "num_input_tokens_seen": 1624633024, "step": 8998 }, { "epoch": 0.9851391663701798, "grad_norm": 1.1296642886741135, "learning_rate": 2.6944941921172585e-08, "loss": 0.8035, "num_input_tokens_seen": 1624819840, "step": 8999 }, { "epoch": 0.9852486384411178, "grad_norm": 1.1917238906423375, "learning_rate": 2.654730645873793e-08, "loss": 1.0822, "num_input_tokens_seen": 1624968800, "step": 9000 }, { "epoch": 0.9853581105120556, "grad_norm": 0.9603760792781908, "learning_rate": 2.6152625308015212e-08, "loss": 0.8484, "num_input_tokens_seen": 1625164352, "step": 9001 }, { "epoch": 0.9854675825829935, "grad_norm": 1.2578581807625917, "learning_rate": 2.576089851569763e-08, "loss": 0.8313, "num_input_tokens_seen": 1625326304, "step": 9002 }, { "epoch": 0.9855770546539314, "grad_norm": 1.1673146333102566, "learning_rate": 2.5372126128120345e-08, "loss": 1.1665, "num_input_tokens_seen": 1625534176, "step": 9003 }, { "epoch": 0.9856865267248693, "grad_norm": 1.0920626567604024, "learning_rate": 2.4986308191277118e-08, "loss": 0.9605, "num_input_tokens_seen": 1625736672, "step": 9004 }, { "epoch": 0.9857959987958073, "grad_norm": 1.1859568055281575, "learning_rate": 2.4603444750811998e-08, "loss": 0.7496, "num_input_tokens_seen": 1625883840, "step": 9005 }, { "epoch": 0.9859054708667451, "grad_norm": 1.042145021593787, "learning_rate": 2.4223535852010983e-08, "loss": 0.7944, "num_input_tokens_seen": 1626046240, "step": 9006 }, { "epoch": 0.986014942937683, "grad_norm": 1.107089468771851, "learning_rate": 2.384658153982422e-08, "loss": 0.8618, "num_input_tokens_seen": 1626240224, "step": 9007 }, { "epoch": 0.9861244150086209, "grad_norm": 1.192836665727467, "learning_rate": 2.347258185883827e-08, "loss": 1.0287, "num_input_tokens_seen": 1626426144, "step": 9008 }, { "epoch": 0.9862338870795588, "grad_norm": 1.1041138774320671, "learning_rate": 2.31015368532983e-08, "loss": 0.7897, "num_input_tokens_seen": 1626586976, "step": 9009 }, { "epoch": 0.9863433591504968, "grad_norm": 1.0893205049752388, "learning_rate": 2.2733446567099747e-08, "loss": 0.9115, "num_input_tokens_seen": 1626755872, "step": 9010 }, { "epoch": 0.9864528312214347, "grad_norm": 1.0244887100238087, "learning_rate": 2.236831104378556e-08, "loss": 0.6701, "num_input_tokens_seen": 1626938432, "step": 9011 }, { "epoch": 0.9865623032923725, "grad_norm": 1.2379885065750023, "learning_rate": 2.2006130326551745e-08, "loss": 0.9218, "num_input_tokens_seen": 1627142272, "step": 9012 }, { "epoch": 0.9866717753633104, "grad_norm": 1.0995492596904328, "learning_rate": 2.16469044582418e-08, "loss": 0.6784, "num_input_tokens_seen": 1627328416, "step": 9013 }, { "epoch": 0.9867812474342483, "grad_norm": 1.107156604031069, "learning_rate": 2.129063348135507e-08, "loss": 0.8195, "num_input_tokens_seen": 1627513664, "step": 9014 }, { "epoch": 0.9868907195051863, "grad_norm": 1.0544324213208296, "learning_rate": 2.0937317438032844e-08, "loss": 1.1298, "num_input_tokens_seen": 1627709216, "step": 9015 }, { "epoch": 0.9870001915761242, "grad_norm": 1.0374980995436578, "learning_rate": 2.0586956370075018e-08, "loss": 0.7539, "num_input_tokens_seen": 1627878112, "step": 9016 }, { "epoch": 0.9871096636470621, "grad_norm": 1.097658367765834, "learning_rate": 2.0239550318926215e-08, "loss": 1.1311, "num_input_tokens_seen": 1628060896, "step": 9017 }, { "epoch": 0.9872191357179999, "grad_norm": 1.1463748288173712, "learning_rate": 1.9895099325686894e-08, "loss": 1.0293, "num_input_tokens_seen": 1628242784, "step": 9018 }, { "epoch": 0.9873286077889378, "grad_norm": 1.0757178371691423, "learning_rate": 1.955360343110224e-08, "loss": 0.8484, "num_input_tokens_seen": 1628446624, "step": 9019 }, { "epoch": 0.9874380798598758, "grad_norm": 1.2063091303113889, "learning_rate": 1.921506267557327e-08, "loss": 0.8816, "num_input_tokens_seen": 1628632768, "step": 9020 }, { "epoch": 0.9875475519308137, "grad_norm": 1.1661033405684977, "learning_rate": 1.8879477099145726e-08, "loss": 1.1487, "num_input_tokens_seen": 1628825856, "step": 9021 }, { "epoch": 0.9876570240017516, "grad_norm": 1.049395530277197, "learning_rate": 1.8546846741521184e-08, "loss": 0.8833, "num_input_tokens_seen": 1628998112, "step": 9022 }, { "epoch": 0.9877664960726894, "grad_norm": 1.1242257208803546, "learning_rate": 1.8217171642048726e-08, "loss": 0.9563, "num_input_tokens_seen": 1629189632, "step": 9023 }, { "epoch": 0.9878759681436273, "grad_norm": 1.3069997616091265, "learning_rate": 1.7890451839727707e-08, "loss": 1.1399, "num_input_tokens_seen": 1629393696, "step": 9024 }, { "epoch": 0.9879854402145652, "grad_norm": 1.1702244133388082, "learning_rate": 1.756668737320777e-08, "loss": 0.8137, "num_input_tokens_seen": 1629550496, "step": 9025 }, { "epoch": 0.9880949122855032, "grad_norm": 1.0393953865699053, "learning_rate": 1.7245878280791606e-08, "loss": 0.86, "num_input_tokens_seen": 1629728128, "step": 9026 }, { "epoch": 0.9882043843564411, "grad_norm": 1.0331978175814769, "learning_rate": 1.692802460042664e-08, "loss": 0.6698, "num_input_tokens_seen": 1629894784, "step": 9027 }, { "epoch": 0.988313856427379, "grad_norm": 1.046976283853226, "learning_rate": 1.66131263697189e-08, "loss": 0.7303, "num_input_tokens_seen": 1630068384, "step": 9028 }, { "epoch": 0.9884233284983168, "grad_norm": 1.0500224610605668, "learning_rate": 1.630118362591915e-08, "loss": 0.8252, "num_input_tokens_seen": 1630201440, "step": 9029 }, { "epoch": 0.9885328005692547, "grad_norm": 1.0812769715577926, "learning_rate": 1.5992196405925642e-08, "loss": 0.7654, "num_input_tokens_seen": 1630383776, "step": 9030 }, { "epoch": 0.9886422726401927, "grad_norm": 1.0489859293565849, "learning_rate": 1.568616474629525e-08, "loss": 0.6067, "num_input_tokens_seen": 1630543040, "step": 9031 }, { "epoch": 0.9887517447111306, "grad_norm": 1.0485410954649246, "learning_rate": 1.5383088683229574e-08, "loss": 0.7099, "num_input_tokens_seen": 1630716416, "step": 9032 }, { "epoch": 0.9888612167820685, "grad_norm": 1.1583714483471552, "learning_rate": 1.5082968252583263e-08, "loss": 0.8025, "num_input_tokens_seen": 1630883520, "step": 9033 }, { "epoch": 0.9889706888530064, "grad_norm": 1.0597550894652603, "learning_rate": 1.4785803489858474e-08, "loss": 0.8531, "num_input_tokens_seen": 1631082880, "step": 9034 }, { "epoch": 0.9890801609239442, "grad_norm": 1.2639654605619053, "learning_rate": 1.4491594430207645e-08, "loss": 0.973, "num_input_tokens_seen": 1631262976, "step": 9035 }, { "epoch": 0.9891896329948822, "grad_norm": 1.0900464706839004, "learning_rate": 1.4200341108439042e-08, "loss": 0.9446, "num_input_tokens_seen": 1631457632, "step": 9036 }, { "epoch": 0.9892991050658201, "grad_norm": 1.1797893555986951, "learning_rate": 1.3912043559005661e-08, "loss": 0.9806, "num_input_tokens_seen": 1631667296, "step": 9037 }, { "epoch": 0.989408577136758, "grad_norm": 1.1190518914907384, "learning_rate": 1.3626701816010778e-08, "loss": 0.871, "num_input_tokens_seen": 1631859040, "step": 9038 }, { "epoch": 0.9895180492076959, "grad_norm": 1.1688181872544903, "learning_rate": 1.3344315913210725e-08, "loss": 0.7274, "num_input_tokens_seen": 1632046304, "step": 9039 }, { "epoch": 0.9896275212786337, "grad_norm": 1.112413302268717, "learning_rate": 1.3064885884012112e-08, "loss": 0.8064, "num_input_tokens_seen": 1632251040, "step": 9040 }, { "epoch": 0.9897369933495717, "grad_norm": 1.0829283923377453, "learning_rate": 1.278841176147183e-08, "loss": 0.9352, "num_input_tokens_seen": 1632424864, "step": 9041 }, { "epoch": 0.9898464654205096, "grad_norm": 1.2836828162601288, "learning_rate": 1.2514893578294274e-08, "loss": 0.9728, "num_input_tokens_seen": 1632613248, "step": 9042 }, { "epoch": 0.9899559374914475, "grad_norm": 1.0495957845383694, "learning_rate": 1.2244331366836892e-08, "loss": 0.773, "num_input_tokens_seen": 1632790656, "step": 9043 }, { "epoch": 0.9900654095623854, "grad_norm": 1.1046752238367128, "learning_rate": 1.1976725159107415e-08, "loss": 0.7169, "num_input_tokens_seen": 1632921248, "step": 9044 }, { "epoch": 0.9901748816333233, "grad_norm": 1.1900864592166263, "learning_rate": 1.1712074986761079e-08, "loss": 0.9976, "num_input_tokens_seen": 1633111648, "step": 9045 }, { "epoch": 0.9902843537042612, "grad_norm": 1.1837316250205756, "learning_rate": 1.1450380881106171e-08, "loss": 0.863, "num_input_tokens_seen": 1633312800, "step": 9046 }, { "epoch": 0.9903938257751991, "grad_norm": 1.1222227572344354, "learning_rate": 1.1191642873104036e-08, "loss": 0.6596, "num_input_tokens_seen": 1633488864, "step": 9047 }, { "epoch": 0.990503297846137, "grad_norm": 1.2175018081024969, "learning_rate": 1.0935860993357971e-08, "loss": 0.798, "num_input_tokens_seen": 1633666720, "step": 9048 }, { "epoch": 0.9906127699170749, "grad_norm": 1.2251650874166145, "learning_rate": 1.0683035272127107e-08, "loss": 0.9445, "num_input_tokens_seen": 1633856224, "step": 9049 }, { "epoch": 0.9907222419880128, "grad_norm": 1.026917750002146, "learning_rate": 1.0433165739323625e-08, "loss": 0.7118, "num_input_tokens_seen": 1634033408, "step": 9050 }, { "epoch": 0.9908317140589508, "grad_norm": 1.123587516350417, "learning_rate": 1.0186252424504439e-08, "loss": 0.6755, "num_input_tokens_seen": 1634234784, "step": 9051 }, { "epoch": 0.9909411861298886, "grad_norm": 1.1359007208955447, "learning_rate": 9.942295356879517e-09, "loss": 1.0072, "num_input_tokens_seen": 1634433472, "step": 9052 }, { "epoch": 0.9910506582008265, "grad_norm": 1.0660909668719627, "learning_rate": 9.701294565309105e-09, "loss": 1.1832, "num_input_tokens_seen": 1634616256, "step": 9053 }, { "epoch": 0.9911601302717644, "grad_norm": 1.2904690553604985, "learning_rate": 9.463250078300955e-09, "loss": 0.7886, "num_input_tokens_seen": 1634806208, "step": 9054 }, { "epoch": 0.9912696023427023, "grad_norm": 1.2301981794997183, "learning_rate": 9.228161924015877e-09, "loss": 0.9664, "num_input_tokens_seen": 1634977120, "step": 9055 }, { "epoch": 0.9913790744136403, "grad_norm": 1.0998162029342977, "learning_rate": 8.99603013026773e-09, "loss": 0.9962, "num_input_tokens_seen": 1635175136, "step": 9056 }, { "epoch": 0.9914885464845781, "grad_norm": 1.0568604866353266, "learning_rate": 8.766854724509555e-09, "loss": 0.9536, "num_input_tokens_seen": 1635353664, "step": 9057 }, { "epoch": 0.991598018555516, "grad_norm": 1.0919223774684914, "learning_rate": 8.540635733861325e-09, "loss": 0.7651, "num_input_tokens_seen": 1635531968, "step": 9058 }, { "epoch": 0.9917074906264539, "grad_norm": 1.1286453420757967, "learning_rate": 8.317373185079413e-09, "loss": 0.9625, "num_input_tokens_seen": 1635738048, "step": 9059 }, { "epoch": 0.9918169626973918, "grad_norm": 1.1745987836171872, "learning_rate": 8.097067104576029e-09, "loss": 1.0214, "num_input_tokens_seen": 1635904928, "step": 9060 }, { "epoch": 0.9919264347683298, "grad_norm": 1.1876608087175904, "learning_rate": 7.879717518413654e-09, "loss": 0.8122, "num_input_tokens_seen": 1636081440, "step": 9061 }, { "epoch": 0.9920359068392677, "grad_norm": 1.122744736870827, "learning_rate": 7.66532445230228e-09, "loss": 0.9507, "num_input_tokens_seen": 1636274976, "step": 9062 }, { "epoch": 0.9921453789102055, "grad_norm": 1.0914958613968595, "learning_rate": 7.453887931607728e-09, "loss": 0.9778, "num_input_tokens_seen": 1636476800, "step": 9063 }, { "epoch": 0.9922548509811434, "grad_norm": 1.0073730930263458, "learning_rate": 7.2454079813405465e-09, "loss": 0.9354, "num_input_tokens_seen": 1636672576, "step": 9064 }, { "epoch": 0.9923643230520813, "grad_norm": 1.242094936160963, "learning_rate": 7.039884626164339e-09, "loss": 0.9738, "num_input_tokens_seen": 1636825344, "step": 9065 }, { "epoch": 0.9924737951230193, "grad_norm": 1.079626116408539, "learning_rate": 6.83731789038744e-09, "loss": 1.1323, "num_input_tokens_seen": 1637027840, "step": 9066 }, { "epoch": 0.9925832671939572, "grad_norm": 1.141262555591253, "learning_rate": 6.637707797979564e-09, "loss": 0.8698, "num_input_tokens_seen": 1637210848, "step": 9067 }, { "epoch": 0.9926927392648951, "grad_norm": 1.1826750963997272, "learning_rate": 6.44105437255238e-09, "loss": 0.8906, "num_input_tokens_seen": 1637398112, "step": 9068 }, { "epoch": 0.9928022113358329, "grad_norm": 1.2686622344982152, "learning_rate": 6.247357637367834e-09, "loss": 0.8269, "num_input_tokens_seen": 1637601280, "step": 9069 }, { "epoch": 0.9929116834067708, "grad_norm": 1.0586297849032849, "learning_rate": 6.056617615340931e-09, "loss": 0.8737, "num_input_tokens_seen": 1637776896, "step": 9070 }, { "epoch": 0.9930211554777088, "grad_norm": 1.2573847546865098, "learning_rate": 5.868834329036954e-09, "loss": 0.8278, "num_input_tokens_seen": 1637931232, "step": 9071 }, { "epoch": 0.9931306275486467, "grad_norm": 1.2977802380437988, "learning_rate": 5.684007800668689e-09, "loss": 0.9059, "num_input_tokens_seen": 1638106400, "step": 9072 }, { "epoch": 0.9932400996195846, "grad_norm": 1.173935210917825, "learning_rate": 5.50213805210198e-09, "loss": 1.0028, "num_input_tokens_seen": 1638288512, "step": 9073 }, { "epoch": 0.9933495716905224, "grad_norm": 1.1063636419687082, "learning_rate": 5.3232251048473956e-09, "loss": 0.7726, "num_input_tokens_seen": 1638480032, "step": 9074 }, { "epoch": 0.9934590437614603, "grad_norm": 0.9626945930551019, "learning_rate": 5.147268980076891e-09, "loss": 0.8973, "num_input_tokens_seen": 1638698880, "step": 9075 }, { "epoch": 0.9935685158323982, "grad_norm": 1.1528212253140053, "learning_rate": 4.974269698601597e-09, "loss": 0.841, "num_input_tokens_seen": 1638887712, "step": 9076 }, { "epoch": 0.9936779879033362, "grad_norm": 1.275434227471194, "learning_rate": 4.804227280888473e-09, "loss": 1.1056, "num_input_tokens_seen": 1639076096, "step": 9077 }, { "epoch": 0.9937874599742741, "grad_norm": 1.004309072461239, "learning_rate": 4.637141747051987e-09, "loss": 0.7488, "num_input_tokens_seen": 1639248128, "step": 9078 }, { "epoch": 0.993896932045212, "grad_norm": 1.101647448138852, "learning_rate": 4.473013116859659e-09, "loss": 0.796, "num_input_tokens_seen": 1639449504, "step": 9079 }, { "epoch": 0.9940064041161498, "grad_norm": 1.2643344563024033, "learning_rate": 4.311841409723738e-09, "loss": 0.9079, "num_input_tokens_seen": 1639632960, "step": 9080 }, { "epoch": 0.9941158761870877, "grad_norm": 1.1411886520860446, "learning_rate": 4.153626644715081e-09, "loss": 0.8292, "num_input_tokens_seen": 1639814848, "step": 9081 }, { "epoch": 0.9942253482580257, "grad_norm": 1.1035185417204263, "learning_rate": 3.998368840549271e-09, "loss": 1.1794, "num_input_tokens_seen": 1640018240, "step": 9082 }, { "epoch": 0.9943348203289636, "grad_norm": 1.146233032333937, "learning_rate": 3.8460680155921746e-09, "loss": 0.7232, "num_input_tokens_seen": 1640209088, "step": 9083 }, { "epoch": 0.9944442923999015, "grad_norm": 1.1280025574235806, "learning_rate": 3.6967241878599347e-09, "loss": 1.1266, "num_input_tokens_seen": 1640398144, "step": 9084 }, { "epoch": 0.9945537644708394, "grad_norm": 1.2502053568954563, "learning_rate": 3.550337375018975e-09, "loss": 1.0705, "num_input_tokens_seen": 1640599968, "step": 9085 }, { "epoch": 0.9946632365417772, "grad_norm": 1.0722808003003168, "learning_rate": 3.406907594388775e-09, "loss": 0.822, "num_input_tokens_seen": 1640798656, "step": 9086 }, { "epoch": 0.9947727086127152, "grad_norm": 1.109627278615991, "learning_rate": 3.2664348629363183e-09, "loss": 0.8315, "num_input_tokens_seen": 1640993760, "step": 9087 }, { "epoch": 0.9948821806836531, "grad_norm": 1.076279927279576, "learning_rate": 3.1289191972816435e-09, "loss": 0.7504, "num_input_tokens_seen": 1641179456, "step": 9088 }, { "epoch": 0.994991652754591, "grad_norm": 1.084873474056728, "learning_rate": 2.994360613686742e-09, "loss": 1.1492, "num_input_tokens_seen": 1641367168, "step": 9089 }, { "epoch": 0.9951011248255289, "grad_norm": 1.0061069220948307, "learning_rate": 2.862759128072212e-09, "loss": 0.7467, "num_input_tokens_seen": 1641579520, "step": 9090 }, { "epoch": 0.9952105968964667, "grad_norm": 1.0809088990308806, "learning_rate": 2.734114756008932e-09, "loss": 1.2505, "num_input_tokens_seen": 1641769024, "step": 9091 }, { "epoch": 0.9953200689674047, "grad_norm": 1.1115907918457382, "learning_rate": 2.6084275127125078e-09, "loss": 1.1067, "num_input_tokens_seen": 1641945312, "step": 9092 }, { "epoch": 0.9954295410383426, "grad_norm": 1.3079723192138242, "learning_rate": 2.485697413051602e-09, "loss": 0.8964, "num_input_tokens_seen": 1642090016, "step": 9093 }, { "epoch": 0.9955390131092805, "grad_norm": 1.0486596112407007, "learning_rate": 2.365924471547931e-09, "loss": 0.8095, "num_input_tokens_seen": 1642273248, "step": 9094 }, { "epoch": 0.9956484851802184, "grad_norm": 1.0390263601342524, "learning_rate": 2.2491087023651657e-09, "loss": 0.7469, "num_input_tokens_seen": 1642460512, "step": 9095 }, { "epoch": 0.9957579572511563, "grad_norm": 1.0597224675638346, "learning_rate": 2.1352501193255824e-09, "loss": 1.0862, "num_input_tokens_seen": 1642660096, "step": 9096 }, { "epoch": 0.9958674293220942, "grad_norm": 1.1144072075605937, "learning_rate": 2.0243487358989623e-09, "loss": 0.8892, "num_input_tokens_seen": 1642850272, "step": 9097 }, { "epoch": 0.9959769013930321, "grad_norm": 1.1112169904064941, "learning_rate": 1.9164045652053655e-09, "loss": 0.8041, "num_input_tokens_seen": 1643012000, "step": 9098 }, { "epoch": 0.99608637346397, "grad_norm": 1.2759732701661852, "learning_rate": 1.8114176200123567e-09, "loss": 1.183, "num_input_tokens_seen": 1643208224, "step": 9099 }, { "epoch": 0.9961958455349079, "grad_norm": 1.114889061373885, "learning_rate": 1.709387912737781e-09, "loss": 0.8781, "num_input_tokens_seen": 1643364800, "step": 9100 }, { "epoch": 0.9963053176058458, "grad_norm": 1.2247596660667852, "learning_rate": 1.6103154554553135e-09, "loss": 0.9975, "num_input_tokens_seen": 1643532352, "step": 9101 }, { "epoch": 0.9964147896767838, "grad_norm": 1.064373720339657, "learning_rate": 1.5142002598833581e-09, "loss": 1.0536, "num_input_tokens_seen": 1643704832, "step": 9102 }, { "epoch": 0.9965242617477216, "grad_norm": 1.2032410701893521, "learning_rate": 1.4210423373933746e-09, "loss": 0.8358, "num_input_tokens_seen": 1643816160, "step": 9103 }, { "epoch": 0.9966337338186595, "grad_norm": 1.3002393813914435, "learning_rate": 1.330841699004326e-09, "loss": 1.0104, "num_input_tokens_seen": 1643993568, "step": 9104 }, { "epoch": 0.9967432058895974, "grad_norm": 1.1209253058343207, "learning_rate": 1.2435983553882314e-09, "loss": 0.856, "num_input_tokens_seen": 1644159328, "step": 9105 }, { "epoch": 0.9968526779605353, "grad_norm": 1.110767779685141, "learning_rate": 1.1593123168646137e-09, "loss": 0.8713, "num_input_tokens_seen": 1644339200, "step": 9106 }, { "epoch": 0.9969621500314733, "grad_norm": 1.007227469091209, "learning_rate": 1.0779835934032755e-09, "loss": 0.9679, "num_input_tokens_seen": 1644517952, "step": 9107 }, { "epoch": 0.9970716221024111, "grad_norm": 1.235404394173523, "learning_rate": 9.996121946270753e-10, "loss": 0.9056, "num_input_tokens_seen": 1644713504, "step": 9108 }, { "epoch": 0.997181094173349, "grad_norm": 1.2521571143442787, "learning_rate": 9.241981298091506e-10, "loss": 1.0041, "num_input_tokens_seen": 1644880832, "step": 9109 }, { "epoch": 0.9972905662442869, "grad_norm": 1.2700881910115858, "learning_rate": 8.517414078645925e-10, "loss": 0.7762, "num_input_tokens_seen": 1645044576, "step": 9110 }, { "epoch": 0.9974000383152248, "grad_norm": 1.2472499637355252, "learning_rate": 7.822420373726491e-10, "loss": 0.9477, "num_input_tokens_seen": 1645214592, "step": 9111 }, { "epoch": 0.9975095103861628, "grad_norm": 1.2347735473803452, "learning_rate": 7.157000265489711e-10, "loss": 1.1317, "num_input_tokens_seen": 1645404096, "step": 9112 }, { "epoch": 0.9976189824571007, "grad_norm": 1.0770493268978474, "learning_rate": 6.521153832678151e-10, "loss": 0.7154, "num_input_tokens_seen": 1645558880, "step": 9113 }, { "epoch": 0.9977284545280385, "grad_norm": 1.1807734562605396, "learning_rate": 5.914881150509422e-10, "loss": 0.8393, "num_input_tokens_seen": 1645694848, "step": 9114 }, { "epoch": 0.9978379265989764, "grad_norm": 1.0546046471724693, "learning_rate": 5.33818229070393e-10, "loss": 0.9212, "num_input_tokens_seen": 1645879648, "step": 9115 }, { "epoch": 0.9979473986699143, "grad_norm": 1.3080337920563039, "learning_rate": 4.791057321484882e-10, "loss": 0.9202, "num_input_tokens_seen": 1646073856, "step": 9116 }, { "epoch": 0.9980568707408523, "grad_norm": 1.225030756583732, "learning_rate": 4.273506307550523e-10, "loss": 0.8831, "num_input_tokens_seen": 1646248576, "step": 9117 }, { "epoch": 0.9981663428117902, "grad_norm": 1.079834067171953, "learning_rate": 3.785529310185165e-10, "loss": 0.9692, "num_input_tokens_seen": 1646420160, "step": 9118 }, { "epoch": 0.9982758148827281, "grad_norm": 1.1065163652681058, "learning_rate": 3.327126387064894e-10, "loss": 0.8842, "num_input_tokens_seen": 1646585696, "step": 9119 }, { "epoch": 0.9983852869536659, "grad_norm": 1.146186386972348, "learning_rate": 2.898297592424104e-10, "loss": 0.828, "num_input_tokens_seen": 1646765792, "step": 9120 }, { "epoch": 0.9984947590246038, "grad_norm": 1.0614178139112662, "learning_rate": 2.499042976999988e-10, "loss": 0.7106, "num_input_tokens_seen": 1646930656, "step": 9121 }, { "epoch": 0.9986042310955418, "grad_norm": 1.2714912660473545, "learning_rate": 2.1293625880325352e-10, "loss": 0.8705, "num_input_tokens_seen": 1647114560, "step": 9122 }, { "epoch": 0.9987137031664797, "grad_norm": 1.1913594721381673, "learning_rate": 1.7892564692367775e-10, "loss": 0.8217, "num_input_tokens_seen": 1647317504, "step": 9123 }, { "epoch": 0.9988231752374176, "grad_norm": 1.2194616734111414, "learning_rate": 1.478724660886055e-10, "loss": 0.8254, "num_input_tokens_seen": 1647472512, "step": 9124 }, { "epoch": 0.9989326473083554, "grad_norm": 1.146847928842468, "learning_rate": 1.1977671996732388e-10, "loss": 0.9125, "num_input_tokens_seen": 1647639840, "step": 9125 }, { "epoch": 0.9990421193792933, "grad_norm": 1.0425556522032668, "learning_rate": 9.463841188217527e-11, "loss": 0.8639, "num_input_tokens_seen": 1647810976, "step": 9126 }, { "epoch": 0.9991515914502312, "grad_norm": 1.0787739113890433, "learning_rate": 7.245754481133294e-11, "loss": 1.132, "num_input_tokens_seen": 1648004064, "step": 9127 }, { "epoch": 0.9992610635211692, "grad_norm": 0.9974236380716206, "learning_rate": 5.3234121377698785e-11, "loss": 0.7971, "num_input_tokens_seen": 1648192000, "step": 9128 }, { "epoch": 0.9993705355921071, "grad_norm": 0.9546933637523226, "learning_rate": 3.696814385445446e-11, "loss": 0.6711, "num_input_tokens_seen": 1648376800, "step": 9129 }, { "epoch": 0.999480007663045, "grad_norm": 1.0958920892162078, "learning_rate": 2.365961416506135e-11, "loss": 1.0707, "num_input_tokens_seen": 1648559808, "step": 9130 }, { "epoch": 0.9995894797339828, "grad_norm": 1.231024722485542, "learning_rate": 1.3308533886036145e-11, "loss": 0.8996, "num_input_tokens_seen": 1648714144, "step": 9131 }, { "epoch": 0.9996989518049207, "grad_norm": 1.0017347785739568, "learning_rate": 5.914904241399732e-12, "loss": 0.7928, "num_input_tokens_seen": 1648911264, "step": 9132 }, { "epoch": 0.9998084238758587, "grad_norm": 1.2572936889497326, "learning_rate": 1.4787261026771859e-12, "loss": 0.8593, "num_input_tokens_seen": 1649101216, "step": 9133 }, { "epoch": 0.9999178959467966, "grad_norm": 0.9670324481679939, "learning_rate": 0.0, "loss": 0.7529, "num_input_tokens_seen": 1649269888, "step": 9134 }, { "epoch": 0.9999178959467966, "num_input_tokens_seen": 1649269888, "step": 9134, "total_flos": 3298213988794368.0, "train_loss": 0.7717254485568724, "train_runtime": 600800.1758, "train_samples_per_second": 3.406, "train_steps_per_second": 0.015 } ], "logging_steps": 1.0, "max_steps": 9134, "num_input_tokens_seen": 1649269888, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3298213988794368.0, "train_batch_size": 28, "trial_name": null, "trial_params": null }